[makertv] improve extraction

2015-12-21 04:24:58 +01:00 · 2015-12-21 04:24:58 +01:00 · 7cb0952474
parent 78653a33aa
commit 7cb0952474
3 changed files with 22 additions and 7 deletions
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -53,6 +53,7 @@ from .onionstudios import OnionStudiosIE
 from .snagfilms import SnagFilmsEmbedIE
 from .screenwavemedia import ScreenwaveMediaIE
 from .mtv import MTVServicesEmbeddedIE
 from .jwplatform import JWPlatformIE
 class GenericIE(InfoExtractor):
@ -1787,6 +1788,11 @@ class GenericIE(InfoExtractor):
        if snagfilms_url:
            return self.url_result(snagfilms_url)
        # Look for JWPlatform embeds
        jwplatform_url = JWPlatformIE._extract_url(webpage)
        if jwplatform_url:
            return self.url_result(jwplatform_url, 'JWPlatform')
        # Look for ScreenwaveMedia embeds
        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
        if mobj is not None:
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
@ -23,7 +25,7 @@ class JWPlatformIE(InfoExtractor):
    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
-            r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8}',
+            r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
            webpage)
        if mobj:
            return mobj.group('url')
@ -42,7 +44,9 @@ class JWPlatformIE(InfoExtractor):
            source_url = self._proto_relative_url(source['file'])
            source_type = source.get('type') or ''
            if source_type == 'application/vnd.apple.mpegurl':
-                formats.extend(self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None))
+                m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
                if m3u8_formats:
                    formats.extend(m3u8_formats)
            elif source_type.startswith('audio'):
                formats.append({
                    'url': source_url,
@ -57,7 +61,7 @@ class JWPlatformIE(InfoExtractor):
        self._sort_formats(formats)
        return {
-            'id': video_data['mediaid'],
+            'id': video_id,
            'title': video_data['title'],
            'description': video_data.get('description'),
            'thumbnail': self._proto_relative_url(video_data.get('image')),
--- a/youtube_dl/extractor/makertv.py
+++ b/youtube_dl/extractor/makertv.py
@ -5,12 +5,12 @@ from .common import InfoExtractor
 class MakerTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)?video|http://makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
+    _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
    _TEST = {
        'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
        'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
        'info_dict': {
-            'id': 'brOEcGut',
+            'id': 'Fh3QgymL9gsc',
            'ext': 'mp4',
            'title': 'Maze Runner: The Scorch Trials Official Movie Review',
            'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
@ -22,6 +22,11 @@ class MakerTVIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        jwplatform_id = self._search_regex([r'jwid="([^"]+)"', r'Maker.jw_id\s*=\s*"([^"]+)";'], webpage, 'jwplatform id')
+        jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
-        return self.url_result('jwplatform:%s' % jwplatform_id, 'JWPlatform')
+        return {
            '_type': 'url_transparent',
            'id': video_id,
            'url': 'jwplatform:%s' % jwplatform_id,
            'ie_key': 'JWPlatform',
        }