Merge branch 'master' of github.com:rg3/youtube-dl

2013-09-07 22:28:54 +02:00 · 2013-09-07 22:28:54 +02:00 · a27a2470cd
parent 8851a574a3 a7130543fa
commit a27a2470cd
6 changed files with 92 additions and 15 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -21,14 +21,15 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertEqual(self.matching_ies(url), ie_list)
    def test_youtube_playlist_matching(self):
-        self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
+        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
-        self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
+        assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
+        assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
+        assertPlaylist(u'PL63F0C78739B09958')
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
+        assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
+        assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
+        assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
+        assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
        self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
    def test_youtube_matching(self):
        self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
@ -37,13 +38,23 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
    def test_youtube_channel_matching(self):
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
+        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
    def test_youtube_user_matching(self):
        self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
    def test_youtube_feeds(self):
        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
        self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
        self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
        self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
    def test_youtube_show_matching(self):
        self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
    def test_justin_tv_channelid_matching(self):
        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
        self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -52,6 +52,7 @@ from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mtv import MTVIE
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -14,7 +14,7 @@ from ..utils import (
 class DailymotionIE(InfoExtractor):
    """Information Extractor for Dailymotion"""
-    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
+    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
    IE_NAME = u'dailymotion'
    _TEST = {
        u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
@ -33,6 +33,7 @@ class DailymotionIE(InfoExtractor):
        video_id = mobj.group(1).split('_')[0].split('?')[0]
        video_extension = 'mp4'
        url = 'http://www.dailymotion.com/video/%s' % video_id
        # Retrieve video webpage to extract further information
        request = compat_urllib_request.Request(url)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -109,6 +109,11 @@ class GenericIE(InfoExtractor):
        return new_url
    def _real_extract(self, url):
        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
            self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
            return self.url_result('http://' + url)
        try:
            new_url = self._test_redirect(url)
            if new_url:
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@ -0,0 +1,55 @@
 import re
 import xml.etree.ElementTree
 import operator
 from .common import InfoExtractor
 class MetacriticIE(InfoExtractor):
    _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
    _TEST = {
        u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
        u'file': u'3698222.mp4',
        u'info_dict': {
            u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
            u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
            u'duration': 221,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        # The xml is not well formatted, there are raw '&'
        info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
            video_id, u'Downloading info xml').replace('&', '&amp;')
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
        formats = []
        for videoFile in clip.findall('httpURI/videoFile'):
            rate_str = videoFile.find('rate').text
            video_url = videoFile.find('filePath').text
            formats.append({
                'url': video_url,
                'ext': 'mp4',
                'format_id': rate_str,
                'rate': int(rate_str),
            })
        formats.sort(key=operator.itemgetter('rate'))
        description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
            webpage, u'description', flags=re.DOTALL)
        info = {
            'id': video_id,
            'title': clip.find('title').text,
            'formats': formats,
            'description': description,
            'duration': int(clip.find('duration').text),
        }
        # TODO: Remove when #980 has been merged
        info.update(formats[-1])
        return info
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -386,7 +386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
    @classmethod
    def suitable(cls, url):
        """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
+        if YoutubePlaylistIE.suitable(url): return False
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
    def report_video_webpage_download(self, video_id):
@ -1015,14 +1015,18 @@ class YoutubeChannelIE(InfoExtractor):
 class YoutubeUserIE(InfoExtractor):
    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
    _GDATA_PAGE_SIZE = 50
    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
    IE_NAME = u'youtube:user'
    @classmethod
    def suitable(cls, url):
-        if YoutubeIE.suitable(url): return False
+        # Don't return True if the url can be extracted with other youtube
        # extractor, the regex would is too permissive and it would match.
        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
        if any(ie.suitable(url) for ie in other_ies): return False
        else: return super(YoutubeUserIE, cls).suitable(url)
    def _real_extract(self, url):