[generic] Add support for multiple brightcove URLs (Fixes #2283)

This commit is contained in:
Philipp Hagemeister 2014-02-03 15:19:40 +01:00
parent b0268cb6ce
commit 99877772d0
3 changed files with 36 additions and 12 deletions

View File

@ -34,6 +34,7 @@ from youtube_dl.extractor import (
KhanAcademyIE, KhanAcademyIE,
EveryonesMixtapeIE, EveryonesMixtapeIE,
RutubeChannelIE, RutubeChannelIE,
GenericIE,
) )
@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '1409') self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34) self.assertTrue(len(result['entries']) >= 34)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
self.assertEqual(len(result['entries']), 3)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
@classmethod @classmethod
def _extract_brightcove_url(cls, webpage): def _extract_brightcove_url(cls, webpage):
"""Try to extract the brightcove url from the wepbage, returns None """Try to extract the brightcove url from the webpage, returns None
if it can't be found if it can't be found
""" """
urls = cls._extract_brightcove_urls(webpage)
return urls[0] if urls else None
@classmethod
def _extract_brightcove_urls(cls, webpage):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m: if url_m:
return url_m.group(1) return [url_m.group(1)]
m_brightcove = re.search( matches = re.findall(
r'''(?sx)<object r'''(?sx)<object
(?: (?:
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''', ).+?</object>''',
webpage) webpage)
if m_brightcove is not None: return [cls._build_brighcove_url(m) for m in matches]
return cls._build_brighcove_url(m_brightcove.group())
else:
return None
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})

View File

@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Look for BrightCove: # Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage) bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
if bc_url is not None: if bc_urls:
self.to_screen('Brightcove video detected.') self.to_screen('Brightcove video detected.')
surl = smuggle_url(bc_url, {'Referer': url}) entries = [{
return self.url_result(surl, 'Brightcove') '_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove'
} for bc_url in bc_urls]
return {
'_type': 'playlist',
'title': video_title,
'id': video_id,
'entries': entries,
}
# Look for embedded (iframe) Vimeo player # Look for embedded (iframe) Vimeo player
mobj = re.search( mobj = re.search(