[NBC] Enhance extraction of ThePlatform URL (fixes #5470)

This commit is contained in:
Yen Chi Hsuan 2015-05-04 19:09:18 +08:00
parent 957b794c26
commit b9b3ab45ea
1 changed files with 14 additions and 1 deletions

View File

@ -37,13 +37,26 @@ class NBCIE(InfoExtractor):
},
'skip': 'Only works from US',
},
{
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
'info_dict': {
'id': '8iUuyzWDdYUZ',
'ext': 'flv',
'title': 'Star Wars Teaser',
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
},
'skip': 'Only works from US',
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
theplatform_url = self._search_regex(
'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
[
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
r'"embedURL"\s*:\s*"([^"]+)"'
],
webpage, 'theplatform url').replace('_no_endcard', '')
if theplatform_url.startswith('//'):
theplatform_url = 'http:' + theplatform_url