[bbc] Improve work with mediaselection URLs

This commit is contained in:
Sergey M․ 2015-07-30 00:55:06 +06:00
parent 8d42e3501e
commit d12a1a47d5
1 changed files with 33 additions and 17 deletions

View File

@ -14,12 +14,15 @@ from ..utils import (
) )
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
class BBCCoUkIE(InfoExtractor): class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' _MEDIASELECTOR_URLS = [
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
]
_TESTS = [ _TESTS = [
{ {
@ -161,6 +164,10 @@ class BBCCoUkIE(InfoExtractor):
} }
] ]
class MediaSelectionError(Exception):
def __init__(self, id):
self.id = id
def _extract_asx_playlist(self, connection, programme_id): def _extract_asx_playlist(self, connection, programme_id):
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
return [ref.get('href') for ref in asx.findall('./Entry/ref')] return [ref.get('href') for ref in asx.findall('./Entry/ref')]
@ -211,8 +218,7 @@ class BBCCoUkIE(InfoExtractor):
def _extract_medias(self, media_selection): def _extract_medias(self, media_selection):
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error') error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
if error is not None: if error is not None:
raise ExtractorError( raise BBCCoUkIE.MediaSelectionError(error.get('id'))
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media') return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
def _extract_connections(self, media): def _extract_connections(self, media):
@ -269,17 +275,23 @@ class BBCCoUkIE(InfoExtractor):
] ]
return subtitles return subtitles
def _raise_extractor_error(self, media_selection_error):
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
expected=True)
def _download_media_selector(self, programme_id): def _download_media_selector(self, programme_id):
last_exception = None
for mediaselector_url in self._MEDIASELECTOR_URLS:
try: try:
return self._download_media_selector_url( return self._download_media_selector_url(
self._MEDIASELECTOR_URL % programme_id, programme_id) mediaselector_url % programme_id, programme_id)
except ExtractorError as e: except BBCCoUkIE.MediaSelectionError as e:
if hasattr(self, '_MEDIASELECTOR_ALT_URL') and str(e) == 'bbc returned error: notukerror': if e.id == 'notukerror':
# notukerror on bbc.com/travel using bbc news mediaselector: fallback to /mediaselector/5/ last_exception = e
return self._download_media_selector_url( continue
self._MEDIASELECTOR_ALT_URL % programme_id, programme_id) self._raise_extractor_error(e)
else: self._raise_extractor_error(last_exception)
raise
def _download_media_selector_url(self, url, programme_id=None): def _download_media_selector_url(self, url, programme_id=None):
try: try:
@ -432,10 +444,14 @@ class BBCIE(BBCCoUkIE):
IE_DESC = 'BBC' IE_DESC = 'BBC'
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
# fails with notukerror for some videos ( non news sites such as bbc.com/travel ) _MEDIASELECTOR_URLS = [
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s' # Provides more formats, namely direct mp4 links, but fails on some videos with
# limited selection of formats but may work where the above does not # notukerror for non UK (?) users (e.g.
_MEDIASELECTOR_ALT_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s' # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
# Provides fewer formats, but works everywhere for everybody (hopefully)
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
]
_TESTS = [{ _TESTS = [{
# article with multiple videos embedded with data-media-meta containing # article with multiple videos embedded with data-media-meta containing