[youtube] Fall back to the original regex for ytplayer.config

This commit is contained in:
Lukáš Lalinský 2015-11-22 13:49:33 +01:00
parent 3cfd000849
commit 0e49d9a6b0
1 changed files with 31 additions and 8 deletions

View File

@ -674,7 +674,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
{ {
'url': 'http://vid.plus/FlRa-iH7PGw', 'url': 'http://vid.plus/FlRa-iH7PGw',
'only_matching': True, 'only_matching': True,
} },
{
# Title with JS-like syntax "};"
'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
'info_dict': {
'id': 'lsguqyKfVQg',
'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
'uploader': 'IronSoulElf',
},
'params': {
'skip_download': True,
},
},
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -858,16 +874,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return {} return {}
return sub_lang_list return sub_lang_list
def _get_ytplayer_config(self, webpage):
patterns = [
r';ytplayer\.config\s*=\s*({.*?});ytplayer',
r';ytplayer\.config\s*=\s*({.*?});',
]
for pattern in patterns:
config = self._search_regex(pattern, webpage, 'ytconfig.player', default=None)
if config is not None:
return json.loads(uppercase_escape(config))
def _get_automatic_captions(self, video_id, webpage): def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
self.to_screen('%s: Looking for automatic captions' % video_id) self.to_screen('%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage) player_config = self._get_ytplayer_config(webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id err_msg = 'Couldn\'t find automatic captions for %s' % video_id
if mobj is None: if player_config is None:
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
player_config = json.loads(mobj.group(1))
try: try:
args = player_config['args'] args = player_config['args']
caption_url = args['ttsurl'] caption_url = args['ttsurl']
@ -1074,10 +1099,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
age_gate = False age_gate = False
video_info = None video_info = None
# Try looking directly into the video webpage # Try looking directly into the video webpage
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});ytplayer', video_webpage) ytplayer_config = self._get_ytplayer_config(video_webpage)
if mobj: if ytplayer_config is not None:
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args'] args = ytplayer_config['args']
if args.get('url_encoded_fmt_stream_map'): if args.get('url_encoded_fmt_stream_map'):
# Convert to the same format returned by compat_parse_qs # Convert to the same format returned by compat_parse_qs