From bae1404893341ed89f4c9b556aa4068c13ed9f7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 18 Apr 2017 22:21:38 +0700 Subject: [PATCH] [extractor/common] Add support for video of WebPage context in _json_ld (closes #12778) --- youtube_dl/extractor/common.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 12e010a0d..61d97ab72 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -976,6 +976,22 @@ class InfoExtractor(object): return info if isinstance(json_ld, dict): json_ld = [json_ld] + + def extract_video_object(e): + assert e['@type'] == 'VideoObject' + info.update({ + 'url': e.get('contentUrl'), + 'title': unescapeHTML(e.get('name')), + 'description': unescapeHTML(e.get('description')), + 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), + 'duration': parse_duration(e.get('duration')), + 'timestamp': unified_timestamp(e.get('uploadDate')), + 'filesize': float_or_none(e.get('contentSize')), + 'tbr': int_or_none(e.get('bitrate')), + 'width': int_or_none(e.get('width')), + 'height': int_or_none(e.get('height')), + }) + for e in json_ld: if e.get('@context') == 'http://schema.org': item_type = e.get('@type') @@ -1000,18 +1016,11 @@ class InfoExtractor(object): 'description': unescapeHTML(e.get('articleBody')), }) elif item_type == 'VideoObject': - info.update({ - 'url': e.get('contentUrl'), - 'title': unescapeHTML(e.get('name')), - 'description': unescapeHTML(e.get('description')), - 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), - 'duration': parse_duration(e.get('duration')), - 'timestamp': unified_timestamp(e.get('uploadDate')), - 'filesize': float_or_none(e.get('contentSize')), - 'tbr': int_or_none(e.get('bitrate')), - 'width': int_or_none(e.get('width')), - 'height': int_or_none(e.get('height')), - }) + extract_video_object(e) + elif item_type == 'WebPage': + video = e.get('video') + if isinstance(video, dict) and video.get('@type') == 'VideoObject': + extract_video_object(video) break return dict((k, v) for k, v in info.items() if v is not None)