@@ -37,7 +37,7 @@ | |||
"writeinfojson": true, | |||
"writesubtitles": false, | |||
"allsubtitles": false, | |||
"listssubtitles": false, | |||
"listsubtitles": false, | |||
"socket_timeout": 20, | |||
"fixup": "never" | |||
} |
@@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase): | |||
self.assertEqual(downloaded['extractor'], 'testex') | |||
self.assertEqual(downloaded['extractor_key'], 'TestEx') | |||
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064 | |||
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self): | |||
class _YDL(YDL): | |||
def __init__(self, *args, **kwargs): | |||
super(_YDL, self).__init__(*args, **kwargs) | |||
def trouble(self, s, tb=None): | |||
pass | |||
ydl = _YDL({ | |||
'format': 'extra', | |||
'ignoreerrors': True, | |||
}) | |||
class VideoIE(InfoExtractor): | |||
_VALID_URL = r'video:(?P<id>\d+)' | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
formats = [{ | |||
'format_id': 'default', | |||
'url': 'url:', | |||
}] | |||
if video_id == '0': | |||
raise ExtractorError('foo') | |||
if video_id == '2': | |||
formats.append({ | |||
'format_id': 'extra', | |||
'url': TEST_URL, | |||
}) | |||
return { | |||
'id': video_id, | |||
'title': 'Video %s' % video_id, | |||
'formats': formats, | |||
} | |||
class PlaylistIE(InfoExtractor): | |||
_VALID_URL = r'playlist:' | |||
def _entries(self): | |||
for n in range(3): | |||
video_id = compat_str(n) | |||
yield { | |||
'_type': 'url_transparent', | |||
'ie_key': VideoIE.ie_key(), | |||
'id': video_id, | |||
'url': 'video:%s' % video_id, | |||
'title': 'Video Transparent %s' % video_id, | |||
} | |||
def _real_extract(self, url): | |||
return self.playlist_result(self._entries()) | |||
ydl.add_info_extractor(VideoIE(ydl)) | |||
ydl.add_info_extractor(PlaylistIE(ydl)) | |||
info = ydl.extract_info('playlist:') | |||
entries = info['entries'] | |||
self.assertEqual(len(entries), 3) | |||
self.assertTrue(entries[0] is None) | |||
self.assertTrue(entries[1] is None) | |||
self.assertEqual(len(ydl.downloaded_info_dicts), 1) | |||
downloaded = ydl.downloaded_info_dicts[0] | |||
self.assertEqual(entries[2], downloaded) | |||
self.assertEqual(downloaded['url'], TEST_URL) | |||
self.assertEqual(downloaded['title'], 'Video Transparent 2') | |||
self.assertEqual(downloaded['id'], '2') | |||
self.assertEqual(downloaded['extractor'], 'Video') | |||
self.assertEqual(downloaded['extractor_key'], 'Video') | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -830,34 +830,23 @@ class YoutubeDL(object): | |||
'and will probably not work.') | |||
try: | |||
try: | |||
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url) | |||
except (AssertionError, IndexError, AttributeError): | |||
temp_id = None | |||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): | |||
self.to_screen("[%s] %s: has already been recorded in archive" % ( | |||
ie_key, temp_id)) | |||
break | |||
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url) | |||
except (AssertionError, IndexError, AttributeError): | |||
temp_id = None | |||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): | |||
self.to_screen("[%s] %s: has already been recorded in archive" % ( | |||
ie_key, temp_id)) | |||
break | |||
ie_result = ie.extract(url) | |||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) | |||
break | |||
if isinstance(ie_result, list): | |||
# Backwards compatibility: old IE result format | |||
ie_result = { | |||
'_type': 'compat_list', | |||
'entries': ie_result, | |||
} | |||
if info_dict: | |||
if info_dict.get('id'): | |||
ie_result['id'] = info_dict['id'] | |||
if info_dict.get('title'): | |||
ie_result['title'] = info_dict['title'] | |||
self.add_default_extra_info(ie_result, ie, url) | |||
if process: | |||
return self.process_ie_result(ie_result, download, extra_info) | |||
else: | |||
return ie_result | |||
return self.__extract_info(url, ie, download, extra_info, process, info_dict) | |||
else: | |||
self.report_error('no suitable InfoExtractor for URL %s' % url) | |||
def __handle_extraction_exceptions(func): | |||
def wrapper(self, *args, **kwargs): | |||
try: | |||
return func(self, *args, **kwargs) | |||
except GeoRestrictedError as e: | |||
msg = e.msg | |||
if e.countries: | |||
@@ -865,20 +854,38 @@ class YoutubeDL(object): | |||
map(ISO3166Utils.short2full, e.countries)) | |||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' | |||
self.report_error(msg) | |||
break | |||
except ExtractorError as e: # An error we somewhat expected | |||
self.report_error(compat_str(e), e.format_traceback()) | |||
break | |||
except MaxDownloadsReached: | |||
raise | |||
except Exception as e: | |||
if self.params.get('ignoreerrors', False): | |||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) | |||
break | |||
else: | |||
raise | |||
return wrapper | |||
@__handle_extraction_exceptions | |||
def __extract_info(self, url, ie, download, extra_info, process, info_dict): | |||
ie_result = ie.extract(url) | |||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) | |||
return | |||
if isinstance(ie_result, list): | |||
# Backwards compatibility: old IE result format | |||
ie_result = { | |||
'_type': 'compat_list', | |||
'entries': ie_result, | |||
} | |||
if info_dict: | |||
if info_dict.get('id'): | |||
ie_result['id'] = info_dict['id'] | |||
if info_dict.get('title'): | |||
ie_result['title'] = info_dict['title'] | |||
self.add_default_extra_info(ie_result, ie, url) | |||
if process: | |||
return self.process_ie_result(ie_result, download, extra_info) | |||
else: | |||
self.report_error('no suitable InfoExtractor for URL %s' % url) | |||
return ie_result | |||
def add_default_extra_info(self, ie_result, ie, url): | |||
self.add_extra_info(ie_result, { | |||
@@ -1057,9 +1064,8 @@ class YoutubeDL(object): | |||
self.to_screen('[download] ' + reason) | |||
continue | |||
entry_result = self.process_ie_result(entry, | |||
download=download, | |||
extra_info=extra) | |||
entry_result = self.__process_iterable_entry(entry, download, extra) | |||
# TODO: skip failed (empty) entries? | |||
playlist_results.append(entry_result) | |||
ie_result['entries'] = playlist_results | |||
self.to_screen('[download] Finished downloading playlist: %s' % playlist) | |||
@@ -1088,6 +1094,11 @@ class YoutubeDL(object): | |||
else: | |||
raise Exception('Invalid result type: %s' % result_type) | |||
@__handle_extraction_exceptions | |||
def __process_iterable_entry(self, entry, download, extra_info): | |||
return self.process_ie_result( | |||
entry, download=download, extra_info=extra_info) | |||
def _build_format_filter(self, filter_spec): | |||
" Returns a function to filter the formats according to the filter_spec " | |||
@@ -2345,7 +2345,7 @@ except ImportError: # Python <3.4 | |||
# HTMLParseError has been deprecated in Python 3.3 and removed in | |||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible | |||
# and uniform cross-version exceptiong handling | |||
# and uniform cross-version exception handling | |||
class compat_HTMLParseError(Exception): | |||
pass | |||
@@ -109,7 +109,9 @@ class HttpFD(FileDownloader): | |||
try: | |||
ctx.data = self.ydl.urlopen(request) | |||
except (compat_urllib_error.URLError, ) as err: | |||
if isinstance(err.reason, socket.timeout): | |||
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6 | |||
reason = getattr(err, 'reason', None) | |||
if isinstance(reason, socket.timeout): | |||
raise RetryDownload(err) | |||
raise err | |||
# When trying to resume, Content-Range HTTP header of response has to be checked | |||
@@ -0,0 +1,103 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
from .common import InfoExtractor | |||
from .youtube import YoutubeIE | |||
from .vimeo import VimeoIE | |||
from ..utils import ( | |||
int_or_none, | |||
parse_iso8601, | |||
update_url_query, | |||
) | |||
class AmaraIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)' | |||
_TESTS = [{ | |||
# Youtube | |||
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video', | |||
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae', | |||
'info_dict': { | |||
'id': 'h6ZuVdvYnfE', | |||
'ext': 'mp4', | |||
'title': 'Why jury trials are becoming less common', | |||
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1', | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'subtitles': dict, | |||
'upload_date': '20160813', | |||
'uploader': 'PBS NewsHour', | |||
'uploader_id': 'PBSNewsHour', | |||
'timestamp': 1549639570, | |||
} | |||
}, { | |||
# Vimeo | |||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', | |||
'md5': '99392c75fa05d432a8f11df03612195e', | |||
'info_dict': { | |||
'id': '18622084', | |||
'ext': 'mov', | |||
'title': 'Vimeo at CES 2011!', | |||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'subtitles': dict, | |||
'timestamp': 1294763658, | |||
'upload_date': '20110111', | |||
'uploader': 'Sam Morrill', | |||
'uploader_id': 'sammorrill' | |||
} | |||
}, { | |||
# Direct Link | |||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', | |||
'md5': 'd3970f08512738ee60c5807311ff5d3f', | |||
'info_dict': { | |||
'id': 's8KL7I3jLmh6', | |||
'ext': 'mp4', | |||
'title': 'The danger of a single story', | |||
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23', | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'subtitles': dict, | |||
'upload_date': '20091007', | |||
'timestamp': 1254942511, | |||
} | |||
}] | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
meta = self._download_json( | |||
'https://amara.org/api/videos/%s/' % video_id, | |||
video_id, query={'format': 'json'}) | |||
title = meta['title'] | |||
video_url = meta['all_urls'][0] | |||
subtitles = {} | |||
for language in (meta.get('languages') or []): | |||
subtitles_uri = language.get('subtitles_uri') | |||
if not (subtitles_uri and language.get('published')): | |||
continue | |||
subtitle = subtitles.setdefault(language.get('code') or 'en', []) | |||
for f in ('json', 'srt', 'vtt'): | |||
subtitle.append({ | |||
'ext': f, | |||
'url': update_url_query(subtitles_uri, {'format': f}), | |||
}) | |||
info = { | |||
'url': video_url, | |||
'id': video_id, | |||
'subtitles': subtitles, | |||
'title': title, | |||
'description': meta.get('description'), | |||
'thumbnail': meta.get('thumbnail'), | |||
'duration': int_or_none(meta.get('duration')), | |||
'timestamp': parse_iso8601(meta.get('created')), | |||
} | |||
for ie in (YoutubeIE, VimeoIE): | |||
if ie.suitable(video_url): | |||
info.update({ | |||
'_type': 'url_transparent', | |||
'ie_key': ie.ie_key(), | |||
}) | |||
break | |||
return info |
@@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor): | |||
] | |||
@classmethod | |||
def _build_brighcove_url(cls, object_str): | |||
def _build_brightcove_url(cls, object_str): | |||
""" | |||
Build a Brightcove url from a xml string containing | |||
<object class="BrightcoveExperience">{params}</object> | |||
@@ -217,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor): | |||
return cls._make_brightcove_url(params) | |||
@classmethod | |||
def _build_brighcove_url_from_js(cls, object_js): | |||
def _build_brightcove_url_from_js(cls, object_js): | |||
# The layout of JS is as follows: | |||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { | |||
# // build Brightcove <object /> XML | |||
@@ -272,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor): | |||
).+?>\s*</object>''', | |||
webpage) | |||
if matches: | |||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) | |||
return list(filter(None, [cls._build_brightcove_url(m) for m in matches])) | |||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) | |||
if matches: | |||
return list(filter(None, [ | |||
cls._build_brighcove_url_from_js(custom_bc) | |||
cls._build_brightcove_url_from_js(custom_bc) | |||
for custom_bc in matches])) | |||
return [src for _, src in re.findall( | |||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] | |||
@@ -1664,7 +1664,7 @@ class InfoExtractor(object): | |||
# just the media without qualities renditions. | |||
# Fortunately, master playlist can be easily distinguished from media | |||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4] | |||
# master playlist tags MUST NOT appear in a media playist and vice versa. | |||
# master playlist tags MUST NOT appear in a media playlist and vice versa. | |||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every | |||
# media playlist and MUST NOT appear in master playlist thus we can | |||
# clearly detect media playlist with this criterion. | |||
@@ -7,7 +7,7 @@ from .dplay import DPlayIE | |||
class DiscoveryNetworksDeIE(DPlayIE): | |||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)' | |||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)' | |||
_TESTS = [{ | |||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', | |||
@@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE): | |||
}, { | |||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
@@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor): | |||
title = get_item('title', preferred_langs) or video_id | |||
description = get_item('description', preferred_langs) | |||
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') | |||
thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail') | |||
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) | |||
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) | |||
view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) | |||
@@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor): | |||
'id': video_id, | |||
'title': title, | |||
'description': description, | |||
'thumbnail': thumbnmail, | |||
'thumbnail': thumbnail, | |||
'upload_date': upload_date, | |||
'duration': duration, | |||
'view_count': view_count, | |||
@@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE | |||
from .airmozilla import AirMozillaIE | |||
from .aljazeera import AlJazeeraIE | |||
from .alphaporno import AlphaPornoIE | |||
from .amara import AmaraIE | |||
from .alura import ( | |||
AluraIE, | |||
AluraCourseIE | |||
@@ -1507,7 +1508,6 @@ from .youtube import ( | |||
YoutubeIE, | |||
YoutubeFavouritesIE, | |||
YoutubeHistoryIE, | |||
YoutubeLiveIE, | |||
YoutubeTabIE, | |||
YoutubePlaylistIE, | |||
YoutubeRecommendedIE, | |||
@@ -211,7 +211,7 @@ class FranceTVIE(InfoExtractor): | |||
'id': video_id, | |||
'title': self._live_title(title) if is_live else title, | |||
'description': clean_html(info.get('synopsis')), | |||
'thumbnail': urljoin('http://pluzz.francetv.fr', info.get('image')), | |||
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')), | |||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), | |||
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), | |||
'is_live': is_live, | |||
@@ -842,7 +842,7 @@ class GenericIE(InfoExtractor): | |||
'skip_download': True, | |||
} | |||
}, | |||
# MTVSercices embed | |||
# MTVServices embed | |||
{ | |||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html', | |||
'md5': 'ca1aef97695ef2c1d6973256a57e5252', | |||
@@ -3,11 +3,13 @@ from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..compat import compat_parse_qs | |||
from ..utils import ( | |||
determine_ext, | |||
ExtractorError, | |||
int_or_none, | |||
lowercase_escape, | |||
try_get, | |||
update_url_query, | |||
) | |||
@@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor): | |||
# video can't be watched anonymously due to view count limit reached, | |||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046) | |||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view', | |||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e', | |||
'info_dict': { | |||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ', | |||
'ext': 'mp4', | |||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4', | |||
} | |||
'only_matching': True, | |||
}, { | |||
# video id is longer than 28 characters | |||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', | |||
'info_dict': { | |||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ', | |||
'ext': 'mp4', | |||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4', | |||
'duration': 189, | |||
}, | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28', | |||
@@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor): | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
webpage = self._download_webpage( | |||
'http://docs.google.com/file/d/%s' % video_id, video_id) | |||
video_info = compat_parse_qs(self._download_webpage( | |||
'https://drive.google.com/get_video_info', | |||
video_id, query={'docid': video_id})) | |||
def get_value(key): | |||
return try_get(video_info, lambda x: x[key][0]) | |||
title = self._search_regex( | |||
r'"title"\s*,\s*"([^"]+)', webpage, 'title', | |||
default=None) or self._og_search_title(webpage) | |||
duration = int_or_none(self._search_regex( | |||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', | |||
default=None)) | |||
reason = get_value('reason') | |||
title = get_value('title') | |||
if not title and reason: | |||
raise ExtractorError(reason, expected=True) | |||
formats = [] | |||
fmt_stream_map = self._search_regex( | |||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, | |||
'fmt stream map', default='').split(',') | |||
fmt_list = self._search_regex( | |||
r'"fmt_list"\s*,\s*"([^"]+)', webpage, | |||
'fmt_list', default='').split(',') | |||
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',') | |||
fmt_list = (get_value('fmt_list') or '').split(',') | |||
if fmt_stream_map and fmt_list: | |||
resolutions = {} | |||
for fmt in fmt_list: | |||
@@ -257,19 +246,14 @@ class GoogleDriveIE(InfoExtractor): | |||
if urlh and urlh.headers.get('Content-Disposition'): | |||
add_source_format(urlh) | |||
if not formats: | |||
reason = self._search_regex( | |||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) | |||
if reason: | |||
raise ExtractorError(reason, expected=True) | |||
if not formats and reason: | |||
raise ExtractorError(reason, expected=True) | |||
self._sort_formats(formats) | |||
hl = self._search_regex( | |||
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None) | |||
hl = get_value('hl') | |||
subtitles_id = None | |||
ttsurl = self._search_regex( | |||
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None) | |||
ttsurl = get_value('ttsurl') | |||
if ttsurl: | |||
# the video Id for subtitles will be the last value in the ttsurl | |||
# query string | |||
@@ -281,8 +265,8 @@ class GoogleDriveIE(InfoExtractor): | |||
return { | |||
'id': video_id, | |||
'title': title, | |||
'thumbnail': self._og_search_thumbnail(webpage, default=None), | |||
'duration': duration, | |||
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id, | |||
'duration': int_or_none(get_value('length_seconds')), | |||
'formats': formats, | |||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), | |||
'automatic_captions': self.extract_automatic_captions( | |||
@@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE): | |||
def _extract_rtmp_video(self, webpage): | |||
# The server URL is hardcoded | |||
video_url = 'rtmpe://video.infoq.com/cfx/st/' | |||
video_url = 'rtmpe://videof.infoq.com/cfx/st/' | |||
# Extract video URL | |||
encoded_id = self._search_regex( | |||
@@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE): | |||
return [{ | |||
'format_id': 'http_video', | |||
'url': http_video_url, | |||
'http_headers': {'Referer': 'https://www.infoq.com/'}, | |||
}] | |||
def _extract_http_audio(self, webpage, video_id): | |||
fields = self._hidden_inputs(webpage) | |||
fields = self._form_hidden_inputs('mp3Form', webpage) | |||
http_audio_url = fields.get('filename') | |||
if not http_audio_url: | |||
return [] | |||
# base URL is found in the Location header in the response returned by | |||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in. | |||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url) | |||
http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url) | |||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage)) | |||
# audio file seem to be missing some times even if there is a download link | |||
@@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor): | |||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000) | |||
description = xpath_text(doc, 'ABSTRACT') | |||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME') | |||
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate')) | |||
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate')) | |||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content') | |||
formats = [] | |||
@@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor): | |||
'duration': duration, | |||
'formats': formats, | |||
'thumbnail': thumbnail, | |||
'timestamp': createtion_time, | |||
'timestamp': creation_time, | |||
} |
@@ -33,7 +33,7 @@ class NprIE(InfoExtractor): | |||
}, | |||
}], | |||
}, { | |||
# mutlimedia, not media title | |||
# multimedia, not media title | |||
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', | |||
'info_dict': { | |||
'id': '533198237', | |||
@@ -477,7 +477,7 @@ class PBSIE(InfoExtractor): | |||
if media_id: | |||
return media_id, presumptive_id, upload_date, description | |||
# Fronline video embedded via flp | |||
# Frontline video embedded via flp | |||
video_id = self._search_regex( | |||
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None) | |||
if video_id: | |||
@@ -16,8 +16,9 @@ from ..utils import ( | |||
GeoRestrictedError, | |||
int_or_none, | |||
parse_duration, | |||
remove_start, | |||
strip_or_none, | |||
unescapeHTML, | |||
try_get, | |||
unified_strdate, | |||
unified_timestamp, | |||
update_url_query, | |||
@@ -67,7 +68,7 @@ class RaiBaseIE(InfoExtractor): | |||
# This does not imply geo restriction (e.g. | |||
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) | |||
if media_url == 'http://download.rai.it/video_no_available.mp4': | |||
if '/video_no_available.mp4' in media_url: | |||
continue | |||
ext = determine_ext(media_url) | |||
@@ -122,27 +123,8 @@ class RaiBaseIE(InfoExtractor): | |||
class RaiPlayIE(RaiBaseIE): | |||
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.(?:html|json))' % RaiBaseIE._UUID_RE | |||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE | |||
_TESTS = [{ | |||
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', | |||
'md5': '340aa3b7afb54bfd14a8c11786450d76', | |||
'info_dict': { | |||
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', | |||
'ext': 'mp4', | |||
'title': 'La Casa Bianca', | |||
'alt_title': 'S2016 - Puntata del 23/10/2016', | |||
'description': 'md5:a09d45890850458077d1f68bb036e0a5', | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'uploader': 'Rai 3', | |||
'creator': 'Rai 3', | |||
'duration': 3278, | |||
'timestamp': 1477764300, | |||
'upload_date': '20161029', | |||
'series': 'La Casa Bianca', | |||
'season': '2016', | |||
}, | |||
'skip': 'This content is not available', | |||
}, { | |||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', | |||
'md5': '8970abf8caf8aef4696e7b1f2adfc696', | |||
'info_dict': { | |||
@@ -166,10 +148,10 @@ class RaiPlayIE(RaiBaseIE): | |||
}] | |||
def _real_extract(self, url): | |||
url, video_id = re.match(self._VALID_URL, url).groups() | |||
base, video_id = re.match(self._VALID_URL, url).groups() | |||
media = self._download_json( | |||
url.replace('.html', '.json'), video_id, 'Downloading video JSON') | |||
base + '.json', video_id, 'Downloading video JSON') | |||
title = media['name'] | |||
video = media['video'] | |||
@@ -195,7 +177,8 @@ class RaiPlayIE(RaiBaseIE): | |||
season = media.get('season') | |||
info = { | |||
'id': video_id, | |||
'id': remove_start(media.get('id'), 'ContentItem-') or video_id, | |||
'display_id': video_id, | |||
'title': self._live_title(title) if relinker_info.get( | |||
'is_live') else title, | |||
'alt_title': strip_or_none(media.get('subtitle')), | |||
@@ -217,16 +200,16 @@ class RaiPlayIE(RaiBaseIE): | |||
return info | |||
class RaiPlayLiveIE(RaiBaseIE): | |||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)' | |||
_TEST = { | |||
class RaiPlayLiveIE(RaiPlayIE): | |||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))' | |||
_TESTS = [{ | |||
'url': 'http://www.raiplay.it/dirette/rainews24', | |||
'info_dict': { | |||
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', | |||
'display_id': 'rainews24', | |||
'ext': 'mp4', | |||
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |||
'description': 'md5:6eca31500550f9376819f174e5644754', | |||
'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', | |||
'uploader': 'Rai News 24', | |||
'creator': 'Rai News 24', | |||
'is_live': True, | |||
@@ -234,58 +217,50 @@ class RaiPlayLiveIE(RaiBaseIE): | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
} | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
webpage = self._download_webpage(url, display_id) | |||
video_id = self._search_regex( | |||
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE, | |||
webpage, 'content id') | |||
return { | |||
'_type': 'url_transparent', | |||
'ie_key': RaiPlayIE.ie_key(), | |||
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, | |||
'id': video_id, | |||
'display_id': display_id, | |||
} | |||
}] | |||
class RaiPlayPlaylistIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' | |||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))' | |||
_TESTS = [{ | |||
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', | |||
'info_dict': { | |||
'id': 'nondirloalmiocapo', | |||
'title': 'Non dirlo al mio capo', | |||
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', | |||
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', | |||
}, | |||
'playlist_mincount': 12, | |||
}] | |||
def _real_extract(self, url): | |||
playlist_id = self._match_id(url) | |||
base, playlist_id = re.match(self._VALID_URL, url).groups() | |||
webpage = self._download_webpage(url, playlist_id) | |||
title = self._html_search_meta( | |||
('programma', 'nomeProgramma'), webpage, 'title') | |||
description = unescapeHTML(self._html_search_meta( | |||
('description', 'og:description'), webpage, 'description')) | |||
program = self._download_json( | |||
base + '.json', playlist_id, 'Downloading program JSON') | |||
entries = [] | |||
for mobj in re.finditer( | |||
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', | |||
webpage): | |||
video_url = urljoin(url, mobj.group('path')) | |||
entries.append(self.url_result( | |||
video_url, ie=RaiPlayIE.ie_key(), | |||
video_id=RaiPlayIE._match_id(video_url))) | |||
return self.playlist_result(entries, playlist_id, title, description) | |||
for b in (program.get('blocks') or []): | |||
for s in (b.get('sets') or []): | |||
s_id = s.get('id') | |||
if not s_id: | |||
continue | |||
medias = self._download_json( | |||
'%s/%s.json' % (base, s_id), s_id, | |||
'Downloading content set JSON', fatal=False) | |||
if not medias: | |||
continue | |||
for m in (medias.get('items') or []): | |||
path_id = m.get('path_id') | |||
if not path_id: | |||
continue | |||
video_url = urljoin(url, path_id) | |||
entries.append(self.url_result( | |||
video_url, ie=RaiPlayIE.ie_key(), | |||
video_id=RaiPlayIE._match_id(video_url))) | |||
return self.playlist_result( | |||
entries, playlist_id, program.get('name'), | |||
try_get(program, lambda x: x['program_info']['description'])) | |||
class RaiIE(RaiBaseIE): | |||
@@ -328,19 +303,6 @@ class RaiIE(RaiBaseIE): | |||
'duration': 2214, | |||
'upload_date': '20161103', | |||
} | |||
}, { | |||
# drawMediaRaiTV(...) | |||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', | |||
'md5': '2dd727e61114e1ee9c47f0da6914e178', | |||
'info_dict': { | |||
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', | |||
'ext': 'mp4', | |||
'title': 'Il pacco', | |||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'upload_date': '20141221', | |||
}, | |||
'skip': 'This content is not available', | |||
}, { | |||
# initEdizione('ContentItem-...' | |||
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', | |||
@@ -352,18 +314,6 @@ class RaiIE(RaiBaseIE): | |||
'upload_date': '20170401', | |||
}, | |||
'skip': 'Changes daily', | |||
}, { | |||
# HDS live stream with only relinker URL | |||
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews', | |||
'info_dict': { | |||
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc', | |||
'ext': 'flv', | |||
'title': 'EuroNews', | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
'skip': 'This content is available only in Italy', | |||
}, { | |||
# HLS live stream with ContentItem in og:url | |||
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', | |||
@@ -473,7 +423,7 @@ class RaiIE(RaiBaseIE): | |||
except ExtractorError: | |||
pass | |||
relinker_url = self._search_regex( | |||
relinker_url = self._proto_relative_url(self._search_regex( | |||
r'''(?x) | |||
(?: | |||
var\s+videoURL| | |||
@@ -485,7 +435,7 @@ class RaiIE(RaiBaseIE): | |||
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? | |||
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 | |||
''', | |||
webpage, 'relinker URL', group='url') | |||
webpage, 'relinker URL', group='url')) | |||
relinker_info = self._extract_relinker_info( | |||
urljoin(url, relinker_url), video_id) | |||
@@ -649,7 +649,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): | |||
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): | |||
def _extract_playlist(self, base_url, playlist_id, playlist_title): | |||
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200. | |||
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200. | |||
# https://developers.soundcloud.com/blog/offset-pagination-deprecated | |||
COMMON_QUERY = { | |||
'limit': 200, | |||
@@ -9,6 +9,7 @@ from ..utils import ( | |||
determine_ext, | |||
dict_get, | |||
int_or_none, | |||
unified_timestamp, | |||
str_or_none, | |||
strip_or_none, | |||
try_get, | |||
@@ -44,7 +45,8 @@ class SVTBaseIE(InfoExtractor): | |||
'format_id': player_type, | |||
'url': vurl, | |||
}) | |||
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): | |||
rights = try_get(video_info, lambda x: x['rights'], dict) or {} | |||
if not formats and rights.get('geoBlockedSweden'): | |||
self.raise_geo_restricted( | |||
'This video is only available in Sweden', | |||
countries=self._GEO_COUNTRIES) | |||
@@ -70,6 +72,7 @@ class SVTBaseIE(InfoExtractor): | |||
episode = video_info.get('episodeTitle') | |||
episode_number = int_or_none(video_info.get('episodeNumber')) | |||
timestamp = unified_timestamp(rights.get('validFrom')) | |||
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | |||
age_limit = None | |||
adult = dict_get( | |||
@@ -84,6 +87,7 @@ class SVTBaseIE(InfoExtractor): | |||
'formats': formats, | |||
'subtitles': subtitles, | |||
'duration': duration, | |||
'timestamp': timestamp, | |||
'age_limit': age_limit, | |||
'series': series, | |||
'season_number': season_number, | |||
@@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE): | |||
IE_DESC = 'SVT Play and Öppet arkiv' | |||
_VALID_URL = r'''(?x) | |||
(?: | |||
svt:(?P<svt_id>[^/?#&]+)| | |||
(?: | |||
svt:| | |||
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/ | |||
) | |||
(?P<svt_id>[^/?#&]+)| | |||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) | |||
) | |||
''' | |||
_TESTS = [{ | |||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', | |||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | |||
'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen', | |||
'md5': '2382036fd6f8c994856c323fe51c426e', | |||
'info_dict': { | |||
'id': '5996901', | |||
'id': 'jNwpV9P', | |||
'ext': 'mp4', | |||
'title': 'Flygplan till Haile Selassie', | |||
'duration': 3527, | |||
'thumbnail': r're:^https?://.*[\.-]jpg$', | |||
'title': 'Det här är himlen', | |||
'timestamp': 1586044800, | |||
'upload_date': '20200405', | |||
'duration': 3515, | |||
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$', | |||
'age_limit': 0, | |||
'subtitles': { | |||
'sv': [{ | |||
'ext': 'wsrt', | |||
'ext': 'vtt', | |||
}] | |||
}, | |||
}, | |||
'params': { | |||
'format': 'bestvideo', | |||
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses | |||
# init segments that are smaller | |||
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B | |||
'skip_download': True, | |||
}, | |||
}, { | |||
# geo restricted to Sweden | |||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | |||
@@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE): | |||
}, { | |||
'url': 'svt:14278044', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/', | |||
'only_matching': True, | |||
}, { | |||
'url': 'svt:eWv5MLX', | |||
'only_matching': True, | |||
}] | |||
def _adjust_title(self, info): | |||
@@ -236,7 +259,10 @@ class SVTPlayIE(SVTPlayBaseIE): | |||
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'), | |||
webpage, 'video id') | |||
return self._extract_by_video_id(svt_id, webpage) | |||
info_dict = self._extract_by_video_id(svt_id, webpage) | |||
info_dict['thumbnail'] = thumbnail | |||
return info_dict | |||
class SVTSeriesIE(SVTPlayBaseIE): | |||
@@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor): | |||
@classmethod | |||
def suitable(cls, url): | |||
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url) | |||
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url) | |||
def _real_extract(self, url): | |||
path, display_id = re.match(self._VALID_URL, url).groups() | |||
@@ -86,7 +86,7 @@ class TagesschauPlayerIE(InfoExtractor): | |||
# return self._extract_via_api(kind, video_id) | |||
# JSON api does not provide some audio formats (e.g. ogg) thus | |||
# extractiong audio via webpage | |||
# extracting audio via webpage | |||
webpage = self._download_webpage(url, video_id) | |||
@@ -208,7 +208,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): | |||
if m: | |||
return [m.group('url')] | |||
# Are whitesapces ignored in URLs? | |||
# Are whitespaces ignored in URLs? | |||
# https://github.com/ytdl-org/youtube-dl/issues/12044 | |||
matches = re.findall( | |||
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) | |||
@@ -56,9 +56,9 @@ class TurnerBaseIE(AdobePassIE): | |||
content_id = xpath_text(video_data, 'contentId') or video_id | |||
# rtmp_src = xpath_text(video_data, 'akamai/src') | |||
# if rtmp_src: | |||
# splited_rtmp_src = rtmp_src.split(',') | |||
# if len(splited_rtmp_src) == 2: | |||
# rtmp_src = splited_rtmp_src[1] | |||
# split_rtmp_src = rtmp_src.split(',') | |||
# if len(split_rtmp_src) == 2: | |||
# rtmp_src = split_rtmp_src[1] | |||
# aifp = xpath_text(video_data, 'akamai/aifp', default='') | |||
urls = [] | |||
@@ -1,6 +1,7 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import base64 | |||
import hashlib | |||
import hmac | |||
import itertools | |||
@@ -9,6 +10,10 @@ import re | |||
import time | |||
from .common import InfoExtractor | |||
from ..compat import ( | |||
compat_parse_qs, | |||
compat_urllib_parse_urlparse, | |||
) | |||
from ..utils import ( | |||
ExtractorError, | |||
int_or_none, | |||
@@ -166,19 +171,20 @@ class VikiIE(VikiBaseIE): | |||
}, { | |||
# episode | |||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', | |||
'md5': '5fa476a902e902783ac7a4d615cdbc7a', | |||
'md5': '94e0e34fd58f169f40c184f232356cfe', | |||
'info_dict': { | |||
'id': '44699v', | |||
'ext': 'mp4', | |||
'title': 'Boys Over Flowers - Episode 1', | |||
'description': 'md5:b89cf50038b480b88b5b3c93589a9076', | |||
'duration': 4204, | |||
'duration': 4172, | |||
'timestamp': 1270496524, | |||
'upload_date': '20100405', | |||
'uploader': 'group8', | |||
'like_count': int, | |||
'age_limit': 13, | |||
} | |||
}, | |||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'], | |||
}, { | |||
# youtube external | |||
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1', | |||
@@ -195,14 +201,15 @@ class VikiIE(VikiBaseIE): | |||
'uploader_id': 'ad14065n', | |||
'like_count': int, | |||
'age_limit': 13, | |||
} | |||
}, | |||
'skip': 'Page not found!', | |||
}, { | |||
'url': 'http://www.viki.com/player/44699v', | |||
'only_matching': True, | |||
}, { | |||
# non-English description | |||
'url': 'http://www.viki.com/videos/158036v-love-in-magic', | |||
'md5': '1713ae35df5a521b31f6dc40730e7c9c', | |||
'md5': 'adf9e321a0ae5d0aace349efaaff7691', | |||
'info_dict': { | |||
'id': '158036v', | |||
'ext': 'mp4', | |||
@@ -218,71 +225,11 @@ class VikiIE(VikiBaseIE): | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
video = self._call_api( | |||
'videos/%s.json' % video_id, video_id, 'Downloading video JSON') | |||
streams = self._call_api( | |||
'videos/%s/streams.json' % video_id, video_id, | |||
'Downloading video streams JSON') | |||
formats = [] | |||
for format_id, stream_dict in streams.items(): | |||
height = int_or_none(self._search_regex( | |||
r'^(\d+)[pP]$', format_id, 'height', default=None)) | |||
for protocol, format_dict in stream_dict.items(): | |||
# rtmps URLs does not seem to work | |||
if protocol == 'rtmps': | |||
continue | |||
format_url = format_dict.get('url') | |||
format_drms = format_dict.get('drms') | |||
format_stream_id = format_dict.get('id') | |||
if format_id == 'm3u8': | |||
m3u8_formats = self._extract_m3u8_formats( | |||
format_url, video_id, 'mp4', | |||
entry_protocol='m3u8_native', | |||
m3u8_id='m3u8-%s' % protocol, fatal=False) | |||
# Despite CODECS metadata in m3u8 all video-only formats | |||
# are actually video+audio | |||
for f in m3u8_formats: | |||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none': | |||
f['acodec'] = None | |||
formats.extend(m3u8_formats) | |||
elif format_id == 'mpd': | |||
mpd_formats = self._extract_mpd_formats( | |||
format_url, video_id, | |||
mpd_id='mpd-%s' % protocol, fatal=False) | |||
formats.extend(mpd_formats) | |||
elif format_id == 'mpd': | |||
formats.extend(mpd_formats) | |||
elif format_url.startswith('rtmp'): | |||
mobj = re.search( | |||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$', | |||
format_url) | |||
if not mobj: | |||
continue | |||
formats.append({ | |||
'format_id': 'rtmp-%s' % format_id, | |||
'ext': 'flv', | |||
'url': mobj.group('url'), | |||
'play_path': mobj.group('playpath'), | |||
'app': mobj.group('app'), | |||
'page_url': url, | |||
'drms': format_drms, | |||
'stream_id': format_stream_id, | |||
}) | |||
else: | |||
urlh = self._request_webpage( | |||
HEADRequest(format_url), video_id, 'Checking file size', fatal=False) | |||
formats.append({ | |||
'url': format_url, | |||
'format_id': '%s-%s' % (format_id, protocol), | |||
'height': height, | |||
'drms': format_drms, | |||
'stream_id': format_stream_id, | |||
'filesize': int_or_none(urlh.headers.get('Content-Length')), | |||
}) | |||
self._sort_formats(formats) | |||
resp = self._download_json( | |||
'https://www.viki.com/api/videos/' + video_id, | |||
video_id, 'Downloading video JSON', | |||
headers={'x-viki-app-ver': '4.0.57'}) | |||
video = resp['video'] | |||
self._check_errors(video) | |||
@@ -342,12 +289,84 @@ class VikiIE(VikiBaseIE): | |||
'subtitles': subtitles, | |||
} | |||
if 'external' in streams: | |||
result.update({ | |||
'_type': 'url_transparent', | |||
'url': streams['external']['url'], | |||
}) | |||
return result | |||
formats = [] | |||
def add_format(format_id, format_dict, protocol='http'): | |||
# rtmps URLs does not seem to work | |||
if protocol == 'rtmps': | |||
return | |||
format_url = format_dict.get('url') | |||
if not format_url: | |||
return | |||
format_drms = format_dict.get('drms') | |||
format_stream_id = format_dict.get('id') | |||
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query) | |||
stream = qs.get('stream', [None])[0] | |||
if stream: | |||
format_url = base64.b64decode(stream).decode() | |||
if format_id in ('m3u8', 'hls'): | |||
m3u8_formats = self._extract_m3u8_formats( | |||
format_url, video_id, 'mp4', | |||
entry_protocol='m3u8_native', | |||
m3u8_id='m3u8-%s' % protocol, fatal=False) | |||
# Despite CODECS metadata in m3u8 all video-only formats | |||
# are actually video+audio | |||
for f in m3u8_formats: | |||
if '_drm/index_' in f['url']: | |||
continue | |||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none': | |||
f['acodec'] = None | |||
formats.append(f) | |||
elif format_id in ('mpd', 'dash'): | |||
formats.extend(self._extract_mpd_formats( | |||
format_url, video_id, 'mpd-%s' % protocol, fatal=False)) | |||
elif format_url.startswith('rtmp'): | |||
mobj = re.search( | |||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$', | |||
format_url) | |||
if not mobj: | |||
return | |||
formats.append({ | |||
'format_id': 'rtmp-%s' % format_id, | |||
'ext': 'flv', | |||
'url': mobj.group('url'), | |||
'play_path': mobj.group('playpath'), | |||
'app': mobj.group('app'), | |||
'page_url': url, | |||
'drms': format_drms, | |||
'stream_id': format_stream_id, | |||
}) | |||
else: | |||
urlh = self._request_webpage( | |||
HEADRequest(format_url), video_id, 'Checking file size', fatal=False) | |||
formats.append({ | |||
'url': format_url, | |||
'format_id': '%s-%s' % (format_id, protocol), | |||
'height': int_or_none(self._search_regex( | |||
r'^(\d+)[pP]$', format_id, 'height', default=None)), | |||
'drms': format_drms, | |||
'stream_id': format_stream_id, | |||
'filesize': int_or_none(urlh.headers.get('Content-Length')), | |||
}) | |||
for format_id, format_dict in (resp.get('streams') or {}).items(): | |||
add_format(format_id, format_dict) | |||
if not formats: | |||
streams = self._call_api( | |||
'videos/%s/streams.json' % video_id, video_id, | |||
'Downloading video streams JSON') | |||
if 'external' in streams: | |||
result.update({ | |||
'_type': 'url_transparent', | |||
'url': streams['external']['url'], | |||
}) | |||
return result | |||
for format_id, stream_dict in streams.items(): | |||
for protocol, format_dict in stream_dict.items(): | |||
add_format(format_id, format_dict, protocol) | |||
self._sort_formats(formats) | |||
result['formats'] = formats | |||
return result | |||
@@ -922,7 +922,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): | |||
}] | |||
_PAGE_SIZE = 100 | |||
def _fetch_page(self, album_id, authorizaion, hashed_pass, page): | |||
def _fetch_page(self, album_id, authorization, hashed_pass, page): | |||
api_page = page + 1 | |||
query = { | |||
'fields': 'link,uri', | |||
@@ -934,7 +934,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): | |||
videos = self._download_json( | |||
'https://api.vimeo.com/albums/%s/videos' % album_id, | |||
album_id, 'Downloading page %d' % api_page, query=query, headers={ | |||
'Authorization': 'jwt ' + authorizaion, | |||
'Authorization': 'jwt ' + authorization, | |||
})['data'] | |||
for video in videos: | |||
link = video.get('link') | |||
@@ -54,17 +54,17 @@ class XiamiBaseIE(InfoExtractor): | |||
def _decrypt(origin): | |||
n = int(origin[0]) | |||
origin = origin[1:] | |||
short_lenth = len(origin) // n | |||
long_num = len(origin) - short_lenth * n | |||
short_length = len(origin) // n | |||
long_num = len(origin) - short_length * n | |||
l = tuple() | |||
for i in range(0, n): | |||
length = short_lenth | |||
length = short_length | |||
if i < long_num: | |||
length += 1 | |||
l += (origin[0:length], ) | |||
origin = origin[length:] | |||
ans = '' | |||
for i in range(0, short_lenth + 1): | |||
for i in range(0, short_length + 1): | |||
for j in range(0, n): | |||
if len(l[j]) > i: | |||
ans += l[j][i] | |||
@@ -306,6 +306,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | |||
}, | |||
} | |||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' | |||
def _call_api(self, ep, query, video_id): | |||
data = self._DEFAULT_API_DATA.copy() | |||
data.update(query) | |||
@@ -322,8 +324,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | |||
def _extract_yt_initial_data(self, video_id, webpage): | |||
return self._parse_json( | |||
self._search_regex( | |||
r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;', | |||
webpage, 'yt initial data'), | |||
(r'%s\s*\n' % self._YT_INITIAL_DATA_RE, | |||
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'), | |||
video_id) | |||
@@ -1089,6 +1091,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
'skip_download': True, | |||
}, | |||
}, | |||
{ | |||
# with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093) | |||
'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no', | |||
'info_dict': { | |||
'id': 'CHqg6qOn4no', | |||
'ext': 'mp4', | |||
'title': 'Part 77 Sort a list of simple types in c#', | |||
'description': 'md5:b8746fa52e10cdbf47997903f13b20dc', | |||
'upload_date': '20130831', | |||
'uploader_id': 'kudvenkat', | |||
'uploader': 'kudvenkat', | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
}, | |||
] | |||
def __init__(self, *args, **kwargs): | |||
@@ -2138,6 +2156,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
formats.append(a_format) | |||
else: | |||
error_message = extract_unavailable_message() | |||
if not error_message: | |||
reason_list = try_get( | |||
player_response, | |||
lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'], | |||
list) or [] | |||
for reason in reason_list: | |||
if not isinstance(reason, dict): | |||
continue | |||
reason_text = try_get(reason, lambda x: x['text'], compat_str) | |||
if reason_text: | |||
if not error_message: | |||
error_message = '' | |||
error_message += reason_text | |||
if error_message: | |||
error_message = clean_html(error_message) | |||
if not error_message: | |||
error_message = clean_html(try_get( | |||
player_response, lambda x: x['playabilityStatus']['reason'], | |||
@@ -2319,8 +2352,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
def _extract_count(count_name): | |||
return str_to_int(self._search_regex( | |||
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' | |||
% re.escape(count_name), | |||
(r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name), | |||
r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)), | |||
video_webpage, count_name, default=None)) | |||
like_count = _extract_count('like') | |||
@@ -2613,13 +2646,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | |||
}, | |||
'playlist_mincount': 138, | |||
}, { | |||
'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', | |||
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA', | |||
'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ', | |||
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', | |||
'only_matching': True, | |||
}, { | |||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', | |||
@@ -2666,7 +2699,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | |||
}, | |||
'playlist_mincount': 11, | |||
}, { | |||
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU', | |||
'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', | |||
'only_matching': True, | |||
}, { | |||
# Playlist URL that does not actually serve a playlist | |||
@@ -2698,14 +2731,59 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | |||
}, { | |||
'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', | |||
'only_matching': True, | |||
}] | |||
@classmethod | |||
def suitable(cls, url): | |||
IGNORE = (YoutubeLiveIE,) | |||
return ( | |||
False if any(ie.suitable(url) for ie in IGNORE) | |||
else super(YoutubeTabIE, cls).suitable(url)) | |||
}, { | |||
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', | |||
'info_dict': { | |||
'id': '9Auq9mYxFEE', | |||
'ext': 'mp4', | |||
'title': 'Watch Sky News live', | |||
'uploader': 'Sky News', | |||
'uploader_id': 'skynews', | |||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews', | |||
'upload_date': '20191102', | |||
'description': 'md5:78de4e1c2359d0ea3ed829678e38b662', | |||
'categories': ['News & Politics'], | |||
'tags': list, | |||
'like_count': int, | |||
'dislike_count': int, | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
}, { | |||
'url': 'https://www.youtube.com/user/TheYoungTurks/live', | |||
'info_dict': { | |||
'id': 'a48o2S1cPoo', | |||
'ext': 'mp4', | |||
'title': 'The Young Turks - Live Main Show', | |||
'uploader': 'The Young Turks', | |||
'uploader_id': 'TheYoungTurks', | |||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', | |||
'upload_date': '20150715', | |||
'license': 'Standard YouTube License', | |||
'description': 'md5:438179573adcdff3c97ebb1ee632b891', | |||
'categories': ['News & Politics'], | |||
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], | |||
'like_count': int, | |||
'dislike_count': int, | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://www.youtube.com/c/CommanderVideoHq/live', | |||
'only_matching': True, | |||
}, | |||
# TODO | |||
# { | |||
# 'url': 'https://www.youtube.com/TheYoungTurks/live', | |||
# 'only_matching': True, | |||
# } | |||
] | |||
def _extract_channel_id(self, webpage): | |||
channel_id = self._html_search_meta( | |||
@@ -3147,7 +3225,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | |||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | |||
webpage = self._download_webpage(url, item_id) | |||
identity_token = self._search_regex( | |||
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, | |||
r'\bID_TOKEN["\']\s*:\s/l*["\'](.+?)["\']', webpage, | |||
'identity token', default=None) | |||
data = self._extract_yt_initial_data(item_id, webpage) | |||
tabs = try_get( | |||
@@ -3158,7 +3236,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | |||
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) | |||
if playlist: | |||
return self._extract_from_playlist(item_id, data, playlist) | |||
# Fallback to video extraction if no playlist alike page is recognized | |||
# Fallback to video extraction if no playlist alike page is recognized. | |||
# First check for the current video then try the v attribute of URL query. | |||
video_id = try_get( | |||
data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'], | |||
compat_str) or video_id | |||
if video_id: | |||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) | |||
# Failed to recognize | |||
@@ -3279,58 +3361,6 @@ class YoutubeYtUserIE(InfoExtractor): | |||
ie=YoutubeTabIE.ie_key(), video_id=user_id) | |||
class YoutubeLiveIE(YoutubeBaseInfoExtractor): | |||
IE_DESC = 'YouTube.com live streams' | |||
_VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL | |||
IE_NAME = 'youtube:live' | |||
_TESTS = [{ | |||
'url': 'https://www.youtube.com/user/TheYoungTurks/live', | |||
'info_dict': { | |||
'id': 'a48o2S1cPoo', | |||
'ext': 'mp4', | |||
'title': 'The Young Turks - Live Main Show', | |||
'uploader': 'The Young Turks', | |||
'uploader_id': 'TheYoungTurks', | |||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', | |||
'upload_date': '20150715', | |||
'license': 'Standard YouTube License', | |||
'description': 'md5:438179573adcdff3c97ebb1ee632b891', | |||
'categories': ['News & Politics'], | |||
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], | |||
'like_count': int, | |||
'dislike_count': int, | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
}, { | |||
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://www.youtube.com/c/CommanderVideoHq/live', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://www.youtube.com/TheYoungTurks/live', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
channel_id = mobj.group('id') | |||
base_url = mobj.group('base_url') | |||
webpage = self._download_webpage(url, channel_id, fatal=False) | |||
if webpage: | |||
page_type = self._og_search_property( | |||
'type', webpage, 'page type', default='') | |||
video_id = self._html_search_meta( | |||
'videoId', webpage, 'video id', default=None) | |||
if page_type.startswith('video') and video_id and re.match( | |||
r'^[0-9A-Za-z_-]{11}$', video_id): | |||
return self.url_result(video_id, YoutubeIE.ie_key()) | |||
return self.url_result(base_url) | |||
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): | |||
IE_DESC = 'YouTube.com searches' | |||
# there doesn't appear to be a real limit, for example if you search for | |||
@@ -2460,7 +2460,7 @@ class XAttrMetadataError(YoutubeDLError): | |||
# Parsing code and msg | |||
if (self.code in (errno.ENOSPC, errno.EDQUOT) | |||
or 'No space left' in self.msg or 'Disk quota excedded' in self.msg): | |||
or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg): | |||
self.reason = 'NO_SPACE' | |||
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: | |||
self.reason = 'VALUE_TOO_LONG' | |||
@@ -4215,10 +4215,10 @@ def parse_codecs(codecs_str): | |||
# http://tools.ietf.org/html/rfc6381 | |||
if not codecs_str: | |||
return {} | |||
splited_codecs = list(filter(None, map( | |||
split_codecs = list(filter(None, map( | |||
lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) | |||
vcodec, acodec = None, None | |||
for full_codec in splited_codecs: | |||
for full_codec in split_codecs: | |||
codec = full_codec.split('.')[0] | |||
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): | |||
if not vcodec: | |||
@@ -4229,10 +4229,10 @@ def parse_codecs(codecs_str): | |||
else: | |||
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) | |||
if not vcodec and not acodec: | |||
if len(splited_codecs) == 2: | |||
if len(split_codecs) == 2: | |||
return { | |||
'vcodec': splited_codecs[0], | |||
'acodec': splited_codecs[1], | |||
'vcodec': split_codecs[0], | |||
'acodec': split_codecs[1], | |||
} | |||
else: | |||
return { | |||
@@ -5471,7 +5471,7 @@ def encode_base_n(num, n, table=None): | |||
def decode_packed_codes(code): | |||
mobj = re.search(PACKED_CODES_RE, code) | |||
obfucasted_code, base, count, symbols = mobj.groups() | |||
obfuscated_code, base, count, symbols = mobj.groups() | |||
base = int(base) | |||
count = int(count) | |||
symbols = symbols.split('|') | |||
@@ -5484,7 +5484,7 @@ def decode_packed_codes(code): | |||
return re.sub( | |||
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], | |||
obfucasted_code) | |||
obfuscated_code) | |||
def caesar(s, alphabet, shift): | |||