1
0
Fork 0
mirror of https://github.com/blackjack4494/yt-dlc.git synced 2024-12-21 23:33:10 +00:00

Updated to release 2020.11.21.1

This commit is contained in:
pukkandan 2020-11-21 20:20:42 +05:30
parent 3462ffa892
commit a0566bbf5c
29 changed files with 559 additions and 360 deletions

View file

@ -37,7 +37,7 @@
"writeinfojson": true, "writeinfojson": true,
"writesubtitles": false, "writesubtitles": false,
"allsubtitles": false, "allsubtitles": false,
"listssubtitles": false, "listsubtitles": false,
"socket_timeout": 20, "socket_timeout": 20,
"fixup": "never" "fixup": "never"
} }

View file

@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(downloaded['extractor'], 'testex') self.assertEqual(downloaded['extractor'], 'testex')
self.assertEqual(downloaded['extractor_key'], 'TestEx') self.assertEqual(downloaded['extractor_key'], 'TestEx')
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
class _YDL(YDL):
def __init__(self, *args, **kwargs):
super(_YDL, self).__init__(*args, **kwargs)
def trouble(self, s, tb=None):
pass
ydl = _YDL({
'format': 'extra',
'ignoreerrors': True,
})
class VideoIE(InfoExtractor):
_VALID_URL = r'video:(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
formats = [{
'format_id': 'default',
'url': 'url:',
}]
if video_id == '0':
raise ExtractorError('foo')
if video_id == '2':
formats.append({
'format_id': 'extra',
'url': TEST_URL,
})
return {
'id': video_id,
'title': 'Video %s' % video_id,
'formats': formats,
}
class PlaylistIE(InfoExtractor):
_VALID_URL = r'playlist:'
def _entries(self):
for n in range(3):
video_id = compat_str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),
'id': video_id,
'url': 'video:%s' % video_id,
'title': 'Video Transparent %s' % video_id,
}
def _real_extract(self, url):
return self.playlist_result(self._entries())
ydl.add_info_extractor(VideoIE(ydl))
ydl.add_info_extractor(PlaylistIE(ydl))
info = ydl.extract_info('playlist:')
entries = info['entries']
self.assertEqual(len(entries), 3)
self.assertTrue(entries[0] is None)
self.assertTrue(entries[1] is None)
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(entries[2], downloaded)
self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'Video Transparent 2')
self.assertEqual(downloaded['id'], '2')
self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -829,7 +829,6 @@ class YoutubeDL(object):
self.report_warning('The program functionality for this site has been marked as broken, ' self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.') 'and will probably not work.')
try:
try: try:
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url) temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
except (AssertionError, IndexError, AttributeError): except (AssertionError, IndexError, AttributeError):
@ -839,9 +838,38 @@ class YoutubeDL(object):
ie_key, temp_id)) ie_key, temp_id))
break break
return self.__extract_info(url, ie, download, extra_info, process, info_dict)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
def __handle_extraction_exceptions(func):
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
msg += '\nThis video is available in %s.' % ', '.join(
map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
else:
raise
return wrapper
@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process, info_dict):
ie_result = ie.extract(url) ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break return
if isinstance(ie_result, list): if isinstance(ie_result, list):
# Backwards compatibility: old IE result format # Backwards compatibility: old IE result format
ie_result = { ie_result = {
@ -858,27 +886,6 @@ class YoutubeDL(object):
return self.process_ie_result(ie_result, download, extra_info) return self.process_ie_result(ie_result, download, extra_info)
else: else:
return ie_result return ie_result
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
msg += '\nThis video is available in %s.' % ', '.join(
map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
break
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else:
raise
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
def add_default_extra_info(self, ie_result, ie, url): def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, { self.add_extra_info(ie_result, {
@ -1057,9 +1064,8 @@ class YoutubeDL(object):
self.to_screen('[download] ' + reason) self.to_screen('[download] ' + reason)
continue continue
entry_result = self.process_ie_result(entry, entry_result = self.__process_iterable_entry(entry, download, extra)
download=download, # TODO: skip failed (empty) entries?
extra_info=extra)
playlist_results.append(entry_result) playlist_results.append(entry_result)
ie_result['entries'] = playlist_results ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist) self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@ -1088,6 +1094,11 @@ class YoutubeDL(object):
else: else:
raise Exception('Invalid result type: %s' % result_type) raise Exception('Invalid result type: %s' % result_type)
@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)
def _build_format_filter(self, filter_spec): def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec " " Returns a function to filter the formats according to the filter_spec "

View file

@ -2345,7 +2345,7 @@ except ImportError: # Python <3.4
# HTMLParseError has been deprecated in Python 3.3 and removed in # HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exceptiong handling # and uniform cross-version exception handling
class compat_HTMLParseError(Exception): class compat_HTMLParseError(Exception):
pass pass

View file

@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
try: try:
ctx.data = self.ydl.urlopen(request) ctx.data = self.ydl.urlopen(request)
except (compat_urllib_error.URLError, ) as err: except (compat_urllib_error.URLError, ) as err:
if isinstance(err.reason, socket.timeout): # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
reason = getattr(err, 'reason', None)
if isinstance(reason, socket.timeout):
raise RetryDownload(err) raise RetryDownload(err)
raise err raise err
# When trying to resume, Content-Range HTTP header of response has to be checked # When trying to resume, Content-Range HTTP header of response has to be checked

View file

@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
from ..utils import (
int_or_none,
parse_iso8601,
update_url_query,
)
class AmaraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
_TESTS = [{
# Youtube
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
'info_dict': {
'id': 'h6ZuVdvYnfE',
'ext': 'mp4',
'title': 'Why jury trials are becoming less common',
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20160813',
'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570,
}
}, {
# Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
'md5': '99392c75fa05d432a8f11df03612195e',
'info_dict': {
'id': '18622084',
'ext': 'mov',
'title': 'Vimeo at CES 2011!',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'timestamp': 1294763658,
'upload_date': '20110111',
'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill'
}
}, {
# Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
'md5': 'd3970f08512738ee60c5807311ff5d3f',
'info_dict': {
'id': 's8KL7I3jLmh6',
'ext': 'mp4',
'title': 'The danger of a single story',
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20091007',
'timestamp': 1254942511,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id,
video_id, query={'format': 'json'})
title = meta['title']
video_url = meta['all_urls'][0]
subtitles = {}
for language in (meta.get('languages') or []):
subtitles_uri = language.get('subtitles_uri')
if not (subtitles_uri and language.get('published')):
continue
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
for f in ('json', 'srt', 'vtt'):
subtitle.append({
'ext': f,
'url': update_url_query(subtitles_uri, {'format': f}),
})
info = {
'url': video_url,
'id': video_id,
'subtitles': subtitles,
'title': title,
'description': meta.get('description'),
'thumbnail': meta.get('thumbnail'),
'duration': int_or_none(meta.get('duration')),
'timestamp': parse_iso8601(meta.get('created')),
}
for ie in (YoutubeIE, VimeoIE):
if ie.suitable(video_url):
info.update({
'_type': 'url_transparent',
'ie_key': ie.ie_key(),
})
break
return info

View file

@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
] ]
@classmethod @classmethod
def _build_brighcove_url(cls, object_str): def _build_brightcove_url(cls, object_str):
""" """
Build a Brightcove url from a xml string containing Build a Brightcove url from a xml string containing
<object class="BrightcoveExperience">{params}</object> <object class="BrightcoveExperience">{params}</object>
@ -217,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
return cls._make_brightcove_url(params) return cls._make_brightcove_url(params)
@classmethod @classmethod
def _build_brighcove_url_from_js(cls, object_js): def _build_brightcove_url_from_js(cls, object_js):
# The layout of JS is as follows: # The layout of JS is as follows:
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
# // build Brightcove <object /> XML # // build Brightcove <object /> XML
@ -272,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
).+?>\s*</object>''', ).+?>\s*</object>''',
webpage) webpage)
if matches: if matches:
return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
if matches: if matches:
return list(filter(None, [ return list(filter(None, [
cls._build_brighcove_url_from_js(custom_bc) cls._build_brightcove_url_from_js(custom_bc)
for custom_bc in matches])) for custom_bc in matches]))
return [src for _, src in re.findall( return [src for _, src in re.findall(
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]

View file

@ -1664,7 +1664,7 @@ class InfoExtractor(object):
# just the media without qualities renditions. # just the media without qualities renditions.
# Fortunately, master playlist can be easily distinguished from media # Fortunately, master playlist can be easily distinguished from media
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4] # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
# master playlist tags MUST NOT appear in a media playist and vice versa. # master playlist tags MUST NOT appear in a media playlist and vice versa.
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
# media playlist and MUST NOT appear in master playlist thus we can # media playlist and MUST NOT appear in master playlist thus we can
# clearly detect media playlist with this criterion. # clearly detect media playlist with this criterion.

View file

@ -7,7 +7,7 @@ from .dplay import DPlayIE
class DiscoveryNetworksDeIE(DPlayIE): class DiscoveryNetworksDeIE(DPlayIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
}, { }, {
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):
title = get_item('title', preferred_langs) or video_id title = get_item('title', preferred_langs) or video_id
description = get_item('description', preferred_langs) description = get_item('description', preferred_langs)
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnmail, 'thumbnail': thumbnail,
'upload_date': upload_date, 'upload_date': upload_date,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,

View file

@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
from .alura import ( from .alura import (
AluraIE, AluraIE,
AluraCourseIE AluraCourseIE
@ -1507,7 +1508,6 @@ from .youtube import (
YoutubeIE, YoutubeIE,
YoutubeFavouritesIE, YoutubeFavouritesIE,
YoutubeHistoryIE, YoutubeHistoryIE,
YoutubeLiveIE,
YoutubeTabIE, YoutubeTabIE,
YoutubePlaylistIE, YoutubePlaylistIE,
YoutubeRecommendedIE, YoutubeRecommendedIE,

View file

@ -211,7 +211,7 @@ class FranceTVIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': self._live_title(title) if is_live else title, 'title': self._live_title(title) if is_live else title,
'description': clean_html(info.get('synopsis')), 'description': clean_html(info.get('synopsis')),
'thumbnail': urljoin('http://pluzz.francetv.fr', info.get('image')), 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
'is_live': is_live, 'is_live': is_live,

View file

@ -842,7 +842,7 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, },
# MTVSercices embed # MTVServices embed
{ {
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html', 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
'md5': 'ca1aef97695ef2c1d6973256a57e5252', 'md5': 'ca1aef97695ef2c1d6973256a57e5252',

View file

@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
try_get,
update_url_query, update_url_query,
) )
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
# video can't be watched anonymously due to view count limit reached, # video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046) # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view', 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
'md5': 'bfbd670d03a470bb1e6d4a257adec12e', 'only_matching': True,
'info_dict': {
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
'ext': 'mp4',
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
}
}, { }, {
# video id is longer than 28 characters # video id is longer than 28 characters
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
'info_dict': {
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
'ext': 'mp4',
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
'duration': 189,
},
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28', 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( video_info = compat_parse_qs(self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id) 'https://drive.google.com/get_video_info',
video_id, query={'docid': video_id}))
title = self._search_regex( def get_value(key):
r'"title"\s*,\s*"([^"]+)', webpage, 'title', return try_get(video_info, lambda x: x[key][0])
default=None) or self._og_search_title(webpage)
duration = int_or_none(self._search_regex( reason = get_value('reason')
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', title = get_value('title')
default=None)) if not title and reason:
raise ExtractorError(reason, expected=True)
formats = [] formats = []
fmt_stream_map = self._search_regex( fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, fmt_list = (get_value('fmt_list') or '').split(',')
'fmt stream map', default='').split(',')
fmt_list = self._search_regex(
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
'fmt_list', default='').split(',')
if fmt_stream_map and fmt_list: if fmt_stream_map and fmt_list:
resolutions = {} resolutions = {}
for fmt in fmt_list: for fmt in fmt_list:
@ -257,19 +246,14 @@ class GoogleDriveIE(InfoExtractor):
if urlh and urlh.headers.get('Content-Disposition'): if urlh and urlh.headers.get('Content-Disposition'):
add_source_format(urlh) add_source_format(urlh)
if not formats: if not formats and reason:
reason = self._search_regex(
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason, expected=True) raise ExtractorError(reason, expected=True)
self._sort_formats(formats) self._sort_formats(formats)
hl = self._search_regex( hl = get_value('hl')
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
subtitles_id = None subtitles_id = None
ttsurl = self._search_regex( ttsurl = get_value('ttsurl')
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
if ttsurl: if ttsurl:
# the video Id for subtitles will be the last value in the ttsurl # the video Id for subtitles will be the last value in the ttsurl
# query string # query string
@ -281,8 +265,8 @@ class GoogleDriveIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': self._og_search_thumbnail(webpage, default=None), 'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
'duration': duration, 'duration': int_or_none(get_value('length_seconds')),
'formats': formats, 'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
'automatic_captions': self.extract_automatic_captions( 'automatic_captions': self.extract_automatic_captions(

View file

@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
def _extract_rtmp_video(self, webpage): def _extract_rtmp_video(self, webpage):
# The server URL is hardcoded # The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/' video_url = 'rtmpe://videof.infoq.com/cfx/st/'
# Extract video URL # Extract video URL
encoded_id = self._search_regex( encoded_id = self._search_regex(
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
return [{ return [{
'format_id': 'http_video', 'format_id': 'http_video',
'url': http_video_url, 'url': http_video_url,
'http_headers': {'Referer': 'https://www.infoq.com/'},
}] }]
def _extract_http_audio(self, webpage, video_id): def _extract_http_audio(self, webpage, video_id):
fields = self._hidden_inputs(webpage) fields = self._form_hidden_inputs('mp3Form', webpage)
http_audio_url = fields.get('filename') http_audio_url = fields.get('filename')
if not http_audio_url: if not http_audio_url:
return [] return []
# base URL is found in the Location header in the response returned by # base URL is found in the Location header in the response returned by
# GET https://www.infoq.com/mp3download.action?filename=... when logged in. # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url) http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage)) http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
# audio file seem to be missing some times even if there is a download link # audio file seem to be missing some times even if there is a download link

View file

@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor):
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000) duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
description = xpath_text(doc, 'ABSTRACT') description = xpath_text(doc, 'ABSTRACT')
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME') thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate')) creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content') quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
formats = [] formats = []
@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor):
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'timestamp': createtion_time, 'timestamp': creation_time,
} }

View file

@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
}, },
}], }],
}, { }, {
# mutlimedia, not media title # multimedia, not media title
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
'info_dict': { 'info_dict': {
'id': '533198237', 'id': '533198237',

View file

@ -477,7 +477,7 @@ class PBSIE(InfoExtractor):
if media_id: if media_id:
return media_id, presumptive_id, upload_date, description return media_id, presumptive_id, upload_date, description
# Fronline video embedded via flp # Frontline video embedded via flp
video_id = self._search_regex( video_id = self._search_regex(
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None) r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
if video_id: if video_id:

View file

@ -16,8 +16,9 @@ from ..utils import (
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
parse_duration, parse_duration,
remove_start,
strip_or_none, strip_or_none,
unescapeHTML, try_get,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -67,7 +68,7 @@ class RaiBaseIE(InfoExtractor):
# This does not imply geo restriction (e.g. # This does not imply geo restriction (e.g.
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
if media_url == 'http://download.rai.it/video_no_available.mp4': if '/video_no_available.mp4' in media_url:
continue continue
ext = determine_ext(media_url) ext = determine_ext(media_url)
@ -122,27 +123,8 @@ class RaiBaseIE(InfoExtractor):
class RaiPlayIE(RaiBaseIE): class RaiPlayIE(RaiBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.(?:html|json))' % RaiBaseIE._UUID_RE _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
_TESTS = [{ _TESTS = [{
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
'md5': '340aa3b7afb54bfd14a8c11786450d76',
'info_dict': {
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
'ext': 'mp4',
'title': 'La Casa Bianca',
'alt_title': 'S2016 - Puntata del 23/10/2016',
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 3',
'creator': 'Rai 3',
'duration': 3278,
'timestamp': 1477764300,
'upload_date': '20161029',
'series': 'La Casa Bianca',
'season': '2016',
},
'skip': 'This content is not available',
}, {
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': { 'info_dict': {
@ -166,10 +148,10 @@ class RaiPlayIE(RaiBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
url, video_id = re.match(self._VALID_URL, url).groups() base, video_id = re.match(self._VALID_URL, url).groups()
media = self._download_json( media = self._download_json(
url.replace('.html', '.json'), video_id, 'Downloading video JSON') base + '.json', video_id, 'Downloading video JSON')
title = media['name'] title = media['name']
video = media['video'] video = media['video']
@ -195,7 +177,8 @@ class RaiPlayIE(RaiBaseIE):
season = media.get('season') season = media.get('season')
info = { info = {
'id': video_id, 'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
'display_id': video_id,
'title': self._live_title(title) if relinker_info.get( 'title': self._live_title(title) if relinker_info.get(
'is_live') else title, 'is_live') else title,
'alt_title': strip_or_none(media.get('subtitle')), 'alt_title': strip_or_none(media.get('subtitle')),
@ -217,16 +200,16 @@ class RaiPlayIE(RaiBaseIE):
return info return info
class RaiPlayLiveIE(RaiBaseIE): class RaiPlayLiveIE(RaiPlayIE):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
_TEST = { _TESTS = [{
'url': 'http://www.raiplay.it/dirette/rainews24', 'url': 'http://www.raiplay.it/dirette/rainews24',
'info_dict': { 'info_dict': {
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
'display_id': 'rainews24', 'display_id': 'rainews24',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:6eca31500550f9376819f174e5644754', 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
'uploader': 'Rai News 24', 'uploader': 'Rai News 24',
'creator': 'Rai News 24', 'creator': 'Rai News 24',
'is_live': True, 'is_live': True,
@ -234,58 +217,50 @@ class RaiPlayLiveIE(RaiBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} }]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
webpage, 'content id')
return {
'_type': 'url_transparent',
'ie_key': RaiPlayIE.ie_key(),
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
'id': video_id,
'display_id': display_id,
}
class RaiPlayPlaylistIE(InfoExtractor): class RaiPlayPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
'info_dict': { 'info_dict': {
'id': 'nondirloalmiocapo', 'id': 'nondirloalmiocapo',
'title': 'Non dirlo al mio capo', 'title': 'Non dirlo al mio capo',
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
}, },
'playlist_mincount': 12, 'playlist_mincount': 12,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) base, playlist_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, playlist_id) program = self._download_json(
base + '.json', playlist_id, 'Downloading program JSON')
title = self._html_search_meta(
('programma', 'nomeProgramma'), webpage, 'title')
description = unescapeHTML(self._html_search_meta(
('description', 'og:description'), webpage, 'description'))
entries = [] entries = []
for mobj in re.finditer( for b in (program.get('blocks') or []):
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', for s in (b.get('sets') or []):
webpage): s_id = s.get('id')
video_url = urljoin(url, mobj.group('path')) if not s_id:
continue
medias = self._download_json(
'%s/%s.json' % (base, s_id), s_id,
'Downloading content set JSON', fatal=False)
if not medias:
continue
for m in (medias.get('items') or []):
path_id = m.get('path_id')
if not path_id:
continue
video_url = urljoin(url, path_id)
entries.append(self.url_result( entries.append(self.url_result(
video_url, ie=RaiPlayIE.ie_key(), video_url, ie=RaiPlayIE.ie_key(),
video_id=RaiPlayIE._match_id(video_url))) video_id=RaiPlayIE._match_id(video_url)))
return self.playlist_result(entries, playlist_id, title, description) return self.playlist_result(
entries, playlist_id, program.get('name'),
try_get(program, lambda x: x['program_info']['description']))
class RaiIE(RaiBaseIE): class RaiIE(RaiBaseIE):
@ -328,19 +303,6 @@ class RaiIE(RaiBaseIE):
'duration': 2214, 'duration': 2214,
'upload_date': '20161103', 'upload_date': '20161103',
} }
}, {
# drawMediaRaiTV(...)
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20141221',
},
'skip': 'This content is not available',
}, { }, {
# initEdizione('ContentItem-...' # initEdizione('ContentItem-...'
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
@ -352,18 +314,6 @@ class RaiIE(RaiBaseIE):
'upload_date': '20170401', 'upload_date': '20170401',
}, },
'skip': 'Changes daily', 'skip': 'Changes daily',
}, {
# HDS live stream with only relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'params': {
'skip_download': True,
},
'skip': 'This content is available only in Italy',
}, { }, {
# HLS live stream with ContentItem in og:url # HLS live stream with ContentItem in og:url
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
@ -473,7 +423,7 @@ class RaiIE(RaiBaseIE):
except ExtractorError: except ExtractorError:
pass pass
relinker_url = self._search_regex( relinker_url = self._proto_relative_url(self._search_regex(
r'''(?x) r'''(?x)
(?: (?:
var\s+videoURL| var\s+videoURL|
@ -485,7 +435,7 @@ class RaiIE(RaiBaseIE):
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
''', ''',
webpage, 'relinker URL', group='url') webpage, 'relinker URL', group='url'))
relinker_info = self._extract_relinker_info( relinker_info = self._extract_relinker_info(
urljoin(url, relinker_url), video_id) urljoin(url, relinker_url), video_id)

View file

@ -649,7 +649,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200. # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
# https://developers.soundcloud.com/blog/offset-pagination-deprecated # https://developers.soundcloud.com/blog/offset-pagination-deprecated
COMMON_QUERY = { COMMON_QUERY = {
'limit': 200, 'limit': 200,

View file

@ -9,6 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
unified_timestamp,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
try_get, try_get,
@ -44,7 +45,8 @@ class SVTBaseIE(InfoExtractor):
'format_id': player_type, 'format_id': player_type,
'url': vurl, 'url': vurl,
}) })
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): rights = try_get(video_info, lambda x: x['rights'], dict) or {}
if not formats and rights.get('geoBlockedSweden'):
self.raise_geo_restricted( self.raise_geo_restricted(
'This video is only available in Sweden', 'This video is only available in Sweden',
countries=self._GEO_COUNTRIES) countries=self._GEO_COUNTRIES)
@ -70,6 +72,7 @@ class SVTBaseIE(InfoExtractor):
episode = video_info.get('episodeTitle') episode = video_info.get('episodeTitle')
episode_number = int_or_none(video_info.get('episodeNumber')) episode_number = int_or_none(video_info.get('episodeNumber'))
timestamp = unified_timestamp(rights.get('validFrom'))
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
age_limit = None age_limit = None
adult = dict_get( adult = dict_get(
@ -84,6 +87,7 @@ class SVTBaseIE(InfoExtractor):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'duration': duration, 'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit, 'age_limit': age_limit,
'series': series, 'series': series,
'season_number': season_number, 'season_number': season_number,
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv' IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
svt:(?P<svt_id>[^/?#&]+)| (?:
svt:|
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
)
(?P<svt_id>[^/?#&]+)|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
) )
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 'md5': '2382036fd6f8c994856c323fe51c426e',
'info_dict': { 'info_dict': {
'id': '5996901', 'id': 'jNwpV9P',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Flygplan till Haile Selassie', 'title': 'Det här är himlen',
'duration': 3527, 'timestamp': 1586044800,
'thumbnail': r're:^https?://.*[\.-]jpg$', 'upload_date': '20200405',
'duration': 3515,
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
'age_limit': 0, 'age_limit': 0,
'subtitles': { 'subtitles': {
'sv': [{ 'sv': [{
'ext': 'wsrt', 'ext': 'vtt',
}] }]
}, },
}, },
'params': {
'format': 'bestvideo',
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses
# init segments that are smaller
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
'skip_download': True,
},
}, { }, {
# geo restricted to Sweden # geo restricted to Sweden
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
}, { }, {
'url': 'svt:14278044', 'url': 'svt:14278044',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
'only_matching': True,
}, {
'url': 'svt:eWv5MLX',
'only_matching': True,
}] }]
def _adjust_title(self, info): def _adjust_title(self, info):
@ -236,7 +259,10 @@ class SVTPlayIE(SVTPlayBaseIE):
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'), r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
webpage, 'video id') webpage, 'video id')
return self._extract_by_video_id(svt_id, webpage) info_dict = self._extract_by_video_id(svt_id, webpage)
info_dict['thumbnail'] = thumbnail
return info_dict
class SVTSeriesIE(SVTPlayBaseIE): class SVTSeriesIE(SVTPlayBaseIE):
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url) return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = re.match(self._VALID_URL, url).groups() path, display_id = re.match(self._VALID_URL, url).groups()

View file

@ -86,7 +86,7 @@ class TagesschauPlayerIE(InfoExtractor):
# return self._extract_via_api(kind, video_id) # return self._extract_via_api(kind, video_id)
# JSON api does not provide some audio formats (e.g. ogg) thus # JSON api does not provide some audio formats (e.g. ogg) thus
# extractiong audio via webpage # extracting audio via webpage
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)

View file

@ -208,7 +208,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
if m: if m:
return [m.group('url')] return [m.group('url')]
# Are whitesapces ignored in URLs? # Are whitespaces ignored in URLs?
# https://github.com/ytdl-org/youtube-dl/issues/12044 # https://github.com/ytdl-org/youtube-dl/issues/12044
matches = re.findall( matches = re.findall(
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)

View file

@ -56,9 +56,9 @@ class TurnerBaseIE(AdobePassIE):
content_id = xpath_text(video_data, 'contentId') or video_id content_id = xpath_text(video_data, 'contentId') or video_id
# rtmp_src = xpath_text(video_data, 'akamai/src') # rtmp_src = xpath_text(video_data, 'akamai/src')
# if rtmp_src: # if rtmp_src:
# splited_rtmp_src = rtmp_src.split(',') # split_rtmp_src = rtmp_src.split(',')
# if len(splited_rtmp_src) == 2: # if len(split_rtmp_src) == 2:
# rtmp_src = splited_rtmp_src[1] # rtmp_src = split_rtmp_src[1]
# aifp = xpath_text(video_data, 'akamai/aifp', default='') # aifp = xpath_text(video_data, 'akamai/aifp', default='')
urls = [] urls = []

View file

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import hashlib import hashlib
import hmac import hmac
import itertools import itertools
@ -9,6 +10,10 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -166,19 +171,20 @@ class VikiIE(VikiBaseIE):
}, { }, {
# episode # episode
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
'md5': '5fa476a902e902783ac7a4d615cdbc7a', 'md5': '94e0e34fd58f169f40c184f232356cfe',
'info_dict': { 'info_dict': {
'id': '44699v', 'id': '44699v',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Boys Over Flowers - Episode 1', 'title': 'Boys Over Flowers - Episode 1',
'description': 'md5:b89cf50038b480b88b5b3c93589a9076', 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
'duration': 4204, 'duration': 4172,
'timestamp': 1270496524, 'timestamp': 1270496524,
'upload_date': '20100405', 'upload_date': '20100405',
'uploader': 'group8', 'uploader': 'group8',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, { }, {
# youtube external # youtube external
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1', 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@ -195,14 +201,15 @@ class VikiIE(VikiBaseIE):
'uploader_id': 'ad14065n', 'uploader_id': 'ad14065n',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'skip': 'Page not found!',
}, { }, {
'url': 'http://www.viki.com/player/44699v', 'url': 'http://www.viki.com/player/44699v',
'only_matching': True, 'only_matching': True,
}, { }, {
# non-English description # non-English description
'url': 'http://www.viki.com/videos/158036v-love-in-magic', 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
'md5': '1713ae35df5a521b31f6dc40730e7c9c', 'md5': 'adf9e321a0ae5d0aace349efaaff7691',
'info_dict': { 'info_dict': {
'id': '158036v', 'id': '158036v',
'ext': 'mp4', 'ext': 'mp4',
@ -218,71 +225,11 @@ class VikiIE(VikiBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video = self._call_api( resp = self._download_json(
'videos/%s.json' % video_id, video_id, 'Downloading video JSON') 'https://www.viki.com/api/videos/' + video_id,
video_id, 'Downloading video JSON',
streams = self._call_api( headers={'x-viki-app-ver': '4.0.57'})
'videos/%s/streams.json' % video_id, video_id, video = resp['video']
'Downloading video streams JSON')
formats = []
for format_id, stream_dict in streams.items():
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None))
for protocol, format_dict in stream_dict.items():
# rtmps URLs does not seem to work
if protocol == 'rtmps':
continue
format_url = format_dict.get('url')
format_drms = format_dict.get('drms')
format_stream_id = format_dict.get('id')
if format_id == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native',
m3u8_id='m3u8-%s' % protocol, fatal=False)
# Despite CODECS metadata in m3u8 all video-only formats
# are actually video+audio
for f in m3u8_formats:
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
f['acodec'] = None
formats.extend(m3u8_formats)
elif format_id == 'mpd':
mpd_formats = self._extract_mpd_formats(
format_url, video_id,
mpd_id='mpd-%s' % protocol, fatal=False)
formats.extend(mpd_formats)
elif format_id == 'mpd':
formats.extend(mpd_formats)
elif format_url.startswith('rtmp'):
mobj = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
format_url)
if not mobj:
continue
formats.append({
'format_id': 'rtmp-%s' % format_id,
'ext': 'flv',
'url': mobj.group('url'),
'play_path': mobj.group('playpath'),
'app': mobj.group('app'),
'page_url': url,
'drms': format_drms,
'stream_id': format_stream_id,
})
else:
urlh = self._request_webpage(
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
formats.append({
'url': format_url,
'format_id': '%s-%s' % (format_id, protocol),
'height': height,
'drms': format_drms,
'stream_id': format_stream_id,
'filesize': int_or_none(urlh.headers.get('Content-Length')),
})
self._sort_formats(formats)
self._check_errors(video) self._check_errors(video)
@ -342,6 +289,73 @@ class VikiIE(VikiBaseIE):
'subtitles': subtitles, 'subtitles': subtitles,
} }
formats = []
def add_format(format_id, format_dict, protocol='http'):
# rtmps URLs does not seem to work
if protocol == 'rtmps':
return
format_url = format_dict.get('url')
if not format_url:
return
format_drms = format_dict.get('drms')
format_stream_id = format_dict.get('id')
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
stream = qs.get('stream', [None])[0]
if stream:
format_url = base64.b64decode(stream).decode()
if format_id in ('m3u8', 'hls'):
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native',
m3u8_id='m3u8-%s' % protocol, fatal=False)
# Despite CODECS metadata in m3u8 all video-only formats
# are actually video+audio
for f in m3u8_formats:
if '_drm/index_' in f['url']:
continue
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
f['acodec'] = None
formats.append(f)
elif format_id in ('mpd', 'dash'):
formats.extend(self._extract_mpd_formats(
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
elif format_url.startswith('rtmp'):
mobj = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
format_url)
if not mobj:
return
formats.append({
'format_id': 'rtmp-%s' % format_id,
'ext': 'flv',
'url': mobj.group('url'),
'play_path': mobj.group('playpath'),
'app': mobj.group('app'),
'page_url': url,
'drms': format_drms,
'stream_id': format_stream_id,
})
else:
urlh = self._request_webpage(
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
formats.append({
'url': format_url,
'format_id': '%s-%s' % (format_id, protocol),
'height': int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)),
'drms': format_drms,
'stream_id': format_stream_id,
'filesize': int_or_none(urlh.headers.get('Content-Length')),
})
for format_id, format_dict in (resp.get('streams') or {}).items():
add_format(format_id, format_dict)
if not formats:
streams = self._call_api(
'videos/%s/streams.json' % video_id, video_id,
'Downloading video streams JSON')
if 'external' in streams: if 'external' in streams:
result.update({ result.update({
'_type': 'url_transparent', '_type': 'url_transparent',
@ -349,6 +363,11 @@ class VikiIE(VikiBaseIE):
}) })
return result return result
for format_id, stream_dict in streams.items():
for protocol, format_dict in stream_dict.items():
add_format(format_id, format_dict, protocol)
self._sort_formats(formats)
result['formats'] = formats result['formats'] = formats
return result return result

View file

@ -922,7 +922,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
}] }]
_PAGE_SIZE = 100 _PAGE_SIZE = 100
def _fetch_page(self, album_id, authorizaion, hashed_pass, page): def _fetch_page(self, album_id, authorization, hashed_pass, page):
api_page = page + 1 api_page = page + 1
query = { query = {
'fields': 'link,uri', 'fields': 'link,uri',
@ -934,7 +934,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
videos = self._download_json( videos = self._download_json(
'https://api.vimeo.com/albums/%s/videos' % album_id, 'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={ album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorizaion, 'Authorization': 'jwt ' + authorization,
})['data'] })['data']
for video in videos: for video in videos:
link = video.get('link') link = video.get('link')

View file

@ -54,17 +54,17 @@ class XiamiBaseIE(InfoExtractor):
def _decrypt(origin): def _decrypt(origin):
n = int(origin[0]) n = int(origin[0])
origin = origin[1:] origin = origin[1:]
short_lenth = len(origin) // n short_length = len(origin) // n
long_num = len(origin) - short_lenth * n long_num = len(origin) - short_length * n
l = tuple() l = tuple()
for i in range(0, n): for i in range(0, n):
length = short_lenth length = short_length
if i < long_num: if i < long_num:
length += 1 length += 1
l += (origin[0:length], ) l += (origin[0:length], )
origin = origin[length:] origin = origin[length:]
ans = '' ans = ''
for i in range(0, short_lenth + 1): for i in range(0, short_length + 1):
for j in range(0, n): for j in range(0, n):
if len(l[j]) > i: if len(l[j]) > i:
ans += l[j][i] ans += l[j][i]

View file

@ -306,6 +306,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
}, },
} }
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
def _call_api(self, ep, query, video_id): def _call_api(self, ep, query, video_id):
data = self._DEFAULT_API_DATA.copy() data = self._DEFAULT_API_DATA.copy()
data.update(query) data.update(query)
@ -322,8 +324,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_yt_initial_data(self, video_id, webpage): def _extract_yt_initial_data(self, video_id, webpage):
return self._parse_json( return self._parse_json(
self._search_regex( self._search_regex(
r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;', (r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
webpage, 'yt initial data'), self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
video_id) video_id)
@ -1089,6 +1091,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
'info_dict': {
'id': 'CHqg6qOn4no',
'ext': 'mp4',
'title': 'Part 77 Sort a list of simple types in c#',
'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
'upload_date': '20130831',
'uploader_id': 'kudvenkat',
'uploader': 'kudvenkat',
},
'params': {
'skip_download': True,
},
},
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -2138,6 +2156,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
formats.append(a_format) formats.append(a_format)
else: else:
error_message = extract_unavailable_message() error_message = extract_unavailable_message()
if not error_message:
reason_list = try_get(
player_response,
lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
list) or []
for reason in reason_list:
if not isinstance(reason, dict):
continue
reason_text = try_get(reason, lambda x: x['text'], compat_str)
if reason_text:
if not error_message:
error_message = ''
error_message += reason_text
if error_message:
error_message = clean_html(error_message)
if not error_message: if not error_message:
error_message = clean_html(try_get( error_message = clean_html(try_get(
player_response, lambda x: x['playabilityStatus']['reason'], player_response, lambda x: x['playabilityStatus']['reason'],
@ -2319,8 +2352,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_count(count_name): def _extract_count(count_name):
return str_to_int(self._search_regex( return str_to_int(self._search_regex(
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
% re.escape(count_name), r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
video_webpage, count_name, default=None)) video_webpage, count_name, default=None))
like_count = _extract_count('like') like_count = _extract_count('like')
@ -2613,13 +2646,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, },
'playlist_mincount': 138, 'playlist_mincount': 138,
}, { }, {
'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA', 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ', 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
@ -2666,7 +2699,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, },
'playlist_mincount': 11, 'playlist_mincount': 11,
}, { }, {
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU', 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'only_matching': True, 'only_matching': True,
}, { }, {
# Playlist URL that does not actually serve a playlist # Playlist URL that does not actually serve a playlist
@ -2698,14 +2731,59 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
'only_matching': True, 'only_matching': True,
}] }, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
@classmethod 'info_dict': {
def suitable(cls, url): 'id': '9Auq9mYxFEE',
IGNORE = (YoutubeLiveIE,) 'ext': 'mp4',
return ( 'title': 'Watch Sky News live',
False if any(ie.suitable(url) for ie in IGNORE) 'uploader': 'Sky News',
else super(YoutubeTabIE, cls).suitable(url)) 'uploader_id': 'skynews',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
'upload_date': '20191102',
'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
'categories': ['News & Politics'],
'tags': list,
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.youtube.com/user/TheYoungTurks/live',
'info_dict': {
'id': 'a48o2S1cPoo',
'ext': 'mp4',
'title': 'The Young Turks - Live Main Show',
'uploader': 'The Young Turks',
'uploader_id': 'TheYoungTurks',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
'upload_date': '20150715',
'license': 'Standard YouTube License',
'description': 'md5:438179573adcdff3c97ebb1ee632b891',
'categories': ['News & Politics'],
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
},
'only_matching': True,
}, {
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
'only_matching': True,
}, {
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
'only_matching': True,
},
# TODO
# {
# 'url': 'https://www.youtube.com/TheYoungTurks/live',
# 'only_matching': True,
# }
]
def _extract_channel_id(self, webpage): def _extract_channel_id(self, webpage):
channel_id = self._html_search_meta( channel_id = self._html_search_meta(
@ -3147,7 +3225,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, item_id) webpage = self._download_webpage(url, item_id)
identity_token = self._search_regex( identity_token = self._search_regex(
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, r'\bID_TOKEN["\']\s*:\s/l*["\'](.+?)["\']', webpage,
'identity token', default=None) 'identity token', default=None)
data = self._extract_yt_initial_data(item_id, webpage) data = self._extract_yt_initial_data(item_id, webpage)
tabs = try_get( tabs = try_get(
@ -3158,7 +3236,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
if playlist: if playlist:
return self._extract_from_playlist(item_id, data, playlist) return self._extract_from_playlist(item_id, data, playlist)
# Fallback to video extraction if no playlist alike page is recognized # Fallback to video extraction if no playlist alike page is recognized.
# First check for the current video then try the v attribute of URL query.
video_id = try_get(
data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
compat_str) or video_id
if video_id: if video_id:
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
# Failed to recognize # Failed to recognize
@ -3279,58 +3361,6 @@ class YoutubeYtUserIE(InfoExtractor):
ie=YoutubeTabIE.ie_key(), video_id=user_id) ie=YoutubeTabIE.ie_key(), video_id=user_id)
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com live streams'
_VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL
IE_NAME = 'youtube:live'
_TESTS = [{
'url': 'https://www.youtube.com/user/TheYoungTurks/live',
'info_dict': {
'id': 'a48o2S1cPoo',
'ext': 'mp4',
'title': 'The Young Turks - Live Main Show',
'uploader': 'The Young Turks',
'uploader_id': 'TheYoungTurks',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
'upload_date': '20150715',
'license': 'Standard YouTube License',
'description': 'md5:438179573adcdff3c97ebb1ee632b891',
'categories': ['News & Politics'],
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
'only_matching': True,
}, {
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
'only_matching': True,
}, {
'url': 'https://www.youtube.com/TheYoungTurks/live',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
base_url = mobj.group('base_url')
webpage = self._download_webpage(url, channel_id, fatal=False)
if webpage:
page_type = self._og_search_property(
'type', webpage, 'page type', default='')
video_id = self._html_search_meta(
'videoId', webpage, 'video id', default=None)
if page_type.startswith('video') and video_id and re.match(
r'^[0-9A-Za-z_-]{11}$', video_id):
return self.url_result(video_id, YoutubeIE.ie_key())
return self.url_result(base_url)
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com searches' IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for # there doesn't appear to be a real limit, for example if you search for

View file

@ -2460,7 +2460,7 @@ class XAttrMetadataError(YoutubeDLError):
# Parsing code and msg # Parsing code and msg
if (self.code in (errno.ENOSPC, errno.EDQUOT) if (self.code in (errno.ENOSPC, errno.EDQUOT)
or 'No space left' in self.msg or 'Disk quota excedded' in self.msg): or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
self.reason = 'NO_SPACE' self.reason = 'NO_SPACE'
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
self.reason = 'VALUE_TOO_LONG' self.reason = 'VALUE_TOO_LONG'
@ -4215,10 +4215,10 @@ def parse_codecs(codecs_str):
# http://tools.ietf.org/html/rfc6381 # http://tools.ietf.org/html/rfc6381
if not codecs_str: if not codecs_str:
return {} return {}
splited_codecs = list(filter(None, map( split_codecs = list(filter(None, map(
lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
vcodec, acodec = None, None vcodec, acodec = None, None
for full_codec in splited_codecs: for full_codec in split_codecs:
codec = full_codec.split('.')[0] codec = full_codec.split('.')[0]
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
if not vcodec: if not vcodec:
@ -4229,10 +4229,10 @@ def parse_codecs(codecs_str):
else: else:
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
if not vcodec and not acodec: if not vcodec and not acodec:
if len(splited_codecs) == 2: if len(split_codecs) == 2:
return { return {
'vcodec': splited_codecs[0], 'vcodec': split_codecs[0],
'acodec': splited_codecs[1], 'acodec': split_codecs[1],
} }
else: else:
return { return {
@ -5471,7 +5471,7 @@ def encode_base_n(num, n, table=None):
def decode_packed_codes(code): def decode_packed_codes(code):
mobj = re.search(PACKED_CODES_RE, code) mobj = re.search(PACKED_CODES_RE, code)
obfucasted_code, base, count, symbols = mobj.groups() obfuscated_code, base, count, symbols = mobj.groups()
base = int(base) base = int(base)
count = int(count) count = int(count)
symbols = symbols.split('|') symbols = symbols.split('|')
@ -5484,7 +5484,7 @@ def decode_packed_codes(code):
return re.sub( return re.sub(
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
obfucasted_code) obfuscated_code)
def caesar(s, alphabet, shift): def caesar(s, alphabet, shift):