From 6410992da9cb472391f03f7a5a8c69ebeb52a1c8 Mon Sep 17 00:00:00 2001 From: Tom-Oliver Heidel Date: Mon, 26 Oct 2020 05:08:28 +0100 Subject: [PATCH 01/30] [skip travis] updated readme replaced download links added 'how to update' --- README.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index af65e9606..9d40d2631 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ youtube-dlc is a fork of youtube-dl with the intention of getting features teste - [VIDEO SELECTION](#video-selection-1) # INSTALLATION +[How to update](#update) **All Platforms** Preferred way using pip: @@ -46,16 +47,16 @@ You may want to use `python3` instead of `python` **UNIX** (Linux, macOS, etc.) Using wget: - sudo wget https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc -O /usr/local/bin/youtube-dlc + sudo wget https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc -O /usr/local/bin/youtube-dlc sudo chmod a+rx /usr/local/bin/youtube-dlc Using curl: - sudo curl -L https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc -o /usr/local/bin/youtube-dlc + sudo curl -L https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc -o /usr/local/bin/youtube-dlc sudo chmod a+rx /usr/local/bin/youtube-dlc -**Windows** users can download [youtube-dlc.exe](https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc.exe) (**do not** put in `C:\Windows\System32`!). +**Windows** users can download [youtube-dlc.exe](https://github.com/blackjack4494/yt-dlc/releases/latest/download/youtube-dlc.exe) (**do not** put in `C:\Windows\System32`!). **Compile** To build the Windows executable yourself (without version info!) @@ -77,6 +78,10 @@ Then simply type this make +# UPDATE +**DO NOT UPDATE using `-U` !** instead download binaries again or when installed with pip use a described above when installing. +I will add some memorable short links to the binaries so you can download them easier. + # DESCRIPTION **youtube-dlc** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. From 5fec75c81cf1a83fac9b35af08dc6750dba0e07b Mon Sep 17 00:00:00 2001 From: nixxo Date: Mon, 26 Oct 2020 13:36:29 +0100 Subject: [PATCH 02/30] [mtv] Fix a missing match_id Fix a problem introduced in 320724f964f09a5e1f08edd246464db4f0d297f9 where is extracted the ID from the url with self._match_id but the problem is that ID is not always present in the url passed so the title should be extracted as proposed by the fix (and like is done in _real_extract (see line 337)) --- youtube_dlc/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py index 6b3658397..eaf43429f 100644 --- a/youtube_dlc/extractor/mtv.py +++ b/youtube_dlc/extractor/mtv.py @@ -300,7 +300,7 @@ class MTVServicesInfoExtractor(InfoExtractor): except RegexNotFoundError: mgid = None - title = self._match_id(url) + title = url_basename(url) try: window_data = self._parse_json(self._search_regex( From 9754a441e39208b2453631de6e7b60bedd971384 Mon Sep 17 00:00:00 2001 From: bopol Date: Tue, 27 Oct 2020 12:10:52 +0100 Subject: [PATCH 03/30] use webarchive to fix a dead link in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9d40d2631..5532cd720 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ youtube-dlc - download videos from youtube.com or other video platforms. -youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://github.com/ytdl-org/youtube-dl/issues/26462) +youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462) - [INSTALLATION](#installation) - [DESCRIPTION](#description) From 87ab4fb11a70c61e46cd7ee642f830b475b89c93 Mon Sep 17 00:00:00 2001 From: Dan Walker Date: Tue, 27 Oct 2020 06:33:47 -0700 Subject: [PATCH 04/30] Added DRM logic In the event when there are no available sources due to DRM controlled sources, return a DRM error and don't proceed with trying. #28 reports that an ExtractorError "No video formats found". Which is true, because the formats list is empty, however it's empty because they are all locked. This provides a more informative message to the end-user. # TESTING Tried the URL provided in #28 and confirmed a DRM messages is returned. --- youtube_dlc/extractor/brightcove.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dlc/extractor/brightcove.py b/youtube_dlc/extractor/brightcove.py index 2aa9f4782..638673c31 100644 --- a/youtube_dlc/extractor/brightcove.py +++ b/youtube_dlc/extractor/brightcove.py @@ -471,12 +471,17 @@ class BrightcoveNewIE(AdobePassIE): title = json_data['name'].strip() formats = [] + sources_num = len(json_data.get('sources')) + key_systems_present = 0 for source in json_data.get('sources', []): container = source.get('container') ext = mimetype2ext(source.get('type')) src = source.get('src') - # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object - if ext == 'ism' or container == 'WVM' or source.get('key_systems'): + # https://apis.support.brightcove.com/playback/references/playback-api-video-fields-reference.html + if source.get('key_systems'): + key_systems_present += 1 + continue + elif ext == 'ism' or container == 'WVM': continue elif ext == 'm3u8' or container == 'M2TS': if not src: @@ -533,6 +538,10 @@ class BrightcoveNewIE(AdobePassIE): 'format_id': build_format_id('rtmp'), }) formats.append(f) + + if sources_num == key_systems_present: + raise ExtractorError('This video is DRM protected', expected=True) + if not formats: # for sonyliv.com DRM protected videos s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl') From fb9c36c45f629d4b76a609f05a19a5bef1c7fbad Mon Sep 17 00:00:00 2001 From: amigatomte <48889381+amigatomte@users.noreply.github.com> Date: Tue, 27 Oct 2020 15:27:51 +0100 Subject: [PATCH 05/30] Update to reflect website changes. --- youtube_dlc/extractor/urplay.py | 49 +++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/youtube_dlc/extractor/urplay.py b/youtube_dlc/extractor/urplay.py index 6030b7cb5..4bc2b78fb 100644 --- a/youtube_dlc/extractor/urplay.py +++ b/youtube_dlc/extractor/urplay.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import unified_timestamp +import re class URPlayIE(InfoExtractor): @@ -13,10 +14,10 @@ class URPlayIE(InfoExtractor): 'info_dict': { 'id': '203704', 'ext': 'mp4', - 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', + 'title': 'Om vetenskap, kritiskt tänkande och motstånd', 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', - 'timestamp': 1513512768, - 'upload_date': '20171217', + 'timestamp': 1513292400, + 'upload_date': '20171214', }, }, { 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', @@ -37,35 +38,41 @@ class URPlayIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - urplayer_data = self._parse_json(self._search_regex( - r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id) + urplayer_data = re.sub(""", "\"", self._search_regex( + r'components\/Player\/Player\" data-react-props=\"({.+?})\"', + webpage, 'urplayer data')) + urplayer_data = self._parse_json(urplayer_data, video_id) + for i in range(len(urplayer_data['accessibleEpisodes'])): + if urplayer_data.get('accessibleEpisodes', {})[i].get('id') == int(video_id): + urplayer_data = urplayer_data['accessibleEpisodes'][i] + break + host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] - formats = [] - for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)): - file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr) - if file_http: + urplayer_streams = urplayer_data.get("streamingInfo") + for quality in ('sd'), ('hd'): + location = (urplayer_streams.get("raw", {}).get(quality, {}).get("location") + or urplayer_streams.get("sweComplete", {}).get(quality, {}).get("location")) + if location: formats.extend(self._extract_wowza_formats( - 'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp'])) + 'http://%s/%s/playlist.m3u8' % (host, location), video_id, + skip_protocols=['f4m', 'rtmp', 'rtsp'])) self._sort_formats(formats) - subtitles = {} - for subtitle in urplayer_data.get('subtitles', []): - subtitle_url = subtitle.get('file') - kind = subtitle.get('kind') - if not subtitle_url or (kind and kind != 'captions'): - continue - subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({ - 'url': subtitle_url, + subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location") + if subs: + subtitles.setdefault('Svenska', []).append({ + 'url': subs, }) return { 'id': video_id, 'title': urplayer_data['title'], 'description': self._og_search_description(webpage), - 'thumbnail': urplayer_data.get('image'), - 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')), - 'series': urplayer_data.get('series_title'), + 'thumbnail': urplayer_data.get('image', {}).get('1280x720'), + 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), + webpage, 'timestamp')), + 'series': urplayer_data.get('seriesTitle'), 'subtitles': subtitles, 'formats': formats, } From 6f8557ec4db627bdd2fda4f47bc2492a04ce5d0d Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 27 Oct 2020 16:49:42 +0100 Subject: [PATCH 06/30] [skip travis] add note to remove tvland. --- youtube_dlc/extractor/tvland.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dlc/extractor/tvland.py b/youtube_dlc/extractor/tvland.py index 791144128..225b6b078 100644 --- a/youtube_dlc/extractor/tvland.py +++ b/youtube_dlc/extractor/tvland.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals from .spike import ParamountNetworkIE +# TODO: Remove - Reason not used anymore - Service moved to youtube + class TVLandIE(ParamountNetworkIE): IE_NAME = 'tvland.com' From 67b19799a5cc8ab24aa48de66cea4e2ad41315a8 Mon Sep 17 00:00:00 2001 From: Peter Oettig Date: Tue, 27 Oct 2020 20:39:49 +0100 Subject: [PATCH 07/30] Fixed problem with JS player URL The JS player URL could not be found anymore, possibly because of a change on Youtubes side. --- youtube_dlc/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 4fb49b864..ccfaa733d 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -2051,7 +2051,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if cipher: if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True): - ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' + ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))' jsplayer_url_json = self._search_regex( ASSETS_RE, embed_webpage if age_gate else video_webpage, From 48aac9fc867087095079dee966a1511730d01f6d Mon Sep 17 00:00:00 2001 From: insaneracist Date: Tue, 27 Oct 2020 19:21:34 -0700 Subject: [PATCH 08/30] [bandcamp] restore album downloads flake8 conform --- youtube_dlc/extractor/bandcamp.py | 183 +++++++++++++++------------- youtube_dlc/extractor/extractors.py | 2 +- 2 files changed, 102 insertions(+), 83 deletions(-) diff --git a/youtube_dlc/extractor/bandcamp.py b/youtube_dlc/extractor/bandcamp.py index 9dbafe86d..8a37e1b94 100644 --- a/youtube_dlc/extractor/bandcamp.py +++ b/youtube_dlc/extractor/bandcamp.py @@ -25,7 +25,45 @@ from ..utils import ( ) -class BandcampIE(InfoExtractor): +class BandcampBaseIE(InfoExtractor): + """Provide base functions for Bandcamp extractors""" + + def _extract_json_from_html_data_attribute(self, webpage, suffix, video_id): + json_string = self._html_search_regex( + r' data-%s="([^"]*)' % suffix, + webpage, '%s json' % suffix, default='{}') + + return self._parse_json(json_string, video_id) + + def _parse_json_track(self, json): + formats = [] + file_ = json.get('file') + if isinstance(file_, dict): + for format_id, format_url in file_.items(): + if not url_or_none(format_url): + continue + ext, abr_str = format_id.split('-', 1) + formats.append({ + 'format_id': format_id, + 'url': self._proto_relative_url(format_url, 'http:'), + 'ext': ext, + 'vcodec': 'none', + 'acodec': ext, + 'abr': int_or_none(abr_str), + }) + + return { + 'duration': float_or_none(json.get('duration')), + 'id': str_or_none(json.get('track_id') or json.get('id')), + 'title': json.get('title'), + 'title_link': json.get('title_link'), + 'number': int_or_none(json.get('track_num')), + 'formats': formats + } + + +class BandcampIE(BandcampBaseIE): + IE_NAME = "Bandcamp:track" _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song', @@ -85,52 +123,32 @@ class BandcampIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') + url_track_title = title webpage = self._download_webpage(url, title) thumbnail = self._html_search_meta('og:image', webpage, default=None) - track_id = None - track = None - track_number = None - duration = None + json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title) + json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title) - formats = [] - trackinfo_block = self._html_search_regex( - r'trackinfo(?:["\']|"):\[\s*({.+?})\s*\],(?:["\']|")', - webpage, 'track info', default='{}') + json_tracks = json_tralbum.get('trackinfo') + if not json_tracks: + raise ExtractorError('Could not extract track') - track_info = self._parse_json(trackinfo_block, title) - if track_info: - file_ = track_info.get('file') - if isinstance(file_, dict): - for format_id, format_url in file_.items(): - if not url_or_none(format_url): - continue - ext, abr_str = format_id.split('-', 1) - formats.append({ - 'format_id': format_id, - 'url': self._proto_relative_url(format_url, 'http:'), - 'ext': ext, - 'vcodec': 'none', - 'acodec': ext, - 'abr': int_or_none(abr_str), - }) + track = self._parse_json_track(json_tracks[0]) + artist = json_tralbum.get('artist') + album_title = json_embed.get('album_title') - track_id = str_or_none(track_info.get('track_id') or track_info.get('id')) - track_number = int_or_none(track_info.get('track_num')) - duration = float_or_none(track_info.get('duration')) + json_album = json_tralbum.get('packages') + if json_album: + json_album = json_album[0] + album_publish_date = json_album.get('album_publish_date') + album_release_date = json_album.get('album_release_date') + else: + album_publish_date = None + album_release_date = json_tralbum.get('album_release_date') - def extract(key): - data = self._html_search_regex( - r',(["\']|")%s\1:\1(?P<value>(?:\\\1|((?!\1).))+)\1' % key, - webpage, key, default=None, group='value') - return data.replace(r'\"', '"').replace('\\\\', '\\') if data else data - - track = extract('title') - artist = extract('artist') - album = extract('album_title') - timestamp = unified_timestamp( - extract('publish_date') or extract('album_publish_date')) - release_date = unified_strdate(extract('album_release_date')) + timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date) + release_date = unified_strdate(album_release_date) download_link = self._search_regex( r'freeDownloadPage(?:["\']|"):\s*(["\']|")(?P<url>(?:(?!\1).)+)\1', webpage, @@ -155,8 +173,6 @@ class BandcampIE(InfoExtractor): if info: downloads = info.get('downloads') if isinstance(downloads, dict): - if not track: - track = info.get('title') if not artist: artist = info.get('artist') if not thumbnail: @@ -190,7 +206,7 @@ class BandcampIE(InfoExtractor): retry_url = url_or_none(stat.get('retry_url')) if not retry_url: continue - formats.append({ + track['formats'].append({ 'url': self._proto_relative_url(retry_url, 'http:'), 'ext': download_formats.get(format_id), 'format_id': format_id, @@ -199,32 +215,37 @@ class BandcampIE(InfoExtractor): 'vcodec': 'none', }) - self._sort_formats(formats) + self._sort_formats(track['formats']) - title = '%s - %s' % (artist, track) if artist else track - - if not duration: - duration = float_or_none(self._html_search_meta( - 'duration', webpage, default=None)) + title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title') return { - 'id': track_id, - 'title': title, - 'thumbnail': thumbnail, - 'uploader': artist, - 'timestamp': timestamp, - 'release_date': release_date, - 'duration': duration, - 'track': track, - 'track_number': track_number, - 'track_id': track_id, + 'album': album_title, 'artist': artist, - 'album': album, - 'formats': formats, + 'duration': track['duration'], + 'formats': track['formats'], + 'id': track['id'], + 'release_date': release_date, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'title': title, + 'track': track['title'], + 'track_id': track['id'], + 'track_number': track['number'], + 'uploader': artist } -class BandcampAlbumIE(InfoExtractor): +class BandcampAlbumTrackIE(BandcampIE): + IE_NAME = "Bandcamp:albumtrack" + """Hack class to force album downloads to have prefixed track numbers by default""" + def _real_extract(self, url): + data = super()._real_extract(url) + data['title'] = '{:02d} - {} - {}'.format(data['track_number'], data['artist'], data['track']) + return data + + +class BandcampAlbumIE(BandcampBaseIE): IE_NAME = 'Bandcamp:album' _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' @@ -305,34 +326,32 @@ class BandcampAlbumIE(InfoExtractor): album_id = mobj.group('album_id') playlist_id = album_id or uploader_id webpage = self._download_webpage(url, playlist_id) - track_elements = re.findall( - r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage) - if not track_elements: - raise ExtractorError('The page doesn\'t contain any tracks') + + json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", playlist_id) + json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", playlist_id) + + json_tracks = json_tralbum.get('trackinfo') + if not json_tracks: + raise ExtractorError('Could not extract album tracks') + + album_title = json_embed.get('album_title') + # Only tracks with duration info have songs + tracks = [self._parse_json_track(track) for track in json_tracks] entries = [ self.url_result( - compat_urlparse.urljoin(url, t_path), - ie=BandcampIE.ie_key(), - video_title=self._search_regex( - r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', - elem_content, 'track title', fatal=False)) - for elem_content, t_path in track_elements - if self._html_search_meta('duration', elem_content, default=None)] - - title = self._html_search_regex( - r'album_title\s*(?:"|["\']):\s*("|["\'])(?P<album>(?:\\\1|((?!\1).))+)\1', - webpage, 'title', fatal=False, group='album') - - if title: - title = title.replace(r'\"', '"') + compat_urlparse.urljoin(url, track['title_link']), + ie=BandcampAlbumTrackIE.ie_key(), + video_title=track['title']) + for track in tracks + if track.get('duration')] return { '_type': 'playlist', 'uploader_id': uploader_id, 'id': playlist_id, - 'title': title, - 'entries': entries, + 'title': album_title, + 'entries': entries } diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index d31edd7c8..fbd4ed1e3 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -84,7 +84,7 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE -from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE +from .bandcamp import BandcampIE, BandcampAlbumTrackIE, BandcampAlbumIE, BandcampWeeklyIE from .bbc import ( BBCCoUkIE, BBCCoUkArticleIE, From 3467b3e28f30bf53ca8355361806bbc74ccf2435 Mon Sep 17 00:00:00 2001 From: Unknown <blackjack4494@web.de> Date: Wed, 28 Oct 2020 12:18:04 +0100 Subject: [PATCH 09/30] [skip travis][bandcamp] fix minor typo in tests --- youtube_dlc/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/bandcamp.py b/youtube_dlc/extractor/bandcamp.py index 8a37e1b94..dbf96f33d 100644 --- a/youtube_dlc/extractor/bandcamp.py +++ b/youtube_dlc/extractor/bandcamp.py @@ -66,7 +66,7 @@ class BandcampIE(BandcampBaseIE): IE_NAME = "Bandcamp:track" _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)' _TESTS = [{ - 'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song', + 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'md5': 'c557841d5e50261777a6585648adf439', 'info_dict': { 'id': '1812978515', From 079a941282648b0278933e57adc4a77c3e95b86e Mon Sep 17 00:00:00 2001 From: Unknown <blackjack4494@web.de> Date: Wed, 28 Oct 2020 12:25:49 +0100 Subject: [PATCH 10/30] [mtv] add match_id reminder --- youtube_dlc/extractor/mtv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py index eaf43429f..feb442377 100644 --- a/youtube_dlc/extractor/mtv.py +++ b/youtube_dlc/extractor/mtv.py @@ -300,6 +300,7 @@ class MTVServicesInfoExtractor(InfoExtractor): except RegexNotFoundError: mgid = None + # TODO: ideally use self._match_id(url) title = url_basename(url) try: From 6c6ee4905f386f4280d9c07de50eafd3a797c306 Mon Sep 17 00:00:00 2001 From: nixxo <c.nixxo@gmail.com> Date: Wed, 28 Oct 2020 13:48:22 +0100 Subject: [PATCH 11/30] [mtv] proposed fix --- youtube_dlc/extractor/mtv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py index feb442377..04cc95b6a 100644 --- a/youtube_dlc/extractor/mtv.py +++ b/youtube_dlc/extractor/mtv.py @@ -289,7 +289,7 @@ class MTVServicesInfoExtractor(InfoExtractor): return mgid - def _extract_mgid(self, webpage, url, data_zone=None): + def _extract_mgid(self, webpage, url, title=None, data_zone=None): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -300,8 +300,8 @@ class MTVServicesInfoExtractor(InfoExtractor): except RegexNotFoundError: mgid = None - # TODO: ideally use self._match_id(url) - title = url_basename(url) + if not title: + title = url_basename(url) try: window_data = self._parse_json(self._search_regex( @@ -337,7 +337,7 @@ class MTVServicesInfoExtractor(InfoExtractor): def _real_extract(self, url): title = url_basename(url) webpage = self._download_webpage(url, title) - mgid = self._extract_mgid(webpage, url) + mgid = self._extract_mgid(webpage, url, title=title) videos_info = self._get_videos_info(mgid, url=url) return videos_info From cf553deceb0e4d8f1fa6c66a7eabfe9a0f04343c Mon Sep 17 00:00:00 2001 From: Tom-Oliver Heidel <github@tom-oliver.eu> Date: Wed, 28 Oct 2020 15:18:13 +0100 Subject: [PATCH 12/30] [skip travis] update travis badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5532cd720..08bddaa18 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![Build Status](https://travis-ci.com/blackjack4494/youtube-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/youtube-dlc) +[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc) [![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc) [![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc) From ccec6955f34ce9933e48562e8538bbf98247c050 Mon Sep 17 00:00:00 2001 From: insaneracist <insaneracist@cyberdude.com> Date: Wed, 28 Oct 2020 08:12:58 -0700 Subject: [PATCH 13/30] [bandcamp] fix failing test. remove subclass hack --- youtube_dlc/extractor/bandcamp.py | 11 +---------- youtube_dlc/extractor/extractors.py | 2 +- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/youtube_dlc/extractor/bandcamp.py b/youtube_dlc/extractor/bandcamp.py index dbf96f33d..7d29481c0 100644 --- a/youtube_dlc/extractor/bandcamp.py +++ b/youtube_dlc/extractor/bandcamp.py @@ -236,15 +236,6 @@ class BandcampIE(BandcampBaseIE): } -class BandcampAlbumTrackIE(BandcampIE): - IE_NAME = "Bandcamp:albumtrack" - """Hack class to force album downloads to have prefixed track numbers by default""" - def _real_extract(self, url): - data = super()._real_extract(url) - data['title'] = '{:02d} - {} - {}'.format(data['track_number'], data['artist'], data['track']) - return data - - class BandcampAlbumIE(BandcampBaseIE): IE_NAME = 'Bandcamp:album' _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' @@ -341,7 +332,7 @@ class BandcampAlbumIE(BandcampBaseIE): entries = [ self.url_result( compat_urlparse.urljoin(url, track['title_link']), - ie=BandcampAlbumTrackIE.ie_key(), + ie=BandcampIE.ie_key(), video_title=track['title']) for track in tracks if track.get('duration')] diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index fbd4ed1e3..d31edd7c8 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -84,7 +84,7 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE -from .bandcamp import BandcampIE, BandcampAlbumTrackIE, BandcampAlbumIE, BandcampWeeklyIE +from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bbc import ( BBCCoUkIE, BBCCoUkArticleIE, From c3c18d7b8a035c0099499147be5fcfe5f603e072 Mon Sep 17 00:00:00 2001 From: nixxo <c.nixxo@gmail.com> Date: Wed, 28 Oct 2020 16:55:58 +0100 Subject: [PATCH 14/30] [skyitalia] Add new extractor --- youtube_dlc/extractor/extractors.py | 4 + youtube_dlc/extractor/skyitalia.py | 119 ++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 youtube_dlc/extractor/skyitalia.py diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index d31edd7c8..a0c7d0f42 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -1037,6 +1037,10 @@ from .sky import ( SkyNewsIE, SkySportsIE, ) +from .skyitalia import ( + SkyArteItaliaIE, + SkyItaliaIE, +) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py new file mode 100644 index 000000000..d9c35c3a1 --- /dev/null +++ b/youtube_dlc/extractor/skyitalia.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SkyItaliaBaseIE(InfoExtractor): + _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}' + _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' + _RES = { + 'low': [426, 240], + 'med': [640, 360], + 'high': [854, 480], + 'hd': [1280, 720] + } + + def _extract_video_id(self, url): + webpage = self._download_webpage(url, 'skyitalia') + video_id = self._html_search_regex( + [r'data-videoid=\"(\d+)\"', + r'http://player\.sky\.it/social\?id=(\d+)\&'], + webpage, 'video_id') + if video_id: + return video_id + raise ExtractorError('Video ID not found.') + + def _get_formats(self, video_id, token=_TOKEN): + data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) + data_url = data_url.replace('{token}', token) + video_data = self._parse_json( + self._download_webpage(data_url, video_id), + video_id) + + formats = [] + for q, r in self._RES.items(): + key = 'web_' + q + '_url' + if key not in video_data: + continue + formats.append({ + 'url': video_data[key], + 'format_id': q, + 'width': r[0], + 'height': r[1] + }) + + self._sort_formats(formats) + title = video_data.get('title') + thumb = video_data.get('thumb') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumb, + 'formats': formats + } + + def _real_extract(self, url): + video_id = self._match_id(url) + if video_id == 'None': + video_id = self._extract_video_id(url) + return self._get_formats(video_id, self._TOKEN) + + +class SkyItaliaIE(SkyItaliaBaseIE): + IE_NAME = 'sky.it' + _VALID_URL = r'''(?x)https?:// + (?P<ie>sport|tg24|video) + \.sky\.it/(?:.+?) + (?P<id>[0-9]{6})? + (?:$|\?)''' + + _TESTS = [{ + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, { + 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, { + 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', + 'md5': 'caa25e62dadb529bc5e0b078da99f854', + 'info_dict': { + 'id': '615904', + 'ext': 'mp4', + 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg', + } + }, { + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', + 'only_matching': True, + }] + + +class SkyArteItaliaIE(SkyItaliaBaseIE): + IE_NAME = 'arte.sky.it' + _VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$' + _TEST = { + 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', + 'md5': '2f22513a89f45142f2746f878d690647', + 'info_dict': { + 'id': '612888', + 'ext': 'mp4', + 'title': 'I maestri del cinema Federico Felini', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', + } + } + _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd' From 0b72c2bc31a3400134182154f23fdd494c838a5a Mon Sep 17 00:00:00 2001 From: nixxo <c.nixxo@gmail.com> Date: Wed, 28 Oct 2020 17:04:36 +0100 Subject: [PATCH 15/30] [skyitalia] removed arbitrary parameter --- youtube_dlc/extractor/skyitalia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py index d9c35c3a1..262701c6b 100644 --- a/youtube_dlc/extractor/skyitalia.py +++ b/youtube_dlc/extractor/skyitalia.py @@ -25,7 +25,7 @@ class SkyItaliaBaseIE(InfoExtractor): return video_id raise ExtractorError('Video ID not found.') - def _get_formats(self, video_id, token=_TOKEN): + def _get_formats(self, video_id, token): data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) data_url = data_url.replace('{token}', token) video_data = self._parse_json( From 81a20463a44d1039729ed5611d39d0bcb4abeb73 Mon Sep 17 00:00:00 2001 From: nixxo <c.nixxo@gmail.com> Date: Wed, 28 Oct 2020 17:06:49 +0100 Subject: [PATCH 16/30] [skyitalia] moved token --- youtube_dlc/extractor/skyitalia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py index 262701c6b..a4e894044 100644 --- a/youtube_dlc/extractor/skyitalia.py +++ b/youtube_dlc/extractor/skyitalia.py @@ -7,7 +7,6 @@ from ..utils import ExtractorError class SkyItaliaBaseIE(InfoExtractor): _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}' - _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' _RES = { 'low': [426, 240], 'med': [640, 360], @@ -101,6 +100,7 @@ class SkyItaliaIE(SkyItaliaBaseIE): 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', 'only_matching': True, }] + _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' class SkyArteItaliaIE(SkyItaliaBaseIE): From 576d233fe67641da24b15751fc2a0e5c1e787034 Mon Sep 17 00:00:00 2001 From: insaneracist <insaneracist@cyberdude.com> Date: Wed, 28 Oct 2020 13:48:55 -0700 Subject: [PATCH 17/30] [xtube] fix extractor (#17) --- youtube_dlc/extractor/xtube.py | 47 +++++++++++++++------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/youtube_dlc/extractor/xtube.py b/youtube_dlc/extractor/xtube.py index 01b253dcb..081c5e2e7 100644 --- a/youtube_dlc/extractor/xtube.py +++ b/youtube_dlc/extractor/xtube.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, js_to_json, orderedSet, @@ -33,27 +34,11 @@ class XTubeIE(InfoExtractor): 'title': 'strange erotica', 'description': 'contains:an ET kind of thing', 'uploader': 'greenshowers', - 'duration': 450, + 'duration': 449, 'view_count': int, 'comment_count': int, 'age_limit': 18, } - }, { - # FLV videos with duplicated formats - 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752', - 'md5': 'a406963eb349dd43692ec54631efd88b', - 'info_dict': { - 'id': '9299752', - 'display_id': 'A-Super-Run-Part-1-YT', - 'ext': 'flv', - 'title': 'A Super Run - Part 1 (YT)', - 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616', - 'uploader': 'tshirtguy59', - 'duration': 579, - 'view_count': int, - 'comment_count': int, - 'age_limit': 18, - }, }, { # new URL schema 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837', @@ -89,16 +74,24 @@ class XTubeIE(InfoExtractor): title, thumbnail, duration = [None] * 3 - config = self._parse_json(self._search_regex( - r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config', - default='{}'), video_id, transform_source=js_to_json, fatal=False) - if config: - config = config.get('mainRoll') - if isinstance(config, dict): - title = config.get('title') - thumbnail = config.get('poster') - duration = int_or_none(config.get('duration')) - sources = config.get('sources') or config.get('format') + json_config_string = self._search_regex( + r'playerConf=({.+?}),loaderConf', + webpage, 'config', default=None) + if not json_config_string: + raise ExtractorError("Could not extract video player data") + + json_config_string = json_config_string.replace("!0", "true").replace("!1", "false") + + config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False) + if not config: + raise ExtractorError("Could not extract video player data") + + config = config.get('mainRoll') + if isinstance(config, dict): + title = config.get('title') + thumbnail = config.get('poster') + duration = int_or_none(config.get('duration')) + sources = config.get('sources') or config.get('format') if not isinstance(sources, dict): sources = self._parse_json(self._search_regex( From 139e10ad9815f134588d89d27f570a3ee54f79dc Mon Sep 17 00:00:00 2001 From: insaneracist <insaneracist@cyberdude.com> Date: Wed, 28 Oct 2020 16:55:47 -0700 Subject: [PATCH 18/30] [newgrounds] fix: video download --- youtube_dlc/extractor/newgrounds.py | 103 +++++++++++++++++++--------- 1 file changed, 69 insertions(+), 34 deletions(-) diff --git a/youtube_dlc/extractor/newgrounds.py b/youtube_dlc/extractor/newgrounds.py index 82e7cf522..b9f01235f 100644 --- a/youtube_dlc/extractor/newgrounds.py +++ b/youtube_dlc/extractor/newgrounds.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, extract_attributes, int_or_none, parse_duration, @@ -20,22 +21,22 @@ class NewgroundsIE(InfoExtractor): 'info_dict': { 'id': '549479', 'ext': 'mp3', - 'title': 'B7 - BusMode', + 'title': 'Burn7 - B7 - BusMode', 'uploader': 'Burn7', 'timestamp': 1378878540, 'upload_date': '20130911', 'duration': 143, }, }, { - 'url': 'https://www.newgrounds.com/portal/view/673111', - 'md5': '3394735822aab2478c31b1004fe5e5bc', + 'url': 'https://www.newgrounds.com/portal/view/1', + 'md5': 'fbfb40e2dc765a7e830cb251d370d981', 'info_dict': { - 'id': '673111', + 'id': '1', 'ext': 'mp4', - 'title': 'Dancin', - 'uploader': 'Squirrelman82', - 'timestamp': 1460256780, - 'upload_date': '20160410', + 'title': 'Brian-Beaton - Scrotum 1', + 'uploader': 'Brian-Beaton', + 'timestamp': 955064100, + 'upload_date': '20000406', }, }, { # source format unavailable, additional mp4 formats @@ -43,7 +44,7 @@ class NewgroundsIE(InfoExtractor): 'info_dict': { 'id': '689400', 'ext': 'mp4', - 'title': 'ZTV News Episode 8', + 'title': 'Bennettthesage - ZTV News Episode 8', 'uploader': 'BennettTheSage', 'timestamp': 1487965140, 'upload_date': '20170224', @@ -55,42 +56,73 @@ class NewgroundsIE(InfoExtractor): def _real_extract(self, url): media_id = self._match_id(url) - + formats = [] + uploader = None webpage = self._download_webpage(url, media_id) title = self._html_search_regex( r'<title>([^>]+)', webpage, 'title') - media_url = self._parse_json(self._search_regex( - r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id) + media_url_string = self._search_regex( + r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None, fatal=False) - formats = [{ - 'url': media_url, - 'format_id': 'source', - 'quality': 1, - }] + if media_url_string: + media_url = self._parse_json(media_url_string, media_id) + formats = [{ + 'url': media_url, + 'format_id': 'source', + 'quality': 1, + }] - max_resolution = int_or_none(self._search_regex( - r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution', - default=None)) - if max_resolution: - url_base = media_url.rpartition('.')[0] - for resolution in (360, 720, 1080): - if resolution > max_resolution: - break - formats.append({ - 'url': '%s.%dp.mp4' % (url_base, resolution), - 'format_id': '%dp' % resolution, - 'height': resolution, - }) + max_resolution = int_or_none(self._search_regex( + r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution', + default=None)) + if max_resolution: + url_base = media_url.rpartition('.')[0] + for resolution in (360, 720, 1080): + if resolution > max_resolution: + break + formats.append({ + 'url': '%s.%dp.mp4' % (url_base, resolution), + 'format_id': '%dp' % resolution, + 'height': resolution, + }) + else: + video_id = int_or_none(self._search_regex( + r'data-movie-id=\\"([0-9]+)\\"', webpage, '')) + if not video_id: + raise ExtractorError('Could not extract media data') + + url_video_data = 'https://www.newgrounds.com/portal/video/%s' % video_id + headers = { + 'Accept': 'application/json', + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest' + } + json_video = self._download_json(url_video_data, video_id, headers=headers, fatal=False) + if not json_video: + raise ExtractorError('Could not fetch media data') + + uploader = json_video.get('author') + title = json_video.get('title') + media_formats = json_video.get('sources', []) + for media_format in media_formats: + media_sources = media_formats[media_format] + for source in media_sources: + formats.append({ + 'format_id': media_format, + 'quality': int_or_none(media_format[:-1]), + 'url': source.get('src') + }) self._check_formats(formats, media_id) self._sort_formats(formats) - uploader = self._html_search_regex( - (r'(?s)]*>(.+?).*?\s*Author\s*', - r'(?:Author|Writer)\s*]+>([^<]+)'), webpage, 'uploader', - fatal=False) + if not uploader: + uploader = self._html_search_regex( + (r'(?s)]*>(.+?).*?\s*(?:Author|Artist)\s*', + r'(?:Author|Writer)\s*]+>([^<]+)'), webpage, 'uploader', + fatal=False) timestamp = unified_timestamp(self._html_search_regex( (r'
\s*Uploaded\s*
\s*
([^<]+
\s*
[^<]+)', @@ -109,6 +141,9 @@ class NewgroundsIE(InfoExtractor): if '
Song' in webpage: formats[0]['vcodec'] = 'none' + if uploader: + title = "%s - %s" % (uploader, title) + return { 'id': media_id, 'title': title, From 0704d2224b328caeafbce6a029904472628d12bd Mon Sep 17 00:00:00 2001 From: Unknown Date: Thu, 29 Oct 2020 01:56:55 +0100 Subject: [PATCH 19/30] [core] be able to hand over id and title using url_result --- youtube_dlc/YoutubeDL.py | 9 +++++++-- youtube_dlc/extractor/bandcamp.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index fc351db0d..f959a4e47 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -801,7 +801,7 @@ class YoutubeDL(object): for key, value in extra_info.items(): info_dict.setdefault(key, value) - def extract_info(self, url, download=True, ie_key=None, extra_info={}, + def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={}, process=True, force_generic_extractor=False): ''' Returns a list with a dictionary for each video we find. @@ -836,6 +836,11 @@ class YoutubeDL(object): '_type': 'compat_list', 'entries': ie_result, } + if info_dict: + if info_dict.get('id'): + ie_result['id'] = info_dict['id'] + if info_dict.get('title'): + ie_result['title'] = info_dict['title'] self.add_default_extra_info(ie_result, ie, url) if process: return self.process_ie_result(ie_result, download, extra_info) @@ -898,7 +903,7 @@ class YoutubeDL(object): # We have to add extra_info to the results because it may be # contained in a playlist return self.extract_info(ie_result['url'], - download, + download, info_dict=ie_result, ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'url_transparent': diff --git a/youtube_dlc/extractor/bandcamp.py b/youtube_dlc/extractor/bandcamp.py index 7d29481c0..0e7492764 100644 --- a/youtube_dlc/extractor/bandcamp.py +++ b/youtube_dlc/extractor/bandcamp.py @@ -332,7 +332,7 @@ class BandcampAlbumIE(BandcampBaseIE): entries = [ self.url_result( compat_urlparse.urljoin(url, track['title_link']), - ie=BandcampIE.ie_key(), + ie=BandcampIE.ie_key(), video_id=track['id'], video_title=track['title']) for track in tracks if track.get('duration')] From 4932ba4aecf653166f04211680a48624b48f030f Mon Sep 17 00:00:00 2001 From: Unknown Date: Thu, 29 Oct 2020 02:57:43 +0100 Subject: [PATCH 20/30] [yt_live_chat] deactivate for now. --- youtube_dlc/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index ccfaa733d..5fd22081a 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -1366,14 +1366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': ext, }) sub_lang_list[lang] = sub_formats - if has_live_chat_replay: + """ if has_live_chat_replay: sub_lang_list['live_chat'] = [ { 'video_id': video_id, 'ext': 'json', 'protocol': 'youtube_live_chat_replay', }, - ] + ] """ if not sub_lang_list: self._downloader.report_warning('video doesn\'t have subtitles') return {} From bb8a73a0e2b5e4118a32dee9e3c30737107ed90b Mon Sep 17 00:00:00 2001 From: bopol Date: Wed, 28 Oct 2020 21:57:58 +0100 Subject: [PATCH 21/30] [nitter] Add new extractor --- docs/supportedsites.md | 1 + youtube_dlc/extractor/extractors.py | 1 + youtube_dlc/extractor/nitter.py | 167 ++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+) create mode 100644 youtube_dlc/extractor/nitter.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c46d122ff..3b98e7a12 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -582,6 +582,7 @@ - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **Nintendo** + - **Nitter** - **njoy**: N-JOY - **njoy:embed** - **NJPWWorld**: 新日本プロレスワールド diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index d31edd7c8..1dc2ab34c 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -751,6 +751,7 @@ from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE +from .nitter import NitterIE from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE from .noco import NocoIE diff --git a/youtube_dlc/extractor/nitter.py b/youtube_dlc/extractor/nitter.py new file mode 100644 index 000000000..3191543ed --- /dev/null +++ b/youtube_dlc/extractor/nitter.py @@ -0,0 +1,167 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + parse_count, + unified_strdate, + unified_timestamp, + remove_end, + determine_ext, +) +import re + + +class NitterIE(InfoExtractor): + # Taken from https://github.com/zedeus/nitter/wiki/Instances + INSTANCES = ('nitter.net', + 'nitter.snopyta.org', + 'nitter.42l.fr', + 'nitter.nixnet.services', + 'nitter.13ad.de', + 'nitter.pussthecat.org', + 'nitter.mastodont.cat', + 'nitter.dark.fail', + 'nitter.tedomum.net', + 'nitter.cattube.org', + 'nitter.fdn.fr', + 'nitter.1d4.us', + 'nitter.kavin.rocks', + 'tweet.lambda.dance', + 'nitter.cc', + 'nitter.weaponizedhumiliation.com', + '3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion', + 'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion', + 'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion') + + _INSTANCES_RE = '(?:' + '|'.join([re.escape(instance) for instance in INSTANCES]) + ')' + _VALID_URL = r'https?://%(instance)s/(?P.+)/status/(?P[0-9]+)(#.)?' % {'instance': _INSTANCES_RE} + current_instance = INSTANCES[0] # the test and official instance + _TESTS = [ + { + # GIF (wrapped in mp4) + 'url': 'https://' + current_instance + '/firefox/status/1314279897502629888#m', + 'info_dict': { + 'id': '1314279897502629888', + 'ext': 'mp4', + 'title': 'Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet', + 'description': 'You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg #UnfckTheInternet', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Firefox 🔥', + 'uploader_id': 'firefox', + 'uploader_url': 'https://' + current_instance + '/firefox', + 'upload_date': '20201008', + 'timestamp': 1602183720, + }, + }, { # normal video + 'url': 'https://' + current_instance + '/Le___Doc/status/1299715685392756737#m', + 'info_dict': { + 'id': '1299715685392756737', + 'ext': 'mp4', + 'title': 'Le Doc - "Je ne prédis jamais rien" D Raoult, Août 2020...', + 'description': '"Je ne prédis jamais rien" D Raoult, Août 2020...', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Le Doc', + 'uploader_id': 'Le___Doc', + 'uploader_url': 'https://' + current_instance + '/Le___Doc', + 'upload_date': '20200829', + 'timestamp': 1598711341, + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + }, + }, { # video embed in a "Streaming Political Ads" box + 'url': 'https://' + current_instance + '/mozilla/status/1321147074491092994#m', + 'info_dict': { + 'id': '1321147074491092994', + 'ext': 'mp4', + 'title': "Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds", + 'description': "Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? This isn't a real political ad, but if you're watching streaming TV in the U.S., chances are you've seen quite a few. Learn more ➡️ https://mzl.la/StreamingAds", + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Mozilla', + 'uploader_id': 'mozilla', + 'uploader_url': 'https://' + current_instance + '/mozilla', + 'upload_date': '20201027', + 'timestamp': 1603820982 + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + parsed_url = compat_urlparse.urlparse(url) + base_url = parsed_url.scheme + '://' + parsed_url.netloc + + self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on') + webpage = self._download_webpage(url, video_id) + + video_url = base_url + self._html_search_regex(r'(?:]+data-url|]+src)="([^"]+)"', webpage, 'video url') + ext = determine_ext(video_url) + + if ext == 'unknown_video': + formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4') + else: + formats = [{ + 'url': video_url, + 'ext': ext + }] + + title = ( + self._og_search_description(webpage).replace('\n', ' ') + or self._html_search_regex(r'
]+title="([^"]+)"', webpage, 'uploader name', fatal=False)) + + if uploader_id: + uploader_url = base_url + '/' + uploader_id + + uploader = self._html_search_regex(r']+title="([^"]+)"', webpage, 'uploader name', fatal=False) + + if uploader: + title = uploader + ' - ' + title + + view_count = parse_count(self._html_search_regex(r']+class="icon-play[^>]*>\s([^<]+)
', webpage, 'view count', fatal=False)) + like_count = parse_count(self._html_search_regex(r']+class="icon-heart[^>]*>\s([^<]+)', webpage, 'like count', fatal=False)) + repost_count = parse_count(self._html_search_regex(r']+class="icon-retweet[^>]*>\s([^<]+)', webpage, 'repost count', fatal=False)) + comment_count = parse_count(self._html_search_regex(r']+class="icon-comment[^>]*>\s([^<]+)', webpage, 'repost count', fatal=False)) + + thumbnail = base_url + (self._html_search_meta('og:image', webpage, 'thumbnail url') + or self._html_search_regex(r']+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)) + + thumbnail = remove_end(thumbnail, '%3Asmall') # if parsed with regex, it should contain this + + thumbnails = [] + thumbnail_ids = ('thumb', 'small', 'large', 'medium', 'orig') + for id in thumbnail_ids: + thumbnails.append({ + 'id': id, + 'url': thumbnail + '%3A' + id, + }) + + date = self._html_search_regex(r']+class="tweet-date"[^>]*>]+title="([^"]+)"', webpage, 'upload date', fatal=False) + upload_date = unified_strdate(date) + timestamp = unified_timestamp(date) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': uploader, + 'timestamp': timestamp, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, + 'view_count': view_count, + 'like_count': like_count, + 'repost_count': repost_count, + 'comment_count': comment_count, + 'formats': formats, + 'thumbnails': thumbnails, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + } From 920ad13673b4f60274fe132bf17d8019011dfc9b Mon Sep 17 00:00:00 2001 From: nixxo Date: Thu, 29 Oct 2020 13:37:07 +0100 Subject: [PATCH 22/30] [skyitalia] fixed coding conventions --- youtube_dlc/extractor/skyitalia.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py index a4e894044..3c7bd465d 100644 --- a/youtube_dlc/extractor/skyitalia.py +++ b/youtube_dlc/extractor/skyitalia.py @@ -33,11 +33,11 @@ class SkyItaliaBaseIE(InfoExtractor): formats = [] for q, r in self._RES.items(): - key = 'web_' + q + '_url' + key = 'web_%s_url' % q if key not in video_data: continue formats.append({ - 'url': video_data[key], + 'url': video_data.get(key), 'format_id': q, 'width': r[0], 'height': r[1] From 9322f1162de5d5f4fc7e911acdce782ccb943712 Mon Sep 17 00:00:00 2001 From: insaneracist Date: Thu, 29 Oct 2020 14:37:06 -0700 Subject: [PATCH 23/30] [youtube] fix: extract artist metadata from ytInitialData (#49) --- youtube_dlc/extractor/youtube.py | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 5fd22081a..c75a7edae 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -1406,6 +1406,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self._parse_json( uppercase_escape(config), video_id, fatal=False) + def _get_music_metadata_from_yt_initial(self, yt_initial): + music_metadata = [] + key_map = { + 'Album': 'album', + 'Artist': 'artist', + 'Song': 'track' + } + contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents']) + if type(contents) is list: + for content in contents: + music_track = {} + if type(content) is not dict: + continue + videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer']) + if type(videoSecondaryInfoRenderer) is not dict: + continue + rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows']) + if type(rows) is not list: + continue + for row in rows: + metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer']) + if type(metadataRowRenderer) is not dict: + continue + key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText']) + value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \ + try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text']) + if type(key) is not str or type(value) is not str: + continue + if key in key_map: + if key_map[key] in music_track: + # we've started on a new track + music_metadata.append(music_track) + music_track = {} + music_track[key_map[key]] = value + if len(music_track.keys()): + music_metadata.append(music_track) + return music_metadata + def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" @@ -2328,6 +2366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if release_year: release_year = int(release_year) + yt_initial = self._get_yt_initial_data(video_id, video_webpage) + if yt_initial: + music_metadata = self._get_music_metadata_from_yt_initial(yt_initial) + if len(music_metadata): + album = music_metadata[0].get('album') + artist = music_metadata[0].get('artist') + track = music_metadata[0].get('track') + m_episode = re.search( r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', video_webpage) From 3086aa194fcacd4e12e9b266ec046c991bd07f2a Mon Sep 17 00:00:00 2001 From: Dan Walker Date: Fri, 30 Oct 2020 08:31:34 -0700 Subject: [PATCH 24/30] Added Comcast_SSO fix This fix had been proposed on yt-dl for a lengthy period of time but was never merged. It has been thoroughly tested but a large section of the community. --- youtube_dlc/extractor/adobepass.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dlc/extractor/adobepass.py b/youtube_dlc/extractor/adobepass.py index 38dca1b0a..649f9940f 100644 --- a/youtube_dlc/extractor/adobepass.py +++ b/youtube_dlc/extractor/adobepass.py @@ -1438,6 +1438,13 @@ class AdobePassIE(InfoExtractor): provider_redirect_page, 'oauth redirect') self._download_webpage( oauth_redirect_url, video_id, 'Confirming auto login') + elif 'automatically signed in with' in provider_redirect_page: + # Seems like comcast is rolling up new way of automatically signing customers + oauth_redirect_url = self._html_search_regex( + r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page, + 'oauth redirect (signed)') + # Just need to process the request. No useful data comes back + self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login') else: if '
Date: Fri, 30 Oct 2020 21:29:21 +0100 Subject: [PATCH 25/30] Only use video id to find metadata --- youtube_dlc/extractor/netzkino.py | 47 ++++++++++++++++--------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/youtube_dlc/extractor/netzkino.py b/youtube_dlc/extractor/netzkino.py index aec3026b1..3d1a06d0b 100644 --- a/youtube_dlc/extractor/netzkino.py +++ b/youtube_dlc/extractor/netzkino.py @@ -13,17 +13,16 @@ from ..utils import ( class NetzkinoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P[^/]+)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/[^/]+/(?P[^/]+)' - _TEST = { - 'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond', + _TESTS = [{ + 'url': 'https://www.netzkino.de/#!/scifikino/rakete-zum-mond', 'md5': '92a3f8b76f8d7220acce5377ea5d4873', 'info_dict': { 'id': 'rakete-zum-mond', 'ext': 'mp4', - 'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)', - 'comments': 'mincount:3', - 'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28', + 'title': 'Rakete zum Mond \u2013 Jules Verne', + 'description': 'md5:f0a8024479618ddbfa450ff48ffa6c60', 'upload_date': '20120813', 'thumbnail': r're:https?://.*\.jpg$', 'timestamp': 1344858571, @@ -32,17 +31,30 @@ class NetzkinoIE(InfoExtractor): 'params': { 'skip_download': 'Download only works from Germany', } - } + }, { + 'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2', + 'md5': 'c7728b2dadd04ff6727814847a51ef03', + 'info_dict': { + 'id': 'dr-jekyll-mrs-hyde-2', + 'ext': 'mp4', + 'title': 'Dr. Jekyll & Mrs. Hyde 2', + 'description': 'md5:c2e9626ebd02de0a794b95407045d186', + 'upload_date': '20190130', + 'thumbnail': r're:https?://.*\.jpg$', + 'timestamp': 1548849437, + 'age_limit': 18, + }, + 'params': { + 'skip_download': 'Download only works from Germany', + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - category_id = mobj.group('category') video_id = mobj.group('id') - api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id - api_info = self._download_json(api_url, video_id) - info = next( - p for p in api_info['posts'] if p['slug'] == video_id) + api_url = 'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/%s.json?d=www' % video_id + info = self._download_json(api_url, video_id) custom_fields = info['custom_fields'] production_js = self._download_webpage( @@ -67,23 +79,12 @@ class NetzkinoIE(InfoExtractor): } for key, tpl in templates.items()] self._sort_formats(formats) - comments = [{ - 'timestamp': parse_iso8601(c.get('date'), delimiter=' '), - 'id': c['id'], - 'author': c['name'], - 'html': c['content'], - 'parent': 'root' if c.get('parent', 0) == 0 else c['parent'], - } for c in info.get('comments', [])] - return { 'id': video_id, 'formats': formats, - 'comments': comments, 'title': info['title'], 'age_limit': int_or_none(custom_fields.get('FSK')[0]), 'timestamp': parse_iso8601(info.get('date'), delimiter=' '), 'description': clean_html(info.get('content')), 'thumbnail': info.get('thumbnail'), - 'playlist_title': api_info.get('title'), - 'playlist_id': category_id, } From 9f448fcb269d52e757999cba6dab3ff7046d2c19 Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 31 Oct 2020 05:46:51 +0100 Subject: [PATCH 26/30] [core/yt_live_chat] live_chat is back. dl() new parameter --- youtube_dlc/YoutubeDL.py | 7 +++++-- youtube_dlc/downloader/common.py | 24 ++++++++++++++++-------- youtube_dlc/extractor/youtube.py | 4 ++-- youtube_dlc/options.py | 2 +- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index f959a4e47..360595918 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -1857,13 +1857,13 @@ class YoutubeDL(object): self.report_error('Cannot write annotations file: ' + annofn) return - def dl(name, info): + def dl(name, info, subtitle=False): fd = get_suitable_downloader(info, self.params)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) if self.params.get('verbose'): self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) + return fd.download(name, info, subtitle) subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) @@ -1891,6 +1891,8 @@ class YoutubeDL(object): return else: try: + dl(sub_filename, sub_info, subtitle=True) + ''' if self.params.get('sleep_interval_subtitles', False): dl(sub_filename, sub_info) else: @@ -1898,6 +1900,7 @@ class YoutubeDL(object): sub_info['url'], info_dict['id'], note=False).read() with io.open(encodeFilename(sub_filename), 'wb') as subfile: subfile.write(sub_data) + ''' except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download subtitle for "%s": %s' % (sub_lang, error_to_compat_str(err))) diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py index 31c286458..14bd322b4 100644 --- a/youtube_dlc/downloader/common.py +++ b/youtube_dlc/downloader/common.py @@ -326,7 +326,7 @@ class FileDownloader(object): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') - def download(self, filename, info_dict): + def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise """ @@ -353,15 +353,23 @@ class FileDownloader(object): }) return True - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + if subtitle is False: + min_sleep_interval = self.params.get('sleep_interval') + if min_sleep_interval: + max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) + sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) + time.sleep(sleep_interval) + else: + sleep_interval_sub = self.params.get('sleep_interval_subtitles') self.to_screen( '[download] Sleeping %s seconds...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) + int(sleep_interval_sub))) + time.sleep(sleep_interval_sub) + return self.real_download(filename, info_dict) diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 5fd22081a..ccfaa733d 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -1366,14 +1366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': ext, }) sub_lang_list[lang] = sub_formats - """ if has_live_chat_replay: + if has_live_chat_replay: sub_lang_list['live_chat'] = [ { 'video_id': video_id, 'ext': 'json', 'protocol': 'youtube_live_chat_replay', }, - ] """ + ] if not sub_lang_list: self._downloader.report_warning('video doesn\'t have subtitles') return {} diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index 1d7a7fed2..66b45220c 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -582,7 +582,7 @@ def parseOpts(overrideArguments=None): 'along with --min-sleep-interval.')) workarounds.add_option( '--sleep-subtitles', - dest='sleep_interval_subtitles', action='store_true', default=False, + dest='sleep_interval_subtitles', action='store_true', default=0, help='Enforce sleep interval on subtitles as well') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') From 989188476804cccd2c4c858571c80f1e93a5afc3 Mon Sep 17 00:00:00 2001 From: Tom-Oliver Heidel Date: Sat, 31 Oct 2020 07:21:58 +0100 Subject: [PATCH 27/30] [skip travis] half done workflow --- .github/workflows/build.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8db7e92f2..538740355 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -57,7 +57,7 @@ jobs: id: sha2_file env: SHA2: ${{ hashFiles('youtube-dlc') }} - run: echo "::set-output name=sha2_unix::${env:SHA2}" + run: echo "::set-output name=sha2_unix::$SHA2" - name: Install dependencies for pypi run: | python -m pip install --upgrade pip @@ -98,12 +98,12 @@ jobs: upload_url: ${{ needs.build_unix.outputs.upload_url }} asset_path: ./dist/youtube-dlc.exe asset_name: youtube-dlc.exe - asset_content_type: application/octet-stream + asset_content_type: application/vnd.microsoft.portable-executable - name: Get SHA2-256SUMS for youtube-dlc.exe id: sha2_file_win env: - SHA2: ${{ hashFiles('dist/youtube-dlc.exe') }} - run: echo "::set-output name=sha2_windows::${env:SHA2}" + SHA2_win: ${{ hashFiles('dist/youtube-dlc.exe') }} + run: echo "::set-output name=sha2_windows::$SHA2_win" build_windows32: @@ -133,12 +133,12 @@ jobs: upload_url: ${{ needs.build_unix.outputs.upload_url }} asset_path: ./dist/youtube-dlc_x86.exe asset_name: youtube-dlc_x86.exe - asset_content_type: application/octet-stream + asset_content_type: application/vnd.microsoft.portable-executable - name: Get SHA2-256SUMS for youtube-dlc_x86.exe id: sha2_file_win32 env: - SHA2: ${{ hashFiles('dist/youtube-dlc_x86.exe') }} - run: echo "::set-output name=sha2_windows32::${env:SHA2}" + SHA2_win32: ${{ hashFiles('dist/youtube-dlc_x86.exe') }} + run: echo "::set-output name=sha2_windows32::$SHA2_win32" - name: Make SHA2-256SUMS file env: SHA2_WINDOWS: ${{ needs.build_windows.outputs.sha2_windows }} From da6403d340bce709ca226fc3e195749032b02d88 Mon Sep 17 00:00:00 2001 From: Tom-Oliver Heidel Date: Sat, 31 Oct 2020 08:36:14 +0100 Subject: [PATCH 28/30] [skip travis] finalised workflow --- .github/workflows/build.yml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 538740355..f5d94dc49 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -146,6 +146,18 @@ jobs: SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }} YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }} run: | - echo "$SHA2_WINDOWS youtube-dlc.exe" > SHA2-256SUMS - echo "$SHA2_WINDOWS32 youtube-dlc32.exe" > SHA2-256SUMS - echo "$SHA2_UNIX youtube-dlc" >> SHA2-256SUMS + echo "version:$YTDLC_VERSION" >> SHA2-256SUMS + echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS + echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS + echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS + + - name: Upload 256SUMS file + id: upload-sums + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./SHA2-256SUMS + asset_name: SHA2-256SUMS + asset_content_type: text/plain From fa57af1ef333b11630ba6ae4353a94ea118883d4 Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 31 Oct 2020 08:57:55 +0100 Subject: [PATCH 29/30] flake8. added sha256 check to updater. --- youtube_dlc/YoutubeDL.py | 2 +- youtube_dlc/downloader/common.py | 2 -- youtube_dlc/update.py | 20 ++++++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index 360595918..dd55ba0f2 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -1872,7 +1872,7 @@ class YoutubeDL(object): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['requested_subtitles'] - ie = self.get_info_extractor(info_dict['extractor_key']) + # ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py index 14bd322b4..460364a0b 100644 --- a/youtube_dlc/downloader/common.py +++ b/youtube_dlc/downloader/common.py @@ -369,8 +369,6 @@ class FileDownloader(object): '[download] Sleeping %s seconds...' % ( int(sleep_interval_sub))) time.sleep(sleep_interval_sub) - - return self.real_download(filename, info_dict) def real_download(self, filename, info_dict): diff --git a/youtube_dlc/update.py b/youtube_dlc/update.py index e49e09c17..b358e902b 100644 --- a/youtube_dlc/update.py +++ b/youtube_dlc/update.py @@ -37,10 +37,26 @@ def update_self(to_screen, verbose, opener): JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) + def sha256sum(): + h = hashlib.sha256() + b = bytearray(128 * 1024) + mv = memoryview(b) + with open(os.path.realpath(sys.executable), 'rb', buffering=0) as f: + for n in iter(lambda: f.readinto(mv), 0): + h.update(mv[:n]) + return h.hexdigest() + + to_screen('Current Build Hash %s' % sha256sum()) + if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'): to_screen('It looks like you installed youtube-dlc with a package manager, pip, setup.py or a tarball. Please use that to update.') return + # compiled file.exe can find itself by + # to_screen(os.path.basename(sys.executable)) + # and path to py or exe + # to_screen(os.path.realpath(sys.executable)) + # Check if there is a new version try: newversion = opener.open(VERSION_URL).read().decode('utf-8').strip() @@ -48,6 +64,7 @@ def update_self(to_screen, verbose, opener): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: can\'t find the current version. Please try again later.') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return if newversion == __version__: to_screen('youtube-dlc is up-to-date (' + __version__ + ')') @@ -61,6 +78,7 @@ def update_self(to_screen, verbose, opener): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: can\'t obtain versions info. Please try again later.') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return if 'signature' not in versions_info: to_screen('ERROR: the versions file is not signed or corrupted. Aborting.') @@ -109,6 +127,7 @@ def update_self(to_screen, verbose, opener): if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: unable to download latest version') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return newcontent_hash = hashlib.sha256(newcontent).hexdigest() @@ -155,6 +174,7 @@ start /b "" cmd /c del "%%~f0"&exit /b" if verbose: to_screen(encode_compat_str(traceback.format_exc())) to_screen('ERROR: unable to download latest version') + to_screen('Visit https://github.com/blackjack4494/yt-dlc/releases/latest') return newcontent_hash = hashlib.sha256(newcontent).hexdigest() From 7fb5f2f29d99fa269988c6586558c7e9d21e432d Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 31 Oct 2020 09:26:04 +0100 Subject: [PATCH 30/30] [skip travis] templates and urls --- .github/ISSUE_TEMPLATE/1_broken_site.md | 10 +++++----- .github/ISSUE_TEMPLATE/2_site_support_request.md | 8 ++++---- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 6 +++--- .github/ISSUE_TEMPLATE/4_bug_report.md | 12 ++++++------ .github/ISSUE_TEMPLATE/5_feature_request.md | 6 +++--- .github/ISSUE_TEMPLATE/6_question.md | 6 +++--- .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md | 6 +++--- .../ISSUE_TEMPLATE_tmpl/2_site_support_request.md | 6 +++--- .../ISSUE_TEMPLATE_tmpl/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md | 8 ++++---- .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md | 4 ++-- youtube_dlc/utils.py | 4 ++-- 12 files changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index bf4251004..32c14aa85 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -21,15 +21,15 @@ assignees: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dlc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlc version **2020.10.31** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -44,7 +44,7 @@ Add the `-v` flag to your command line you run youtube-dlc with (`youtube-dlc -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dlc version 2020.10.26 + [debug] youtube-dlc version 2020.10.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index 889005097..fe1aade05 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -21,15 +21,15 @@ assignees: '' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dlcc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlcc version **2020.10.31** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index e5d714388..cddb81dda 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -21,13 +21,13 @@ assignees: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dlc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlc version **2020.10.31** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 9de52f98c..920ae8dbc 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -21,16 +21,16 @@ assignees: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dlc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlc version **2020.10.31** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -46,7 +46,7 @@ Add the `-v` flag to your command line you run youtube-dlc with (`youtube-dlc -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dlc version 2020.10.26 + [debug] youtube-dlc version 2020.10.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 86fac96dd..7cc390f58 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -21,13 +21,13 @@ assignees: '' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dlc version **2020.10.26** +- [ ] I've verified that I'm running youtube-dlc version **2020.10.31** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md index 034a9c5ac..3c3ae0f3b 100644 --- a/.github/ISSUE_TEMPLATE/6_question.md +++ b/.github/ISSUE_TEMPLATE/6_question.md @@ -21,8 +21,8 @@ assignees: '' @@ -34,7 +34,7 @@ Carefully read and work through this check list in order to prevent the most com ## Question WRITE QUESTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md index 8f9bb2c33..3fe4d6968 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md @@ -18,10 +18,10 @@ title: '' diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md index 9748afd4d..aad8fa054 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md @@ -19,10 +19,10 @@ labels: 'site-support-request' diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md index f274e8aeb..2fb82f828 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md @@ -18,8 +18,8 @@ title: '' diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md index 788f1c9a1..b7bebf8ab 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md @@ -18,11 +18,11 @@ title: '' diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md index 9b3b8c3bf..99592f79d 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md @@ -19,8 +19,8 @@ labels: 'request' diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py index 54a4ea2aa..f5dc1bdaf 100644 --- a/youtube_dlc/utils.py +++ b/youtube_dlc/utils.py @@ -2320,8 +2320,8 @@ def bug_reports_message(): if ytdl_is_updateable(): update_cmd = 'type youtube-dlc -U to update' else: - update_cmd = 'see https://yt-dl.org/update on how to update' - msg = '; please report this issue on https://yt-dl.org/bug .' + update_cmd = 'see https://github.com/blackjack4494/yt-dlc on how to update' + msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .' msg += ' Make sure you are using the latest version; %s.' % update_cmd msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.' return msg