From 5d3a0e794b50a7f2524bdf37a886e0f436eb2f14 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 25 Jun 2021 19:35:41 +0530 Subject: [PATCH] Add `--extractor-args` to pass extractor-specific arguments --- README.md | 32 ++++++++++++++++++------------- yt_dlp/YoutubeDL.py | 9 +++++++-- yt_dlp/__init__.py | 1 + yt_dlp/extractor/common.py | 5 +++++ yt_dlp/extractor/youtube.py | 38 +++++++++++++++++++------------------ yt_dlp/options.py | 23 +++++++++++++++++----- 6 files changed, 70 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 273f83b3b..152c23c30 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Format Selection examples](#format-selection-examples) * [MODIFYING METADATA](#modifying-metadata) * [Modifying metadata examples](#modifying-metadata-examples) +* [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) * [DEPRECATED OPTIONS](#deprecated-options) * [MORE](#more) @@ -433,7 +434,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --downloader-args NAME:ARGS Give these arguments to the external downloader. Specify the downloader name and the arguments separated by a colon ":". You - can use this option multiple times + can use this option multiple times to give + different arguments to different downloaders (Alias: --external-downloader-args) ## Filesystem Options: @@ -816,18 +818,10 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --no-hls-split-discontinuity Do not split HLS playlists to different formats at discontinuities such as ad breaks (default) - --youtube-include-dash-manifest Download the DASH manifests and related - data on YouTube videos (default) - (Alias: --no-youtube-skip-dash-manifest) - --youtube-skip-dash-manifest Do not download the DASH manifests and - related data on YouTube videos - (Alias: --no-youtube-include-dash-manifest) - --youtube-include-hls-manifest Download the HLS manifests and related data - on YouTube videos (default) - (Alias: --no-youtube-skip-hls-manifest) - --youtube-skip-hls-manifest Do not download the HLS manifests and - related data on YouTube videos - (Alias: --no-youtube-include-hls-manifest) + --extractor-args KEY:ARGS Pass these arguments to the extractor. See + "EXTRACTOR ARGUMENTS" for details. You can + use this option multiple times to give + different arguments to different extractors # CONFIGURATION @@ -1331,6 +1325,14 @@ $ yt-dlp --parse-metadata 'description:(?s)(?P.+)' --add-metadata ``` +# EXTRACTOR ARGUMENTS + +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (colon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args youtube:skip=dash,hls` + +The following extractors use this feature: +* **youtube** + * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests + # PLUGINS Plugins are loaded from `/ytdlp_plugins//__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example. @@ -1362,6 +1364,10 @@ While these options still work, their use is not recommended since there are oth --list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table) --list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old) --sponskrub-args ARGS --ppa "sponskrub:ARGS" + --youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest) + --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest) + --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest) + --youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest) --test Used by developers for testing extractors. Not intended for the end user --youtube-print-sig-code Used for testing youtube signatures diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d3e95efab..c67ca8b30 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -420,11 +420,16 @@ class YoutubeDL(object): dynamic_mpd: Whether to process dynamic DASH manifests (default: True) hls_split_discontinuity: Split HLS playlists to different formats at discontinuities such as ad breaks (default: False) - youtube_include_dash_manifest: If True (default), DASH manifests and related + extractor_args: A dictionary of arguments to be passed to the extractors. + See "EXTRACTOR ARGUMENTS" for details. + Eg: {'youtube': {'skip': ['dash', 'hls']}} + youtube_include_dash_manifest: Deprecated - Use extractor_args instead. + If True (default), DASH manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about DASH. (only for youtube) - youtube_include_hls_manifest: If True (default), HLS manifests and related + youtube_include_hls_manifest: Deprecated - Use extractor_args instead. + If True (default), HLS manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about HLS. (only for youtube) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 21b45db0a..fd7729ee6 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -631,6 +631,7 @@ def _real_main(argv=None): 'include_ads': opts.include_ads, 'default_search': opts.default_search, 'dynamic_mpd': opts.dynamic_mpd, + 'extractor_args': opts.extractor_args, 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, 'encoding': opts.encoding, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d210ec02f..bb9d8fba5 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -70,6 +70,7 @@ from ..utils import ( str_or_none, str_to_int, strip_or_none, + traverse_obj, unescapeHTML, unified_strdate, unified_timestamp, @@ -3567,6 +3568,10 @@ class InfoExtractor(object): else 'public' if all_known else None) + def _configuration_arg(self, key): + return traverse_obj( + self._downloader.params, ('extractor_args', self.ie_key().lower(), key)) + class SearchInfoExtractor(InfoExtractor): """ diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c16f16165..2c4e9b657 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2119,8 +2119,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dct['container'] = dct['ext'] + '_dash' formats.append(dct) + skip_manifests = self._configuration_arg('skip') or [] + get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True) + get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True) + for sd in (streaming_data, ytm_streaming_data): - hls_manifest_url = sd.get('hlsManifestUrl') + hls_manifest_url = get_hls and sd.get('hlsManifestUrl') if hls_manifest_url: for f in self._extract_m3u8_formats( hls_manifest_url, video_id, 'mp4', fatal=False): @@ -2130,23 +2134,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f['format_id'] = itag formats.append(f) - if self.get_param('youtube_include_dash_manifest', True): - for sd in (streaming_data, ytm_streaming_data): - dash_manifest_url = sd.get('dashManifestUrl') - if dash_manifest_url: - for f in self._extract_mpd_formats( - dash_manifest_url, video_id, fatal=False): - itag = f['format_id'] - if itag in itags: - continue - if itag in itag_qualities: - f['quality'] = q(itag_qualities[itag]) - filesize = int_or_none(self._search_regex( - r'/clen/(\d+)', f.get('fragment_base_url') - or f['url'], 'file size', default=None)) - if filesize: - f['filesize'] = filesize - formats.append(f) + dash_manifest_url = get_dash and sd.get('dashManifestUrl') + if dash_manifest_url: + for f in self._extract_mpd_formats( + dash_manifest_url, video_id, fatal=False): + itag = f['format_id'] + if itag in itags: + continue + if itag in itag_qualities: + f['quality'] = q(itag_qualities[itag]) + filesize = int_or_none(self._search_regex( + r'/clen/(\d+)', f.get('fragment_base_url') + or f['url'], 'file size', default=None)) + if filesize: + f['filesize'] = filesize + formats.append(f) if not formats: if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'): diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 4ad5223fa..5caf4cb53 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -716,7 +716,8 @@ def parseOpts(overrideArguments=None): help=( 'Give these arguments to the external downloader. ' 'Specify the downloader name and the arguments separated by a colon ":". ' - 'You can use this option multiple times (Alias: --external-downloader-args)')) + 'You can use this option multiple times to give different arguments to different downloaders ' + '(Alias: --external-downloader-args)')) workarounds = optparse.OptionGroup(parser, 'Workarounds') workarounds.add_option( @@ -1343,22 +1344,34 @@ def parseOpts(overrideArguments=None): '--no-hls-split-discontinuity', dest='hls_split_discontinuity', action='store_false', help='Do not split HLS playlists to different formats at discontinuities such as ad breaks (default)') + extractor.add_option( + '--extractor-args', + metavar='KEY:ARGS', dest='extractor_args', default={}, type='str', + action='callback', callback=_dict_from_options_callback, + callback_kwargs={ + 'multiple_keys': False, + 'process': lambda val: dict( + (lambda x: (x[0], x[1].split(',')))(arg.split('=', 1) + ['', '']) for arg in val.split(';')) + }, + help=( + 'Pass these arguments to the extractor. See "EXTRACTOR ARGUMENTS" for details. ' + 'You can use this option multiple times to give different arguments to different extractors')) extractor.add_option( '--youtube-include-dash-manifest', '--no-youtube-skip-dash-manifest', action='store_true', dest='youtube_include_dash_manifest', default=True, - help='Download the DASH manifests and related data on YouTube videos (default) (Alias: --no-youtube-skip-dash-manifest)') + help=optparse.SUPPRESS_HELP) extractor.add_option( '--youtube-skip-dash-manifest', '--no-youtube-include-dash-manifest', action='store_false', dest='youtube_include_dash_manifest', - help='Do not download the DASH manifests and related data on YouTube videos (Alias: --no-youtube-include-dash-manifest)') + help=optparse.SUPPRESS_HELP) extractor.add_option( '--youtube-include-hls-manifest', '--no-youtube-skip-hls-manifest', action='store_true', dest='youtube_include_hls_manifest', default=True, - help='Download the HLS manifests and related data on YouTube videos (default) (Alias: --no-youtube-skip-hls-manifest)') + help=optparse.SUPPRESS_HELP) extractor.add_option( '--youtube-skip-hls-manifest', '--no-youtube-include-hls-manifest', action='store_false', dest='youtube_include_hls_manifest', - help='Do not download the HLS manifests and related data on YouTube videos (Alias: --no-youtube-include-hls-manifest)') + help=optparse.SUPPRESS_HELP) parser.add_option_group(general) parser.add_option_group(network)