From eff635394a108b0208abdac52fd14d457e38927e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 2 Apr 2021 22:12:42 +0530 Subject: [PATCH] Ability to select nth best format Eg: -f `bv*.2` * Also cleaned up the related format selection code --- README.md | 6 ++++-- yt_dlp/YoutubeDL.py | 39 +++++++++++++++++++++++---------------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 42609fa2a..59684b3cc 100644 --- a/README.md +++ b/README.md @@ -983,7 +983,7 @@ $ yt-dlp -o - BaW_jenozKc By default, yt-dlp tries to download the best available quality if you **don't** pass any options. This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`. -The general syntax for format selection is `--f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. +The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. **tl;dr:** [navigate me to examples](#format-selection-examples). @@ -1007,7 +1007,9 @@ You can also use special names to select particular edge case formats: - `ba*`, `bestaudio*`: Select the best quality format that contains audio. It may also contain video. Equivalent to `best*[acodec!=none]` - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` -For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended to never actually use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details. +For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recomended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps` instead of `-f worst`. See [sorting formats](#sorting-formats) for more details. + +You can select the n'th best format of a type by using `best.`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream. If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d1b6b2056..a77e1fe5e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1576,21 +1576,25 @@ class YoutubeDL(object): else: format_fallback = False - format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec) - if format_spec_obj is not None: - format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1 - format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False - not_format_type = 'v' if format_type == 'a' else 'a' - format_modified = format_spec_obj.group(3) is not None + mobj = re.match( + r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', + format_spec) + if mobj is not None: + format_idx = int_or_none(mobj.group('n'), default=1) + format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx + format_type = (mobj.group('type') or [None])[0] + not_format_type = {'v': 'a', 'a': 'v'}.get(format_type) + format_modified = mobj.group('mod') is not None format_fallback = not format_type and not format_modified # for b, w - filter_f = ((lambda f: f.get(format_type + 'codec') != 'none') - if format_type and format_modified # bv*, ba*, wv*, wa* - else (lambda f: f.get(not_format_type + 'codec') == 'none') - if format_type # bv, ba, wv, wa - else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') - if not format_modified # b, w - else None) # b*, w* + filter_f = ( + (lambda f: f.get('%scodec' % format_type) != 'none') + if format_type and format_modified # bv*, ba*, wv*, wa* + else (lambda f: f.get('%scodec' % not_format_type) == 'none') + if format_type # bv, ba, wv, wa + else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') + if not format_modified # b, w + else None) # b*, w* else: format_idx = -1 filter_f = ((lambda f: f.get('ext') == format_spec) @@ -1602,13 +1606,16 @@ class YoutubeDL(object): if not formats: return matches = list(filter(filter_f, formats)) if filter_f is not None else formats - if matches: + n = len(matches) + if -n <= format_idx < n: yield matches[format_idx] - elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']): + elif format_fallback and ctx['incomplete_formats']: # for extractors with incomplete formats (audio only (soundcloud) # or video only (imgur)) best/worst will fallback to # best/worst {video,audio}-only format - yield formats[format_idx] + n = len(formats) + if -n <= format_idx < n: + yield formats[format_idx] elif selector.type == MERGE: # + def _merge(formats_pair):