From e625be0d10d96a20702d630dcc88e3269554e172 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 3 May 2021 22:36:03 +0530 Subject: [PATCH] Improve output template internal formatting * Allow slicing lists/strings using `field.start:end:step` * A field can also be used as offset like `field1+num+field2` * A default value can be given using `field|default` * Capture all format strings and set it to `None` if invalid. This prevents invalid fields from causing errors --- README.md | 9 +-- yt_dlp/YoutubeDL.py | 84 ++++++++++++++++------- yt_dlp/postprocessor/execafterdownload.py | 2 +- yt_dlp/utils.py | 8 +-- 4 files changed, 71 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index cef6eaf36..5ae596da7 100644 --- a/README.md +++ b/README.md @@ -842,13 +842,14 @@ The simplest usage of `-o` is not to set any template arguments when downloading It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: -1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s` or `%(upload_date>%Y-%m-%d)s` -2. **Offset numbers**: Numeric fields can have an initial offset specified by using a `+` separator. Eg: `%(playlist_index+10)03d`. This can also be used in conjunction with the date-time formatting. Eg: `%(epoch+-3600>%H-%M-%S)s` -3. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. Eg: `%(tags.0)s` or `%(subtitles.en.-1.ext)`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields +1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields +1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` +1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` +1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` To summarize, the general syntax for a field is: ``` -%(name[.keys][+offset][>strf])[flags][width][.precision][length]type +%(name[.keys][addition][>strf][|default])[flags][width][.precision][length]type ``` Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 0af036458..79ba3ef93 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -843,29 +843,67 @@ class YoutubeDL(object): if sanitize is None: sanitize = lambda k, v: v - # Internal Formatting = name.key1.key2+number>strf - INTERNAL_FORMAT_RE = FORMAT_RE.format( - r'''(?P - (?P\w+(?:\.[-\w]+)*) - (?:\+(?P-?\d+(?:\.\d+)?))? - (?:>(?P.+?))? - )''') - for mobj in re.finditer(INTERNAL_FORMAT_RE, outtmpl): - mobj = mobj.groupdict() - # Object traversal - fields = mobj['fields'].split('.') - final_key = mobj['final_key'] - value = traverse_dict(template_dict, fields) - # Offset the value - if mobj['add']: - value = float_or_none(value) - if value is not None: - value = value + float(mobj['add']) - # Datetime formatting - if mobj['strf_format']: - value = strftime_or_none(value, mobj['strf_format']) - if mobj['type'] in 'crs' and value is not None: # string - value = sanitize('%{}'.format(mobj['type']) % fields[-1], value) + EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P[^)]*)') + # Field is of the form key1.key2... + # where keys (except first) can be string, int or slice + FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*' + INTERNAL_FORMAT_RE = re.compile(r'''(?x) + (?P-)? + (?P{0}) + (?P(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*) + (?:>(?P.+?))? + (?:\|(?P.*?))? + $'''.format(FIELD_RE)) + MATH_OPERATORS_RE = re.compile(r'(?.+]+'), cmd): + if not re.search(FORMAT_RE.format(r'[^)]*'), cmd): if '{}' not in cmd: cmd += ' {}' return cmd.replace('{}', compat_shlex_quote(info['filepath'])) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 08e2d19d2..baa2a415e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6112,11 +6112,11 @@ def traverse_dict(dictn, keys, casesense=True): key = key.lower() dictn = dictn.get(key) elif isinstance(dictn, (list, tuple, compat_str)): - key, n = int_or_none(key), len(dictn) - if key is not None and -n <= key < n: - dictn = dictn[key] + if ':' in key: + key = slice(*map(int_or_none, key.split(':'))) else: - dictn = None + key = int_or_none(key) + dictn = try_get(dictn, lambda x: x[key]) else: return None return dictn