Fix and refactor `prepare_outtmpl`

The following tests would have failed previously:
%(id)d %(id)r
%(ext)s-%(ext|def)d
%(width|)d
%(id)r %(height)r
%(formats.0)r
%s
This commit is contained in:
pukkandan 2021-06-03 23:30:38 +05:30
parent 9d83ad93d0
commit 752cda3880
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
6 changed files with 192 additions and 160 deletions

View File

@ -17,7 +17,7 @@ from yt_dlp.compat import compat_str, compat_urllib_error
from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor
from yt_dlp.utils import ExtractorError, match_filter_func
from yt_dlp.utils import ExtractorError, float_or_none, match_filter_func
TEST_URL = 'http://localhost/sample.mp4'
@ -648,56 +648,95 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(test_dict['extractor'], 'Foo')
self.assertEqual(test_dict['playlist'], 'funny videos')
def test_prepare_filename(self):
info = {
'id': '1234',
'ext': 'mp4',
'width': None,
'height': 1080,
'title1': '$PATH',
'title2': '%PATH%',
'timestamp': 1618488000,
'formats': [{'id': 'id1'}, {'id': 'id2'}]
}
outtmpl_info = {
'id': '1234',
'ext': 'mp4',
'width': None,
'height': 1080,
'title1': '$PATH',
'title2': '%PATH%',
'timestamp': 1618488000,
'duration': 100000,
'playlist_index': 1,
'_last_playlist_index': 100,
'n_entries': 10,
'formats': [{'id': 'id1'}, {'id': 'id2'}, {'id': 'id3'}]
}
def fname(templ, na_placeholder='NA'):
params = {'outtmpl': templ}
if na_placeholder != 'NA':
params['outtmpl_na_placeholder'] = na_placeholder
def test_prepare_outtmpl(self):
def out(tmpl, **params):
params['outtmpl'] = tmpl
ydl = YoutubeDL(params)
return ydl.prepare_filename(info)
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
# Replace missing fields with 'NA' by default
self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
# Or by provided placeholder
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
self.assertEqual(fname('%(height)s.%(ext)s'), '1080.mp4')
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
ydl._num_downloads = 1
outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, self.outtmpl_info)
return outtmpl % tmpl_dict
self.assertEqual(out('%(id)s.%(ext)s'), '1234.mp4')
self.assertEqual(out('%(duration_string)s'), '27:46:40')
self.assertTrue(float_or_none(out('%(epoch)d')))
self.assertEqual(out('%(resolution)s'), '1080p')
self.assertEqual(out('%(playlist_index)s'), '001')
self.assertEqual(out('%(autonumber)s'), '00001')
self.assertEqual(out('%(autonumber+2)03d', autonumber_start=3), '005')
self.assertEqual(out('%(autonumber)s', autonumber_size=3), '001')
self.assertEqual(out('%%'), '%')
self.assertEqual(out('%%%%'), '%%')
self.assertEqual(out('%(invalid@tmpl|def)s', outtmpl_na_placeholder='none'), 'none')
self.assertEqual(out('%()s'), 'NA')
self.assertEqual(out('%s'), '%s')
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
self.assertEqual(out(NA_TEST_OUTTMPL), 'NA-NA-def-1234.mp4')
self.assertEqual(out(NA_TEST_OUTTMPL, outtmpl_na_placeholder='none'), 'none-none-def-1234.mp4')
self.assertEqual(out(NA_TEST_OUTTMPL, outtmpl_na_placeholder=''), '--def-1234.mp4')
FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s'
self.assertEqual(out(FMT_TEST_OUTTMPL % 's'), '1080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % 'd'), '1080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '6d'), ' 1080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '-6d'), '1080 .mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '06d'), '001080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % ' 06d'), ' 01080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % ' 06d'), ' 01080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '0 6d'), ' 01080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % '0 6d'), ' 01080.mp4')
self.assertEqual(out(FMT_TEST_OUTTMPL % ' 0 6d'), ' 01080.mp4')
self.assertEqual(out('%(id)d'), '1234')
self.assertEqual(out('%(id)d %(id)r'), "1234 '1234'")
self.assertEqual(out('%(ext)s-%(ext|def)d'), 'mp4-def')
self.assertEqual(out('%(width|0)04d'), '0000')
self.assertEqual(out('%(width|)d', outtmpl_na_placeholder='none'), '')
FORMATS = self.outtmpl_info['formats']
self.assertEqual(out('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20')
self.assertEqual(out('%(id+1-height+3)05d'), '00158')
self.assertEqual(out('%(width+100)05d'), 'NA')
self.assertEqual(out('%(formats.0)s'), str(FORMATS[0]))
self.assertEqual(out('%(formats.-1.id)s'), str(FORMATS[-1]['id']))
self.assertEqual(out('%(formats.3)s'), 'NA')
self.assertEqual(out('%(formats.:2:-1)r'), repr(FORMATS[:2:-1]))
self.assertEqual(out('%(formats.0.id.-1+id)f'), '1235.000000')
def test_prepare_filename(self):
def fname(templ):
params = {'outtmpl': templ}
ydl = YoutubeDL(params)
return ydl.prepare_filename(self.outtmpl_info)
self.assertEqual(fname('%%'), '%')
self.assertEqual(fname('%%%%'), '%%')
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
self.assertEqual(fname('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20')
self.assertEqual(fname('%(id+1)05d'), '01235')
self.assertEqual(fname('%(width+100)05d'), 'NA')
self.assertEqual(fname('%(formats.0)s').replace("u", ""), "{'id' - 'id1'}")
self.assertEqual(fname('%(formats.-1.id)s'), 'id2')
self.assertEqual(fname('%(formats.2)s'), 'NA')
self.assertEqual(fname('%(id)r %(height)r'), "'1234' 1080")
self.assertEqual(fname('%(formats.0)r'), "{'id' - 'id1'}")
def test_format_note(self):
ydl = YoutubeDL()

View File

@ -8,7 +8,10 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_shlex_quote
from yt_dlp.postprocessor import (
ExecAfterDownloadPP,
FFmpegThumbnailsConvertorPP,
MetadataFromFieldPP,
MetadataFromTitlePP,
@ -55,3 +58,14 @@ class TestConvertThumbnail(unittest.TestCase):
for _, out in tests:
os.remove(file.format(out))
class TestExecAfterDownload(unittest.TestCase):
def test_parse_cmd(self):
pp = ExecAfterDownloadPP(YoutubeDL(), '')
info = {'filepath': 'file name'}
quoted_filepath = compat_shlex_quote(info['filepath'])
self.assertEqual(pp.parse_cmd('echo', info), 'echo %s' % quoted_filepath)
self.assertEqual(pp.parse_cmd('echo.{}', info), 'echo.%s' % quoted_filepath)
self.assertEqual(pp.parse_cmd('echo "%(filepath)s"', info), 'echo "%s"' % info['filepath'])

View File

@ -64,7 +64,7 @@ from .utils import (
float_or_none,
format_bytes,
format_field,
FORMAT_RE,
STR_FORMAT_RE,
formatSeconds,
GeoRestrictedError,
int_or_none,
@ -815,52 +815,26 @@ class YoutubeDL(object):
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
template_dict = dict(info_dict)
info_dict = dict(info_dict)
na = self.params.get('outtmpl_na_placeholder', 'NA')
# duration_string
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
formatSeconds(info_dict['duration'], '-' if sanitize else ':')
if info_dict.get('duration', None) is not None
else None)
# epoch
template_dict['epoch'] = int(time.time())
# autonumber
autonumber_size = self.params.get('autonumber_size')
if autonumber_size is None:
autonumber_size = 5
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
# resolution if not defined
if template_dict.get('resolution') is None:
if template_dict.get('width') and template_dict.get('height'):
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
elif template_dict.get('height'):
template_dict['resolution'] = '%sp' % template_dict['height']
elif template_dict.get('width'):
template_dict['resolution'] = '%dx?' % template_dict['width']
info_dict['epoch'] = int(time.time())
info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
if info_dict.get('resolution') is None:
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
# For fields playlist_index and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = {
'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
'autonumber': autonumber_size,
'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
'autonumber': self.params.get('autonumber_size') or 5,
}
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
if mobj:
outtmpl = re.sub(
FIELD_SIZE_COMPAT_RE,
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
outtmpl)
numeric_fields = list(self._NUMERIC_FIELDS)
if sanitize is None:
sanitize = lambda k, v: v
EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
EXTERNAL_FORMAT_RE = STR_FORMAT_RE.format('[^)]*')
# Field is of the form key1.key2...
# where keys (except first) can be string, int or slice
FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
@ -876,71 +850,76 @@ class YoutubeDL(object):
'+': float.__add__,
'-': float.__sub__,
}
for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
final_key = outer_mobj.group('key')
str_type = outer_mobj.group('type')
value = None
mobj = re.match(INTERNAL_FORMAT_RE, final_key)
if mobj is not None:
mobj = mobj.groupdict()
# Object traversal
fields = mobj['fields'].split('.')
value = traverse_dict(template_dict, fields)
# Negative
if mobj['negate']:
value = float_or_none(value)
if value is not None:
value *= -1
# Do maths
if mobj['maths']:
value = float_or_none(value)
operator = None
for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
if item == '':
value = None
if value is None:
break
if operator:
item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
offset = float_or_none(item)
if offset is None:
offset = float_or_none(traverse_dict(template_dict, item.split('.')))
try:
value = operator(value, multiplier * offset)
except (TypeError, ZeroDivisionError):
value = None
operator = None
else:
operator = MATH_FUNCTIONS[item]
# Datetime formatting
if mobj['strf_format']:
value = strftime_or_none(value, mobj['strf_format'])
# Set default
if value is None and mobj['default'] is not None:
value = mobj['default']
# Sanitize
if str_type in 'crs' and value is not None: # string
value = sanitize('%{}'.format(str_type) % fields[-1], value)
else: # numeric
numeric_fields.append(final_key)
tmpl_dict = {}
def get_value(mdict):
# Object traversal
fields = mdict['fields'].split('.')
value = traverse_dict(info_dict, fields)
# Negative
if mdict['negate']:
value = float_or_none(value)
if value is not None:
template_dict[final_key] = value
if value is not None:
value *= -1
# Do maths
if mdict['maths']:
value = float_or_none(value)
operator = None
for item in MATH_OPERATORS_RE.split(mdict['maths'])[1:]:
if item == '' or value is None:
return None
if operator:
item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
offset = float_or_none(item)
if offset is None:
offset = float_or_none(traverse_dict(info_dict, item.split('.')))
try:
value = operator(value, multiplier * offset)
except (TypeError, ZeroDivisionError):
return None
operator = None
else:
operator = MATH_FUNCTIONS[item]
# Datetime formatting
if mdict['strf_format']:
value = strftime_or_none(value, mdict['strf_format'])
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
# string NA placeholder is returned for missing fields. We will patch
# output template for missing fields to meet string presentation type.
for numeric_field in numeric_fields:
if template_dict.get(numeric_field) is None:
outtmpl = re.sub(
FORMAT_RE.format(re.escape(numeric_field)),
r'%({0})s'.format(numeric_field), outtmpl)
return value
template_dict = collections.defaultdict(lambda: na, (
(k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items() if v is not None))
return outtmpl, template_dict
def create_key(outer_mobj):
if not outer_mobj.group('has_key'):
return '%{}'.format(outer_mobj.group(0))
key = outer_mobj.group('key')
fmt = outer_mobj.group('format')
mobj = re.match(INTERNAL_FORMAT_RE, key)
if mobj is None:
value, default = None, na
else:
mobj = mobj.groupdict()
default = mobj['default'] if mobj['default'] is not None else na
value = get_value(mobj)
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
fmt = '0{:d}d'.format(field_size_compat_map[key])
value = default if value is None else value
key += '\0%s' % fmt
if fmt[-1] not in 'crs': # numeric
value = float_or_none(value)
if value is None:
value, fmt = default, 's'
if sanitize:
if fmt[-1] == 'r':
# If value is an object, sanitize might convert it to a string
# So we convert it to repr first
value, fmt = repr(value), '%ss' % fmt[:-1]
value = sanitize(key, value)
tmpl_dict[key] = value
return '%({key}){fmt}'.format(key=key, fmt=fmt)
return re.sub(EXTERNAL_FORMAT_RE, create_key, outtmpl), tmpl_dict
def _prepare_filename(self, info_dict, tmpl_type='default'):
try:
@ -966,7 +945,7 @@ class YoutubeDL(object):
force_ext = OUTTMPL_TYPES.get(tmpl_type)
if force_ext is not None:
filename = replace_extension(filename, force_ext, template_dict.get('ext'))
filename = replace_extension(filename, force_ext, info_dict.get('ext'))
# https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self.params.get('trim_file_name', False)

View File

@ -1,13 +1,11 @@
from __future__ import unicode_literals
import re
import subprocess
from .common import PostProcessor
from ..compat import compat_shlex_quote
from ..utils import (
encodeArgument,
FORMAT_RE,
PostProcessingError,
)
@ -23,14 +21,14 @@ class ExecAfterDownloadPP(PostProcessor):
return 'Exec'
def parse_cmd(self, cmd, info):
# If no %(key)s is found, replace {} for backard compatibility
if not re.search(FORMAT_RE.format(r'[^)]*'), cmd):
if '{}' not in cmd:
cmd += ' {}'
return cmd.replace('{}', compat_shlex_quote(info['filepath']))
tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info)
if tmpl_dict: # if there are no replacements, tmpl_dict = {}
return tmpl % tmpl_dict
tmpl, info_copy = self._downloader.prepare_outtmpl(cmd, info)
return tmpl % info_copy
# If no replacements are found, replace {} for backard compatibility
if '{}' not in cmd:
cmd += ' {}'
return cmd.replace('{}', compat_shlex_quote(info['filepath']))
def run(self, info):
cmd = self.parse_cmd(self.exec_cmd, info)

View File

@ -54,9 +54,9 @@ class MetadataFromFieldPP(PostProcessor):
def run(self, info):
for dictn in self._data:
tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
data_to_parse = tmpl % info_copy
self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl))
tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
data_to_parse = tmpl % tmpl_dict
self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl']))
match = re.search(dictn['regex'], data_to_parse)
if match is None:
self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))

View File

@ -4393,15 +4393,17 @@ OUTTMPL_TYPES = {
# As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
FORMAT_RE = r'''(?x)
STR_FORMAT_RE = r'''(?x)
(?<!%)
%
\({0}\) # mapping key
(?:[#0\-+ ]+)? # conversion flags (optional)
(?:\d+)? # minimum field width (optional)
(?:\.\d+)? # precision (optional)
[hlL]? # length modifier (optional)
(?P<type>[diouxXeEfFgGcrs%]) # conversion type
(?P<has_key>\((?P<key>{0})\))? # mapping key
(?P<format>
(?:[#0\-+ ]+)? # conversion flags (optional)
(?:\d+)? # minimum field width (optional)
(?:\.\d+)? # precision (optional)
[hlL]? # length modifier (optional)
[diouxXeEfFgGcrs] # conversion type
)
'''