[youtube] Extract even more thumbnails and reduce testing

* Also fix bug where `_test_url` was being ignored

Ref: https://stackoverflow.com/a/20542029
Related: #340
This commit is contained in:
pukkandan 2021-07-20 03:40:35 +05:30
parent c634ad2a3c
commit cca80fe611
2 changed files with 18 additions and 7 deletions

View File

@ -1949,12 +1949,15 @@ class YoutubeDL(object):
def thumbnail_tester():
if self.params.get('check_formats'):
def to_screen(msg):
return self.to_screen(f'[info] {msg}')
test_all = True
to_screen = lambda msg: self.to_screen(f'[info] {msg}')
else:
test_all = False
to_screen = self.write_debug
def test_thumbnail(t):
if not test_all and not t.get('_test_url'):
return True
to_screen('Testing thumbnail %s' % t['id'])
try:
self.urlopen(HEADRequest(t['url']))

View File

@ -2658,8 +2658,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
owner_profile_url = microformat.get('ownerProfileUrl')
thumbnails = []
thumbnail_types = ['maxresdefault', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', '1', '2', '3']
for container in (video_details, microformat):
for thumbnail in (try_get(
container,
@ -2684,14 +2682,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
})
# The best resolution thumbnails sometimes does not appear in the webpage
# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
guaranteed_thumbnail_names = [
'hqdefault', 'hq1', 'hq2', 'hq3', '0',
'mqdefault', 'mq1', 'mq2', 'mq3',
'default', '1', '2', '3'
]
thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
n_thumbnail_names = len(thumbnail_names)
thumbnails.extend({
'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
video_id=video_id, name=name, ext=ext,
webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
'_test_url': True,
} for name in thumbnail_types for ext in ('webp', 'jpg'))
'_test_url': name in hq_thumbnail_names,
} for name in thumbnail_names for ext in ('webp', 'jpg'))
for thumb in thumbnails:
i = next((i for i, t in enumerate(thumbnail_types) if f'/{video_id}/{t}' in thumb['url']), 20)
i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
self._remove_duplicate_formats(thumbnails)