[youtube] Extract more thumbnails

* The thumbnail URLs are hard-coded and their actual existence is tested lazily
* Added option `--no-check-formats` to not test them

Closes #340, Related: #402, #337, https://github.com/ytdl-org/youtube-dl/issues/29049
This commit is contained in:
pukkandan 2021-07-15 22:49:59 +05:30
parent d9488f69c1
commit 0ba692acc8
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
5 changed files with 61 additions and 34 deletions

View File

@ -638,7 +638,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
--no-prefer-free-formats Don't give any special preference to free --no-prefer-free-formats Don't give any special preference to free
containers (default) containers (default)
--check-formats Check that the formats selected are --check-formats Check that the formats selected are
actually downloadable (Experimental) actually downloadable
--no-check-formats Do not check that the formats selected are
actually downloadable
-F, --list-formats List all available formats of requested -F, --list-formats List all available formats of requested
videos videos
--merge-output-format FORMAT If a merge is required (e.g. --merge-output-format FORMAT If a merge is required (e.g.

View File

@ -209,6 +209,9 @@ class YoutubeDL(object):
into a single file into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged allow_multiple_audio_streams: Allow multiple audio streams to be merged
into a single file into a single file
check_formats Whether to test if the formats are downloadable.
Can be True (check all), False (check none)
or None (check only if requested by extractor)
paths: Dictionary of output paths. The allowed keys are 'home' paths: Dictionary of output paths. The allowed keys are 'home'
'temp' and the keys of OUTTMPL_TYPES (in utils.py) 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
outtmpl: Dictionary of templates for output names. Allowed keys outtmpl: Dictionary of templates for output names. Allowed keys
@ -1944,15 +1947,24 @@ class YoutubeDL(object):
t.get('id') if t.get('id') is not None else '', t.get('id') if t.get('id') is not None else '',
t.get('url'))) t.get('url')))
def test_thumbnail(t): def thumbnail_tester():
self.to_screen('[info] Testing thumbnail %s' % t['id']) if self.params.get('check_formats'):
try: def to_screen(msg):
self.urlopen(HEADRequest(t['url'])) return self.to_screen(f'[info] {msg}')
except network_exceptions as err: else:
self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % ( to_screen = self.write_debug
t['id'], t['url'], error_to_compat_str(err)))
return False def test_thumbnail(t):
return True to_screen('Testing thumbnail %s' % t['id'])
try:
self.urlopen(HEADRequest(t['url']))
except network_exceptions as err:
to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
t['id'], t['url'], error_to_compat_str(err)))
return False
return True
return test_thumbnail
for i, t in enumerate(thumbnails): for i, t in enumerate(thumbnails):
if t.get('id') is None: if t.get('id') is None:
@ -1960,8 +1972,11 @@ class YoutubeDL(object):
if t.get('width') and t.get('height'): if t.get('width') and t.get('height'):
t['resolution'] = '%dx%d' % (t['width'], t['height']) t['resolution'] = '%dx%d' % (t['width'], t['height'])
t['url'] = sanitize_url(t['url']) t['url'] = sanitize_url(t['url'])
if self.params.get('check_formats'):
info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse() if self.params.get('check_formats') is not False:
info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
else:
info_dict['thumbnails'] = thumbnails
def process_video_result(self, info_dict, download=True): def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video' assert info_dict.get('_type', 'video') == 'video'

View File

@ -229,6 +229,7 @@ class InfoExtractor(object):
* "resolution" (optional, string "{width}x{height}", * "resolution" (optional, string "{width}x{height}",
deprecated) deprecated)
* "filesize" (optional, int) * "filesize" (optional, int)
* "_test_url" (optional, bool) - If true, test the URL
thumbnail: Full URL to a video thumbnail image. thumbnail: Full URL to a video thumbnail image.
description: Full video description. description: Full video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.

View File

@ -2645,7 +2645,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f['stretched_ratio'] = ratio f['stretched_ratio'] = ratio
break break
category = microformat.get('category') or search_meta('genre')
channel_id = video_details.get('channelId') \
or microformat.get('externalChannelId') \
or search_meta('channelId')
duration = int_or_none(
video_details.get('lengthSeconds')
or microformat.get('lengthSeconds')) \
or parse_duration(search_meta('duration'))
is_live = video_details.get('isLive')
is_upcoming = video_details.get('isUpcoming')
owner_profile_url = microformat.get('ownerProfileUrl')
thumbnails = [] thumbnails = []
thumbnail_types = ['maxresdefault', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', '1', '2', '3']
for container in (video_details, microformat): for container in (video_details, microformat):
for thumbnail in (try_get( for thumbnail in (try_get(
container, container,
@ -2662,34 +2676,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': thumbnail_url, 'url': thumbnail_url,
'height': int_or_none(thumbnail.get('height')), 'height': int_or_none(thumbnail.get('height')),
'width': int_or_none(thumbnail.get('width')), 'width': int_or_none(thumbnail.get('width')),
'preference': 1 if 'maxresdefault' in thumbnail_url else -1
}) })
thumbnail_url = search_meta(['og:image', 'twitter:image']) thumbnail_url = search_meta(['og:image', 'twitter:image'])
if thumbnail_url: if thumbnail_url:
thumbnails.append({ thumbnails.append({
'url': thumbnail_url, 'url': thumbnail_url,
'preference': 1 if 'maxresdefault' in thumbnail_url else -1
}) })
# All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage # The best resolution thumbnails sometimes does not appear in the webpage
# See: https://github.com/ytdl-org/youtube-dl/issues/29049 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
thumbnails.append({ thumbnails.extend({
'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id, 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
'preference': 1, video_id=video_id, name=name, ext=ext,
}) webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
'_test_url': True,
} for name in thumbnail_types for ext in ('webp', 'jpg'))
for thumb in thumbnails:
i = next((i for i, t in enumerate(thumbnail_types) if f'/{video_id}/{t}' in thumb['url']), 20)
thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
self._remove_duplicate_formats(thumbnails) self._remove_duplicate_formats(thumbnails)
category = microformat.get('category') or search_meta('genre')
channel_id = video_details.get('channelId') \
or microformat.get('externalChannelId') \
or search_meta('channelId')
duration = int_or_none(
video_details.get('lengthSeconds')
or microformat.get('lengthSeconds')) \
or parse_duration(search_meta('duration'))
is_live = video_details.get('isLive')
is_upcoming = video_details.get('isUpcoming')
owner_profile_url = microformat.get('ownerProfileUrl')
info = { info = {
'id': video_id, 'id': video_id,
'title': self._live_title(video_title) if is_live else video_title, 'title': self._live_title(video_title) if is_live else video_title,

View File

@ -524,8 +524,12 @@ def parseOpts(overrideArguments=None):
help="Don't give any special preference to free containers (default)") help="Don't give any special preference to free containers (default)")
video_format.add_option( video_format.add_option(
'--check-formats', '--check-formats',
action='store_true', dest='check_formats', default=False, action='store_true', dest='check_formats', default=None,
help="Check that the formats selected are actually downloadable (Experimental)") help='Check that the formats selected are actually downloadable')
video_format.add_option(
'--no-check-formats',
action='store_false', dest='check_formats',
help='Do not check that the formats selected are actually downloadable')
video_format.add_option( video_format.add_option(
'-F', '--list-formats', '-F', '--list-formats',
action='store_true', dest='listformats', action='store_true', dest='listformats',