mirror of
https://github.com/blackjack4494/yt-dlc.git
synced 2025-01-03 05:36:07 +00:00
[youtube] Make search extraction less dependent on json schema.
If an object looks like a video (it has a `videoId` key), assume that it is.
This commit is contained in:
parent
19f671f88b
commit
e03b4f3e05
1 changed files with 26 additions and 5 deletions
|
@ -3229,16 +3229,37 @@ class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _find_videos_in_json(self, extracted):
|
||||||
|
videos = []
|
||||||
|
|
||||||
|
def _real_find(obj):
|
||||||
|
if obj is None or isinstance(obj, str):
|
||||||
|
return
|
||||||
|
|
||||||
|
if type(obj) is list:
|
||||||
|
for elem in obj:
|
||||||
|
_real_find(elem)
|
||||||
|
|
||||||
|
if type(obj) is dict:
|
||||||
|
if "videoId" in obj:
|
||||||
|
videos.append(obj)
|
||||||
|
return
|
||||||
|
|
||||||
|
for _, o in obj.items():
|
||||||
|
_real_find(o)
|
||||||
|
|
||||||
|
_real_find(extracted)
|
||||||
|
|
||||||
|
return videos
|
||||||
|
|
||||||
def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
|
def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
|
||||||
search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
|
search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
|
||||||
|
|
||||||
result_items = try_get(
|
result_items = self._find_videos_in_json(search_response)
|
||||||
search_response,
|
|
||||||
lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'])
|
|
||||||
|
|
||||||
for plobj in result_items:
|
for plobj in result_items:
|
||||||
video_id = try_get(plobj, lambda x: x['videoRenderer']['videoId'])
|
video_id = try_get(plobj, lambda x: x['videoId'])
|
||||||
video_title = try_get(plobj, lambda x: x['videoRenderer']['title']['runs'][0]['text'])
|
video_title = try_get(plobj, lambda x: x['title']['runs'][0]['text'])
|
||||||
|
|
||||||
if video_id is None or video_title is None:
|
if video_id is None or video_title is None:
|
||||||
# we do not have a videoRenderer or it is empty
|
# we do not have a videoRenderer or it is empty
|
||||||
|
|
Loading…
Reference in a new issue