From 321bf820c577f34593ff0462775e43875c8d886d Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 03:30:10 +0300 Subject: [PATCH] check live chat replay existence --- youtube_dl/YoutubeDL.py | 7 +++--- youtube_dl/extractor/youtube.py | 39 ++++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f9aa91f30..1b8a938e5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1838,10 +1838,11 @@ class YoutubeDL(object): return else: try: - # TODO does this transfer session...? - # TODO exceptions dl(sub_filename, sub_info) - except (ExtractorError, IOError, OSError, ValueError) as err: + except ( + ExtractorError, IOError, OSError, ValueError, + compat_urllib_error.URLError, + compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download subtitle for "%s": %s' % (sub_lang, error_to_compat_str(err))) continue diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e554702e7..782aba6ff 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1435,7 +1435,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError( 'Signature extraction failed: ' + tb, cause=e) - def _get_subtitles(self, video_id, webpage): + def _get_subtitles(self, video_id, webpage, is_live_content): try: subs_doc = self._download_xml( 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, @@ -1462,14 +1462,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': ext, }) sub_lang_list[lang] = sub_formats - # TODO check that live chat replay actually exists - sub_lang_list['live_chat'] = [ - { - 'video_id': video_id, - 'ext': 'json', - 'protocol': 'youtube_live_chat_replay', - }, - ] + if is_live_content: + sub_lang_list['live_chat'] = [ + { + 'video_id': video_id, + 'ext': 'json', + 'protocol': 'youtube_live_chat_replay', + }, + ] if not sub_lang_list: self._downloader.report_warning('video doesn\'t have subtitles') return {} @@ -1493,6 +1493,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self._parse_json( uppercase_escape(config), video_id, fatal=False) + def _get_yt_initial_data(self, video_id, webpage): + config = self._search_regex( + r'window\["ytInitialData"\]\s*=\s*(.*);', + webpage, 'ytInitialData', default=None) + if config: + return self._parse_json( + uppercase_escape(config), video_id, fatal=False) + def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" @@ -1992,6 +2000,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if is_live is None: is_live = bool_or_none(video_details.get('isLive')) + has_live_chat_replay = False + is_live_content = bool_or_none(video_details.get('isLiveContent')) + if not is_live and is_live_content: + yt_initial_data = self._get_yt_initial_data(video_id, video_webpage) + try: + yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + has_live_chat_replay = True + except (KeyError, IndexError): + pass + # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) @@ -2399,7 +2417,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0]))) # subtitles - video_subtitles = self.extract_subtitles(video_id, video_webpage) + video_subtitles = self.extract_subtitles( + video_id, video_webpage, has_live_chat_replay) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) video_duration = try_get(