[Youtube] Improve comment API requests
co-authored by bbepis
This commit is contained in:
parent
0748b3317b
commit
8d0ea5f955
|
@ -2485,17 +2485,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
|
ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
|
||||||
continuations = [(ncd['continuation'], ncd['clickTrackingParams'])]
|
continuations = [ncd['continuation']]
|
||||||
# Handle videos where comments have been disabled entirely
|
# Handle videos where comments have been disabled entirely
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
continuations = []
|
continuations = []
|
||||||
|
|
||||||
def get_continuation(continuation, itct, session_token, replies=False):
|
def get_continuation(continuation, session_token, replies=False):
|
||||||
query = {
|
query = {
|
||||||
'pbj': 1,
|
'pbj': 1,
|
||||||
'ctoken': continuation,
|
'ctoken': continuation,
|
||||||
'continuation': continuation,
|
|
||||||
'itct': itct,
|
|
||||||
}
|
}
|
||||||
if replies:
|
if replies:
|
||||||
query['action_get_comment_replies'] = 1
|
query['action_get_comment_replies'] = 1
|
||||||
|
@ -2523,23 +2521,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
response_code = handle.getcode()
|
response_code = handle.getcode()
|
||||||
if (response_code == 200):
|
if (response_code == 200):
|
||||||
return self._parse_json(content, video_id)
|
return self._parse_json(content, video_id)
|
||||||
if (response_code == 413): # Sometimes google makes continuations that are too big to be accepted by themselves. Grade A engineering
|
if (response_code == 413):
|
||||||
# self.to_screen(json.dumps(query))
|
|
||||||
# self.to_screen('Google API rate limit detected; waiting 30 seconds before continuing')
|
|
||||||
# time.sleep(30)
|
|
||||||
# continue
|
|
||||||
return None
|
return None
|
||||||
raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
|
raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
|
||||||
|
|
||||||
first_continuation = True
|
first_continuation = True
|
||||||
while continuations:
|
while continuations:
|
||||||
continuation, itct = continuations.pop()
|
continuation, itct = continuations.pop()
|
||||||
comment_response = get_continuation(continuation, itct, xsrf_token)
|
comment_response = get_continuation(continuation, xsrf_token)
|
||||||
if not comment_response:
|
if not comment_response:
|
||||||
continue
|
continue
|
||||||
if list(search_dict(comment_response, 'externalErrorMessage')):
|
if list(search_dict(comment_response, 'externalErrorMessage')):
|
||||||
raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
|
raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
|
||||||
|
|
||||||
|
if 'continuationContents' not in comment_response['response']:
|
||||||
|
# Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
|
||||||
|
continue
|
||||||
|
# not sure if this actually helps
|
||||||
|
if 'xsrf_token' in comment_response:
|
||||||
|
xsrf_token = comment_response['xsrf_token']
|
||||||
|
|
||||||
item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
|
item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
|
||||||
if first_continuation:
|
if first_continuation:
|
||||||
expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
|
expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
|
||||||
|
@ -2554,7 +2555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
video_comments.append({
|
video_comments.append({
|
||||||
'id': comment['commentId'],
|
'id': comment['commentId'],
|
||||||
'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
|
'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
|
||||||
'time_text': comment['publishedTimeText']['runs'][0]['text'],
|
'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
|
||||||
'author': comment.get('authorText', {}).get('simpleText', ''),
|
'author': comment.get('authorText', {}).get('simpleText', ''),
|
||||||
'votes': comment.get('voteCount', {}).get('simpleText', '0'),
|
'votes': comment.get('voteCount', {}).get('simpleText', '0'),
|
||||||
'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
|
'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
|
||||||
|
@ -2563,14 +2564,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if 'replies' not in meta_comment['commentThreadRenderer']:
|
if 'replies' not in meta_comment['commentThreadRenderer']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
reply_continuation = meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']
|
reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
|
||||||
continuation = reply_continuation['continuation']
|
while reply_continuations:
|
||||||
itct = reply_continuation['clickTrackingParams']
|
|
||||||
while True:
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
replies_data = get_continuation(continuation, itct, xsrf_token, True)
|
continuation = reply_continuations.pop()
|
||||||
|
replies_data = get_continuation(continuation, xsrf_token, True)
|
||||||
if not replies_data or 'continuationContents' not in replies_data[1]['response']:
|
if not replies_data or 'continuationContents' not in replies_data[1]['response']:
|
||||||
break
|
continue
|
||||||
|
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
|
self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
|
||||||
|
@ -2580,25 +2580,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
video_comments.append({
|
video_comments.append({
|
||||||
'id': reply_comment['commentId'],
|
'id': reply_comment['commentId'],
|
||||||
'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
|
'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
|
||||||
'time_text': reply_comment['publishedTimeText']['runs'][0]['text'],
|
'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
|
||||||
'author': reply_comment.get('authorText', {}).get('simpleText', ''),
|
'author': reply_comment.get('authorText', {}).get('simpleText', ''),
|
||||||
'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
|
'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
|
||||||
'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
|
'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
|
||||||
'parent': comment['commentId']
|
'parent': comment['commentId']
|
||||||
})
|
})
|
||||||
if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
|
if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
|
||||||
break
|
continue
|
||||||
|
|
||||||
continuation = reply_comment_meta['continuations'][0]['nextContinuationData']['continuation']
|
reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
|
||||||
itct = reply_comment_meta['continuations'][0]['nextContinuationData']['clickTrackingParams']
|
|
||||||
|
|
||||||
self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
|
self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
|
||||||
|
|
||||||
if 'continuations' in item_section:
|
if 'continuations' in item_section:
|
||||||
new_continuations = [
|
continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
|
||||||
(ncd['nextContinuationData']['continuation'], ncd['nextContinuationData']['clickTrackingParams'])
|
|
||||||
for ncd in item_section['continuations']]
|
|
||||||
continuations += new_continuations
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
|
self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
|
||||||
|
|
Loading…
Reference in New Issue