Merge pull request #126 from nao20010128nao/master

[instagram] Fix extractor
This commit is contained in:
Tom-Oliver Heidel 2020-11-30 02:50:39 +01:00 committed by GitHub
commit 929576bb9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 15 additions and 8 deletions

View File

@ -126,16 +126,23 @@ class InstagramIE(InfoExtractor):
uploader_id, like_count, comment_count, comments, height, uploader_id, like_count, comment_count, comments, height,
width) = [None] * 11 width) = [None] * 11
shared_data = self._parse_json( shared_data = try_get(webpage,
self._search_regex( (lambda x: self._parse_json(
r'window\._sharedData\s*=\s*({.+?});', self._search_regex(
webpage, 'shared data', default='{}'), r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);',
video_id, fatal=False) x, 'additional data', default='{}'),
video_id, fatal=False),
lambda x: self._parse_json(
self._search_regex(
r'window\._sharedData\s*=\s*({.+?});',
x, 'shared data', default='{}'),
video_id, fatal=False)['entry_data']['PostPage'][0]),
None)
if shared_data: if shared_data:
media = try_get( media = try_get(
shared_data, shared_data,
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], (lambda x: x['graphql']['shortcode_media'],
lambda x: x['entry_data']['PostPage'][0]['media']), lambda x: x['media']),
dict) dict)
if media: if media:
video_url = media.get('video_url') video_url = media.get('video_url')
@ -144,7 +151,7 @@ class InstagramIE(InfoExtractor):
description = try_get( description = try_get(
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
compat_str) or media.get('caption') compat_str) or media.get('caption')
thumbnail = media.get('display_src') thumbnail = media.get('display_src') or media.get('thumbnail_src')
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
uploader = media.get('owner', {}).get('full_name') uploader = media.get('owner', {}).get('full_name')
uploader_id = media.get('owner', {}).get('username') uploader_id = media.get('owner', {}).get('username')