From 987d2e079ad0fd45df19b6183d38f83bcd528e9d Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Fri, 6 Nov 2020 15:15:07 +0900 Subject: [PATCH 1/2] [instagram] Fix extractor --- youtube_dlc/extractor/instagram.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube_dlc/extractor/instagram.py b/youtube_dlc/extractor/instagram.py index b061850a1..bbfe23c76 100644 --- a/youtube_dlc/extractor/instagram.py +++ b/youtube_dlc/extractor/instagram.py @@ -126,16 +126,23 @@ class InstagramIE(InfoExtractor): uploader_id, like_count, comment_count, comments, height, width) = [None] * 11 - shared_data = self._parse_json( - self._search_regex( - r'window\._sharedData\s*=\s*({.+?});', - webpage, 'shared data', default='{}'), - video_id, fatal=False) + shared_data = try_get(webpage, + (lambda x: self._parse_json( + self._search_regex( + r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);', + x, 'additional data', default='{}'), + video_id, fatal=False), + lambda x: self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + x, 'shared data', default='{}'), + video_id, fatal=False)['entry_data']['PostPage'][0]), + None) if shared_data: media = try_get( shared_data, - (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], - lambda x: x['entry_data']['PostPage'][0]['media']), + (lambda x: x['graphql']['shortcode_media'], + lambda x: x['media']), dict) if media: video_url = media.get('video_url') From a1d6041497c50d59c6d275125d21cd3b613f6a1c Mon Sep 17 00:00:00 2001 From: nao20010128nao Date: Wed, 11 Nov 2020 08:59:09 +0000 Subject: [PATCH 2/2] [instagram] fix thumbnail URL extraction --- youtube_dlc/extractor/instagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dlc/extractor/instagram.py b/youtube_dlc/extractor/instagram.py index bbfe23c76..c3eba0114 100644 --- a/youtube_dlc/extractor/instagram.py +++ b/youtube_dlc/extractor/instagram.py @@ -151,7 +151,7 @@ class InstagramIE(InfoExtractor): description = try_get( media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], compat_str) or media.get('caption') - thumbnail = media.get('display_src') + thumbnail = media.get('display_src') or media.get('thumbnail_src') timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) uploader = media.get('owner', {}).get('full_name') uploader_id = media.get('owner', {}).get('username')