Merge branch 'tiktok' of https://github.com/skyme5/youtube-dl into skyme5-tiktok

This commit is contained in:
Tom-Oliver Heidel 2020-09-12 05:49:52 +02:00
commit 6cef08ad00
2 changed files with 103 additions and 108 deletions

View File

@ -1165,10 +1165,7 @@ from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .tiktok import ( from .tiktok import TikTokIE
TikTokIE,
TikTokUserIE,
)
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
from .tmz import ( from .tmz import (
TMZIE, TMZIE,

View File

@ -1,138 +1,136 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from datetime import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
str_or_none, str_or_none,
try_get, try_get
url_or_none,
) )
class TikTokBaseIE(InfoExtractor): class TikTokBaseIE(InfoExtractor):
def _extract_aweme(self, data): def _extract_aweme(self, video_data, webpage):
video = data['video'] video_info = try_get(
description = str_or_none(try_get(data, lambda x: x['desc'])) video_data, lambda x: x['videoData']['itemInfos'], dict)
width = int_or_none(try_get(data, lambda x: video['width'])) author_info = try_get(
height = int_or_none(try_get(data, lambda x: video['height'])) video_data, lambda x: x['videoData']['authorInfos'], dict)
share_info = try_get(video_data, lambda x: x['shareMeta'], dict)
unique_id = str_or_none(author_info.get('uniqueId'))
timestamp = try_get(video_info, lambda x: int(x['createTime']), int)
date = datetime.fromtimestamp(timestamp).strftime('%Y%m%d')
height = try_get(video_info, lambda x: x['video']['videoMeta']['height'], int)
width = try_get(video_info, lambda x: x['video']['videoMeta']['width'], int)
thumbnails = []
thumbnails.append({
'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage),
'width': width,
'height': height
})
format_urls = set()
formats = [] formats = []
for format_id in ( formats.append({
'play_addr_lowbr', 'play_addr', 'play_addr_h264', 'url': try_get(video_info, lambda x: x['video']['urls'][0]),
'download_addr'): 'ext': 'mp4',
for format in try_get( 'height': height,
video, lambda x: x[format_id]['url_list'], list) or []: 'width': width
format_url = url_or_none(format) })
if not format_url:
continue
if format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'url': format_url,
'ext': 'mp4',
'height': height,
'width': width,
})
self._sort_formats(formats)
thumbnail = url_or_none(try_get(
video, lambda x: x['cover']['url_list'][0], compat_str))
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
timestamp = int_or_none(data.get('create_time'))
comment_count = int_or_none(data.get('comment_count')) or int_or_none(
try_get(data, lambda x: x['statistics']['comment_count']))
repost_count = int_or_none(try_get(
data, lambda x: x['statistics']['share_count']))
aweme_id = data['aweme_id']
return { return {
'id': aweme_id, 'comment_count': int_or_none(video_info.get('commentCount')),
'title': uploader or aweme_id, 'duration': try_get(video_info, lambda x: x['video']['videoMeta']['duration'], int),
'description': description, 'height': height,
'thumbnail': thumbnail, 'id': str_or_none(video_info.get('id')),
'uploader': uploader, 'like_count': int_or_none(video_info.get('diggCount')),
'repost_count': int_or_none(video_info.get('shareCount')),
'thumbnail': try_get(video_info, lambda x: x['covers'][0]),
'timestamp': timestamp, 'timestamp': timestamp,
'comment_count': comment_count, 'width': width,
'repost_count': repost_count, 'title': str_or_none(share_info.get('title')) or self._og_search_title(webpage),
'formats': formats, 'creator': str_or_none(author_info.get('nickName')),
'uploader': unique_id,
'uploader_id': str_or_none(author_info.get('userId')),
'uploader_url': 'https://www.tiktok.com/@' + unique_id,
'thumbnails': thumbnails,
'upload_date': date,
'webpage_url': self._og_search_url(webpage),
'description': str_or_none(video_info.get('text')) or str_or_none(share_info.get('desc')),
'ext': 'mp4',
'formats': formats
} }
class TikTokIE(TikTokBaseIE): class TikTokIE(TikTokBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P<id>\d+)'
https?://
(?:
(?:m\.)?tiktok\.com/v|
(?:www\.)?tiktok\.com/share/video
)
/(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://m.tiktok.com/v/6606727368545406213.html', 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
'md5': 'd584b572e92fcd48888051f238022420', 'md5': '34a7543afd5a151b0840ba6736fb633b',
'info_dict': { 'info_dict': {
'id': '6606727368545406213',
'ext': 'mp4',
'title': 'Zureeal',
'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
'thumbnail': r're:^https?://.*~noop.image',
'uploader': 'Zureeal',
'timestamp': 1538248586,
'upload_date': '20180929',
'comment_count': int, 'comment_count': int,
'creator': 'facestoriesbyleenabh',
'description': 'md5:a9f6c0c44a1ff2249cae610372d0ae95',
'duration': 13,
'ext': 'mp4',
'formats': list,
'height': 1280,
'id': '6748451240264420610',
'like_count': int,
'repost_count': int, 'repost_count': int,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
'thumbnails': list,
'timestamp': 1571246252,
'title': 'facestoriesbyleenabh on TikTok',
'upload_date': '20191016',
'uploader': 'leenabhushan',
'uploader_id': '6691488002098119685',
'uploader_url': r're:https://www.tiktok.com/@leenabhushan',
'webpage_url': r're:https://www.tiktok.com/@leenabhushan/(video/)?6748451240264420610',
'width': 720,
} }
}, { }, {
'url': 'https://www.tiktok.com/share/video/6606727368545406213', 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
'only_matching': True, 'md5': '06b9800d47d5fe51a19e322dd86e61c9',
'info_dict': {
'comment_count': int,
'creator': 'patroX',
'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
'duration': 27,
'ext': 'mp4',
'formats': list,
'height': 960,
'id': '6742501081818877190',
'like_count': int,
'repost_count': int,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
'thumbnails': list,
'timestamp': 1569860870,
'title': 'patroX on TikTok',
'upload_date': '20190930',
'uploader': 'patroxofficial',
'uploader_id': '18702747',
'uploader_url': r're:https://www.tiktok.com/@patroxofficial',
'webpage_url': r're:https://www.tiktok.com/@patroxofficial/(video/)?6742501081818877190',
'width': 540,
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(
'https://m.tiktok.com/v/%s.html' % video_id, video_id)
data = self._parse_json(self._search_regex(
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
return self._extract_aweme(data)
webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
json_string = self._search_regex(
r'id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P<json_string_ld>[^<]+)',
webpage, 'json_string', group='json_string_ld')
json_data = self._parse_json(json_string, video_id)
video_data = try_get(json_data, lambda x: x['props']['pageProps'], expected_type=dict)
class TikTokUserIE(TikTokBaseIE): # Chech statusCode for success
_VALID_URL = r'''(?x) if video_data.get('statusCode') == 0:
https?:// return self._extract_aweme(video_data, webpage)
(?:
(?:m\.)?tiktok\.com/h5/share/usr|
(?:www\.)?tiktok\.com/share/user
)
/(?P<id>\d+)
'''
_TESTS = [{
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
'info_dict': {
'id': '188294915489964032',
},
'playlist_mincount': 24,
}, {
'url': 'https://www.tiktok.com/share/user/188294915489964032',
'only_matching': True,
}]
def _real_extract(self, url): raise ExtractorError('Video not available', video_id=video_id)
user_id = self._match_id(url)
data = self._download_json(
'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
query={'_signature': '_'})
entries = []
for aweme in data['aweme_list']:
try:
entry = self._extract_aweme(aweme)
except ExtractorError:
continue
entry['extractor_key'] = TikTokIE.ie_key()
entries.append(entry)
return self.playlist_result(entries, user_id)