[kakao] improve extraction
- support embed URLs - support Kakao Legacy vid based embed URLs - only extract fields used for extraction - strip description and extract tags
This commit is contained in:
parent
e993f1a095
commit
274bf5e4c5
|
@ -6,14 +6,15 @@ from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
strip_or_none,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class KakaoIE(InfoExtractor):
|
class KakaoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
|
||||||
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
|
_API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
|
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
|
||||||
|
@ -36,7 +37,7 @@ class KakaoIE(InfoExtractor):
|
||||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||||
'uploader_id': 2653210,
|
'uploader_id': 2653210,
|
||||||
'uploader': '쇼 음악중심',
|
'uploader': '쇼! 음악중심',
|
||||||
'timestamp': 1485684628,
|
'timestamp': 1485684628,
|
||||||
'upload_date': '20170129',
|
'upload_date': '20170129',
|
||||||
}
|
}
|
||||||
|
@ -44,6 +45,8 @@ class KakaoIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
display_id = video_id.rstrip('@my')
|
||||||
|
api_base = self._API_BASE_TMPL % video_id
|
||||||
|
|
||||||
player_header = {
|
player_header = {
|
||||||
'Referer': update_url_query(
|
'Referer': update_url_query(
|
||||||
|
@ -55,20 +58,22 @@ class KakaoIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
QUERY_COMMON = {
|
query = {
|
||||||
'player': 'monet_html5',
|
'player': 'monet_html5',
|
||||||
'referer': url,
|
'referer': url,
|
||||||
'uuid': '',
|
'uuid': '',
|
||||||
'service': 'kakao_tv',
|
'service': 'kakao_tv',
|
||||||
'section': '',
|
'section': '',
|
||||||
'dteType': 'PC',
|
'dteType': 'PC',
|
||||||
|
'fields': ','.join([
|
||||||
|
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
|
||||||
|
'description', 'channelId', 'createTime', 'duration', 'playCount',
|
||||||
|
'likeCount', 'commentCount', 'tagList', 'channel', 'name',
|
||||||
|
'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault'])
|
||||||
}
|
}
|
||||||
|
|
||||||
query = QUERY_COMMON.copy()
|
|
||||||
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
|
|
||||||
impress = self._download_json(
|
impress = self._download_json(
|
||||||
'%s/%s/impress' % (self._API_BASE, video_id),
|
api_base + 'impress', display_id, 'Downloading video info',
|
||||||
video_id, 'Downloading video info',
|
|
||||||
query=query, headers=player_header)
|
query=query, headers=player_header)
|
||||||
|
|
||||||
clip_link = impress['clipLink']
|
clip_link = impress['clipLink']
|
||||||
|
@ -78,30 +83,27 @@ class KakaoIE(InfoExtractor):
|
||||||
|
|
||||||
tid = impress.get('tid', '')
|
tid = impress.get('tid', '')
|
||||||
|
|
||||||
query = QUERY_COMMON.copy()
|
|
||||||
query.update({
|
query.update({
|
||||||
|
'fields': '-*,outputList,profile,width,height,label,filesize',
|
||||||
'tid': tid,
|
'tid': tid,
|
||||||
'profile': 'HIGH',
|
'profile': 'HIGH',
|
||||||
})
|
})
|
||||||
raw = self._download_json(
|
raw = self._download_json(
|
||||||
'%s/%s/raw' % (self._API_BASE, video_id),
|
api_base + 'raw', display_id, 'Downloading video formats info',
|
||||||
video_id, 'Downloading video formats info',
|
|
||||||
query=query, headers=player_header)
|
query=query, headers=player_header)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in raw.get('outputList', []):
|
for fmt in raw.get('outputList', []):
|
||||||
try:
|
try:
|
||||||
profile_name = fmt['profile']
|
profile_name = fmt['profile']
|
||||||
|
query.update({
|
||||||
|
'profile': profile_name,
|
||||||
|
'fields': '-*,url',
|
||||||
|
})
|
||||||
fmt_url_json = self._download_json(
|
fmt_url_json = self._download_json(
|
||||||
'%s/%s/raw/videolocation' % (self._API_BASE, video_id),
|
api_base + 'raw/videolocation', display_id,
|
||||||
video_id,
|
|
||||||
'Downloading video URL for profile %s' % profile_name,
|
'Downloading video URL for profile %s' % profile_name,
|
||||||
query={
|
query=query, headers=player_header, fatal=False)
|
||||||
'service': 'kakao_tv',
|
|
||||||
'section': '',
|
|
||||||
'tid': tid,
|
|
||||||
'profile': profile_name
|
|
||||||
}, headers=player_header, fatal=False)
|
|
||||||
|
|
||||||
if fmt_url_json is None:
|
if fmt_url_json is None:
|
||||||
continue
|
continue
|
||||||
|
@ -134,9 +136,9 @@ class KakaoIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': clip.get('description'),
|
'description': strip_or_none(clip.get('description')),
|
||||||
'uploader': clip_link.get('channel', {}).get('name'),
|
'uploader': clip_link.get('channel', {}).get('name'),
|
||||||
'uploader_id': clip_link.get('channelId'),
|
'uploader_id': clip_link.get('channelId'),
|
||||||
'thumbnails': thumbs,
|
'thumbnails': thumbs,
|
||||||
|
@ -146,4 +148,5 @@ class KakaoIE(InfoExtractor):
|
||||||
'like_count': int_or_none(clip.get('likeCount')),
|
'like_count': int_or_none(clip.get('likeCount')),
|
||||||
'comment_count': int_or_none(clip.get('commentCount')),
|
'comment_count': int_or_none(clip.get('commentCount')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'tags': clip.get('tagList'),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue