@@ -477,6 +477,7 @@ | |||
- **massengeschmack.tv** | |||
- **MatchTV** | |||
- **MDR**: MDR.DE and KiKA | |||
- **MedalTV** | |||
- **media.ccc.de** | |||
- **media.ccc.de:lists** | |||
- **Medialaan** | |||
@@ -846,6 +847,10 @@ | |||
- **Sport5** | |||
- **SportBox** | |||
- **SportDeutschland** | |||
- **Spreaker** | |||
- **SpreakerPage** | |||
- **SpreakerShow** | |||
- **SpreakerShowPage** | |||
- **SpringboardPlatform** | |||
- **Sprout** | |||
- **sr:mediathek**: Saarländischer Rundfunk | |||
@@ -1064,7 +1069,7 @@ | |||
- **vk:wallpost** | |||
- **vlive** | |||
- **vlive:channel** | |||
- **vlive:playlist** | |||
- **vlive:post** | |||
- **Vodlocker** | |||
- **VODPl** | |||
- **VODPlatform** | |||
@@ -97,12 +97,15 @@ class FragmentFD(FileDownloader): | |||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None): | |||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) | |||
success = ctx['dl'].download(fragment_filename, { | |||
fragment_info_dict = { | |||
'url': frag_url, | |||
'http_headers': headers or info_dict.get('http_headers'), | |||
}) | |||
} | |||
success = ctx['dl'].download(fragment_filename, fragment_info_dict) | |||
if not success: | |||
return False, None | |||
if fragment_info_dict.get('filetime'): | |||
ctx['fragment_filetime'] = fragment_info_dict.get('filetime') | |||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb') | |||
ctx['fragment_filename_sanitized'] = frag_sanitized | |||
frag_content = down.read() | |||
@@ -258,6 +261,13 @@ class FragmentFD(FileDownloader): | |||
downloaded_bytes = ctx['complete_frags_downloaded_bytes'] | |||
else: | |||
self.try_rename(ctx['tmpfilename'], ctx['filename']) | |||
if self.params.get('updatetime', True): | |||
filetime = ctx.get('fragment_filetime') | |||
if filetime: | |||
try: | |||
os.utime(ctx['filename'], (time.time(), filetime)) | |||
except Exception: | |||
pass | |||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) | |||
self._hook_progress({ | |||
@@ -981,7 +981,7 @@ class BBCIE(BBCCoUkIE): | |||
group_id = self._search_regex( | |||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, | |||
webpage, 'group id', default=None) | |||
if playlist_id: | |||
if group_id: | |||
return self.url_result( | |||
'https://www.bbc.co.uk/programmes/%s' % group_id, | |||
ie=BBCCoUkIE.ie_key()) | |||
@@ -1092,10 +1092,26 @@ class BBCIE(BBCCoUkIE): | |||
self._search_regex( | |||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage, | |||
'bbcthree config', default='{}'), | |||
playlist_id, transform_source=js_to_json, fatal=False) | |||
if bbc3_config: | |||
playlist_id, transform_source=js_to_json, fatal=False) or {} | |||
payload = bbc3_config.get('payload') or {} | |||
if payload: | |||
clip = payload.get('currentClip') or {} | |||
clip_vpid = clip.get('vpid') | |||
clip_title = clip.get('title') | |||
if clip_vpid and clip_title: | |||
formats, subtitles = self._download_media_selector(clip_vpid) | |||
self._sort_formats(formats) | |||
return { | |||
'id': clip_vpid, | |||
'title': clip_title, | |||
'thumbnail': dict_get(clip, ('poster', 'imageUrl')), | |||
'description': clip.get('description'), | |||
'duration': parse_duration(clip.get('duration')), | |||
'formats': formats, | |||
'subtitles': subtitles, | |||
} | |||
bbc3_playlist = try_get( | |||
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'], | |||
payload, lambda x: x['content']['bbcMedia']['playlist'], | |||
dict) | |||
if bbc3_playlist: | |||
playlist_title = bbc3_playlist.get('title') or playlist_title | |||
@@ -1118,6 +1134,39 @@ class BBCIE(BBCCoUkIE): | |||
return self.playlist_result( | |||
entries, playlist_id, playlist_title, playlist_description) | |||
initial_data = self._parse_json(self._search_regex( | |||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage, | |||
'preload state', default='{}'), playlist_id, fatal=False) | |||
if initial_data: | |||
def parse_media(media): | |||
if not media: | |||
return | |||
for item in (try_get(media, lambda x: x['media']['items'], list) or []): | |||
item_id = item.get('id') | |||
item_title = item.get('title') | |||
if not (item_id and item_title): | |||
continue | |||
formats, subtitles = self._download_media_selector(item_id) | |||
self._sort_formats(formats) | |||
entries.append({ | |||
'id': item_id, | |||
'title': item_title, | |||
'thumbnail': item.get('holdingImageUrl'), | |||
'formats': formats, | |||
'subtitles': subtitles, | |||
}) | |||
for resp in (initial_data.get('data') or {}).values(): | |||
name = resp.get('name') | |||
if name == 'media-experience': | |||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict)) | |||
elif name == 'article': | |||
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []): | |||
if block.get('type') != 'media': | |||
continue | |||
parse_media(block.get('model')) | |||
return self.playlist_result( | |||
entries, playlist_id, playlist_title, playlist_description) | |||
def extract_all(pattern): | |||
return list(filter(None, map( | |||
lambda s: self._parse_json(s, playlist_id, fatal=False), | |||
@@ -5,10 +5,16 @@ import codecs | |||
import re | |||
from .common import InfoExtractor | |||
from ..compat import ( | |||
compat_chr, | |||
compat_ord, | |||
compat_urllib_parse_unquote, | |||
) | |||
from ..utils import ( | |||
ExtractorError, | |||
float_or_none, | |||
int_or_none, | |||
merge_dicts, | |||
multipart_encode, | |||
parse_duration, | |||
random_birthday, | |||
@@ -107,8 +113,9 @@ class CDAIE(InfoExtractor): | |||
r'Odsłony:(?:\s| )*([0-9]+)', webpage, | |||
'view_count', default=None) | |||
average_rating = self._search_regex( | |||
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', | |||
webpage, 'rating', fatal=False, group='rating_value') | |||
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', | |||
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False, | |||
group='rating_value') | |||
info_dict = { | |||
'id': video_id, | |||
@@ -123,6 +130,24 @@ class CDAIE(InfoExtractor): | |||
'age_limit': 18 if need_confirm_age else 0, | |||
} | |||
# Source: https://www.cda.pl/js/player.js?t=1606154898 | |||
def decrypt_file(a): | |||
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): | |||
a = a.replace(p, '') | |||
a = compat_urllib_parse_unquote(a) | |||
b = [] | |||
for c in a: | |||
f = compat_ord(c) | |||
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f)) | |||
a = ''.join(b) | |||
a = a.replace('.cda.mp4', '') | |||
for p in ('.2cda.pl', '.3cda.pl'): | |||
a = a.replace(p, '.cda.pl') | |||
if '/upstream' in a: | |||
a = a.replace('/upstream', '.mp4/upstream') | |||
return 'https://' + a | |||
return 'https://' + a + '.mp4' | |||
def extract_format(page, version): | |||
json_str = self._html_search_regex( | |||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, | |||
@@ -141,6 +166,8 @@ class CDAIE(InfoExtractor): | |||
video['file'] = codecs.decode(video['file'], 'rot_13') | |||
if video['file'].endswith('adc.mp4'): | |||
video['file'] = video['file'].replace('adc.mp4', '.mp4') | |||
elif not video['file'].startswith('http'): | |||
video['file'] = decrypt_file(video['file']) | |||
f = { | |||
'url': video['file'], | |||
} | |||
@@ -179,4 +206,6 @@ class CDAIE(InfoExtractor): | |||
self._sort_formats(formats) | |||
return info_dict | |||
info = self._search_json_ld(webpage, video_id, default={}) | |||
return merge_dicts(info_dict, info) |
@@ -620,6 +620,7 @@ from .markiza import ( | |||
from .massengeschmacktv import MassengeschmackTVIE | |||
from .matchtv import MatchTVIE | |||
from .mdr import MDRIE | |||
from .medaltv import MedalTVIE | |||
from .mediaset import MediasetIE | |||
from .mediasite import ( | |||
MediasiteIE, | |||
@@ -1102,6 +1103,12 @@ from .stitcher import StitcherIE | |||
from .sport5 import Sport5IE | |||
from .sportbox import SportBoxIE | |||
from .sportdeutschland import SportDeutschlandIE | |||
from .spreaker import ( | |||
SpreakerIE, | |||
SpreakerPageIE, | |||
SpreakerShowIE, | |||
SpreakerShowPageIE, | |||
) | |||
from .springboardplatform import SpringboardPlatformIE | |||
from .sprout import SproutIE | |||
from .srgssr import ( | |||
@@ -1395,8 +1402,8 @@ from .vk import ( | |||
) | |||
from .vlive import ( | |||
VLiveIE, | |||
VLivePostIE, | |||
VLiveChannelIE, | |||
VLivePlaylistIE | |||
) | |||
from .vodlocker import VodlockerIE | |||
from .vodpl import VODPlIE | |||
@@ -0,0 +1,131 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..compat import compat_str | |||
from ..utils import ( | |||
ExtractorError, | |||
float_or_none, | |||
int_or_none, | |||
str_or_none, | |||
try_get, | |||
) | |||
class MedalTVIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)' | |||
_TESTS = [{ | |||
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr', | |||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa', | |||
'info_dict': { | |||
'id': '34934644', | |||
'ext': 'mp4', | |||
'title': 'Quad Cold', | |||
'description': 'Medal,https://medal.tv/desktop/', | |||
'uploader': 'MowgliSB', | |||
'timestamp': 1603165266, | |||
'upload_date': '20201020', | |||
'uploader_id': 10619174, | |||
} | |||
}, { | |||
'url': 'https://medal.tv/clips/36787208', | |||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148', | |||
'info_dict': { | |||
'id': '36787208', | |||
'ext': 'mp4', | |||
'title': 'u tk me i tk u bigger', | |||
'description': 'Medal,https://medal.tv/desktop/', | |||
'uploader': 'Mimicc', | |||
'timestamp': 1605580939, | |||
'upload_date': '20201117', | |||
'uploader_id': 5156321, | |||
} | |||
}] | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
webpage = self._download_webpage(url, video_id) | |||
hydration_data = self._parse_json(self._search_regex( | |||
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>', | |||
webpage, 'hydration data', default='{}'), video_id) | |||
clip = try_get( | |||
hydration_data, lambda x: x['clips'][video_id], dict) or {} | |||
if not clip: | |||
raise ExtractorError( | |||
'Could not find video information.', video_id=video_id) | |||
title = clip['contentTitle'] | |||
source_width = int_or_none(clip.get('sourceWidth')) | |||
source_height = int_or_none(clip.get('sourceHeight')) | |||
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9 | |||
def add_item(container, item_url, height, id_key='format_id', item_id=None): | |||
item_id = item_id or '%dp' % height | |||
if item_id not in item_url: | |||
return | |||
width = int(round(aspect_ratio * height)) | |||
container.append({ | |||
'url': item_url, | |||
id_key: item_id, | |||
'width': width, | |||
'height': height | |||
}) | |||
formats = [] | |||
thumbnails = [] | |||
for k, v in clip.items(): | |||
if not (v and isinstance(v, compat_str)): | |||
continue | |||
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k) | |||
if not mobj: | |||
continue | |||
prefix = mobj.group(1) | |||
height = int_or_none(mobj.group(2)) | |||
if prefix == 'contentUrl': | |||
add_item( | |||
formats, v, height or source_height, | |||
item_id=None if height else 'source') | |||
elif prefix == 'thumbnail': | |||
add_item(thumbnails, v, height, 'id') | |||
error = clip.get('error') | |||
if not formats and error: | |||
if error == 404: | |||
raise ExtractorError( | |||
'That clip does not exist.', | |||
expected=True, video_id=video_id) | |||
else: | |||
raise ExtractorError( | |||
'An unknown error occurred ({0}).'.format(error), | |||
video_id=video_id) | |||
self._sort_formats(formats) | |||
# Necessary because the id of the author is not known in advance. | |||
# Won't raise an issue if no profile can be found as this is optional. | |||
author = try_get( | |||
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {} | |||
author_id = str_or_none(author.get('id')) | |||
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None | |||
return { | |||
'id': video_id, | |||
'title': title, | |||
'formats': formats, | |||
'thumbnails': thumbnails, | |||
'description': clip.get('contentDescription'), | |||
'uploader': author.get('displayName'), | |||
'timestamp': float_or_none(clip.get('created'), 1000), | |||
'uploader_id': author_id, | |||
'uploader_url': author_url, | |||
'duration': int_or_none(clip.get('videoLengthSeconds')), | |||
'view_count': int_or_none(clip.get('views')), | |||
'like_count': int_or_none(clip.get('likes')), | |||
'comment_count': int_or_none(clip.get('comments')), | |||
} |
@@ -9,6 +9,7 @@ from ..compat import ( | |||
compat_urllib_parse_unquote, | |||
) | |||
from ..utils import ( | |||
determine_ext, | |||
ExtractorError, | |||
int_or_none, | |||
js_to_json, | |||
@@ -16,185 +17,13 @@ from ..utils import ( | |||
parse_age_limit, | |||
parse_duration, | |||
try_get, | |||
url_or_none, | |||
) | |||
class NRKBaseIE(InfoExtractor): | |||
_GEO_COUNTRIES = ['NO'] | |||
_api_host = None | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS | |||
for api_host in api_hosts: | |||
data = self._download_json( | |||
'http://%s/mediaelement/%s' % (api_host, video_id), | |||
video_id, 'Downloading mediaelement JSON', | |||
fatal=api_host == api_hosts[-1]) | |||
if not data: | |||
continue | |||
self._api_host = api_host | |||
break | |||
title = data.get('fullTitle') or data.get('mainTitle') or data['title'] | |||
video_id = data.get('id') or video_id | |||
entries = [] | |||
conviva = data.get('convivaStatistics') or {} | |||
live = (data.get('mediaElementType') == 'Live' | |||
or data.get('isLive') is True or conviva.get('isLive')) | |||
def make_title(t): | |||
return self._live_title(t) if live else t | |||
media_assets = data.get('mediaAssets') | |||
if media_assets and isinstance(media_assets, list): | |||
def video_id_and_title(idx): | |||
return ((video_id, title) if len(media_assets) == 1 | |||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) | |||
for num, asset in enumerate(media_assets, 1): | |||
asset_url = asset.get('url') | |||
if not asset_url: | |||
continue | |||
formats = self._extract_akamai_formats(asset_url, video_id) | |||
if not formats: | |||
continue | |||
self._sort_formats(formats) | |||
# Some f4m streams may not work with hdcore in fragments' URLs | |||
for f in formats: | |||
extra_param = f.get('extra_param_to_segment_url') | |||
if extra_param and 'hdcore' in extra_param: | |||
del f['extra_param_to_segment_url'] | |||
entry_id, entry_title = video_id_and_title(num) | |||
duration = parse_duration(asset.get('duration')) | |||
subtitles = {} | |||
for subtitle in ('webVtt', 'timedText'): | |||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) | |||
if subtitle_url: | |||
subtitles.setdefault('no', []).append({ | |||
'url': compat_urllib_parse_unquote(subtitle_url) | |||
}) | |||
entries.append({ | |||
'id': asset.get('carrierId') or entry_id, | |||
'title': make_title(entry_title), | |||
'duration': duration, | |||
'subtitles': subtitles, | |||
'formats': formats, | |||
}) | |||
if not entries: | |||
media_url = data.get('mediaUrl') | |||
if media_url: | |||
formats = self._extract_akamai_formats(media_url, video_id) | |||
self._sort_formats(formats) | |||
duration = parse_duration(data.get('duration')) | |||
entries = [{ | |||
'id': video_id, | |||
'title': make_title(title), | |||
'duration': duration, | |||
'formats': formats, | |||
}] | |||
if not entries: | |||
MESSAGES = { | |||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', | |||
'ProgramRightsHasExpired': 'Programmet har gått ut', | |||
'NoProgramRights': 'Ikke tilgjengelig', | |||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', | |||
} | |||
message_type = data.get('messageType', '') | |||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* | |||
if 'IsGeoBlocked' in message_type: | |||
self.raise_geo_restricted( | |||
msg=MESSAGES.get('ProgramIsGeoBlocked'), | |||
countries=self._GEO_COUNTRIES) | |||
raise ExtractorError( | |||
'%s said: %s' % (self.IE_NAME, MESSAGES.get( | |||
message_type, message_type)), | |||
expected=True) | |||
series = conviva.get('seriesName') or data.get('seriesTitle') | |||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') | |||
season_number = None | |||
episode_number = None | |||
if data.get('mediaElementType') == 'Episode': | |||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ | |||
data.get('relativeOriginUrl', '') | |||
EPISODENUM_RE = [ | |||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.', | |||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})', | |||
] | |||
season_number = int_or_none(self._search_regex( | |||
EPISODENUM_RE, _season_episode, 'season number', | |||
default=None, group='season')) | |||
episode_number = int_or_none(self._search_regex( | |||
EPISODENUM_RE, _season_episode, 'episode number', | |||
default=None, group='episode')) | |||
thumbnails = None | |||
images = data.get('images') | |||
if images and isinstance(images, dict): | |||
web_images = images.get('webImages') | |||
if isinstance(web_images, list): | |||
thumbnails = [{ | |||
'url': image['imageUrl'], | |||
'width': int_or_none(image.get('width')), | |||
'height': int_or_none(image.get('height')), | |||
} for image in web_images if image.get('imageUrl')] | |||
description = data.get('description') | |||
category = data.get('mediaAnalytics', {}).get('category') | |||
common_info = { | |||
'description': description, | |||
'series': series, | |||
'episode': episode, | |||
'season_number': season_number, | |||
'episode_number': episode_number, | |||
'categories': [category] if category else None, | |||
'age_limit': parse_age_limit(data.get('legalAge')), | |||
'thumbnails': thumbnails, | |||
} | |||
vcodec = 'none' if data.get('mediaType') == 'Audio' else None | |||
for entry in entries: | |||
entry.update(common_info) | |||
for f in entry['formats']: | |||
f['vcodec'] = vcodec | |||
points = data.get('shortIndexPoints') | |||
if isinstance(points, list): | |||
chapters = [] | |||
for next_num, point in enumerate(points, start=1): | |||
if not isinstance(point, dict): | |||
continue | |||
start_time = parse_duration(point.get('startPoint')) | |||
if start_time is None: | |||
continue | |||
end_time = parse_duration( | |||
data.get('duration') | |||
if next_num == len(points) | |||
else points[next_num].get('startPoint')) | |||
if end_time is None: | |||
continue | |||
chapters.append({ | |||
'start_time': start_time, | |||
'end_time': end_time, | |||
'title': point.get('title'), | |||
}) | |||
if chapters and len(entries) == 1: | |||
entries[0]['chapters'] = chapters | |||
return self.playlist_result(entries, video_id, title, description) | |||
class NRKIE(NRKBaseIE): | |||
_VALID_URL = r'''(?x) | |||
@@ -202,13 +31,13 @@ class NRKIE(NRKBaseIE): | |||
nrk:| | |||
https?:// | |||
(?: | |||
(?:www\.)?nrk\.no/video/PS\*| | |||
(?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)| | |||
v8[-.]psapi\.nrk\.no/mediaelement/ | |||
) | |||
) | |||
(?P<id>[^?#&]+) | |||
(?P<id>[^?\#&]+) | |||
''' | |||
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no') | |||
_TESTS = [{ | |||
# video | |||
'url': 'http://www.nrk.no/video/PS*150533', | |||
@@ -240,8 +69,76 @@ class NRKIE(NRKBaseIE): | |||
}, { | |||
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999', | |||
'only_matching': True, | |||
}] | |||
def _extract_from_playback(self, video_id): | |||
manifest = self._download_json( | |||
'http://psapi.nrk.no/playback/manifest/%s' % video_id, | |||
video_id, 'Downloading manifest JSON') | |||
playable = manifest['playable'] | |||
formats = [] | |||
for asset in playable['assets']: | |||
if not isinstance(asset, dict): | |||
continue | |||
if asset.get('encrypted'): | |||
continue | |||
format_url = url_or_none(asset.get('url')) | |||
if not format_url: | |||
continue | |||
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8': | |||
formats.extend(self._extract_m3u8_formats( | |||
format_url, video_id, 'mp4', entry_protocol='m3u8_native', | |||
m3u8_id='hls', fatal=False)) | |||
self._sort_formats(formats) | |||
data = self._download_json( | |||
'http://psapi.nrk.no/playback/metadata/%s' % video_id, | |||
video_id, 'Downloading metadata JSON') | |||
preplay = data['preplay'] | |||
titles = preplay['titles'] | |||
title = titles['title'] | |||
alt_title = titles.get('subtitle') | |||
description = preplay.get('description') | |||
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration')) | |||
thumbnails = [] | |||
for image in try_get( | |||
preplay, lambda x: x['poster']['images'], list) or []: | |||
if not isinstance(image, dict): | |||
continue | |||
image_url = url_or_none(image.get('url')) | |||
if not image_url: | |||
continue | |||
thumbnails.append({ | |||
'url': image_url, | |||
'width': int_or_none(image.get('pixelWidth')), | |||
'height': int_or_none(image.get('pixelHeight')), | |||
}) | |||
return { | |||
'id': video_id, | |||
'title': title, | |||
'alt_title': alt_title, | |||
'description': description, | |||
'duration': duration, | |||
'thumbnails': thumbnails, | |||
'formats': formats, | |||
} | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
return self._extract_from_playback(video_id) | |||
class NRKTVIE(NRKBaseIE): | |||
IE_DESC = 'NRK TV and NRK Radio' | |||
@@ -380,6 +277,181 @@ class NRKTVIE(NRKBaseIE): | |||
'only_matching': True, | |||
}] | |||
_api_host = None | |||
def _extract_from_mediaelement(self, video_id): | |||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS | |||
for api_host in api_hosts: | |||
data = self._download_json( | |||
'http://%s/mediaelement/%s' % (api_host, video_id), | |||
video_id, 'Downloading mediaelement JSON', | |||
fatal=api_host == api_hosts[-1]) | |||
if not data: | |||
continue | |||
self._api_host = api_host | |||
break | |||
title = data.get('fullTitle') or data.get('mainTitle') or data['title'] | |||
video_id = data.get('id') or video_id | |||
entries = [] | |||
conviva = data.get('convivaStatistics') or {} | |||
live = (data.get('mediaElementType') == 'Live' | |||
or data.get('isLive') is True or conviva.get('isLive')) | |||
def make_title(t): | |||
return self._live_title(t) if live else t | |||
media_assets = data.get('mediaAssets') | |||
if media_assets and isinstance(media_assets, list): | |||
def video_id_and_title(idx): | |||
return ((video_id, title) if len(media_assets) == 1 | |||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) | |||
for num, asset in enumerate(media_assets, 1): | |||
asset_url = asset.get('url') | |||
if not asset_url: | |||
continue | |||
formats = self._extract_akamai_formats(asset_url, video_id) | |||
if not formats: | |||
continue | |||
self._sort_formats(formats) | |||
# Some f4m streams may not work with hdcore in fragments' URLs | |||
for f in formats: | |||
extra_param = f.get('extra_param_to_segment_url') | |||
if extra_param and 'hdcore' in extra_param: | |||
del f['extra_param_to_segment_url'] | |||
entry_id, entry_title = video_id_and_title(num) | |||
duration = parse_duration(asset.get('duration')) | |||
subtitles = {} | |||
for subtitle in ('webVtt', 'timedText'): | |||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) | |||
if subtitle_url: | |||
subtitles.setdefault('no', []).append({ | |||
'url': compat_urllib_parse_unquote(subtitle_url) | |||
}) | |||
entries.append({ | |||
'id': asset.get('carrierId') or entry_id, | |||
'title': make_title(entry_title), | |||
'duration': duration, | |||
'subtitles': subtitles, | |||
'formats': formats, | |||
}) | |||
if not entries: | |||
media_url = data.get('mediaUrl') | |||
if media_url: | |||
formats = self._extract_akamai_formats(media_url, video_id) | |||
self._sort_formats(formats) | |||
duration = parse_duration(data.get('duration')) | |||
entries = [{ | |||
'id': video_id, | |||
'title': make_title(title), | |||
'duration': duration, | |||
'formats': formats, | |||
}] | |||
if not entries: | |||
MESSAGES = { | |||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', | |||
'ProgramRightsHasExpired': 'Programmet har gått ut', | |||
'NoProgramRights': 'Ikke tilgjengelig', | |||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', | |||
} | |||
message_type = data.get('messageType', '') | |||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* | |||
if 'IsGeoBlocked' in message_type: | |||
self.raise_geo_restricted( | |||
msg=MESSAGES.get('ProgramIsGeoBlocked'), | |||
countries=self._GEO_COUNTRIES) | |||
raise ExtractorError( | |||
'%s said: %s' % (self.IE_NAME, MESSAGES.get( | |||
message_type, message_type)), | |||
expected=True) | |||
series = conviva.get('seriesName') or data.get('seriesTitle') | |||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') | |||
season_number = None | |||
episode_number = None | |||
if data.get('mediaElementType') == 'Episode': | |||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ | |||
data.get('relativeOriginUrl', '') | |||
EPISODENUM_RE = [ | |||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.', | |||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})', | |||
] | |||
season_number = int_or_none(self._search_regex( | |||
EPISODENUM_RE, _season_episode, 'season number', | |||
default=None, group='season')) | |||
episode_number = int_or_none(self._search_regex( | |||
EPISODENUM_RE, _season_episode, 'episode number', | |||
default=None, group='episode')) | |||
thumbnails = None | |||
images = data.get('images') | |||
if images and isinstance(images, dict): | |||
web_images = images.get('webImages') | |||
if isinstance(web_images, list): | |||
thumbnails = [{ | |||
'url': image['imageUrl'], | |||
'width': int_or_none(image.get('width')), | |||
'height': int_or_none(image.get('height')), | |||
} for image in web_images if image.get('imageUrl')] | |||
description = data.get('description') | |||
category = data.get('mediaAnalytics', {}).get('category') | |||
common_info = { | |||
'description': description, | |||
'series': series, | |||
'episode': episode, | |||
'season_number': season_number, | |||
'episode_number': episode_number, | |||
'categories': [category] if category else None, | |||
'age_limit': parse_age_limit(data.get('legalAge')), | |||
'thumbnails': thumbnails, | |||
} | |||
vcodec = 'none' if data.get('mediaType') == 'Audio' else None | |||
for entry in entries: | |||
entry.update(common_info) | |||
for f in entry['formats']: | |||
f['vcodec'] = vcodec | |||
points = data.get('shortIndexPoints') | |||
if isinstance(points, list): | |||
chapters = [] | |||
for next_num, point in enumerate(points, start=1): | |||
if not isinstance(point, dict): | |||
continue | |||
start_time = parse_duration(point.get('startPoint')) | |||
if start_time is None: | |||
continue | |||
end_time = parse_duration( | |||
data.get('duration') | |||
if next_num == len(points) | |||
else points[next_num].get('startPoint')) | |||
if end_time is None: | |||
continue | |||
chapters.append({ | |||
'start_time': start_time, | |||
'end_time': end_time, | |||
'title': point.get('title'), | |||
}) | |||
if chapters and len(entries) == 1: | |||
entries[0]['chapters'] = chapters | |||
return self.playlist_result(entries, video_id, title, description) | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
return self._extract_from_mediaelement(video_id) | |||
class NRKTVEpisodeIE(InfoExtractor): | |||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)' | |||
@@ -0,0 +1,176 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import itertools | |||
from .common import InfoExtractor | |||
from ..compat import compat_str | |||
from ..utils import ( | |||
float_or_none, | |||
int_or_none, | |||
str_or_none, | |||
try_get, | |||
unified_timestamp, | |||
url_or_none, | |||
) | |||
def _extract_episode(data, episode_id=None): | |||
title = data['title'] | |||
download_url = data['download_url'] | |||
series = try_get(data, lambda x: x['show']['title'], compat_str) | |||
uploader = try_get(data, lambda x: x['author']['fullname'], compat_str) | |||
thumbnails = [] | |||
for image in ('image_original', 'image_medium', 'image'): | |||
image_url = url_or_none(data.get('%s_url' % image)) | |||
if image_url: | |||
thumbnails.append({'url': image_url}) | |||
def stats(key): | |||
return int_or_none(try_get( | |||
data, | |||
(lambda x: x['%ss_count' % key], | |||
lambda x: x['stats']['%ss' % key]))) | |||
def duration(key): | |||
return float_or_none(data.get(key), scale=1000) | |||
return { | |||
'id': compat_str(episode_id or data['episode_id']), | |||
'url': download_url, | |||
'display_id': data.get('permalink'), | |||
'title': title, | |||
'description': data.get('description'), | |||
'timestamp': unified_timestamp(data.get('published_at')), | |||
'uploader': uploader, | |||
'uploader_id': str_or_none(data.get('author_id')), | |||
'creator': uploader, | |||
'duration': duration('duration') or duration('length'), | |||
'view_count': stats('play'), | |||
'like_count': stats('like'), | |||
'comment_count': stats('message'), | |||
'format': 'MPEG Layer 3', | |||
'format_id': 'mp3', | |||
'container': 'mp3', | |||
'ext': 'mp3', | |||
'thumbnails': thumbnails, | |||
'series': series, | |||
'extractor_key': SpreakerIE.ie_key(), | |||
} | |||
class SpreakerIE(InfoExtractor): | |||
_VALID_URL = r'''(?x) | |||
https?:// | |||
api\.spreaker\.com/ | |||
(?: | |||
(?:download/)?episode| | |||
v2/episodes | |||
)/ | |||
(?P<id>\d+) | |||
''' | |||
_TESTS = [{ | |||
'url': 'https://api.spreaker.com/episode/12534508', | |||
'info_dict': { | |||
'id': '12534508', | |||
'display_id': 'swm-ep15-how-to-market-your-music-part-2', | |||
'ext': 'mp3', | |||
'title': 'EP:15 | Music Marketing (Likes) - Part 2', | |||
'description': 'md5:0588c43e27be46423e183076fa071177', | |||
'timestamp': 1502250336, | |||
'upload_date': '20170809', | |||
'uploader': 'SWM', | |||
'uploader_id': '9780658', | |||
'duration': 1063.42, | |||
'view_count': int, | |||
'like_count': int, | |||
'comment_count': int, | |||
'series': 'Success With Music (SWM)', | |||
}, | |||
}, { | |||
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
episode_id = self._match_id(url) | |||
data = self._download_json( | |||
'https://api.spreaker.com/v2/episodes/%s' % episode_id, | |||
episode_id)['response']['episode'] | |||
return _extract_episode(data, episode_id) | |||
class SpreakerPageIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)' | |||
_TESTS = [{ | |||
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
webpage = self._download_webpage(url, display_id) | |||
episode_id = self._search_regex( | |||
(r'data-episode_id=["\'](?P<id>\d+)', | |||
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id') | |||
return self.url_result( | |||
'https://api.spreaker.com/episode/%s' % episode_id, | |||
ie=SpreakerIE.ie_key(), video_id=episode_id) | |||
class SpreakerShowIE(InfoExtractor): | |||
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)' | |||
_TESTS = [{ | |||
'url': 'https://www.spreaker.com/show/3-ninjas-podcast', | |||
'info_dict': { | |||
'id': '4652058', | |||
}, | |||
'playlist_mincount': 118, | |||
}] | |||
def _entries(self, show_id): | |||
for page_num in itertools.count(1): | |||
episodes = self._download_json( | |||
'https://api.spreaker.com/show/%s/episodes' % show_id, | |||
show_id, note='Downloading JSON page %d' % page_num, query={ | |||
'page': page_num, | |||
'max_per_page': 100, | |||
}) | |||
pager = try_get(episodes, lambda x: x['response']['pager'], dict) | |||
if not pager: | |||
break | |||
results = pager.get('results') | |||
if not results or not isinstance(results, list): | |||
break | |||
for result in results: | |||
if not isinstance(result, dict): | |||
continue | |||
yield _extract_episode(result) | |||
if page_num == pager.get('last_page'): | |||
break | |||
def _real_extract(self, url): | |||
show_id = self._match_id(url) | |||
return self.playlist_result(self._entries(show_id), playlist_id=show_id) | |||
class SpreakerShowPageIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)' | |||
_TESTS = [{ | |||
'url': 'https://www.spreaker.com/show/success-with-music', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
webpage = self._download_webpage(url, display_id) | |||
show_id = self._search_regex( | |||
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id') | |||
return self.url_result( | |||
'https://api.spreaker.com/show/%s' % show_id, | |||
ie=SpreakerShowIE.ie_key(), video_id=show_id) |
@@ -21,6 +21,7 @@ from ..utils import ( | |||
parse_age_limit, | |||
parse_iso8601, | |||
sanitized_Request, | |||
std_headers, | |||
) | |||
@@ -227,8 +228,10 @@ class VikiIE(VikiBaseIE): | |||
resp = self._download_json( | |||
'https://www.viki.com/api/videos/' + video_id, | |||
video_id, 'Downloading video JSON', | |||
headers={'x-viki-app-ver': '4.0.57'}) | |||
video_id, 'Downloading video JSON', headers={ | |||
'x-client-user-agent': std_headers['User-Agent'], | |||
'x-viki-app-ver': '4.0.57', | |||
}) | |||
video = resp['video'] | |||
self._check_errors(video) | |||
@@ -1,55 +1,50 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
import time | |||
import itertools | |||
import json | |||
from .common import InfoExtractor | |||
from .naver import NaverBaseIE | |||
from ..compat import compat_str | |||
from ..compat import ( | |||
compat_HTTPError, | |||
compat_str, | |||
) | |||
from ..utils import ( | |||
ExtractorError, | |||
int_or_none, | |||
merge_dicts, | |||
str_or_none, | |||
strip_or_none, | |||
try_get, | |||
urlencode_postdata, | |||
) | |||
class VLiveIE(NaverBaseIE): | |||
class VLiveBaseIE(NaverBaseIE): | |||
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' | |||
class VLiveIE(VLiveBaseIE): | |||
IE_NAME = 'vlive' | |||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)' | |||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)' | |||
_NETRC_MACHINE = 'vlive' | |||
_TESTS = [{ | |||
'url': 'https://www.vlive.tv/video/1326', | |||
'url': 'http://www.vlive.tv/video/1326', | |||
'md5': 'cc7314812855ce56de70a06a27314983', | |||
'info_dict': { | |||
'id': '1326', | |||
'ext': 'mp4', | |||
'title': "[V LIVE] Girl's Day's Broadcast", | |||
'title': "Girl's Day's Broadcast", | |||
'creator': "Girl's Day", | |||
'view_count': int, | |||
'uploader_id': 'muploader_a', | |||
}, | |||
}, | |||
{ | |||
'url': 'https://vlive.tv/post/1-18244258', | |||
'md5': 'cc7314812855ce56de70a06a27314983', | |||
'info_dict': { | |||
'id': '1326', | |||
'ext': 'mp4', | |||
'title': "[V LIVE] Girl's Day's Broadcast", | |||
'creator': "Girl's Day", | |||
'view_count': int, | |||
'uploader_id': 'muploader_a', | |||
}, | |||
}, | |||
{ | |||
'url': 'https://www.vlive.tv/video/16937', | |||
}, { | |||
'url': 'http://www.vlive.tv/video/16937', | |||
'info_dict': { | |||
'id': '16937', | |||
'ext': 'mp4', | |||
'title': '[V LIVE] 첸백시 걍방', | |||
'title': '첸백시 걍방', | |||
'creator': 'EXO', | |||
'view_count': int, | |||
'subtitles': 'mincount:12', | |||
@@ -70,12 +65,15 @@ class VLiveIE(NaverBaseIE): | |||
'subtitles': 'mincount:10', | |||
}, | |||
'skip': 'This video is only available for CH+ subscribers', | |||
}, { | |||
'url': 'https://www.vlive.tv/embed/1326', | |||
'only_matching': True, | |||
}, { | |||
# works only with gcc=KR | |||
'url': 'https://www.vlive.tv/video/225019', | |||
'only_matching': True, | |||
}] | |||
@classmethod | |||
def suitable(cls, url): | |||
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url) | |||
def _real_initialize(self): | |||
self._login() | |||
@@ -107,118 +105,159 @@ class VLiveIE(NaverBaseIE): | |||
if not is_logged_in(): | |||
raise ExtractorError('Unable to log in', expected=True) | |||
def _real_extract(self, url): | |||
# url may match on a post or a video url with a post_id potentially matching a video_id | |||
working_id = self._match_id(url) | |||
webpage = self._download_webpage(url, working_id) | |||
PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>' | |||
PARAMS_FIELD = 'params' | |||
params = self._search_regex( | |||
PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL) | |||
params = self._parse_json(params, working_id, fatal=False) | |||
video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict) | |||
if video_params is None: | |||
error = try_get(params, lambda x: x["postDetail"]["error"], dict) | |||
error_data = try_get(error, lambda x: x["data"], dict) | |||
error_video = try_get(error_data, lambda x: x["officialVideo"], dict) | |||
error_msg = try_get(error, lambda x: x["message"], compat_str) | |||
product_type = try_get(error_data, | |||
[lambda x: x["officialVideo"]["productType"], | |||
lambda x: x["board"]["boardType"]], | |||
compat_str) | |||
if error_video is not None: | |||
if product_type in ('VLIVE_PLUS', 'VLIVE+'): | |||
self.raise_login_required('This video is only available with V LIVE+.') | |||
elif error_msg is not None: | |||
raise ExtractorError('V LIVE reported the following error: %s' % error_msg) | |||
else: | |||
raise ExtractorError('Failed to extract video parameters.') | |||
elif 'post' in url: | |||
raise ExtractorError('Url does not appear to be a video post.', expected=True) | |||
else: | |||
raise ExtractorError('Failed to extract video parameters.') | |||
video_id = working_id if 'video' in url else str(video_params["videoSeq"]) | |||
def _call_api(self, path_template, video_id, fields=None): | |||
query = {'appId': self._APP_ID, 'gcc': 'KR'} | |||
if fields: | |||
query['fields'] = fields | |||
try: | |||
return self._download_json( | |||
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id, | |||
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0], | |||
headers={'Referer': 'https://www.vlive.tv/'}, query=query) | |||
except ExtractorError as e: | |||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | |||
self.raise_login_required(json.loads(e.cause.read().decode())['message']) | |||
raise | |||
video_type = video_params["type"] | |||
if video_type in ('VOD'): | |||
encoding_status = video_params["encodingStatus"] | |||
if encoding_status == 'COMPLETE': | |||
return self._replay(video_id, webpage, params, video_params) | |||
else: | |||
raise ExtractorError('VOD encoding not yet complete. Please try again later.', | |||
expected=True) | |||
elif video_type in ('LIVE'): | |||
video_status = video_params["status"] | |||
if video_status in ('RESERVED'): | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
post = self._call_api( | |||
'post/v1.0/officialVideoPost-%s', video_id, | |||
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}') | |||
video = post['officialVideo'] | |||
def get_common_fields(): | |||
channel = post.get('channel') or {} | |||
return { | |||
'title': video.get('title'), | |||
'creator': post.get('author', {}).get('nickname'), | |||
'channel': channel.get('channelName'), | |||
'channel_id': channel.get('channelCode'), | |||
'duration': int_or_none(video.get('playTime')), | |||
'view_count': int_or_none(video.get('playCount')), | |||
'like_count': int_or_none(video.get('likeCount')), | |||
'comment_count': int_or_none(video.get('commentCount')), | |||
} | |||
video_type = video.get('type') | |||
if video_type == 'VOD': | |||
inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey'] | |||
vod_id = video['vodId'] | |||
return merge_dicts( | |||
get_common_fields(), | |||
self._extract_video_info(video_id, vod_id, inkey)) | |||
elif video_type == 'LIVE': | |||
status = video.get('status') | |||
if status == 'ON_AIR': | |||
stream_url = self._call_api( | |||
'old/v3/live/%s/playInfo', | |||
video_id)['result']['adaptiveStreamUrl'] | |||
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4') | |||
info = get_common_fields() | |||
info.update({ | |||
'title': self._live_title(video['title']), | |||
'id': video_id, | |||
'formats': formats, | |||
'is_live': True, | |||
}) | |||
return info | |||
elif status == 'ENDED': | |||
raise ExtractorError( | |||
'Uploading for replay. Please wait...', expected=True) | |||
elif status == 'RESERVED': | |||
raise ExtractorError('Coming soon!', expected=True) | |||
elif video_status in ('ENDED', 'END'): | |||
raise ExtractorError('Uploading for replay. Please wait...', expected=True) | |||
elif video.get('exposeStatus') == 'CANCEL': | |||
raise ExtractorError( | |||
'We are sorry, but the live broadcast has been canceled.', | |||
expected=True) | |||
else: | |||
return self._live(video_id, webpage, params) | |||
else: | |||
raise ExtractorError('Unknown video type %s' % video_type) | |||
def _get_common_fields(self, webpage, params): | |||
title = self._og_search_title(webpage) | |||
description = self._html_search_meta( | |||
['og:description', 'description', 'twitter:description'], | |||
webpage, 'description', default=None) | |||
creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str) | |||
or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False)) | |||
thumbnail = self._og_search_thumbnail(webpage) | |||
return { | |||
'title': title, | |||
'creator': creator, | |||
'thumbnail': thumbnail, | |||
} | |||
def _live(self, video_id, webpage, params): | |||
LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id | |||
play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id, | |||
headers={"referer": "https://www.vlive.tv"}) | |||
raise ExtractorError('Unknown status ' + status) | |||
streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or [] | |||
formats = [] | |||
for stream in streams: | |||
formats.extend(self._extract_m3u8_formats( | |||
stream['serviceUrl'], video_id, 'mp4', | |||
fatal=False, live=True)) | |||
self._sort_formats(formats) | |||
class VLivePostIE(VLiveIE): | |||
IE_NAME = 'vlive:post' | |||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)' | |||
_TESTS = [{ | |||
# uploadType = SOS | |||
'url': 'https://www.vlive.tv/post/1-20088044', | |||
'info_dict': { | |||
'id': '1-20088044', | |||
'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...', | |||
'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407', | |||
}, | |||
'playlist_count': 3, | |||
}, { | |||
# uploadType = V | |||
'url': 'https://www.vlive.tv/post/1-20087926', | |||
'info_dict': { | |||
'id': '1-20087926', | |||
'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭', | |||
}, | |||
'playlist_count': 1, | |||
}] | |||
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s' | |||
_SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo' | |||
_INKEY_TMPL = _FVIDEO_TMPL % 'inKey' | |||
info = self._get_common_fields(webpage, params) | |||
info.update({ | |||
'title': self._live_title(info['title']), | |||
'id': video_id, | |||
'formats': formats, | |||
'is_live': True, | |||
}) | |||
return info | |||
def _real_extract(self, url): | |||
post_id = self._match_id(url) | |||
def _replay(self, video_id, webpage, params, video_params): | |||
long_video_id = video_params["vodId"] | |||
post = self._call_api( | |||
'post/v1.0/post-%s', post_id, | |||
'attachments{video},officialVideo{videoSeq},plainBody,title') | |||
VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id | |||
key_json = self._download_json(VOD_KEY_ENDPOINT, video_id, | |||
headers={"referer": "https://www.vlive.tv"}) | |||
key = key_json["inkey"] | |||
video_seq = str_or_none(try_get( | |||
post, lambda x: x['officialVideo']['videoSeq'])) | |||
if video_seq: | |||
return self.url_result( | |||
'http://www.vlive.tv/video/' + video_seq, | |||
VLiveIE.ie_key(), video_seq) | |||
return merge_dicts( | |||
self._get_common_fields(webpage, params), | |||
self._extract_video_info(video_id, long_video_id, key)) | |||
title = post['title'] | |||
entries = [] | |||
for idx, video in enumerate(post['attachments']['video'].values()): | |||
video_id = video.get('videoId') | |||
if not video_id: | |||
continue | |||
upload_type = video.get('uploadType') | |||
upload_info = video.get('uploadInfo') or {} | |||
entry = None | |||
if upload_type == 'SOS': | |||
download = self._call_api( | |||
self._SOS_TMPL, video_id)['videoUrl']['download'] | |||
formats = [] | |||
for f_id, f_url in download.items(): | |||
formats.append({ | |||
'format_id': f_id, | |||
'url': f_url, | |||
'height': int_or_none(f_id[:-1]), | |||
}) | |||
self._sort_formats(formats) | |||
entry = { | |||
'formats': formats, | |||
'id': video_id, | |||
'thumbnail': upload_info.get('imageUrl'), | |||
} | |||
elif upload_type == 'V': | |||
vod_id = upload_info.get('videoId') | |||
if not vod_id: | |||
continue | |||
inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey'] | |||
entry = self._extract_video_info(video_id, vod_id, inkey) | |||
if entry: | |||
entry['title'] = '%s_part%s' % (title, idx) | |||
entries.append(entry) | |||
return self.playlist_result( | |||
entries, post_id, title, strip_or_none(post.get('plainBody'))) | |||
class VLiveChannelIE(InfoExtractor): | |||
class VLiveChannelIE(VLiveBaseIE): | |||
IE_NAME = 'vlive:channel' | |||
_VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)' | |||
_VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)' | |||
_TESTS = [{ | |||
'url': 'https://channels.vlive.tv/FCD4B', | |||
'url': 'http://channels.vlive.tv/FCD4B', | |||
'info_dict': { | |||
'id': 'FCD4B', | |||
'title': 'MAMAMOO', | |||
@@ -226,63 +265,39 @@ class VLiveChannelIE(InfoExtractor): | |||
'playlist_mincount': 110 | |||
}, { | |||
'url': 'https://www.vlive.tv/channel/FCD4B', | |||
'info_dict': { | |||
'id': 'FCD4B', | |||
'title': 'MAMAMOO', | |||
}, | |||
'playlist_mincount': 110 | |||
'only_matching': True, | |||
}] | |||
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' | |||
def _call_api(self, path, channel_key_suffix, channel_value, note, query): | |||
q = { | |||
'app_id': self._APP_ID, | |||
'channel' + channel_key_suffix: channel_value, | |||
} | |||
q.update(query) | |||
return self._download_json( | |||
'http://api.vfan.vlive.tv/vproxy/channelplus/' + path, | |||
channel_value, note='Downloading ' + note, query=q)['result'] | |||
def _real_extract(self, url): | |||
channel_code = self._match_id(url) | |||
webpage = self._download_webpage( | |||
'http://channels.vlive.tv/%s/video' % channel_code, channel_code) | |||
app_id = None | |||
app_js_url = self._search_regex( | |||
r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1', | |||
webpage, 'app js', default=None, group='url') | |||
if app_js_url: | |||
app_js = self._download_webpage( | |||
app_js_url, channel_code, 'Downloading app JS', fatal=False) | |||
if app_js: | |||
app_id = self._search_regex( | |||
r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', | |||
app_js, 'app id', default=None) | |||
app_id = app_id or self._APP_ID | |||
channel_info = self._download_json( | |||
'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', | |||
channel_code, note='Downloading decode channel code', | |||
query={ | |||
'app_id': app_id, | |||
'channelCode': channel_code, | |||
'_': int(time.time()) | |||
}) | |||
channel_seq = self._call_api( | |||
'decodeChannelCode', 'Code', channel_code, | |||
'decode channel code', {})['channelSeq'] | |||
channel_seq = channel_info['result']['channelSeq'] | |||
channel_name = None | |||
entries = [] | |||
for page_num in itertools.count(1): | |||
video_list = self._download_json( | |||
'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', | |||
channel_code, note='Downloading channel list page #%d' % page_num, | |||
query={ | |||
'app_id': app_id, | |||
'channelSeq': channel_seq, | |||
video_list = self._call_api( | |||
'getChannelVideoList', 'Seq', channel_seq, | |||
'channel list page #%d' % page_num, { | |||
# Large values of maxNumOfRows (~300 or above) may cause | |||
# empty responses (see [1]), e.g. this happens for [2] that | |||
# has more than 300 videos. | |||
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830 | |||
# 2. http://channels.vlive.tv/EDBF. | |||
'maxNumOfRows': 100, | |||
'_': int(time.time()), | |||
'pageNo': page_num | |||
} | |||
) | |||
@@ -290,11 +305,11 @@ class VLiveChannelIE(InfoExtractor): | |||
if not channel_name: | |||
channel_name = try_get( | |||
video_list, | |||
lambda x: x['result']['channelInfo']['channelName'], | |||
lambda x: x['channelInfo']['channelName'], | |||
compat_str) | |||
videos = try_get( | |||
video_list, lambda x: x['result']['videoList'], list) | |||
video_list, lambda x: x['videoList'], list) | |||
if not videos: | |||
break | |||
@@ -310,79 +325,3 @@ class VLiveChannelIE(InfoExtractor): | |||
return self.playlist_result( | |||
entries, channel_code, channel_name) | |||
class VLivePlaylistIE(InfoExtractor): | |||
IE_NAME = 'vlive:playlist' | |||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)' | |||
_VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' | |||
_TESTS = [{ | |||
# regular working playlist | |||
'url': 'https://www.vlive.tv/video/117956/playlist/117963', | |||
'info_dict': { | |||
'id': '117963', | |||
'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들' | |||
}, | |||
'playlist_mincount': 10 | |||
}, { | |||
# playlist with no playlistVideoSeqs | |||
'url': 'http://www.vlive.tv/video/22867/playlist/22912', | |||
'info_dict': { | |||
'id': '22867', | |||
'ext': 'mp4', | |||
'title': '[V LIVE] Valentine Day Message from MINA', | |||
'creator': 'TWICE', | |||
'view_count': int | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
} | |||
}] | |||
def _build_video_result(self, video_id, message): | |||
self.to_screen(message) | |||
return self.url_result( | |||
self._VIDEO_URL_TEMPLATE % video_id, | |||
ie=VLiveIE.ie_key(), video_id=video_id) | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_id, playlist_id = mobj.group('video_id', 'id') | |||
if self._downloader.params.get('noplaylist'): | |||
return self._build_video_result( | |||
video_id, | |||
'Downloading just video %s because of --no-playlist' | |||
% video_id) | |||
self.to_screen( | |||
'Downloading playlist %s - add --no-playlist to just download video' | |||
% playlist_id) | |||
webpage = self._download_webpage( | |||
'http://www.vlive.tv/video/%s/playlist/%s' | |||
% (video_id, playlist_id), playlist_id) | |||
raw_item_ids = self._search_regex( | |||
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, | |||
'playlist video seqs', default=None, fatal=False) | |||
if not raw_item_ids: | |||
return self._build_video_result( | |||
video_id, | |||
'Downloading just video %s because no playlist was found' | |||
% video_id) | |||
item_ids = self._parse_json(raw_item_ids, playlist_id) | |||
entries = [ | |||
self.url_result( | |||
self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), | |||
video_id=compat_str(item_id)) | |||
for item_id in item_ids] | |||
playlist_name = self._html_search_regex( | |||
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)', | |||
webpage, 'playlist title', fatal=False) | |||
return self.playlist_result(entries, playlist_id, playlist_name) |
@@ -1335,44 +1335,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
return self._parse_json( | |||
uppercase_escape(config), video_id, fatal=False) | |||
def _get_music_metadata_from_yt_initial(self, yt_initial): | |||
music_metadata = [] | |||
key_map = { | |||
'Album': 'album', | |||
'Artist': 'artist', | |||
'Song': 'track' | |||
} | |||
contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents']) | |||
if type(contents) is list: | |||
for content in contents: | |||
music_track = {} | |||
if type(content) is not dict: | |||
continue | |||
videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer']) | |||
if type(videoSecondaryInfoRenderer) is not dict: | |||
continue | |||
rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows']) | |||
if type(rows) is not list: | |||
continue | |||
for row in rows: | |||
metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer']) | |||
if type(metadataRowRenderer) is not dict: | |||
continue | |||
key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText']) | |||
value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \ | |||
try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text']) | |||
if type(key) is not str or type(value) is not str: | |||
continue | |||
if key in key_map: | |||
if key_map[key] in music_track: | |||
# we've started on a new track | |||
music_metadata.append(music_track) | |||
music_track = {} | |||
music_track[key_map[key]] = value | |||
if len(music_track.keys()): | |||
music_metadata.append(music_track) | |||
return music_metadata | |||
def _get_automatic_captions(self, video_id, webpage): | |||
"""We need the webpage for getting the captions url, pass it as an | |||
argument to speed up the process.""" | |||
@@ -2295,7 +2257,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
# Youtube Music Auto-generated description | |||
release_date = release_year = None | |||
if video_description: | |||
mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description) | |||
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description) | |||
if mobj: | |||
if not track: | |||
track = mobj.group('track').strip() | |||
@@ -2312,13 +2274,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||
if release_year: | |||
release_year = int(release_year) | |||
yt_initial = self._get_yt_initial_data(video_id, video_webpage) | |||
if yt_initial: | |||
music_metadata = self._get_music_metadata_from_yt_initial(yt_initial) | |||
if len(music_metadata): | |||
album = music_metadata[0].get('album') | |||
artist = music_metadata[0].get('artist') | |||
track = music_metadata[0].get('track') | |||
yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage) | |||
contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or [] | |||
for content in contents: | |||
rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or [] | |||
multiple_songs = False | |||
for row in rows: | |||
if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: | |||
multiple_songs = True | |||
break | |||
for row in rows: | |||
mrr = row.get('metadataRowRenderer') or {} | |||
mrr_title = try_get( | |||
mrr, lambda x: x['title']['simpleText'], compat_str) | |||
mrr_contents = try_get( | |||
mrr, lambda x: x['contents'][0], dict) or {} | |||
mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str) | |||
if not (mrr_title and mrr_contents_text): | |||
continue | |||
if mrr_title == 'License': | |||
video_license = mrr_contents_text | |||
elif not multiple_songs: | |||
if mrr_title == 'Album': | |||
album = mrr_contents_text | |||
elif mrr_title == 'Artist': | |||
artist = mrr_contents_text | |||
elif mrr_title == 'Song': | |||
track = mrr_contents_text | |||
m_episode = re.search( | |||
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', | |||