Old Extractors left behind: VLivePlaylistIE YoutubeSearchURLIE YoutubeShowIE YoutubeFavouritesIE If removing old extractors, make corresponding changes in docs/supportedsites.md youtube_dlc/extractor/extractors.py Not merged: .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md test/test_all_urls.py youtube_dlc/version.py Changelogpull/245/head
@@ -61,7 +61,7 @@ def build_lazy_ie(ie, name): | |||
return s | |||
# find the correct sorting and add the required base classes so that sublcasses | |||
# find the correct sorting and add the required base classes so that subclasses | |||
# can be correctly created | |||
classes = _ALL_CLASSES[:-1] | |||
ordered_cls = [] | |||
@@ -59,9 +59,9 @@ | |||
- **ARD:mediathek** | |||
- **ARDBetaMediathek** | |||
- **Arkena** | |||
- **arte.tv:+7** | |||
- **arte.tv:embed** | |||
- **arte.tv:playlist** | |||
- **ArteTV** | |||
- **ArteTVEmbed** | |||
- **ArteTVPlaylist** | |||
- **AsianCrush** | |||
- **AsianCrushPlaylist** | |||
- **AtresPlayer** | |||
@@ -424,6 +424,7 @@ | |||
- **la7.it** | |||
- **laola1tv** | |||
- **laola1tv:embed** | |||
- **lbry.tv** | |||
- **LCI** | |||
- **Lcp** | |||
- **LcpPlay** | |||
@@ -835,8 +836,6 @@ | |||
- **SpankBangPlaylist** | |||
- **Spankwire** | |||
- **Spiegel** | |||
- **Spiegel:Article**: Articles on spiegel.de | |||
- **Spiegeltv** | |||
- **sport.francetvinfo.fr** | |||
- **Sport5** | |||
- **SportBox** | |||
@@ -1147,19 +1146,18 @@ | |||
- **YourPorn** | |||
- **YourUpload** | |||
- **youtube**: YouTube.com | |||
- **youtube:channel**: YouTube.com channels | |||
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication) | |||
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication) | |||
- **youtube:live**: YouTube.com live streams | |||
- **youtube:playlist**: YouTube.com playlists | |||
- **youtube:playlists**: YouTube.com user/channel playlists | |||
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) | |||
- **youtube:search**: YouTube.com searches | |||
- **youtube:search:date**: YouTube.com searches, newest videos first | |||
- **youtube:search_url**: YouTube.com search URLs | |||
- **youtube:show**: YouTube.com (multi-season) shows | |||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) | |||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) | |||
- **YoutubeYtUser**: YouTube.com user videos (URL or "ytuser" keyword) | |||
- **youtube:tab**: YouTube.com tab | |||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) | |||
- **Zapiks** | |||
- **Zaq1** | |||
@@ -31,15 +31,17 @@ class TestAllURLsMatching(unittest.TestCase): | |||
def test_youtube_playlist_matching(self): | |||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) | |||
assertTab = lambda url: self.assertMatch(url, ['youtube:tab']) | |||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | |||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 | |||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | |||
assertPlaylist('PL63F0C78739B09958') | |||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | |||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | |||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') | |||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 | |||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') | |||
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 | |||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) | |||
# Top tracks | |||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101') | |||
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101') | |||
def test_youtube_matching(self): | |||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) | |||
@@ -50,26 +52,22 @@ class TestAllURLsMatching(unittest.TestCase): | |||
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) | |||
def test_youtube_channel_matching(self): | |||
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) | |||
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab']) | |||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') | |||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') | |||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | |||
def test_youtube_user_matching(self): | |||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user']) | |||
# def test_youtube_user_matching(self): | |||
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab']) | |||
def test_youtube_feeds(self): | |||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) | |||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) | |||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) | |||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) | |||
def test_youtube_show_matching(self): | |||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) | |||
def test_youtube_search_matching(self): | |||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) | |||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | |||
# def test_youtube_search_matching(self): | |||
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) | |||
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | |||
def test_youtube_extract(self): | |||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) | |||
@@ -937,6 +937,28 @@ class TestUtil(unittest.TestCase): | |||
self.assertEqual(d['x'], 1) | |||
self.assertEqual(d['y'], 'a') | |||
# Just drop ! prefix for now though this results in a wrong value | |||
on = js_to_json('''{ | |||
a: !0, | |||
b: !1, | |||
c: !!0, | |||
d: !!42.42, | |||
e: !!![], | |||
f: !"abc", | |||
g: !"", | |||
!42: 42 | |||
}''') | |||
self.assertEqual(json.loads(on), { | |||
'a': 0, | |||
'b': 1, | |||
'c': 0, | |||
'd': 42.42, | |||
'e': [], | |||
'f': "abc", | |||
'g': "", | |||
'42': 42 | |||
}) | |||
on = js_to_json('["abc", "def",]') | |||
self.assertEqual(json.loads(on), ['abc', 'def']) | |||
@@ -994,6 +1016,12 @@ class TestUtil(unittest.TestCase): | |||
on = js_to_json('{42:4.2e1}') | |||
self.assertEqual(json.loads(on), {'42': 42.0}) | |||
on = js_to_json('{ "0x40": "0x40" }') | |||
self.assertEqual(json.loads(on), {'0x40': '0x40'}) | |||
on = js_to_json('{ "040": "040" }') | |||
self.assertEqual(json.loads(on), {'040': '040'}) | |||
def test_js_to_json_malformed(self): | |||
self.assertEqual(js_to_json('42a1'), '42"a1"') | |||
self.assertEqual(js_to_json('42a-1'), '42"a"-1') | |||
@@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor): | |||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1] | |||
if video_element is None or video_element.text is None: | |||
raise ExtractorError( | |||
'Video %s video does not exist' % video_id, expected=True) | |||
'Video %s does not exist' % video_id, expected=True) | |||
video_url = video_element.text.strip() | |||
@@ -4,23 +4,57 @@ from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..compat import compat_str | |||
from ..compat import ( | |||
compat_str, | |||
compat_urlparse, | |||
) | |||
from ..utils import ( | |||
ExtractorError, | |||
int_or_none, | |||
qualities, | |||
try_get, | |||
unified_strdate, | |||
url_or_none, | |||
) | |||
# There are different sources of video in arte.tv, the extraction process | |||
# is different for each one. The videos usually expire in 7 days, so we can't | |||
# add tests. | |||
class ArteTVBaseIE(InfoExtractor): | |||
def _extract_from_json_url(self, json_url, video_id, lang, title=None): | |||
info = self._download_json(json_url, video_id) | |||
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl' | |||
_API_BASE = 'https://api.arte.tv/api/player/v1' | |||
class ArteTVIE(ArteTVBaseIE): | |||
_VALID_URL = r'''(?x) | |||
https?:// | |||
(?: | |||
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos| | |||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s) | |||
) | |||
/(?P<id>\d{6}-\d{3}-[AF]) | |||
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} | |||
_TESTS = [{ | |||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', | |||
'info_dict': { | |||
'id': '088501-000-A', | |||
'ext': 'mp4', | |||
'title': 'Mexico: Stealing Petrol to Survive', | |||
'upload_date': '20190628', | |||
}, | |||
}, { | |||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_id = mobj.group('id') | |||
lang = mobj.group('lang') or mobj.group('lang_2') | |||
info = self._download_json( | |||
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id) | |||
player_info = info['videoJsonPlayer'] | |||
vsr = try_get(player_info, lambda x: x['VSR'], dict) | |||
@@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor): | |||
if not upload_date_str: | |||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] | |||
title = (player_info.get('VTI') or title or player_info['VID']).strip() | |||
title = (player_info.get('VTI') or player_info['VID']).strip() | |||
subtitle = player_info.get('VSU', '').strip() | |||
if subtitle: | |||
title += ' - %s' % subtitle | |||
info_dict = { | |||
'id': player_info['VID'], | |||
'title': title, | |||
'description': player_info.get('VDE'), | |||
'upload_date': unified_strdate(upload_date_str), | |||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), | |||
} | |||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) | |||
LANGS = { | |||
@@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor): | |||
formats = [] | |||
for format_id, format_dict in vsr.items(): | |||
f = dict(format_dict) | |||
format_url = url_or_none(f.get('url')) | |||
streamer = f.get('streamer') | |||
if not format_url and not streamer: | |||
continue | |||
versionCode = f.get('versionCode') | |||
l = re.escape(langcode) | |||
@@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor): | |||
else: | |||
lang_pref = -1 | |||
media_type = f.get('mediaType') | |||
if media_type == 'hls': | |||
m3u8_formats = self._extract_m3u8_formats( | |||
format_url, video_id, 'mp4', entry_protocol='m3u8_native', | |||
m3u8_id=format_id, fatal=False) | |||
for m3u8_format in m3u8_formats: | |||
m3u8_format['language_preference'] = lang_pref | |||
formats.extend(m3u8_formats) | |||
continue | |||
format = { | |||
'format_id': format_id, | |||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None, | |||
@@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor): | |||
'quality': qfunc(f.get('quality')), | |||
} | |||
if f.get('mediaType') == 'rtmp': | |||
if media_type == 'rtmp': | |||
format['url'] = f['streamer'] | |||
format['play_path'] = 'mp4:' + f['url'] | |||
format['ext'] = 'flv' | |||
@@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor): | |||
formats.append(format) | |||
self._check_formats(formats, video_id) | |||
self._sort_formats(formats) | |||
info_dict['formats'] = formats | |||
return info_dict | |||
return { | |||
'id': player_info.get('VID') or video_id, | |||
'title': title, | |||
'description': player_info.get('VDE'), | |||
'upload_date': unified_strdate(upload_date_str), | |||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), | |||
'formats': formats, | |||
} | |||
class ArteTVPlus7IE(ArteTVBaseIE): | |||
IE_NAME = 'arte.tv:+7' | |||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])' | |||
class ArteTVEmbedIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+' | |||
_TESTS = [{ | |||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', | |||
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A', | |||
'info_dict': { | |||
'id': '088501-000-A', | |||
'id': '100605-013-A', | |||
'ext': 'mp4', | |||
'title': 'Mexico: Stealing Petrol to Survive', | |||
'upload_date': '20190628', | |||
'title': 'United we Stream November Lockdown Edition #13', | |||
'description': 'md5:be40b667f45189632b78c1425c7c2ce1', | |||
'upload_date': '20201116', | |||
}, | |||
}, { | |||
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
lang, video_id = re.match(self._VALID_URL, url).groups() | |||
return self._extract_from_json_url( | |||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), | |||
video_id, lang) | |||
class ArteTVEmbedIE(ArteTVPlus7IE): | |||
IE_NAME = 'arte.tv:embed' | |||
_VALID_URL = r'''(?x) | |||
https://www\.arte\.tv | |||
/player/v3/index\.php\?json_url= | |||
(?P<json_url> | |||
https?://api\.arte\.tv/api/player/v1/config/ | |||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF]) | |||
) | |||
''' | |||
_TESTS = [] | |||
@staticmethod | |||
def _extract_urls(webpage): | |||
return [url for _, url in re.findall( | |||
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1', | |||
webpage)] | |||
def _real_extract(self, url): | |||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups() | |||
return self._extract_from_json_url(json_url, video_id, lang) | |||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | |||
json_url = qs['json_url'][0] | |||
video_id = ArteTVIE._match_id(json_url) | |||
return self.url_result( | |||
json_url, ie=ArteTVIE.ie_key(), video_id=video_id) | |||
class ArteTVPlaylistIE(ArteTVBaseIE): | |||
IE_NAME = 'arte.tv:playlist' | |||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})' | |||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES | |||
_TESTS = [{ | |||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', | |||
'info_dict': { | |||
@@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE): | |||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2', | |||
}, | |||
'playlist_mincount': 6, | |||
}, { | |||
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
lang, playlist_id = re.match(self._VALID_URL, url).groups() | |||
collection = self._download_json( | |||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' | |||
% (lang, playlist_id), playlist_id) | |||
'%s/collectionData/%s/%s?source=videos' | |||
% (self._API_BASE, lang, playlist_id), playlist_id) | |||
entries = [] | |||
for video in collection['videos']: | |||
if not isinstance(video, dict): | |||
continue | |||
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl')) | |||
if not video_url: | |||
continue | |||
video_id = video.get('programId') | |||
entries.append({ | |||
'_type': 'url_transparent', | |||
'url': video_url, | |||
'id': video_id, | |||
'title': video.get('title'), | |||
'alt_title': video.get('subtitle'), | |||
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)), | |||
'duration': int_or_none(video.get('durationSeconds')), | |||
'view_count': int_or_none(video.get('views')), | |||
'ie_key': ArteTVIE.ie_key(), | |||
}) | |||
title = collection.get('title') | |||
description = collection.get('shortDescription') or collection.get('teaserText') | |||
entries = [ | |||
self._extract_from_json_url( | |||
video['jsonUrl'], video.get('programId') or playlist_id, lang) | |||
for video in collection['videos'] if video.get('jsonUrl')] | |||
return self.playlist_result(entries, playlist_id, title, description) |
@@ -1,3 +1,4 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import random | |||
@@ -5,10 +6,7 @@ import re | |||
import time | |||
from .common import InfoExtractor | |||
from ..compat import ( | |||
compat_str, | |||
compat_urlparse, | |||
) | |||
from ..compat import compat_str | |||
from ..utils import ( | |||
ExtractorError, | |||
float_or_none, | |||
@@ -17,71 +15,32 @@ from ..utils import ( | |||
parse_filesize, | |||
str_or_none, | |||
try_get, | |||
unescapeHTML, | |||
update_url_query, | |||
unified_strdate, | |||
unified_timestamp, | |||
url_or_none, | |||
urljoin, | |||
) | |||
class BandcampBaseIE(InfoExtractor): | |||
"""Provide base functions for Bandcamp extractors""" | |||
def _extract_json_from_html_data_attribute(self, webpage, suffix, video_id): | |||
json_string = self._html_search_regex( | |||
r' data-%s="([^"]*)' % suffix, | |||
webpage, '%s json' % suffix, default='{}') | |||
return self._parse_json(json_string, video_id) | |||
def _parse_json_track(self, json): | |||
formats = [] | |||
file_ = json.get('file') | |||
if isinstance(file_, dict): | |||
for format_id, format_url in file_.items(): | |||
if not url_or_none(format_url): | |||
continue | |||
ext, abr_str = format_id.split('-', 1) | |||
formats.append({ | |||
'format_id': format_id, | |||
'url': self._proto_relative_url(format_url, 'http:'), | |||
'ext': ext, | |||
'vcodec': 'none', | |||
'acodec': ext, | |||
'abr': int_or_none(abr_str), | |||
}) | |||
return { | |||
'duration': float_or_none(json.get('duration')), | |||
'id': str_or_none(json.get('track_id') or json.get('id')), | |||
'title': json.get('title'), | |||
'title_link': json.get('title_link'), | |||
'number': int_or_none(json.get('track_num')), | |||
'formats': formats | |||
} | |||
class BandcampIE(BandcampBaseIE): | |||
IE_NAME = "Bandcamp:track" | |||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)' | |||
class BandcampIE(InfoExtractor): | |||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)' | |||
_TESTS = [{ | |||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', | |||
'md5': 'c557841d5e50261777a6585648adf439', | |||
'info_dict': { | |||
'id': '1812978515', | |||
'ext': 'mp3', | |||
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", | |||
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭", | |||
'duration': 9.8485, | |||
'uploader': "youtube-dl \"'/\\\u00e4\u21ad", | |||
'timestamp': 1354224127, | |||
'uploader': 'youtube-dl "\'/\\ä↭', | |||
'upload_date': '20121129', | |||
'timestamp': 1354224127, | |||
}, | |||
'_skip': 'There is a limit of 200 free downloads / month for the test song' | |||
}, { | |||
# free download | |||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle', | |||
'md5': '5d92af55811e47f38962a54c30b07ef0', | |||
'info_dict': { | |||
'id': '2650410135', | |||
'ext': 'aiff', | |||
@@ -120,52 +79,59 @@ class BandcampIE(BandcampBaseIE): | |||
}, | |||
}] | |||
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True): | |||
return self._parse_json(self._html_search_regex( | |||
r'data-%s=(["\'])({.+?})\1' % attr, webpage, | |||
attr + ' data', group=2), video_id, fatal=fatal) | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
title = mobj.group('title') | |||
url_track_title = title | |||
title = self._match_id(url) | |||
webpage = self._download_webpage(url, title) | |||
thumbnail = self._html_search_meta('og:image', webpage, default=None) | |||
json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title) | |||
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title) | |||
json_tracks = json_tralbum.get('trackinfo') | |||
if not json_tracks: | |||
raise ExtractorError('Could not extract track') | |||
track = self._parse_json_track(json_tracks[0]) | |||
artist = json_tralbum.get('artist') | |||
album_title = json_embed.get('album_title') | |||
json_album = json_tralbum.get('packages') | |||
if json_album: | |||
json_album = json_album[0] | |||
album_publish_date = json_album.get('album_publish_date') | |||
album_release_date = json_album.get('album_release_date') | |||
else: | |||
album_publish_date = None | |||
album_release_date = json_tralbum.get('album_release_date') | |||
timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date) | |||
release_date = unified_strdate(album_release_date) | |||
download_link = self._search_regex( | |||
r'freeDownloadPage(?:["\']|"):\s*(["\']|")(?P<url>(?:(?!\1).)+)\1', webpage, | |||
'download link', default=None, group='url') | |||
tralbum = self._extract_data_attr(webpage, title) | |||
thumbnail = self._og_search_thumbnail(webpage) | |||
track_id = None | |||
track = None | |||
track_number = None | |||
duration = None | |||
formats = [] | |||
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict) | |||
if track_info: | |||
file_ = track_info.get('file') | |||
if isinstance(file_, dict): | |||
for format_id, format_url in file_.items(): | |||
if not url_or_none(format_url): | |||
continue | |||
ext, abr_str = format_id.split('-', 1) | |||
formats.append({ | |||
'format_id': format_id, | |||
'url': self._proto_relative_url(format_url, 'http:'), | |||
'ext': ext, | |||
'vcodec': 'none', | |||
'acodec': ext, | |||
'abr': int_or_none(abr_str), | |||
}) | |||
track = track_info.get('title') | |||
track_id = str_or_none( | |||
track_info.get('track_id') or track_info.get('id')) | |||
track_number = int_or_none(track_info.get('track_num')) | |||
duration = float_or_none(track_info.get('duration')) | |||
embed = self._extract_data_attr(webpage, title, 'embed', False) | |||
current = tralbum.get('current') or {} | |||
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist') | |||
timestamp = unified_timestamp( | |||
current.get('publish_date') or tralbum.get('album_publish_date')) | |||
download_link = tralbum.get('freeDownloadPage') | |||
if download_link: | |||
track_id = self._search_regex( | |||
r'\?id=(?P<id>\d+)&', | |||
download_link, 'track id') | |||
track_id = compat_str(tralbum['id']) | |||
download_webpage = self._download_webpage( | |||
download_link, track_id, 'Downloading free downloads page') | |||
blob = self._parse_json( | |||
self._search_regex( | |||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage, | |||
'blob', group='blob'), | |||
track_id, transform_source=unescapeHTML) | |||
blob = self._extract_data_attr(download_webpage, track_id, 'blob') | |||
info = try_get( | |||
blob, (lambda x: x['digital_items'][0], | |||
@@ -173,6 +139,8 @@ class BandcampIE(BandcampBaseIE): | |||
if info: | |||
downloads = info.get('downloads') | |||
if isinstance(downloads, dict): | |||
if not track: | |||
track = info.get('title') | |||
if not artist: | |||
artist = info.get('artist') | |||
if not thumbnail: | |||
@@ -206,7 +174,7 @@ class BandcampIE(BandcampBaseIE): | |||
retry_url = url_or_none(stat.get('retry_url')) | |||
if not retry_url: | |||
continue | |||
track['formats'].append({ | |||
formats.append({ | |||
'url': self._proto_relative_url(retry_url, 'http:'), | |||
'ext': download_formats.get(format_id), | |||
'format_id': format_id, | |||
@@ -215,30 +183,34 @@ class BandcampIE(BandcampBaseIE): | |||
'vcodec': 'none', | |||
}) | |||
self._sort_formats(track['formats']) | |||
self._sort_formats(formats) | |||
title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title') | |||
title = '%s - %s' % (artist, track) if artist else track | |||
if not duration: | |||
duration = float_or_none(self._html_search_meta( | |||
'duration', webpage, default=None)) | |||
return { | |||
'album': album_title, | |||
'artist': artist, | |||
'duration': track['duration'], | |||
'formats': track['formats'], | |||
'id': track['id'], | |||
'release_date': release_date, | |||
'id': track_id, | |||
'title': title, | |||
'thumbnail': thumbnail, | |||
'uploader': artist, | |||
'timestamp': timestamp, | |||
'title': title, | |||
'track': track['title'], | |||
'track_id': track['id'], | |||
'track_number': track['number'], | |||
'uploader': artist | |||
'release_date': unified_strdate(tralbum.get('album_release_date')), | |||
'duration': duration, | |||
'track': track, | |||
'track_number': track_number, | |||
'track_id': track_id, | |||
'artist': artist, | |||
'album': embed.get('album_title'), | |||
'formats': formats, | |||
} | |||
class BandcampAlbumIE(BandcampBaseIE): | |||
class BandcampAlbumIE(BandcampIE): | |||
IE_NAME = 'Bandcamp:album' | |||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' | |||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?' | |||
_TESTS = [{ | |||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', | |||
@@ -248,7 +220,10 @@ class BandcampAlbumIE(BandcampBaseIE): | |||
'info_dict': { | |||
'id': '1353101989', | |||
'ext': 'mp3', | |||
'title': 'Intro', | |||
'title': 'Blazo - Intro', | |||
'timestamp': 1311756226, | |||
'upload_date': '20110727', | |||
'uploader': 'Blazo', | |||
} | |||
}, | |||
{ | |||
@@ -256,7 +231,10 @@ class BandcampAlbumIE(BandcampBaseIE): | |||
'info_dict': { | |||
'id': '38097443', | |||
'ext': 'mp3', | |||
'title': 'Kero One - Keep It Alive (Blazo remix)', | |||
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)', | |||
'timestamp': 1311757238, | |||
'upload_date': '20110727', | |||
'uploader': 'Blazo', | |||
} | |||
}, | |||
], | |||
@@ -292,6 +270,7 @@ class BandcampAlbumIE(BandcampBaseIE): | |||
'title': '"Entropy" EP', | |||
'uploader_id': 'jstrecords', | |||
'id': 'entropy-ep', | |||
'description': 'md5:0ff22959c943622972596062f2f366a5', | |||
}, | |||
'playlist_mincount': 3, | |||
}, { | |||
@@ -301,6 +280,7 @@ class BandcampAlbumIE(BandcampBaseIE): | |||
'id': 'we-are-the-plague', | |||
'title': 'WE ARE THE PLAGUE', | |||
'uploader_id': 'insulters', | |||
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f', | |||
}, | |||
'playlist_count': 2, | |||
}] | |||
@@ -312,41 +292,34 @@ class BandcampAlbumIE(BandcampBaseIE): | |||
else super(BandcampAlbumIE, cls).suitable(url)) | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
uploader_id = mobj.group('subdomain') | |||
album_id = mobj.group('album_id') | |||
uploader_id, album_id = re.match(self._VALID_URL, url).groups() | |||
playlist_id = album_id or uploader_id | |||
webpage = self._download_webpage(url, playlist_id) | |||
json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", playlist_id) | |||
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", playlist_id) | |||
json_tracks = json_tralbum.get('trackinfo') | |||
if not json_tracks: | |||
raise ExtractorError('Could not extract album tracks') | |||
album_title = json_embed.get('album_title') | |||
tralbum = self._extract_data_attr(webpage, playlist_id) | |||
track_info = tralbum.get('trackinfo') | |||
if not track_info: | |||
raise ExtractorError('The page doesn\'t contain any tracks') | |||
# Only tracks with duration info have songs | |||
tracks = [self._parse_json_track(track) for track in json_tracks] | |||
entries = [ | |||
self.url_result( | |||
compat_urlparse.urljoin(url, track['title_link']), | |||
ie=BandcampIE.ie_key(), video_id=track['id'], | |||
video_title=track['title']) | |||
for track in tracks | |||
if track.get('duration')] | |||
urljoin(url, t['title_link']), BandcampIE.ie_key(), | |||
str_or_none(t.get('track_id') or t.get('id')), t.get('title')) | |||
for t in track_info | |||
if t.get('duration')] | |||
current = tralbum.get('current') or {} | |||
return { | |||
'_type': 'playlist', | |||
'uploader_id': uploader_id, | |||
'id': playlist_id, | |||
'title': album_title, | |||
'entries': entries | |||
'title': current.get('title'), | |||
'description': current.get('about'), | |||
'entries': entries, | |||
} | |||
class BandcampWeeklyIE(InfoExtractor): | |||
class BandcampWeeklyIE(BandcampIE): | |||
IE_NAME = 'Bandcamp:weekly' | |||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' | |||
_TESTS = [{ | |||
@@ -361,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor): | |||
'release_date': '20170404', | |||
'series': 'Bandcamp Weekly', | |||
'episode': 'Magic Moments', | |||
'episode_number': 208, | |||
'episode_id': '224', | |||
} | |||
}, | |||
'params': { | |||
'format': 'opus-lo', | |||
}, | |||
}, { | |||
'url': 'https://bandcamp.com/?blah/blah@&show=228', | |||
'only_matching': True | |||
}] | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
webpage = self._download_webpage(url, video_id) | |||
blob = self._parse_json( | |||
self._search_regex( | |||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage, | |||
'blob', group='blob'), | |||
video_id, transform_source=unescapeHTML) | |||
show_id = self._match_id(url) | |||
webpage = self._download_webpage(url, show_id) | |||
show = blob['bcw_show'] | |||
blob = self._extract_data_attr(webpage, show_id, 'blob') | |||
# This is desired because any invalid show id redirects to `bandcamp.com` | |||
# which happens to expose the latest Bandcamp Weekly episode. | |||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id) | |||
show = blob['bcw_data'][show_id] | |||
formats = [] | |||
for format_id, format_url in show['audio_stream'].items(): | |||
@@ -408,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor): | |||
if subtitle: | |||
title += ' - %s' % subtitle | |||
episode_number = None | |||
seq = blob.get('bcw_seq') | |||
if seq and isinstance(seq, list): | |||
try: | |||
episode_number = next( | |||
int_or_none(e.get('episode_number')) | |||
for e in seq | |||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id) | |||
except StopIteration: | |||
pass | |||
return { | |||
'id': video_id, | |||
'id': show_id, | |||
'title': title, | |||
'description': show.get('desc') or show.get('short_desc'), | |||
'duration': float_or_none(show.get('audio_duration')), | |||
@@ -429,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor): | |||
'release_date': unified_strdate(show.get('published_date')), | |||
'series': 'Bandcamp Weekly', | |||
'episode': show.get('subtitle'), | |||
'episode_number': episode_number, | |||
'episode_id': compat_str(video_id), | |||
'episode_id': show_id, | |||
'formats': formats | |||
} |
@@ -1,6 +1,7 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..utils import smuggle_url | |||
@@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor): | |||
class CNBCVideoIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)' | |||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)' | |||
_TEST = { | |||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', | |||
'info_dict': { | |||
@@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor): | |||
} | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
webpage = self._download_webpage(url, display_id) | |||
video_id = self._search_regex( | |||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, | |||
'video id') | |||
path, display_id = re.match(self._VALID_URL, url).groups() | |||
video_id = self._download_json( | |||
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ | |||
'query': '''{ | |||
page(path: "%s") { | |||
vcpsId | |||
} | |||
}''' % path, | |||
})['data']['page']['vcpsId'] | |||
return self.url_result( | |||
'http://video.cnbc.com/gallery/?video=%s' % video_id, | |||
'http://video.cnbc.com/gallery/?video=%d' % video_id, | |||
CNBCIE.ie_key()) |
@@ -1456,9 +1456,10 @@ class InfoExtractor(object): | |||
try: | |||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) | |||
return True | |||
except ExtractorError: | |||
except ExtractorError as e: | |||
self.to_screen( | |||
'%s: %s URL is invalid, skipping' % (video_id, item)) | |||
'%s: %s URL is invalid, skipping: %s' | |||
% (video_id, item, error_to_compat_str(e.cause))) | |||
return False | |||
def http_scheme(self): | |||
@@ -16,6 +16,8 @@ from ..utils import ( | |||
mimetype2ext, | |||
orderedSet, | |||
parse_iso8601, | |||
strip_or_none, | |||
try_get, | |||
) | |||
@@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor): | |||
'uploader': 'gq', | |||
'upload_date': '20170321', | |||
'timestamp': 1490126427, | |||
'description': 'How much grimmer would things be if these people were competent?', | |||
}, | |||
}, { | |||
# JS embed | |||
@@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor): | |||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks', | |||
'uploader': 'arstechnica', | |||
'upload_date': '20150916', | |||
'timestamp': 1442434955, | |||
'timestamp': 1442434920, | |||
} | |||
}, { | |||
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', | |||
@@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor): | |||
}) | |||
self._sort_formats(formats) | |||
subtitles = {} | |||
for t, caption in video_info.get('captions', {}).items(): | |||
caption_url = caption.get('src') | |||
if not (t in ('vtt', 'srt', 'tml') and caption_url): | |||
continue | |||
subtitles.setdefault('en', []).append({'url': caption_url}) | |||
return { | |||
'id': video_id, | |||
'formats': formats, | |||
@@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor): | |||
'season': video_info.get('season_title'), | |||
'timestamp': parse_iso8601(video_info.get('premiere_date')), | |||
'categories': video_info.get('categories'), | |||
'subtitles': subtitles, | |||
} | |||
def _real_extract(self, url): | |||
@@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor): | |||
if url_type == 'series': | |||
return self._extract_series(url, webpage) | |||
else: | |||
params = self._extract_video_params(webpage, display_id) | |||
info = self._search_json_ld( | |||
webpage, display_id, fatal=False) | |||
video = try_get(self._parse_json(self._search_regex( | |||
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage, | |||
'preload state', '{}'), display_id), | |||
lambda x: x['transformed']['video']) | |||
if video: | |||
params = {'videoId': video['id']} | |||
info = {'description': strip_or_none(video.get('description'))} | |||
else: | |||
params = self._extract_video_params(webpage, display_id) | |||
info = self._search_json_ld( | |||
webpage, display_id, fatal=False) | |||
info.update(self._extract_video(params)) | |||
return info |
@@ -62,7 +62,7 @@ from .ard import ( | |||
ARDMediathekIE, | |||
) | |||
from .arte import ( | |||
ArteTVPlus7IE, | |||
ArteTVIE, | |||
ArteTVEmbedIE, | |||
ArteTVPlaylistIE, | |||
) | |||
@@ -542,6 +542,7 @@ from .laola1tv import ( | |||
EHFTVIE, | |||
ITTFIE, | |||
) | |||
from .lbry import LBRYIE | |||
from .lci import LCIIE | |||
from .lcp import ( | |||
LcpPlayIE, | |||
@@ -1079,8 +1080,7 @@ from .spankbang import ( | |||
SpankBangPlaylistIE, | |||
) | |||
from .spankwire import SpankwireIE | |||
from .spiegel import SpiegelIE, SpiegelArticleIE | |||
from .spiegeltv import SpiegeltvIE | |||
from .spiegel import SpiegelIE | |||
from .spike import ( | |||
BellatorIE, | |||
ParamountNetworkIE, | |||
@@ -1505,12 +1505,11 @@ from .yourporn import YourPornIE | |||
from .yourupload import YourUploadIE | |||
from .youtube import ( | |||
YoutubeIE, | |||
YoutubeChannelIE, | |||
YoutubeFavouritesIE, | |||
YoutubeHistoryIE, | |||
YoutubeLiveIE, | |||
YoutubeTabIE, | |||
YoutubePlaylistIE, | |||
YoutubePlaylistsIE, | |||
YoutubeRecommendedIE, | |||
YoutubeSearchDateIE, | |||
YoutubeSearchIE, | |||
@@ -1519,7 +1518,7 @@ from .youtube import ( | |||
YoutubeSubscriptionsIE, | |||
YoutubeTruncatedIDIE, | |||
YoutubeTruncatedURLIE, | |||
YoutubeUserIE, | |||
YoutubeYtUserIE, | |||
YoutubeWatchLaterIE, | |||
) | |||
from .zapiks import ZapiksIE | |||
@@ -17,6 +17,7 @@ from ..utils import ( | |||
parse_duration, | |||
try_get, | |||
url_or_none, | |||
urljoin, | |||
) | |||
from .dailymotion import DailymotionIE | |||
@@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor): | |||
is_live = None | |||
formats = [] | |||
for video in info['videos']: | |||
if video['statut'] != 'ONLINE': | |||
videos = [] | |||
for video in (info.get('videos') or []): | |||
if video.get('statut') != 'ONLINE': | |||
continue | |||
video_url = video['url'] | |||
if not video.get('url'): | |||
continue | |||
videos.append(video) | |||
if not videos: | |||
for device_type in ['desktop', 'mobile']: | |||
fallback_info = self._download_json( | |||
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id, | |||
video_id, 'Downloading fallback %s video JSON' % device_type, query={ | |||
'device_type': device_type, | |||
'browser': 'chrome', | |||
}, fatal=False) | |||
if fallback_info and fallback_info.get('video'): | |||
videos.append(fallback_info['video']) | |||
formats = [] | |||
for video in videos: | |||
video_url = video.get('url') | |||
if not video_url: | |||
continue | |||
if is_live is None: | |||
is_live = (try_get( | |||
video, lambda x: x['plages_ouverture'][0]['direct'], | |||
bool) is True) or '/live.francetv.fr/' in video_url | |||
format_id = video['format'] | |||
video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True | |||
or video.get('is_live') is True | |||
or '/live.francetv.fr/' in video_url) | |||
format_id = video.get('format') | |||
ext = determine_ext(video_url) | |||
if ext == 'f4m': | |||
if georestricted: | |||
@@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor): | |||
sign(video_url, format_id), video_id, 'mp4', | |||
entry_protocol='m3u8_native', m3u8_id=format_id, | |||
fatal=False)) | |||
elif ext == 'mpd': | |||
formats.extend(self._extract_mpd_formats( | |||
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False)) | |||
elif video_url.startswith('rtmp'): | |||
formats.append({ | |||
'url': video_url, | |||
@@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor): | |||
'url': video_url, | |||
'format_id': format_id, | |||
}) | |||
self._sort_formats(formats) | |||
title = info['titre'] | |||
@@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor): | |||
return { | |||
'id': video_id, | |||
'title': self._live_title(title) if is_live else title, | |||
'description': clean_html(info['synopsis']), | |||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), | |||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']), | |||
'timestamp': int_or_none(info['diffusion']['timestamp']), | |||
'description': clean_html(info.get('synopsis')), | |||
'thumbnail': urljoin('http://pluzz.francetv.fr', info.get('image')), | |||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), | |||
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), | |||
'is_live': is_live, | |||
'formats': formats, | |||
'subtitles': subtitles, | |||
@@ -91,6 +91,7 @@ from .piksel import PikselIE | |||
from .videa import VideaIE | |||
from .twentymin import TwentyMinutenIE | |||
from .ustream import UstreamIE | |||
from .arte import ArteTVEmbedIE | |||
from .videopress import VideoPressIE | |||
from .rutube import RutubeIE | |||
from .limelight import LimelightBaseIE | |||
@@ -2760,11 +2761,9 @@ class GenericIE(InfoExtractor): | |||
return self.url_result(ustream_url, UstreamIE.ie_key()) | |||
# Look for embedded arte.tv player | |||
mobj = re.search( | |||
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"', | |||
webpage) | |||
if mobj is not None: | |||
return self.url_result(mobj.group('url'), 'ArteTVEmbed') | |||
arte_urls = ArteTVEmbedIE._extract_urls(webpage) | |||
if arte_urls: | |||
return self.playlist_from_matches(arte_urls, video_id, video_title) | |||
# Look for embedded francetv player | |||
mobj = re.search( | |||
@@ -150,7 +150,7 @@ class IqiyiSDKInterpreter(object): | |||
elif function in other_functions: | |||
other_functions[function]() | |||
else: | |||
raise ExtractorError('Unknown funcion %s' % function) | |||
raise ExtractorError('Unknown function %s' % function) | |||
return sdk.target | |||
@@ -0,0 +1,88 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import json | |||
from .common import InfoExtractor | |||
from ..compat import compat_str | |||
from ..utils import ( | |||
determine_ext, | |||
ExtractorError, | |||
int_or_none, | |||
mimetype2ext, | |||
try_get, | |||
) | |||
class LBRYIE(InfoExtractor): | |||
IE_NAME = 'lbry.tv' | |||
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[0-9a-zA-Z-]+:[0-9a-z]+/[0-9a-zA-Z().-]+:[0-9a-z])' | |||
_TESTS = [{ | |||
# Video | |||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1', | |||
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9', | |||
'info_dict': { | |||
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d', | |||
'ext': 'mp4', | |||
'title': 'First day in LBRY? Start HERE!', | |||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51', | |||
'timestamp': 1595694354, | |||
'upload_date': '20200725', | |||
} | |||
}, { | |||
# Audio | |||
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e', | |||
'md5': 'c94017d3eba9b49ce085a8fad6b98d00', | |||
'info_dict': { | |||
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396', | |||
'ext': 'mp3', | |||
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding', | |||
'description': 'md5:661ac4f1db09f31728931d7b88807a61', | |||
'timestamp': 1591312601, | |||
'upload_date': '20200604', | |||
} | |||
}, { | |||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e', | |||
'only_matching': True, | |||
}] | |||
def _call_api_proxy(self, method, display_id, params): | |||
return self._download_json( | |||
'https://api.lbry.tv/api/v1/proxy', display_id, | |||
headers={'Content-Type': 'application/json-rpc'}, | |||
data=json.dumps({ | |||
'method': method, | |||
'params': params, | |||
}).encode())['result'] | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url).replace(':', '#') | |||
uri = 'lbry://' + display_id | |||
result = self._call_api_proxy( | |||
'resolve', display_id, {'urls': [uri]})[uri] | |||
result_value = result['value'] | |||
if result_value.get('stream_type') not in ('video', 'audio'): | |||
raise ExtractorError('Unsupported URL', expected=True) | |||
streaming_url = self._call_api_proxy( | |||
'get', display_id, {'uri': uri})['streaming_url'] | |||
source = result_value.get('source') or {} | |||
media = result_value.get('video') or result_value.get('audio') or {} | |||
signing_channel = result_value.get('signing_channel') or {} | |||
return { | |||
'id': result['claim_id'], | |||
'title': result_value['title'], | |||
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str), | |||
'description': result_value.get('description'), | |||
'license': result_value.get('license'), | |||
'timestamp': int_or_none(result.get('timestamp')), | |||
'tags': result_value.get('tags'), | |||
'width': int_or_none(media.get('width')), | |||
'height': int_or_none(media.get('height')), | |||
'duration': int_or_none(media.get('duration')), | |||
'channel': signing_channel.get('name'), | |||
'channel_id': signing_channel.get('claim_id'), | |||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')), | |||
'filesize': int_or_none(source.get('size')), | |||
'url': streaming_url, | |||
} |
@@ -5,28 +5,26 @@ import re | |||
from .common import InfoExtractor | |||
from ..utils import ( | |||
determine_ext, | |||
int_or_none, | |||
parse_duration, | |||
remove_end, | |||
clean_html, | |||
merge_dicts, | |||
) | |||
class LRTIE(InfoExtractor): | |||
IE_NAME = 'lrt.lt' | |||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)' | |||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))' | |||
_TESTS = [{ | |||
# m3u8 download | |||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/', | |||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0', | |||
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene', | |||
'md5': '85cb2bb530f31d91a9c65b479516ade4', | |||
'info_dict': { | |||
'id': '54391', | |||
'id': '2000127261', | |||
'ext': 'mp4', | |||
'title': 'Septynios Kauno dienos', | |||
'description': 'md5:24d84534c7dc76581e59f5689462411a', | |||
'duration': 1783, | |||
'view_count': int, | |||
'like_count': int, | |||
'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė', | |||
'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa', | |||
'duration': 3035, | |||
'timestamp': 1604079000, | |||
'upload_date': '20201030', | |||
}, | |||
}, { | |||
# direct mp3 download | |||
@@ -43,52 +41,35 @@ class LRTIE(InfoExtractor): | |||
}, | |||
}] | |||
def _extract_js_var(self, webpage, var_name, default): | |||
return self._search_regex( | |||
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name, | |||
webpage, var_name.replace('_', ' '), default, group=2) | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
path, video_id = re.match(self._VALID_URL, url).groups() | |||
webpage = self._download_webpage(url, video_id) | |||
title = remove_end(self._og_search_title(webpage), ' - LRT') | |||
formats = [] | |||
for _, file_url in re.findall( | |||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): | |||
ext = determine_ext(file_url) | |||
if ext not in ('m3u8', 'mp3'): | |||
continue | |||
# mp3 served as m3u8 produces stuttered media file | |||
if ext == 'm3u8' and '.mp3' in file_url: | |||
continue | |||
if ext == 'm3u8': | |||
formats.extend(self._extract_m3u8_formats( | |||
file_url, video_id, 'mp4', entry_protocol='m3u8_native', | |||
fatal=False)) | |||
elif ext == 'mp3': | |||
formats.append({ | |||
'url': file_url, | |||
'vcodec': 'none', | |||
}) | |||
self._sort_formats(formats) | |||
media_url = self._extract_js_var(webpage, 'main_url', path) | |||
media = self._download_json(self._extract_js_var( | |||
webpage, 'media_info_url', | |||
'https://www.lrt.lt/servisai/stream_url/vod/media_info/'), | |||
video_id, query={'url': media_url}) | |||
jw_data = self._parse_jwplayer_data( | |||
media['playlist_item'], video_id, base_url=url) | |||
thumbnail = self._og_search_thumbnail(webpage) | |||
description = self._og_search_description(webpage) | |||
duration = parse_duration(self._search_regex( | |||
r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1', | |||
webpage, 'duration', default=None, group='duration')) | |||
json_ld_data = self._search_json_ld(webpage, video_id) | |||
view_count = int_or_none(self._html_search_regex( | |||
r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>', | |||
webpage, 'view count', fatal=False, group='count')) | |||
like_count = int_or_none(self._search_regex( | |||
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<', | |||
webpage, 'like count', fatal=False, group='count')) | |||
tags = [] | |||
for tag in (media.get('tags') or []): | |||
tag_name = tag.get('name') | |||
if not tag_name: | |||
continue | |||
tags.append(tag_name) | |||
return { | |||
'id': video_id, | |||
'title': title, | |||
'formats': formats, | |||
'thumbnail': thumbnail, | |||
'description': description, | |||
'duration': duration, | |||
'view_count': view_count, | |||
'like_count': like_count, | |||
clean_info = { | |||
'description': clean_html(media.get('content')), | |||
'tags': tags, | |||
} | |||
return merge_dicts(clean_info, jw_data, json_ld_data) |
@@ -1,10 +1,16 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..utils import merge_dicts | |||
from ..utils import ( | |||
clean_html, | |||
dict_get, | |||
float_or_none, | |||
int_or_none, | |||
merge_dicts, | |||
parse_duration, | |||
try_get, | |||
) | |||
class MallTVIE(InfoExtractor): | |||
@@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor): | |||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', | |||
'ext': 'mp4', | |||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', | |||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb', | |||
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35', | |||
'duration': 216, | |||
'timestamp': 1538870400, | |||
'upload_date': '20181007', | |||
@@ -37,20 +43,46 @@ class MallTVIE(InfoExtractor): | |||
webpage = self._download_webpage( | |||
url, display_id, headers=self.geo_verification_headers()) | |||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b' | |||
video = self._parse_json(self._search_regex( | |||
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);', | |||
webpage, 'video object'), display_id) | |||
video_source = video['VideoSource'] | |||
video_id = self._search_regex( | |||
SOURCE_RE, webpage, 'video id', group='id') | |||
r'/([\da-z]+)/index\b', video_source, 'video id') | |||
formats = self._extract_m3u8_formats( | |||
video_source + '.m3u8', video_id, 'mp4', 'm3u8_native') | |||
self._sort_formats(formats) | |||
subtitles = {} | |||
for s in (video.get('Subtitles') or {}): | |||
s_url = s.get('Url') | |||
if not s_url: | |||
continue | |||
subtitles.setdefault(s.get('Language') or 'cz', []).append({ | |||
'url': s_url, | |||
}) | |||
entity_counts = video.get('EntityCounts') or {} | |||
media = self._parse_html5_media_entries( | |||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id, | |||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0] | |||
def get_count(k): | |||
v = entity_counts.get(k + 's') or {} | |||
return int_or_none(dict_get(v, ('Count', 'StrCount'))) | |||
info = self._search_json_ld(webpage, video_id, default={}) | |||
return merge_dicts(media, info, { | |||
return merge_dicts({ | |||
'id': video_id, | |||
'display_id': display_id, | |||
'title': self._og_search_title(webpage, default=None) or display_id, | |||
'description': self._og_search_description(webpage, default=None), | |||
'thumbnail': self._og_search_thumbnail(webpage, default=None), | |||
}) | |||
'title': video.get('Title'), | |||
'description': clean_html(video.get('Description')), | |||
'thumbnail': video.get('ThumbnailUrl'), | |||
'formats': formats, | |||
'subtitles': subtitles, | |||
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')), | |||
'view_count': get_count('View'), | |||
'like_count': get_count('Like'), | |||
'dislike_count': get_count('Dislike'), | |||
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])), | |||
'comment_count': get_count('Comment'), | |||
}, info) |
@@ -17,9 +17,8 @@ from ..utils import ( | |||
class MGTVIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' | |||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' | |||
IE_DESC = '芒果TV' | |||
_GEO_COUNTRIES = ['CN'] | |||
_TESTS = [{ | |||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', | |||
@@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor): | |||
}, { | |||
'url': 'http://www.mgtv.com/b/301817/3826653.html', | |||
'only_matching': True, | |||
}, { | |||
'url': 'https://w.mgtv.com/b/301817/3826653.html', | |||
'only_matching': True, | |||
}] | |||
def _real_extract(self, url): | |||
video_id = self._match_id(url) | |||
tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1] | |||
try: | |||
api_data = self._download_json( | |||
'https://pcweb.api.mgtv.com/player/video', video_id, query={ | |||
'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1], | |||
'tk2': tk2, | |||
'video_id': video_id, | |||
}, headers=self.geo_verification_headers())['data'] | |||
except ExtractorError as e: | |||
@@ -56,6 +59,7 @@ class MGTVIE(InfoExtractor): | |||
stream_data = self._download_json( | |||
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={ | |||
'pm2': api_data['atc']['pm2'], | |||
'tk2': tk2, | |||
'video_id': video_id, | |||
}, headers=self.geo_verification_headers())['data'] | |||
stream_domain = stream_data['stream_domain'][0] | |||
@@ -403,6 +403,18 @@ class MTVIE(MTVServicesInfoExtractor): | |||
'only_matching': True, | |||
}] | |||
@staticmethod | |||
def extract_child_with_type(parent, t): | |||
children = parent['children'] | |||
return next(c for c in children if c.get('type') == t) | |||
def _extract_mgid(self, webpage): | |||
data = self._parse_json(self._search_regex( | |||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None) | |||
main_container = self.extract_child_with_type(data, 'MainContainer') | |||
video_player = self.extract_child_with_type(main_container, 'VideoPlayer') | |||
return video_player['props']['media']['video']['config']['uri'] | |||
class MTVJapanIE(MTVServicesInfoExtractor): | |||
IE_NAME = 'mtvjapan' | |||
@@ -10,7 +10,6 @@ from .adobepass import AdobePassIE | |||
from ..compat import compat_urllib_parse_unquote | |||
from ..utils import ( | |||
int_or_none, | |||
js_to_json, | |||
parse_duration, | |||
smuggle_url, | |||
try_get, | |||
@@ -394,8 +393,8 @@ class NBCNewsIE(ThePlatformIE): | |||
webpage = self._download_webpage(url, video_id) | |||
data = self._parse_json(self._search_regex( | |||
r'window\.__data\s*=\s*({.+});', webpage, | |||
'bootstrap json'), video_id, js_to_json) | |||
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>', | |||
webpage, 'bootstrap json'), video_id)['props']['initialState'] | |||
video_data = try_get(data, lambda x: x['video']['current'], dict) | |||
if not video_data: | |||
video_data = data['article']['content'][0]['primaryMedia']['video'] | |||
@@ -82,6 +82,29 @@ class NDRIE(NDRBaseIE): | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
}, { | |||
# with subtitles | |||
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html', | |||
'info_dict': { | |||
'id': 'extra18674', | |||
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring', | |||
'ext': 'mp4', | |||
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring', | |||
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6', | |||
'uploader': 'ndrtv', | |||
'upload_date': '20201113', | |||
'duration': 1749, | |||
'subtitles': { | |||
'de': [{ | |||
'ext': 'ttml', | |||
'url': r're:^https://www\.ndr\.de.+', | |||
}], | |||
}, | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
'expected_warnings': ['Unable to download f4m manifest'], | |||
}, { | |||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html', | |||
'only_matching': True, | |||
@@ -242,6 +265,20 @@ class NDREmbedBaseIE(InfoExtractor): | |||
'preference': quality_key(thumbnail.get('quality')), | |||
}) | |||
subtitles = {} | |||
tracks = config.get('tracks') | |||
if tracks and isinstance(tracks, list): | |||
for track in tracks: | |||
if not isinstance(track, dict): | |||
continue | |||
track_url = urljoin(url, track.get('src')) | |||
if not track_url: | |||
continue | |||
subtitles.setdefault(track.get('srclang') or 'de', []).append({ | |||
'url': track_url, | |||
'ext': 'ttml', | |||
}) | |||
return { | |||
'id': video_id, | |||
'title': title, | |||
@@ -251,6 +288,7 @@ class NDREmbedBaseIE(InfoExtractor): | |||
'duration': duration, | |||
'thumbnails': thumbnails, | |||
'formats': formats, | |||
'subtitles': subtitles, | |||
} | |||
@@ -17,7 +17,7 @@ from ..utils import ( | |||
int_or_none, | |||
parse_duration, | |||
strip_or_none, | |||
try_get, | |||
unescapeHTML, | |||
unified_strdate, | |||
unified_timestamp, | |||
update_url_query, | |||
@@ -30,7 +30,6 @@ class RaiBaseIE(InfoExtractor): | |||
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' | |||
_GEO_COUNTRIES = ['IT'] | |||
_GEO_BYPASS = False | |||
_BASE_URL = 'https://www.raiplay.it' | |||
def _extract_relinker_info(self, relinker_url, video_id): | |||
if not re.match(r'https?://', relinker_url): | |||
@@ -123,19 +122,40 @@ class RaiBaseIE(InfoExtractor): | |||
class RaiPlayIE(RaiBaseIE): | |||
_VALID_URL = r'(?P<url>(?P<base>https?://(?:www\.)?raiplay\.it/.+?-)(?P<id>%s)(?P<ext>\.(?:html|json)))' % RaiBaseIE._UUID_RE | |||
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.(?:html|json))' % RaiBaseIE._UUID_RE | |||
_TESTS = [{ | |||
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', | |||
'md5': '340aa3b7afb54bfd14a8c11786450d76', | |||
'info_dict': { | |||
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', | |||
'ext': 'mp4', | |||
'title': 'La Casa Bianca', | |||
'alt_title': 'S2016 - Puntata del 23/10/2016', | |||
'description': 'md5:a09d45890850458077d1f68bb036e0a5', | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'uploader': 'Rai 3', | |||
'creator': 'Rai 3', | |||
'duration': 3278, | |||
'timestamp': 1477764300, | |||
'upload_date': '20161029', | |||
'series': 'La Casa Bianca', | |||
'season': '2016', | |||
}, | |||
'skip': 'This content is not available', | |||
}, { | |||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', | |||
'md5': '8970abf8caf8aef4696e7b1f2adfc696', | |||
'info_dict': { | |||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', | |||
'ext': 'mp4', | |||
'title': 'Report del 07/04/2014', | |||
'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014 ', | |||
'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014', | |||
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e', | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'uploader': 'Rai Gulp', | |||
'duration': 6160, | |||
'series': 'Report', | |||
'season': '2013/14', | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
@@ -146,11 +166,10 @@ class RaiPlayIE(RaiBaseIE): | |||
}] | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
url, base, video_id, ext = mobj.group('url', 'base', 'id', 'ext') | |||
url, video_id = re.match(self._VALID_URL, url).groups() | |||
media = self._download_json( | |||
'%s%s.json' % (base, video_id), video_id, 'Downloading video JSON') | |||
url.replace('.html', '.json'), video_id, 'Downloading video JSON') | |||
title = media['name'] | |||
video = media['video'] | |||
@@ -159,34 +178,38 @@ class RaiPlayIE(RaiBaseIE): | |||
self._sort_formats(relinker_info['formats']) | |||
thumbnails = [] | |||
if 'images' in media: | |||
for _, value in media.get('images').items(): | |||
if value: | |||
thumbnails.append({ | |||
'url': urljoin(RaiBaseIE._BASE_URL, value.replace('[RESOLUTION]', '600x400')) | |||
}) | |||
for _, value in media.get('images', {}).items(): | |||
if value: | |||
thumbnails.append({ | |||
'url': urljoin(url, value), | |||
}) | |||
timestamp = unified_timestamp(try_get( | |||
media, lambda x: x['availabilities'][0]['start'], compat_str)) | |||
date_published = media.get('date_published') | |||
time_published = media.get('time_published') | |||
if date_published and time_published: | |||
date_published += ' ' + time_published | |||
subtitles = self._extract_subtitles(url, video.get('subtitles')) | |||
program_info = media.get('program_info') or {} | |||
season = media.get('season') | |||
info = { | |||
'id': video_id, | |||
'title': self._live_title(title) if relinker_info.get( | |||
'is_live') else title, | |||
'alt_title': media.get('subtitle'), | |||
'alt_title': strip_or_none(media.get('subtitle')), | |||
'description': media.get('description'), | |||
'uploader': strip_or_none(media.get('channel')), | |||
'creator': strip_or_none(media.get('editor')), | |||
'creator': strip_or_none(media.get('editor') or None), | |||
'duration': parse_duration(video.get('duration')), | |||
'timestamp': timestamp, | |||
'timestamp': unified_timestamp(date_published), | |||
'thumbnails': thumbnails, | |||
'series': try_get( | |||
media, lambda x: x['isPartOf']['name'], compat_str), | |||
'season_number': int_or_none(try_get( | |||
media, lambda x: x['isPartOf']['numeroStagioni'])), | |||
'season': media.get('stagione') or None, | |||
'series': program_info.get('name'), | |||
'season_number': int_or_none(season), | |||
'season': season if (season and not season.isdigit()) else None, | |||
'episode': media.get('episode_title'), | |||
'episode_number': int_or_none(media.get('episode')), | |||
'subtitles': subtitles, | |||
} | |||
@@ -203,7 +226,7 @@ class RaiPlayLiveIE(RaiBaseIE): | |||
'display_id': 'rainews24', | |||
'ext': 'mp4', | |||
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |||
'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', | |||
'description': 'md5:6eca31500550f9376819f174e5644754', | |||
'uploader': 'Rai News 24', | |||
'creator': 'Rai News 24', | |||
'is_live': True, | |||
@@ -216,32 +239,20 @@ class RaiPlayLiveIE(RaiBaseIE): | |||
def _real_extract(self, url): | |||
display_id = self._match_id(url) | |||
media = self._download_json( | |||
'%s.json' % urljoin(RaiBaseIE._BASE_URL, 'dirette/' + display_id), | |||
display_id, 'Downloading channel JSON') | |||
title = media['name'] | |||
video = media['video'] | |||
video_id = media['id'].replace('ContentItem-', '') | |||
webpage = self._download_webpage(url, display_id) | |||
relinker_info = self._extract_relinker_info(video['content_url'], video_id) | |||
self._sort_formats(relinker_info['formats']) | |||
video_id = self._search_regex( | |||
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE, | |||
webpage, 'content id') | |||
info = { | |||
return { | |||
'_type': 'url_transparent', | |||
'ie_key': RaiPlayIE.ie_key(), | |||
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, | |||
'id': video_id, | |||
'display_id': display_id, | |||
'title': self._live_title(title) if relinker_info.get( | |||
'is_live') else title, | |||
'alt_title': media.get('subtitle'), | |||
'description': media.get('description'), | |||
'uploader': strip_or_none(media.get('channel')), | |||
'creator': strip_or_none(media.get('editor')), | |||
'duration': parse_duration(video.get('duration')), | |||
} | |||
info.update(relinker_info) | |||
return info | |||
class RaiPlayPlaylistIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' | |||
@@ -250,7 +261,7 @@ class RaiPlayPlaylistIE(InfoExtractor): | |||
'info_dict': { | |||
'id': 'nondirloalmiocapo', | |||
'title': 'Non dirlo al mio capo', | |||
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', | |||
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', | |||
}, | |||
'playlist_mincount': 12, | |||
}] | |||
@@ -258,25 +269,21 @@ class RaiPlayPlaylistIE(InfoExtractor): | |||
def _real_extract(self, url): | |||
playlist_id = self._match_id(url) | |||
media = self._download_json( | |||
'%s.json' % urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id), | |||
playlist_id, 'Downloading program JSON') | |||
title = media['name'] | |||
description = media['program_info']['description'] | |||
webpage = self._download_webpage(url, playlist_id) | |||
content_sets = [s['id'] for b in media['blocks'] for s in b['sets']] | |||
title = self._html_search_meta( | |||
('programma', 'nomeProgramma'), webpage, 'title') | |||
description = unescapeHTML(self._html_search_meta( | |||
('description', 'og:description'), webpage, 'description')) | |||
entries = [] | |||
for cs in content_sets: | |||
medias = self._download_json( | |||
'%s/%s.json' % (urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id), cs), | |||
cs, 'Downloading content set JSON') | |||
for m in medias['items']: | |||
video_url = urljoin(url, m['path_id']) | |||
entries.append(self.url_result( | |||
video_url, ie=RaiPlayIE.ie_key(), | |||
video_id=RaiPlayIE._match_id(video_url))) | |||
for mobj in re.finditer( | |||
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', | |||
webpage): | |||
video_url = urljoin(url, mobj.group('path')) | |||
entries.append(self.url_result( | |||
video_url, ie=RaiPlayIE.ie_key(), | |||
video_id=RaiPlayIE._match_id(video_url))) | |||
return self.playlist_result(entries, playlist_id, title, description) | |||
@@ -294,7 +301,8 @@ class RaiIE(RaiBaseIE): | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'duration': 1758, | |||
'upload_date': '20140612', | |||
} | |||
}, | |||
'skip': 'This content is available only in Italy', | |||
}, { | |||
# with ContentItem in many metas | |||
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', | |||
@@ -320,6 +328,19 @@ class RaiIE(RaiBaseIE): | |||
'duration': 2214, | |||
'upload_date': '20161103', | |||
} | |||
}, { | |||
# drawMediaRaiTV(...) | |||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', | |||
'md5': '2dd727e61114e1ee9c47f0da6914e178', | |||
'info_dict': { | |||
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', | |||
'ext': 'mp4', | |||
'title': 'Il pacco', | |||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', | |||
'thumbnail': r're:^https?://.*\.jpg$', | |||
'upload_date': '20141221', | |||
}, | |||
'skip': 'This content is not available', | |||
}, { | |||
# initEdizione('ContentItem-...' | |||
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', | |||
@@ -331,6 +352,18 @@ class RaiIE(RaiBaseIE): | |||
'upload_date': '20170401', | |||
}, | |||
'skip': 'Changes daily', | |||
}, { | |||
# HDS live stream with only relinker URL | |||
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews', | |||
'info_dict': { | |||
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc', | |||
'ext': 'flv', | |||
'title': 'EuroNews', | |||
}, | |||
'params': { | |||
'skip_download': True, | |||
}, | |||
'skip': 'This content is available only in Italy', | |||
}, { | |||
# HLS live stream with ContentItem in og:url | |||
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', | |||
@@ -1,9 +1,15 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..utils import ( | |||
determine_ext, | |||
float_or_none, | |||
int_or_none, | |||
unified_timestamp, | |||
urlencode_postdata, | |||
url_or_none, | |||
) | |||
class ServusIE(InfoExtractor): | |||
@@ -12,20 +18,29 @@ class ServusIE(InfoExtractor): | |||
(?:www\.)? | |||
(?: | |||
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)| | |||
servustv\.com/videos | |||
(?:servustv|pm-wissen)\.com/videos | |||
) | |||
/(?P<id>[aA]{2}-\w+|\d+-\d+) | |||
''' | |||
_TESTS = [{ | |||
# new URL schema | |||
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/', | |||
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4', | |||
'md5': '60474d4c21f3eb148838f215c37f02b9', | |||
'info_dict': { | |||
'id': 'AA-1T6VBU5PW1W12', | |||