Browse Source

Merge pull request #245 from pukkandan/merge-main

Merge youtube-dl and fix Youtube Feeds
pull/100/merge
Tom-Oliver Heidel 1 year ago
committed by GitHub
parent
commit
ef5a4db06c
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
66 changed files with 3617 additions and 2252 deletions
  1. +1
    -1
      devscripts/make_lazy_extractors.py
  2. +23
    -15
      docs/supportedsites.md
  3. +1
    -1
      test/parameters.json
  4. +70
    -0
      test/test_YoutubeDL.py
  5. +21
    -19
      test/test_all_urls.py
  6. +28
    -0
      test/test_utils.py
  7. +45
    -34
      youtube_dlc/YoutubeDL.py
  8. +1
    -1
      youtube_dlc/compat.py
  9. +12
    -2
      youtube_dlc/downloader/fragment.py
  10. +3
    -1
      youtube_dlc/downloader/http.py
  11. +1
    -1
      youtube_dlc/extractor/afreecatv.py
  12. +103
    -0
      youtube_dlc/extractor/amara.py
  13. +110
    -57
      youtube_dlc/extractor/arte.py
  14. +111
    -157
      youtube_dlc/extractor/bandcamp.py
  15. +53
    -4
      youtube_dlc/extractor/bbc.py
  16. +98
    -0
      youtube_dlc/extractor/box.py
  17. +4
    -4
      youtube_dlc/extractor/brightcove.py
  18. +32
    -3
      youtube_dlc/extractor/cda.py
  19. +12
    -7
      youtube_dlc/extractor/cnbc.py
  20. +31
    -3
      youtube_dlc/extractor/common.py
  21. +23
    -4
      youtube_dlc/extractor/condenast.py
  22. +4
    -1
      youtube_dlc/extractor/discoverynetworks.py
  23. +2
    -2
      youtube_dlc/extractor/europa.py
  24. +21
    -9
      youtube_dlc/extractor/extractors.py
  25. +3
    -0
      youtube_dlc/extractor/franceinter.py
  26. +36
    -11
      youtube_dlc/extractor/francetv.py
  27. +5
    -6
      youtube_dlc/extractor/generic.py
  28. +21
    -37
      youtube_dlc/extractor/googledrive.py
  29. +4
    -3
      youtube_dlc/extractor/infoq.py
  30. +1
    -1
      youtube_dlc/extractor/iqiyi.py
  31. +2
    -2
      youtube_dlc/extractor/kusi.py
  32. +91
    -0
      youtube_dlc/extractor/lbry.py
  33. +36
    -55
      youtube_dlc/extractor/lrt.py
  34. +46
    -14
      youtube_dlc/extractor/malltv.py
  35. +131
    -0
      youtube_dlc/extractor/medaltv.py
  36. +7
    -3
      youtube_dlc/extractor/mgtv.py
  37. +12
    -0
      youtube_dlc/extractor/mtv.py
  38. +2
    -3
      youtube_dlc/extractor/nbc.py
  39. +38
    -0
      youtube_dlc/extractor/ndr.py
  40. +1
    -1
      youtube_dlc/extractor/npr.py
  41. +248
    -176
      youtube_dlc/extractor/nrk.py
  42. +38
    -0
      youtube_dlc/extractor/nytimes.py
  43. +1
    -1
      youtube_dlc/extractor/pbs.py
  44. +201
    -0
      youtube_dlc/extractor/pinterest.py
  45. +64
    -81
      youtube_dlc/extractor/rai.py
  46. +67
    -0
      youtube_dlc/extractor/rumble.py
  47. +95
    -16
      youtube_dlc/extractor/servus.py
  48. +1
    -1
      youtube_dlc/extractor/soundcloud.py
  49. +24
    -129
      youtube_dlc/extractor/spiegel.py
  50. +176
    -0
      youtube_dlc/extractor/spreaker.py
  51. +37
    -11
      youtube_dlc/extractor/svt.py
  52. +1
    -1
      youtube_dlc/extractor/tagesschau.py
  53. +1
    -1
      youtube_dlc/extractor/theplatform.py
  54. +3
    -3
      youtube_dlc/extractor/turner.py
  55. +7
    -4
      youtube_dlc/extractor/twentythreevideo.py
  56. +51
    -26
      youtube_dlc/extractor/urplay.py
  57. +16
    -66
      youtube_dlc/extractor/usanetwork.py
  58. +5
    -2
      youtube_dlc/extractor/ustream.py
  59. +98
    -76
      youtube_dlc/extractor/viki.py
  60. +9
    -6
      youtube_dlc/extractor/vimeo.py
  61. +185
    -246
      youtube_dlc/extractor/vlive.py
  62. +4
    -4
      youtube_dlc/extractor/xiami.py
  63. +11
    -20
      youtube_dlc/extractor/xtube.py
  64. +3
    -4
      youtube_dlc/extractor/youporn.py
  65. +1008
    -900
      youtube_dlc/extractor/youtube.py
  66. +17
    -16
      youtube_dlc/utils.py

+ 1
- 1
devscripts/make_lazy_extractors.py View File

@@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
return s


# find the correct sorting and add the required base classes so that sublcasses
# find the correct sorting and add the required base classes so that subclasses
# can be correctly created
classes = _ALL_CLASSES[:-1]
ordered_cls = []


+ 23
- 15
docs/supportedsites.md View File

@@ -59,9 +59,9 @@
- **ARD:mediathek**
- **ARDBetaMediathek**
- **Arkena**
- **arte.tv:+7**
- **arte.tv:embed**
- **arte.tv:playlist**
- **ArteTV**
- **ArteTVEmbed**
- **ArteTVPlaylist**
- **AsianCrush**
- **AsianCrushPlaylist**
- **AtresPlayer**
@@ -111,6 +111,7 @@
- **Bloomberg**
- **BokeCC**
- **BostonGlobe**
- **Box**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk
- **BravoTV**
@@ -158,6 +159,7 @@
- **Chilloutzone**
- **chirbit**
- **chirbit:profile**
- **cielotv.it**
- **Cinchcast**
- **Cinemax**
- **CiscoLiveSearch**
@@ -425,6 +427,7 @@
- **la7.it**
- **laola1tv**
- **laola1tv:embed**
- **lbry.tv**
- **LCI**
- **Lcp**
- **LcpPlay**
@@ -475,6 +478,7 @@
- **massengeschmack.tv**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **MedalTV**
- **media.ccc.de**
- **media.ccc.de:lists**
- **Medialaan**
@@ -618,6 +622,7 @@
- **Nuvid**
- **NYTimes**
- **NYTimesArticle**
- **NYTimesCooking**
- **NZZ**
- **ocw.mit.edu**
- **OdaTV**
@@ -670,6 +675,8 @@
- **PicartoVod**
- **Piksel**
- **Pinkbike**
- **Pinterest**
- **PinterestCollection**
- **Pladform**
- **Platzi**
- **PlatziCourse**
@@ -766,6 +773,7 @@
- **RTVNH**
- **RTVS**
- **RUHD**
- **RumbleEmbed**
- **rutube**: Rutube videos
- **rutube:channel**: Rutube channels
- **rutube:embed**: Rutube embedded videos
@@ -836,12 +844,14 @@
- **SpankBangPlaylist**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5**
- **SportBox**
- **SportDeutschland**
- **Spreaker**
- **SpreakerPage**
- **SpreakerShow**
- **SpreakerShowPage**
- **SpringboardPlatform**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk
@@ -945,6 +955,7 @@
- **TV2DKBornholmPlay**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **tv8.it**
- **TVA**
- **TVANouvelles**
- **TVANouvellesArticle**
@@ -1059,7 +1070,7 @@
- **vk:wallpost**
- **vlive**
- **vlive:channel**
- **vlive:playlist**
- **vlive:post**
- **Vodlocker**
- **VODPl**
- **VODPlatform**
@@ -1148,20 +1159,17 @@
- **YourPorn**
- **YourUpload**
- **youtube**: YouTube.com
- **youtube:channel**: YouTube.com channels
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:favorites**: YouTube.com liked videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:live**: YouTube.com live streams
- **youtube:playlist**: YouTube.com playlists
- **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search**: YouTube.com searches, "ytsearch" keyword
- **youtube:search:date**: YouTube.com searches, newest videos first, "ytsearchdate" keyword
- **youtube:search_url**: YouTube.com search URLs
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:subscriptions**: YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)
- **youtube:tab**: YouTube.com tab
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **YoutubeYtUser**: YouTube.com user videos, URL or "ytuser" keyword
- **Zapiks**
- **Zaq1**
- **Zattoo**


+ 1
- 1
test/parameters.json View File

@@ -37,7 +37,7 @@
"writeinfojson": true,
"writesubtitles": false,
"allsubtitles": false,
"listssubtitles": false,
"listsubtitles": false,
"socket_timeout": 20,
"fixup": "never"
}

+ 70
- 0
test/test_YoutubeDL.py View File

@@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(downloaded['extractor'], 'testex')
self.assertEqual(downloaded['extractor_key'], 'TestEx')

# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):

class _YDL(YDL):
def __init__(self, *args, **kwargs):
super(_YDL, self).__init__(*args, **kwargs)

def trouble(self, s, tb=None):
pass

ydl = _YDL({
'format': 'extra',
'ignoreerrors': True,
})

class VideoIE(InfoExtractor):
_VALID_URL = r'video:(?P<id>\d+)'

def _real_extract(self, url):
video_id = self._match_id(url)
formats = [{
'format_id': 'default',
'url': 'url:',
}]
if video_id == '0':
raise ExtractorError('foo')
if video_id == '2':
formats.append({
'format_id': 'extra',
'url': TEST_URL,
})
return {
'id': video_id,
'title': 'Video %s' % video_id,
'formats': formats,
}

class PlaylistIE(InfoExtractor):
_VALID_URL = r'playlist:'

def _entries(self):
for n in range(3):
video_id = compat_str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),
'id': video_id,
'url': 'video:%s' % video_id,
'title': 'Video Transparent %s' % video_id,
}

def _real_extract(self, url):
return self.playlist_result(self._entries())

ydl.add_info_extractor(VideoIE(ydl))
ydl.add_info_extractor(PlaylistIE(ydl))
info = ydl.extract_info('playlist:')
entries = info['entries']
self.assertEqual(len(entries), 3)
self.assertTrue(entries[0] is None)
self.assertTrue(entries[1] is None)
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(entries[2], downloaded)
self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'Video Transparent 2')
self.assertEqual(downloaded['id'], '2')
self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video')


if __name__ == '__main__':
unittest.main()

+ 21
- 19
test/test_all_urls.py View File

@@ -31,45 +31,47 @@ class TestAllURLsMatching(unittest.TestCase):

def test_youtube_playlist_matching(self):
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('PL63F0C78739B09958')
assertTab('https://www.youtube.com/AsapSCIENCE')
assertTab('https://www.youtube.com/embedded')
assertTab('https://www.youtube.com/feed') # Own channel's home page
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
# Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')

def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
# self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])

def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')

def test_youtube_user_matching(self):
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
# def test_youtube_user_matching(self):
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])

def test_youtube_feeds(self):
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])

def test_youtube_show_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])

def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])

# def test_youtube_search_matching(self):
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])

def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)


+ 28
- 0
test/test_utils.py View File

@@ -937,6 +937,28 @@ class TestUtil(unittest.TestCase):
self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a')

# Just drop ! prefix for now though this results in a wrong value
on = js_to_json('''{
a: !0,
b: !1,
c: !!0,
d: !!42.42,
e: !!![],
f: !"abc",
g: !"",
!42: 42
}''')
self.assertEqual(json.loads(on), {
'a': 0,
'b': 1,
'c': 0,
'd': 42.42,
'e': [],
'f': "abc",
'g': "",
'42': 42
})

on = js_to_json('["abc", "def",]')
self.assertEqual(json.loads(on), ['abc', 'def'])

@@ -994,6 +1016,12 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{42:4.2e1}')
self.assertEqual(json.loads(on), {'42': 42.0})

on = js_to_json('{ "0x40": "0x40" }')
self.assertEqual(json.loads(on), {'0x40': '0x40'})

on = js_to_json('{ "040": "040" }')
self.assertEqual(json.loads(on), {'040': '040'})

def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')


+ 45
- 34
youtube_dlc/YoutubeDL.py View File

@@ -830,34 +830,23 @@ class YoutubeDL(object):
'and will probably not work.')

try:
try:
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
except (AssertionError, IndexError):
temp_id = None
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id))
break
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
except (AssertionError, IndexError, AttributeError):
temp_id = None
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id))
break

ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
if info_dict:
if info_dict.get('id'):
ie_result['id'] = info_dict['id']
if info_dict.get('title'):
ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
return self.__extract_info(url, ie, download, extra_info, process, info_dict)

else:
self.report_error('no suitable InfoExtractor for URL %s' % url)

def __handle_extraction_exceptions(func):
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
@@ -865,20 +854,38 @@ class YoutubeDL(object):
map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
break
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached:
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else:
raise
return wrapper

@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process, info_dict):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
return
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
if info_dict:
if info_dict.get('id'):
ie_result['id'] = info_dict['id']
if info_dict.get('title'):
ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
return ie_result

def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, {
@@ -1057,9 +1064,8 @@ class YoutubeDL(object):
self.to_screen('[download] ' + reason)
continue

entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
entry_result = self.__process_iterable_entry(entry, download, extra)
# TODO: skip failed (empty) entries?
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@@ -1088,6 +1094,11 @@ class YoutubeDL(object):
else:
raise Exception('Invalid result type: %s' % result_type)

@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)

def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec "



+ 1
- 1
youtube_dlc/compat.py View File

@@ -2345,7 +2345,7 @@ except ImportError: # Python <3.4

# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exceptiong handling
# and uniform cross-version exception handling
class compat_HTMLParseError(Exception):
pass



+ 12
- 2
youtube_dlc/downloader/fragment.py View File

@@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):

def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
success = ctx['dl'].download(fragment_filename, {
fragment_info_dict = {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
})
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False, None
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
@@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
else:
self.try_rename(ctx['tmpfilename'], ctx['filename'])
if self.params.get('updatetime', True):
filetime = ctx.get('fragment_filetime')
if filetime:
try:
os.utime(ctx['filename'], (time.time(), filetime))
except Exception:
pass
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))

self._hook_progress({


+ 3
- 1
youtube_dlc/downloader/http.py View File

@@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
try:
ctx.data = self.ydl.urlopen(request)
except (compat_urllib_error.URLError, ) as err:
if isinstance(err.reason, socket.timeout):
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
reason = getattr(err, 'reason', None)
if isinstance(reason, socket.timeout):
raise RetryDownload(err)
raise err
# When trying to resume, Content-Range HTTP header of response has to be checked


+ 1
- 1
youtube_dlc/extractor/afreecatv.py View File

@@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor):
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
if video_element is None or video_element.text is None:
raise ExtractorError(
'Video %s video does not exist' % video_id, expected=True)
'Video %s does not exist' % video_id, expected=True)

video_url = video_element.text.strip()



+ 103
- 0
youtube_dlc/extractor/amara.py View File

@@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
from ..utils import (
int_or_none,
parse_iso8601,
update_url_query,
)


class AmaraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
_TESTS = [{
# Youtube
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
'info_dict': {
'id': 'h6ZuVdvYnfE',
'ext': 'mp4',
'title': 'Why jury trials are becoming less common',
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20160813',
'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570,
}
}, {
# Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
'md5': '99392c75fa05d432a8f11df03612195e',
'info_dict': {
'id': '18622084',
'ext': 'mov',
'title': 'Vimeo at CES 2011!',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'timestamp': 1294763658,
'upload_date': '20110111',
'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill'
}
}, {
# Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
'md5': 'd3970f08512738ee60c5807311ff5d3f',
'info_dict': {
'id': 's8KL7I3jLmh6',
'ext': 'mp4',
'title': 'The danger of a single story',
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20091007',
'timestamp': 1254942511,
}
}]

def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id,
video_id, query={'format': 'json'})
title = meta['title']
video_url = meta['all_urls'][0]

subtitles = {}
for language in (meta.get('languages') or []):
subtitles_uri = language.get('subtitles_uri')
if not (subtitles_uri and language.get('published')):
continue
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
for f in ('json', 'srt', 'vtt'):
subtitle.append({
'ext': f,
'url': update_url_query(subtitles_uri, {'format': f}),
})

info = {
'url': video_url,
'id': video_id,
'subtitles': subtitles,
'title': title,
'description': meta.get('description'),
'thumbnail': meta.get('thumbnail'),
'duration': int_or_none(meta.get('duration')),
'timestamp': parse_iso8601(meta.get('created')),
}

for ie in (YoutubeIE, VimeoIE):
if ie.suitable(video_url):
info.update({
'_type': 'url_transparent',
'ie_key': ie.ie_key(),
})
break

return info

+ 110
- 57
youtube_dlc/extractor/arte.py View File

@@ -4,23 +4,57 @@ from __future__ import unicode_literals
import re

from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
ExtractorError,
int_or_none,
qualities,
try_get,
unified_strdate,
url_or_none,
)

# There are different sources of video in arte.tv, the extraction process
# is different for each one. The videos usually expire in 7 days, so we can't
# add tests.


class ArteTVBaseIE(InfoExtractor):
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
info = self._download_json(json_url, video_id)
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
_API_BASE = 'https://api.arte.tv/api/player/v1'


class ArteTVIE(ArteTVBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
)
/(?P<id>\d{6}-\d{3}-[AF])
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'info_dict': {
'id': '088501-000-A',
'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive',
'upload_date': '20190628',
},
}, {
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
'only_matching': True,
}, {
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}]

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')

info = self._download_json(
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
player_info = info['videoJsonPlayer']

vsr = try_get(player_info, lambda x: x['VSR'], dict)
@@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]

title = (player_info.get('VTI') or title or player_info['VID']).strip()
title = (player_info.get('VTI') or player_info['VID']).strip()
subtitle = player_info.get('VSU', '').strip()
if subtitle:
title += ' - %s' % subtitle

info_dict = {
'id': player_info['VID'],
'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])

LANGS = {
@@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
formats = []
for format_id, format_dict in vsr.items():
f = dict(format_dict)
format_url = url_or_none(f.get('url'))
streamer = f.get('streamer')
if not format_url and not streamer:
continue
versionCode = f.get('versionCode')
l = re.escape(langcode)

@@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
else:
lang_pref = -1

media_type = f.get('mediaType')
if media_type == 'hls':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for m3u8_format in m3u8_formats:
m3u8_format['language_preference'] = lang_pref
formats.extend(m3u8_formats)
continue

format = {
'format_id': format_id,
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
'quality': qfunc(f.get('quality')),
}

if f.get('mediaType') == 'rtmp':
if media_type == 'rtmp':
format['url'] = f['streamer']
format['play_path'] = 'mp4:' + f['url']
format['ext'] = 'flv'
@@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):

formats.append(format)

self._check_formats(formats, video_id)
self._sort_formats(formats)

info_dict['formats'] = formats
return info_dict

return {
'id': player_info.get('VID') or video_id,
'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
'formats': formats,
}

class ArteTVPlus7IE(ArteTVBaseIE):
IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'

class ArteTVEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
'info_dict': {
'id': '088501-000-A',
'id': '100605-013-A',
'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive',
'upload_date': '20190628',
'title': 'United we Stream November Lockdown Edition #13',
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
'upload_date': '20201116',
},
}, {
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}]

def _real_extract(self, url):
lang, video_id = re.match(self._VALID_URL, url).groups()
return self._extract_from_json_url(
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
video_id, lang)


class ArteTVEmbedIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:embed'
_VALID_URL = r'''(?x)
https://www\.arte\.tv
/player/v3/index\.php\?json_url=
(?P<json_url>
https?://api\.arte\.tv/api/player/v1/config/
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
)
'''

_TESTS = []
@staticmethod
def _extract_urls(webpage):
return [url for _, url in re.findall(
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
webpage)]

def _real_extract(self, url):
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
return self._extract_from_json_url(json_url, video_id, lang)
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
json_url = qs['json_url'][0]
video_id = ArteTVIE._match_id(json_url)
return self.url_result(
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)


class ArteTVPlaylistIE(ArteTVBaseIE):
IE_NAME = 'arte.tv:playlist'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'

_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
'info_dict': {
@@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
},
'playlist_mincount': 6,
}, {
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
'only_matching': True,
}]

def _real_extract(self, url):
lang, playlist_id = re.match(self._VALID_URL, url).groups()
collection = self._download_json(
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
% (lang, playlist_id), playlist_id)
'%s/collectionData/%s/%s?source=videos'
% (self._API_BASE, lang, playlist_id), playlist_id)
entries = []
for video in collection['videos']:
if not isinstance(video, dict):
continue
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
if not video_url:
continue
video_id = video.get('programId')
entries.append({
'_type': 'url_transparent',
'url': video_url,
'id': video_id,
'title': video.get('title'),
'alt_title': video.get('subtitle'),
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
'duration': int_or_none(video.get('durationSeconds')),
'view_count': int_or_none(video.get('views')),
'ie_key': ArteTVIE.ie_key(),
})
title = collection.get('title')
description = collection.get('shortDescription') or collection.get('teaserText')
entries = [
self._extract_from_json_url(
video['jsonUrl'], video.get('programId') or playlist_id, lang)
for video in collection['videos'] if video.get('jsonUrl')]
return self.playlist_result(entries, playlist_id, title, description)

+ 111
- 157
youtube_dlc/extractor/bandcamp.py View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals

import random
@@ -5,10 +6,7 @@ import re
import time

from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urlparse,
)
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
@@ -17,71 +15,32 @@ from ..utils import (
parse_filesize,
str_or_none,
try_get,
unescapeHTML,
update_url_query,
unified_strdate,
unified_timestamp,
url_or_none,
urljoin,
)


class BandcampBaseIE(InfoExtractor):
"""Provide base functions for Bandcamp extractors"""

def _extract_json_from_html_data_attribute(self, webpage, suffix, video_id):
json_string = self._html_search_regex(
r' data-%s="([^"]*)' % suffix,
webpage, '%s json' % suffix, default='{}')

return self._parse_json(json_string, video_id)

def _parse_json_track(self, json):
formats = []
file_ = json.get('file')
if isinstance(file_, dict):
for format_id, format_url in file_.items():
if not url_or_none(format_url):
continue
ext, abr_str = format_id.split('-', 1)
formats.append({
'format_id': format_id,
'url': self._proto_relative_url(format_url, 'http:'),
'ext': ext,
'vcodec': 'none',
'acodec': ext,
'abr': int_or_none(abr_str),
})

return {
'duration': float_or_none(json.get('duration')),
'id': str_or_none(json.get('track_id') or json.get('id')),
'title': json.get('title'),
'title_link': json.get('title_link'),
'number': int_or_none(json.get('track_num')),
'formats': formats
}


class BandcampIE(BandcampBaseIE):
IE_NAME = "Bandcamp:track"
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
class BandcampIE(InfoExtractor):
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
'info_dict': {
'id': '1812978515',
'ext': 'mp3',
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
'duration': 9.8485,
'uploader': "youtube-dl \"'/\\\u00e4\u21ad",
'timestamp': 1354224127,
'uploader': 'youtube-dl "\'/\\ä↭',
'upload_date': '20121129',
'timestamp': 1354224127,
},
'_skip': 'There is a limit of 200 free downloads / month for the test song'
}, {
# free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'md5': '5d92af55811e47f38962a54c30b07ef0',
'info_dict': {
'id': '2650410135',
'ext': 'aiff',
@@ -120,52 +79,59 @@ class BandcampIE(BandcampBaseIE):
},
}]

def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
return self._parse_json(self._html_search_regex(
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
attr + ' data', group=2), video_id, fatal=fatal)

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
url_track_title = title
title = self._match_id(url)
webpage = self._download_webpage(url, title)
thumbnail = self._html_search_meta('og:image', webpage, default=None)

json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", url_track_title)
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", url_track_title)

json_tracks = json_tralbum.get('trackinfo')
if not json_tracks:
raise ExtractorError('Could not extract track')

track = self._parse_json_track(json_tracks[0])
artist = json_tralbum.get('artist')
album_title = json_embed.get('album_title')

json_album = json_tralbum.get('packages')
if json_album:
json_album = json_album[0]
album_publish_date = json_album.get('album_publish_date')
album_release_date = json_album.get('album_release_date')
else:
album_publish_date = None
album_release_date = json_tralbum.get('album_release_date')

timestamp = unified_timestamp(json_tralbum.get('current', {}).get('publish_date') or album_publish_date)
release_date = unified_strdate(album_release_date)

download_link = self._search_regex(
r'freeDownloadPage(?:["\']|&quot;):\s*(["\']|&quot;)(?P<url>(?:(?!\1).)+)\1', webpage,
'download link', default=None, group='url')
tralbum = self._extract_data_attr(webpage, title)
thumbnail = self._og_search_thumbnail(webpage)

track_id = None
track = None
track_number = None
duration = None

formats = []
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
if track_info:
file_ = track_info.get('file')
if isinstance(file_, dict):
for format_id, format_url in file_.items():
if not url_or_none(format_url):
continue
ext, abr_str = format_id.split('-', 1)
formats.append({
'format_id': format_id,
'url': self._proto_relative_url(format_url, 'http:'),
'ext': ext,
'vcodec': 'none',
'acodec': ext,
'abr': int_or_none(abr_str),
})
track = track_info.get('title')
track_id = str_or_none(
track_info.get('track_id') or track_info.get('id'))
track_number = int_or_none(track_info.get('track_num'))
duration = float_or_none(track_info.get('duration'))

embed = self._extract_data_attr(webpage, title, 'embed', False)
current = tralbum.get('current') or {}
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
timestamp = unified_timestamp(
current.get('publish_date') or tralbum.get('album_publish_date'))

download_link = tralbum.get('freeDownloadPage')
if download_link:
track_id = self._search_regex(
r'\?id=(?P<id>\d+)&',
download_link, 'track id')
track_id = compat_str(tralbum['id'])

download_webpage = self._download_webpage(
download_link, track_id, 'Downloading free downloads page')

blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
'blob', group='blob'),
track_id, transform_source=unescapeHTML)
blob = self._extract_data_attr(download_webpage, track_id, 'blob')

info = try_get(
blob, (lambda x: x['digital_items'][0],
@@ -173,6 +139,8 @@ class BandcampIE(BandcampBaseIE):
if info:
downloads = info.get('downloads')
if isinstance(downloads, dict):
if not track:
track = info.get('title')
if not artist:
artist = info.get('artist')
if not thumbnail:
@@ -206,7 +174,7 @@ class BandcampIE(BandcampBaseIE):
retry_url = url_or_none(stat.get('retry_url'))
if not retry_url:
continue
track['formats'].append({
formats.append({
'url': self._proto_relative_url(retry_url, 'http:'),
'ext': download_formats.get(format_id),
'format_id': format_id,
@@ -215,30 +183,34 @@ class BandcampIE(BandcampBaseIE):
'vcodec': 'none',
})

self._sort_formats(track['formats'])
self._sort_formats(formats)

title = '%s - %s' % (artist, track.get('title')) if artist else track.get('title')
title = '%s - %s' % (artist, track) if artist else track

if not duration:
duration = float_or_none(self._html_search_meta(
'duration', webpage, default=None))

return {
'album': album_title,
'artist': artist,
'duration': track['duration'],
'formats': track['formats'],
'id': track['id'],
'release_date': release_date,
'id': track_id,
'title': title,
'thumbnail': thumbnail,
'uploader': artist,
'timestamp': timestamp,
'title': title,
'track': track['title'],
'track_id': track['id'],
'track_number': track['number'],
'uploader': artist
'release_date': unified_strdate(tralbum.get('album_release_date')),
'duration': duration,
'track': track,
'track_number': track_number,
'track_id': track_id,
'artist': artist,
'album': embed.get('album_title'),
'formats': formats,
}


class BandcampAlbumIE(BandcampBaseIE):
class BandcampAlbumIE(BandcampIE):
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'

_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -248,7 +220,10 @@ class BandcampAlbumIE(BandcampBaseIE):
'info_dict': {
'id': '1353101989',
'ext': 'mp3',
'title': 'Intro',
'title': 'Blazo - Intro',
'timestamp': 1311756226,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
{
@@ -256,7 +231,10 @@ class BandcampAlbumIE(BandcampBaseIE):
'info_dict': {
'id': '38097443',
'ext': 'mp3',
'title': 'Kero One - Keep It Alive (Blazo remix)',
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
'timestamp': 1311757238,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
],
@@ -292,6 +270,7 @@ class BandcampAlbumIE(BandcampBaseIE):
'title': '"Entropy" EP',
'uploader_id': 'jstrecords',
'id': 'entropy-ep',
'description': 'md5:0ff22959c943622972596062f2f366a5',
},
'playlist_mincount': 3,
}, {
@@ -301,6 +280,7 @@ class BandcampAlbumIE(BandcampBaseIE):
'id': 'we-are-the-plague',
'title': 'WE ARE THE PLAGUE',
'uploader_id': 'insulters',
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
},
'playlist_count': 2,
}]
@@ -312,41 +292,34 @@ class BandcampAlbumIE(BandcampBaseIE):
else super(BandcampAlbumIE, cls).suitable(url))

def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader_id = mobj.group('subdomain')
album_id = mobj.group('album_id')
uploader_id, album_id = re.match(self._VALID_URL, url).groups()
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)

json_tralbum = self._extract_json_from_html_data_attribute(webpage, "tralbum", playlist_id)
json_embed = self._extract_json_from_html_data_attribute(webpage, "embed", playlist_id)

json_tracks = json_tralbum.get('trackinfo')
if not json_tracks:
raise ExtractorError('Could not extract album tracks')

album_title = json_embed.get('album_title')

tralbum = self._extract_data_attr(webpage, playlist_id)
track_info = tralbum.get('trackinfo')
if not track_info:
raise ExtractorError('The page doesn\'t contain any tracks')
# Only tracks with duration info have songs
tracks = [self._parse_json_track(track) for track in json_tracks]
entries = [
self.url_result(
compat_urlparse.urljoin(url, track['title_link']),
ie=BandcampIE.ie_key(), video_id=track['id'],
video_title=track['title'])
for track in tracks
if track.get('duration')]
urljoin(url, t['title_link']), BandcampIE.ie_key(),
str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
for t in track_info
if t.get('duration')]

current = tralbum.get('current') or {}

return {
'_type': 'playlist',
'uploader_id': uploader_id,
'id': playlist_id,
'title': album_title,
'entries': entries
'title': current.get('title'),
'description': current.get('about'),
'entries': entries,
}


class BandcampWeeklyIE(InfoExtractor):
class BandcampWeeklyIE(BandcampIE):
IE_NAME = 'Bandcamp:weekly'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{
@@ -361,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
'release_date': '20170404',
'series': 'Bandcamp Weekly',
'episode': 'Magic Moments',
'episode_number': 208,
'episode_id': '224',
}
},
'params': {
'format': 'opus-lo',
},
}, {
'url': 'https://bandcamp.com/?blah/blah@&show=228',
'only_matching': True
}]

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
'blob', group='blob'),
video_id, transform_source=unescapeHTML)
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)

show = blob['bcw_show']
blob = self._extract_data_attr(webpage, show_id, 'blob')

# This is desired because any invalid show id redirects to `bandcamp.com`
# which happens to expose the latest Bandcamp Weekly episode.
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
show = blob['bcw_data'][show_id]

formats = []
for format_id, format_url in show['audio_stream'].items():
@@ -408,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
if subtitle:
title += ' - %s' % subtitle

episode_number = None
seq = blob.get('bcw_seq')

if seq and isinstance(seq, list):
try:
episode_number = next(
int_or_none(e.get('episode_number'))
for e in seq
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
except StopIteration:
pass

return {
'id': video_id,
'id': show_id,
'title': title,
'description': show.get('desc') or show.get('short_desc'),
'duration': float_or_none(show.get('audio_duration')),
@@ -429,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
'release_date': unified_strdate(show.get('published_date')),
'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'),
'episode_number': episode_number,
'episode_id': compat_str(video_id),
'episode_id': show_id,
'formats': formats
}

+ 53
- 4
youtube_dlc/extractor/bbc.py View File

@@ -981,7 +981,7 @@ class BBCIE(BBCCoUkIE):
group_id = self._search_regex(
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
webpage, 'group id', default=None)
if playlist_id:
if group_id:
return self.url_result(
'https://www.bbc.co.uk/programmes/%s' % group_id,
ie=BBCCoUkIE.ie_key())
@@ -1092,10 +1092,26 @@ class BBCIE(BBCCoUkIE):
self._search_regex(
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
'bbcthree config', default='{}'),
playlist_id, transform_source=js_to_json, fatal=False)
if bbc3_config:
playlist_id, transform_source=js_to_json, fatal=False) or {}
payload = bbc3_config.get('payload') or {}
if payload:
clip = payload.get('currentClip') or {}
clip_vpid = clip.get('vpid')
clip_title = clip.get('title')
if clip_vpid and clip_title:
formats, subtitles = self._download_media_selector(clip_vpid)
self._sort_formats(formats)
return {
'id': clip_vpid,
'title': clip_title,
'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
'description': clip.get('description'),
'duration': parse_duration(clip.get('duration')),
'formats': formats,
'subtitles': subtitles,
}
bbc3_playlist = try_get(
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
payload, lambda x: x['content']['bbcMedia']['playlist'],
dict)
if bbc3_playlist:
playlist_title = bbc3_playlist.get('title') or playlist_title
@@ -1118,6 +1134,39 @@ class BBCIE(BBCCoUkIE):
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)

initial_data = self._parse_json(self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
'preload state', default='{}'), playlist_id, fatal=False)
if initial_data:
def parse_media(media):
if not media:
return
for item in (try_get(media, lambda x: x['media']['items'], list) or []):
item_id = item.get('id')
item_title = item.get('title')
if not (item_id and item_title):
continue
formats, subtitles = self._download_media_selector(item_id)
self._sort_formats(formats)
entries.append({
'id': item_id,
'title': item_title,
'thumbnail': item.get('holdingImageUrl'),
'formats': formats,
'subtitles': subtitles,
})
for resp in (initial_data.get('data') or {}).values():
name = resp.get('name')
if name == 'media-experience':
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
elif name == 'article':
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
if block.get('type') != 'media':
continue
parse_media(block.get('model'))
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)

def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),


+ 98
- 0
youtube_dlc/extractor/box.py View File

@@ -0,0 +1,98 @@
# coding: utf-8
from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor
from ..utils import (
determine_ext,
parse_iso8601,
# try_get,
update_url_query,
)


class BoxIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
_TEST = {
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
'info_dict': {
'id': '510727257538',
'ext': 'mp4',
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
'uploader': 'MLS Video',
'timestamp': 1566320259,
'upload_date': '20190820',
'uploader_id': '235196876',
}
}

def _real_extract(self, url):
shared_name, file_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, file_id)
request_token = self._parse_json(self._search_regex(
r'Box\.config\s*=\s*({.+?});', webpage,
'Box config'), file_id)['requestToken']
access_token = self._download_json(
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
'Downloading token JSON metadata',
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
'Content-Type': 'application/json',
'X-Request-Token': request_token,
'X-Box-EndUser-API': 'sharedName=' + shared_name,
})[file_id]['read']
shared_link = 'https://app.box.com/s/' + shared_name
f = self._download_json(
'https://api.box.com/2.0/files/' + file_id, file_id,
'Downloading file JSON metadata', headers={
'Authorization': 'Bearer ' + access_token,
'BoxApi': 'shared_link=' + shared_link,
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
}, query={
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
})
title = f['name']

query = {
'access_token': access_token,
'shared_link': shared_link
}

formats = []

# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
# entry_url_template = try_get(
# entry, lambda x: x['content']['url_template'])
# if not entry_url_template:
# continue
# representation = entry.get('representation')
# if representation == 'dash':
# TODO: append query to every fragment URL
# formats.extend(self._extract_mpd_formats(
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
# file_id, query=query))

authenticated_download_url = f.get('authenticated_download_url')
if authenticated_download_url and f.get('is_download_available'):
formats.append({
'ext': f.get('extension') or determine_ext(title),
'filesize': f.get('size'),
'format_id': 'download',
'url': update_url_query(authenticated_download_url, query),
})

self._sort_formats(formats)

creator = f.get('created_by') or {}

return {
'id': file_id,
'title': title,
'formats': formats,
'description': f.get('description') or None,
'uploader': creator.get('name'),
'timestamp': parse_iso8601(f.get('created_at')),
'uploader_id': creator.get('id'),
}

+ 4
- 4
youtube_dlc/extractor/brightcove.py View File

@@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
]

@classmethod
def _build_brighcove_url(cls, object_str):
def _build_brightcove_url(cls, object_str):
"""
Build a Brightcove url from a xml string containing
<object class="BrightcoveExperience">{params}</object>
@@ -217,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
return cls._make_brightcove_url(params)

@classmethod
def _build_brighcove_url_from_js(cls, object_js):
def _build_brightcove_url_from_js(cls, object_js):
# The layout of JS is as follows:
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
# // build Brightcove <object /> XML
@@ -272,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
).+?>\s*</object>''',
webpage)
if matches:
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))

matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
if matches:
return list(filter(None, [
cls._build_brighcove_url_from_js(custom_bc)
cls._build_brightcove_url_from_js(custom_bc)
for custom_bc in matches]))
return [src for _, src in re.findall(
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]


+ 32
- 3
youtube_dlc/extractor/cda.py View File

@@ -5,10 +5,16 @@ import codecs
import re

from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
compat_urllib_parse_unquote,
)
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
merge_dicts,
multipart_encode,
parse_duration,
random_birthday,
@@ -107,8 +113,9 @@ class CDAIE(InfoExtractor):
r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
'view_count', default=None)
average_rating = self._search_regex(
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
webpage, 'rating', fatal=False, group='rating_value')
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
group='rating_value')

info_dict = {
'id': video_id,
@@ -123,6 +130,24 @@ class CDAIE(InfoExtractor):
'age_limit': 18 if need_confirm_age else 0,
}

# Source: https://www.cda.pl/js/player.js?t=1606154898
def decrypt_file(a):
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
a = a.replace(p, '')
a = compat_urllib_parse_unquote(a)
b = []
for c in a:
f = compat_ord(c)
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
a = ''.join(b)
a = a.replace('.cda.mp4', '')
for p in ('.2cda.pl', '.3cda.pl'):
a = a.replace(p, '.cda.pl')
if '/upstream' in a:
a = a.replace('/upstream', '.mp4/upstream')
return 'https://' + a
return 'https://' + a + '.mp4'

def extract_format(page, version):
json_str = self._html_search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
@@ -141,6 +166,8 @@ class CDAIE(InfoExtractor):
video['file'] = codecs.decode(video['file'], 'rot_13')
if video['file'].endswith('adc.mp4'):
video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
f = {
'url': video['file'],
}
@@ -179,4 +206,6 @@ class CDAIE(InfoExtractor):

self._sort_formats(formats)

return info_dict
info = self._search_json_ld(webpage, video_id, default={})

return merge_dicts(info_dict, info)

+ 12
- 7
youtube_dlc/extractor/cnbc.py View File

@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import smuggle_url
@@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):


class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
'info_dict': {
@@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
}

def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
'video id')
path, display_id = re.match(self._VALID_URL, url).groups()
video_id = self._download_json(
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
'query': '''{
page(path: "%s") {
vcpsId
}
}''' % path,
})['data']['page']['vcpsId']
return self.url_result(
'http://video.cnbc.com/gallery/?video=%s' % video_id,
'http://video.cnbc.com/gallery/?video=%d' % video_id,
CNBCIE.ie_key())

+ 31
- 3
youtube_dlc/extractor/common.py View File

@@ -1456,9 +1456,10 @@ class InfoExtractor(object):
try:
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
return True
except ExtractorError:
except ExtractorError as e:
self.to_screen(
'%s: %s URL is invalid, skipping' % (video_id, item))
'%s: %s URL is invalid, skipping: %s'
% (video_id, item, error_to_compat_str(e.cause)))
return False

def http_scheme(self):
@@ -1663,7 +1664,7 @@ class InfoExtractor(object):
# just the media without qualities renditions.
# Fortunately, master playlist can be easily distinguished from media
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
# master playlist tags MUST NOT appear in a media playist and vice versa.
# master playlist tags MUST NOT appear in a media playlist and vice versa.
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
# media playlist and MUST NOT appear in master playlist thus we can
# clearly detect media playlist with this criterion.
@@ -2596,6 +2597,7 @@ class InfoExtractor(object):

def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
formats = []

hdcore_sign = 'hdcore=3.7.0'
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
hds_host = hosts.get('hds')
@@ -2608,6 +2610,7 @@ class InfoExtractor(object):
for entry in f4m_formats:
entry.update({'extra_param_to_segment_url': hdcore_sign})
formats.extend(f4m_formats)

m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
hls_host = hosts.get('hls')
if hls_host:
@@ -2615,6 +2618,31 @@ class InfoExtractor(object):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))

http_host = hosts.get('http')
if http_host and 'hdnea=' not in manifest_url:
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
qualities_length = len(qualities)
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
i = 0
http_formats = []
for f in formats:
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
for protocol in ('http', 'https'):
http_f = f.copy()
del http_f['manifest_url']
http_url = re.sub(
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
http_f.update({
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
'url': http_url,
'protocol': protocol,
})
http_formats.append(http_f)
i += 1
formats.extend(http_formats)

return formats

def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):


+ 23
- 4
youtube_dlc/extractor/condenast.py View File

@@ -16,6 +16,8 @@ from ..utils import (
mimetype2ext,
orderedSet,
parse_iso8601,
strip_or_none,
try_get,
)


@@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
'uploader': 'gq',
'upload_date': '20170321',
'timestamp': 1490126427,
'description': 'How much grimmer would things be if these people were competent?',
},
}, {
# JS embed
@@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
'uploader': 'arstechnica',
'upload_date': '20150916',
'timestamp': 1442434955,
'timestamp': 1442434920,
}
}, {
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
@@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
})
self._sort_formats(formats)

subtitles = {}
for t, caption in video_info.get('captions', {}).items():
caption_url = caption.get('src')
if not (t in ('vtt', 'srt', 'tml') and caption_url):
continue
subtitles.setdefault('en', []).append({'url': caption_url})

return {
'id': video_id,
'formats': formats,
@@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
'season': video_info.get('season_title'),
'timestamp': parse_iso8601(video_info.get('premiere_date')),
'categories': video_info.get('categories'),
'subtitles': subtitles,
}

def _real_extract(self, url):
@@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
if url_type == 'series':
return self._extract_series(url, webpage)
else:
params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
video = try_get(self._parse_json(self._search_regex(
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
'preload state', '{}'), display_id),
lambda x: x['transformed']['video'])
if video:
params = {'videoId': video['id']}
info = {'description': strip_or_none(video.get('description'))}
else:
params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
info.update(self._extract_video(params))
return info

+ 4
- 1
youtube_dlc/extractor/discoverynetworks.py View File

@@ -7,7 +7,7 @@ from .dplay import DPlayIE


class DiscoveryNetworksDeIE(DPlayIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'

_TESTS = [{
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
@@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
}, {
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
'only_matching': True,
}, {
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
'only_matching': True,
}]

def _real_extract(self, url):


+ 2
- 2
youtube_dlc/extractor/europa.py View File

@@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):

title = get_item('title', preferred_langs) or video_id
description = get_item('description', preferred_langs)
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
@@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnmail,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,


+ 21
- 9
youtube_dlc/extractor/extractors.py View File

@@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
from .alura import (
AluraIE,
AluraCourseIE
@@ -62,7 +63,7 @@ from .ard import (
ARDMediathekIE,
)
from .arte import (
ArteTVPlus7IE,
ArteTVIE,
ArteTVEmbedIE,
ArteTVPlaylistIE,
)
@@ -129,6 +130,7 @@ from .blinkx import BlinkxIE
from .bloomberg import BloombergIE
from .bokecc import BokeCCIE
from .bostonglobe import BostonGlobeIE
from .box import BoxIE
from .bpb import BpbIE
from .br import (
BRIE,
@@ -546,6 +548,7 @@ from .laola1tv import (
EHFTVIE,
ITTFIE,
)
from .lbry import LBRYIE
from .lci import LCIIE
from .lcp import (
LcpPlayIE,
@@ -621,6 +624,7 @@ from .markiza import (
from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .medaltv import MedalTVIE
from .mediaset import MediasetIE
from .mediasite import (
MediasiteIE,
@@ -803,6 +807,7 @@ from .ntvru import NTVRuIE