1
0
Fork 0
mirror of https://github.com/blackjack4494/yt-dlc.git synced 2024-12-21 23:33:10 +00:00

Merge branch 'master' of https://github.com/blackjack4494/yt-dlc into fix-tmz

This commit is contained in:
Diego Fernando Rodríguez Varón 2020-11-14 09:40:51 -05:00
commit a2044d57ca
17 changed files with 607 additions and 364 deletions

View file

@ -82,7 +82,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
python-version: '3.8'
- name: Install Requirements
run: pip install pyinstaller
- name: Bump version
@ -109,14 +109,14 @@ jobs:
runs-on: windows-latest
needs: build_unix
needs: [build_unix, build_windows]
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.5.4 32-Bit
- name: Set up Python 3.4.4 32-Bit
uses: actions/setup-python@v2
with:
python-version: '3.5.4'
python-version: '3.4.4'
architecture: 'x86'
- name: Install Requirements for 32 Bit
run: pip install pyinstaller==3.5
@ -146,10 +146,10 @@ jobs:
SHA2_UNIX: ${{ needs.build_unix.outputs.sha2_unix }}
YTDLC_VERSION: ${{ needs.build_unix.outputs.ytdlc_version }}
run: |
echo "version:$YTDLC_VERSION" >> SHA2-256SUMS
echo "youtube-dlc.exe:$SHA2_WINDOWS" >> SHA2-256SUMS
echo "youtube-dlc32.exe:$SHA2_WINDOWS32" >> SHA2-256SUMS
echo "youtube-dlc:$SHA2_UNIX" >> SHA2-256SUMS
echo "version:${env:YTDLC_VERSION}" >> SHA2-256SUMS
echo "youtube-dlc.exe:${env:SHA2_WINDOWS}" >> SHA2-256SUMS
echo "youtube-dlc_x86.exe:${env:SHA2_WINDOWS32}" >> SHA2-256SUMS
echo "youtube-dlc:${env:SHA2_UNIX}" >> SHA2-256SUMS
- name: Upload 256SUMS file
id: upload-sums

View file

@ -1,15 +1,15 @@
[![Build Status](https://travis-ci.com/blackjack4494/yt-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/yt-dlc)
[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)
[![Gitter chat](https://img.shields.io/gitter/room/youtube-dlc/community)](https://gitter.im/youtube-dlc)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/youtube-dlc/blob/master/LICENSE)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/blackjack4494/yt-dlc/blob/master/LICENSE)
youtube-dlc - download videos from youtube.com or other video platforms.
youtube-dlc is a fork of youtube-dl with the intention of getting features tested by the community merged in the tool faster, since youtube-dl's development seems to be slowing down. (https://web.archive.org/web/20201014194602/https://github.com/ytdl-org/youtube-dl/issues/26462)
- [INSTALLATION](#installation)
- [UPDATE](#update)
- [DESCRIPTION](#description)
- [OPTIONS](#options)
- [Network Options:](#network-options)
@ -44,6 +44,10 @@ You may want to use `python3` instead of `python`
python -m pip install --upgrade youtube-dlc
If you want to install the current master branch
python -m pip install git+https://github.com/blackjack4494/yt-dlc
**UNIX** (Linux, macOS, etc.)
Using wget:

View file

@ -66,7 +66,7 @@ setup(
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
# long_description_content_type="text/markdown",
url="https://github.com/blackjack4494/youtube-dlc",
url="https://github.com/blackjack4494/yt-dlc",
packages=find_packages(exclude=("youtube_dl","test",)),
#packages=[
# 'youtube_dlc',

View file

@ -364,8 +364,10 @@ class FileDownloader(object):
else '%.2f' % sleep_interval))
time.sleep(sleep_interval)
else:
if self.params.get('sleep_interval_subtitles') > 0:
sleep_interval_sub = 0
if type(self.params.get('sleep_interval_subtitles')) is int:
sleep_interval_sub = self.params.get('sleep_interval_subtitles')
if sleep_interval_sub > 0:
self.to_screen(
'[download] Sleeping %s seconds...' % (
sleep_interval_sub))

View file

@ -115,8 +115,10 @@ class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename]
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose')
@ -150,8 +152,9 @@ class AxelFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename]
for key, val in info_dict['http_headers'].items():
cmd += ['-H', '%s: %s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['-H', '%s: %s' % (key, val)]
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@ -162,8 +165,9 @@ class WgetFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
@ -189,8 +193,9 @@ class Aria2cFD(ExternalFD):
if dn:
cmd += ['--dir', dn]
cmd += ['--out', os.path.basename(tmpfilename)]
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
@ -206,8 +211,10 @@ class HttpieFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)]
return cmd
@ -253,7 +260,7 @@ class FFmpegFD(ExternalFD):
# if end_time:
# args += ['-t', compat_str(end_time - start_time)]
if info_dict['http_headers'] and re.match(r'^https?://', url):
if info_dict.get('http_headers') is not None and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])

View file

@ -82,7 +82,10 @@ class YoutubeLiveChatReplayFD(FragmentFD):
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
try:
continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
except KeyError:
continuation_id = None
self._append_fragment(ctx, processed_fragment)

View file

@ -1175,6 +1175,7 @@ from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .thisvid import ThisVidIE
from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE
from .tinypic import TinyPicIE
@ -1541,4 +1542,5 @@ from .zattoo import (
)
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE
from .zoom import ZoomIE
from .zype import ZypeIE

View file

@ -36,6 +36,9 @@ class LA7IE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
if not url.startswith('http'):
url = '%s//%s' % (self.http_scheme(), url)
webpage = self._download_webpage(url, video_id)
player_data = self._search_regex(

View file

@ -12,6 +12,7 @@ from ..utils import (
parse_duration,
remove_end,
try_get,
urljoin,
)
@ -93,6 +94,14 @@ class MailRuIE(InfoExtractor):
{
'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
'only_matching': True,
},
{
'url': 'https://my.mail.ru/mail/cloud-strife/video/embed/Games/2009',
'only_matching': True,
},
{
'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html',
'only_matching': True,
}
]
@ -110,7 +119,7 @@ class MailRuIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
page_config = self._parse_json(self._search_regex([
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
r'(?s)"video":\s*(\{.+?\}),'],
r'(?s)"video":\s*({.+?}),'],
webpage, 'page config', default='{}'), video_id, fatal=False)
if page_config:
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') or page_config.get('metadataUrl')
@ -121,7 +130,7 @@ class MailRuIE(InfoExtractor):
# fix meta_url if missing the host address
if re.match(r'^\/\+\/', meta_url):
meta_url = 'https://my.mail.ru' + meta_url
meta_url = urljoin('https://my.mail.ru', meta_url)
if meta_url:
video_data = self._download_json(

View file

@ -13,6 +13,7 @@ class SkyItaliaBaseIE(InfoExtractor):
'high': [854, 480],
'hd': [1280, 720]
}
_GEO_BYPASS = False
def _extract_video_id(self, url):
webpage = self._download_webpage(url, 'skyitalia')
@ -43,6 +44,9 @@ class SkyItaliaBaseIE(InfoExtractor):
'height': r[1]
})
if not formats and video_data.get('geob') == 1:
self.raise_geo_restricted(countries=['IT'])
self._sort_formats(formats)
title = video_data.get('title')
thumb = video_data.get('thumb')

View file

@ -0,0 +1,97 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class ThisVidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
_TESTS = [{
'url': 'https://thisvid.com/videos/french-boy-pantsed/',
'md5': '3397979512c682f6b85b3b04989df224',
'info_dict': {
'id': '2400174',
'ext': 'mp4',
'title': 'French Boy Pantsed',
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
'age_limit': 18,
}
}, {
'url': 'https://thisvid.com/embed/2400174/',
'md5': '3397979512c682f6b85b3b04989df224',
'info_dict': {
'id': '2400174',
'ext': 'mp4',
'title': 'French Boy Pantsed',
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
'age_limit': 18,
}
}]
def _real_extract(self, url):
main_id = self._match_id(url)
webpage = self._download_webpage(url, main_id)
# URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
if not kvs_version.startswith("5."):
self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
# video_id, video_url and license_code from the 'flashvars' JSON object:
video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
if thumbnail.startswith("//"):
thumbnail = "https:" + thumbnail
if (re.match(self._VALID_URL, url).group('type') == "videos"):
display_id = main_id
else:
display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
return {
'id': video_id,
'display_id': display_id,
'title': title,
'url': getrealurl(video_url, license_code),
'thumbnail': thumbnail,
'age_limit': 18,
}
def getrealurl(video_url, license_code):
urlparts = video_url.split('/')[2:]
license = getlicensetoken(license_code)
newmagic = urlparts[5][:32]
for o in range(len(newmagic) - 1, -1, -1):
new = ""
l = (o + sum([int(n) for n in license[o:]])) % 32
for i in range(0, len(newmagic)):
if i == o:
new += newmagic[l]
elif i == l:
new += newmagic[o]
else:
new += newmagic[i]
newmagic = new
urlparts[5] = newmagic + urlparts[5][32:]
return "/".join(urlparts)
def getlicensetoken(license):
modlicense = license.replace("$", "").replace("0", "1")
center = int(len(modlicense) / 2)
fronthalf = int(modlicense[:center + 1])
backhalf = int(modlicense[center:])
modlicense = str(4 * abs(fronthalf - backhalf))
retval = ""
for o in range(0, center + 1):
for i in range(1, 5):
retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
return retval

View file

@ -308,17 +308,26 @@ class VikiIE(VikiBaseIE):
'url': thumbnail.get('url'),
})
new_video = self._download_json(
'https://www.viki.com/api/videos/%s' % video_id, video_id,
'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
subtitles = {}
for sub in new_video.get('streamSubtitles').get('dash'):
subtitles[sub.get('srclang')] = [{
'ext': 'vtt',
'url': sub.get('src'),
'completion': sub.get('percentage'),
}]
try:
# New way to fetch subtitles
new_video = self._download_json(
'https://www.viki.com/api/videos/%s' % video_id, video_id,
'Downloading new video JSON to get subtitles', headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
for sub in new_video.get('streamSubtitles').get('dash'):
subtitles[sub.get('srclang')] = [{
'ext': 'vtt',
'url': sub.get('src'),
'completion': sub.get('percentage'),
}]
except AttributeError:
# fall-back to the old way if there isn't a streamSubtitles attribute
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
subtitles[subtitle_lang] = [{
'ext': subtitles_format,
'url': self._prepare_call(
'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
} for subtitles_format in ('srt', 'vtt')]
result = {
'id': video_id,

View file

@ -11,7 +11,6 @@ from ..compat import compat_str
from ..utils import (
ExtractorError,
merge_dicts,
remove_start,
try_get,
urlencode_postdata,
)
@ -19,10 +18,10 @@ from ..utils import (
class VLiveIE(NaverBaseIE):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)'
_NETRC_MACHINE = 'vlive'
_TESTS = [{
'url': 'http://www.vlive.tv/video/1326',
'url': 'https://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
@ -32,8 +31,21 @@ class VLiveIE(NaverBaseIE):
'view_count': int,
'uploader_id': 'muploader_a',
},
}, {
'url': 'http://www.vlive.tv/video/16937',
},
{
'url': 'https://vlive.tv/post/1-18244258',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
},
},
{
'url': 'https://www.vlive.tv/video/16937',
'info_dict': {
'id': '16937',
'ext': 'mp4',
@ -96,50 +108,69 @@ class VLiveIE(NaverBaseIE):
raise ExtractorError('Unable to log in', expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
# url may match on a post or a video url with a post_id potentially matching a video_id
working_id = self._match_id(url)
webpage = self._download_webpage(url, working_id)
webpage = self._download_webpage(
'https://www.vlive.tv/video/%s' % video_id, video_id)
PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>'
PARAMS_FIELD = 'params'
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
VIDEO_PARAMS_FIELD = 'video params'
params = self._search_regex(
PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
params = self._parse_json(params, working_id, fatal=False)
params = self._parse_json(self._search_regex(
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
transform_source=lambda s: '[' + s + ']', fatal=False)
video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict)
if not params or len(params) < 7:
params = self._search_regex(
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
if video_params is None:
error = try_get(params, lambda x: x["postDetail"]["error"], dict)
error_data = try_get(error, lambda x: x["data"], dict)
error_video = try_get(error_data, lambda x: x["officialVideo"], dict)
error_msg = try_get(error, lambda x: x["message"], compat_str)
product_type = try_get(error_data,
[lambda x: x["officialVideo"]["productType"],
lambda x: x["board"]["boardType"]],
compat_str)
status, long_video_id, key = params[2], params[5], params[6]
status = remove_start(status, 'PRODUCT_')
if error_video is not None:
if product_type in ('VLIVE_PLUS', 'VLIVE+'):
self.raise_login_required('This video is only available with V LIVE+.')
elif error_msg is not None:
raise ExtractorError('V LIVE reported the following error: %s' % error_msg)
else:
raise ExtractorError('Failed to extract video parameters.')
elif 'post' in url:
raise ExtractorError('Url does not appear to be a video post.', expected=True)
else:
raise ExtractorError('Failed to extract video parameters.')
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
return self._live(video_id, webpage)
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
return self._replay(video_id, webpage, long_video_id, key)
video_id = working_id if 'video' in url else str(video_params["videoSeq"])
if status == 'LIVE_END':
raise ExtractorError('Uploading for replay. Please wait...',
expected=True)
elif status == 'COMING_SOON':
raise ExtractorError('Coming soon!', expected=True)
elif status == 'CANCELED':
raise ExtractorError('We are sorry, '
'but the live broadcast has been canceled.',
expected=True)
elif status == 'ONLY_APP':
raise ExtractorError('Unsupported video type', expected=True)
video_type = video_params["type"]
if video_type in ('VOD'):
encoding_status = video_params["encodingStatus"]
if encoding_status == 'COMPLETE':
return self._replay(video_id, webpage, params, video_params)
else:
raise ExtractorError('VOD encoding not yet complete. Please try again later.',
expected=True)
elif video_type in ('LIVE'):
video_status = video_params["status"]
if video_status in ('RESERVED'):
raise ExtractorError('Coming soon!', expected=True)
elif video_status in ('ENDED', 'END'):
raise ExtractorError('Uploading for replay. Please wait...', expected=True)
else:
return self._live(video_id, webpage, params)
else:
raise ExtractorError('Unknown status %s' % status)
raise ExtractorError('Unknown video type %s' % video_type)
def _get_common_fields(self, webpage):
def _get_common_fields(self, webpage, params):
title = self._og_search_title(webpage)
creator = self._html_search_regex(
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
webpage, 'creator', fatal=False)
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, 'description', default=None)
creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
thumbnail = self._og_search_thumbnail(webpage)
return {
'title': title,
@ -147,24 +178,21 @@ class VLiveIE(NaverBaseIE):
'thumbnail': thumbnail,
}
def _live(self, video_id, webpage):
init_page = self._download_init_page(video_id)
def _live(self, video_id, webpage, params):
LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
headers={"referer": "https://www.vlive.tv"})
live_params = self._search_regex(
r'"liveStreamInfo"\s*:\s*(".*"),',
init_page, 'live stream info')
live_params = self._parse_json(live_params, video_id)
live_params = self._parse_json(live_params, video_id)
streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
formats = []
for vid in live_params.get('resolutions', []):
for stream in streams:
formats.extend(self._extract_m3u8_formats(
vid['cdnUrl'], video_id, 'mp4',
m3u8_id=vid.get('name'),
stream['serviceUrl'], video_id, 'mp4',
fatal=False, live=True))
self._sort_formats(formats)
info = self._get_common_fields(webpage)
info = self._get_common_fields(webpage, params)
info.update({
'title': self._live_title(info['title']),
'id': video_id,
@ -173,44 +201,37 @@ class VLiveIE(NaverBaseIE):
})
return info
def _replay(self, video_id, webpage, long_video_id, key):
if '' in (long_video_id, key):
init_page = self._download_init_page(video_id)
video_info = self._parse_json(self._search_regex(
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
video_id)
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
self.raise_login_required(
'This video is only available for CH+ subscribers')
long_video_id, key = video_info['vid'], video_info['inkey']
def _replay(self, video_id, webpage, params, video_params):
long_video_id = video_params["vodId"]
VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
headers={"referer": "https://www.vlive.tv"})
key = key_json["inkey"]
return merge_dicts(
self._get_common_fields(webpage),
self._get_common_fields(webpage, params),
self._extract_video_info(video_id, long_video_id, key))
def _download_init_page(self, video_id):
return self._download_webpage(
'https://www.vlive.tv/video/init/view',
video_id, note='Downloading live webpage',
data=urlencode_postdata({'videoSeq': video_id}),
headers={
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
'Content-Type': 'application/x-www-form-urlencoded'
})
class VLiveChannelIE(InfoExtractor):
IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
_TEST = {
'url': 'http://channels.vlive.tv/FCD4B',
_VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)'
_TESTS = [{
'url': 'https://channels.vlive.tv/FCD4B',
'info_dict': {
'id': 'FCD4B',
'title': 'MAMAMOO',
},
'playlist_mincount': 110
}
}, {
'url': 'https://www.vlive.tv/channel/FCD4B',
'info_dict': {
'id': 'FCD4B',
'title': 'MAMAMOO',
},
'playlist_mincount': 110
}]
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
def _real_extract(self, url):

View file

@ -36,6 +36,7 @@ from ..utils import (
get_element_by_attribute,
get_element_by_id,
int_or_none,
js_to_json,
mimetype2ext,
orderedSet,
parse_codecs,
@ -70,6 +71,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
_INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_YTCFG_DATA_RE = r"ytcfg.set\(({.*?})\)"
_YOUTUBE_CLIENT_HEADERS = {
'x-youtube-client-name': '1',
@ -274,11 +277,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _download_webpage_handle(self, *args, **kwargs):
query = kwargs.get('query', {}).copy()
query['disable_polymer'] = 'true'
kwargs['query'] = query
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _real_initialize(self):
if self._downloader is None:
return
@ -288,15 +299,61 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
# Extract entries from page with "Load more" button
def _entries(self, page, playlist_id):
more_widget_html = content_html = page
for page_num in itertools.count(1):
for entry in self._process_page(content_html):
def _find_entries_in_json(self, extracted):
entries = []
c = {}
def _real_find(obj):
if obj is None or isinstance(obj, str):
return
if type(obj) is list:
for elem in obj:
_real_find(elem)
if type(obj) is dict:
if self._is_entry(obj):
entries.append(obj)
return
if 'continuationCommand' in obj:
c['continuation'] = obj
return
for _, o in obj.items():
_real_find(o)
_real_find(extracted)
return entries, try_get(c, lambda x: x["continuation"])
def _entries(self, page, playlist_id, max_pages=None):
seen = []
yt_conf = {}
for m in re.finditer(self._YTCFG_DATA_RE, page):
parsed = self._parse_json(m.group(1), playlist_id,
transform_source=js_to_json, fatal=False)
if parsed:
yt_conf.update(parsed)
data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):
entries, continuation = self._find_entries_in_json(data_json)
processed = self._process_entries(entries, seen)
if not processed:
break
for entry in processed:
yield entry
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
if not continuation or not yt_conf:
break
continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
if not continuation_token or not continuation_url:
break
count = 0
@ -305,12 +362,23 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
try:
# Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry
more = self._download_json(
'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
'Downloading page #%s%s'
% (page_num, ' (retry #%d)' % count if count else ''),
data_json = self._download_json(
'https://www.youtube.com%s' % continuation_url,
playlist_id,
'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
transform_source=uppercase_escape,
headers=self._YOUTUBE_CLIENT_HEADERS)
query={
'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
},
data=str(json.dumps({
'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
'continuation': continuation_token
})).encode(encoding='UTF-8', errors='strict'),
headers={
'Content-Type': 'application/json'
}
)
break
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
@ -319,31 +387,30 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
continue
raise
content_html = more['content_html']
if not content_html.strip():
# Some webpages show a "Load more" button but they don't
# have more videos
break
more_widget_html = more['load_more_widget_html']
def _extract_title(self, renderer):
title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
if title:
return title
return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content):
for video_id, video_title in self.extract_videos_from_page(content):
yield self.url_result(video_id, 'Youtube', video_id, video_title)
def _is_entry(self, obj):
return 'videoId' in obj
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
for mobj in re.finditer(video_re, page):
# The link with index 0 is not the first video of the playlist (not sure if still actual)
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
def _process_entries(self, entries, seen):
ids_in_page = []
titles_in_page = []
for renderer in entries:
video_id = try_get(renderer, lambda x: x['videoId'])
video_title = self._extract_title(renderer)
if video_id is None or video_title is None:
# we do not have a videoRenderer or title extraction broke
continue
video_id = mobj.group('id')
video_title = unescapeHTML(
mobj.group('title')) if 'title' in mobj.groupdict() else None
if video_title:
video_title = video_title.strip()
if video_title == '► Play all':
video_title = None
video_title = video_title.strip()
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
@ -352,19 +419,17 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
ids_in_page.append(video_id)
titles_in_page.append(video_title)
def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
self.extract_videos_from_page_impl(
self._VIDEO_RE, page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
for video_id, video_title in zip(ids_in_page, titles_in_page):
yield self.url_result(video_id, 'Youtube', video_id, video_title)
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content):
for playlist_id in orderedSet(re.findall(
r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
content)):
def _is_entry(self, obj):
return 'playlistId' in obj
def _process_entries(self, entries, seen):
for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
@ -1390,6 +1455,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# https://github.com/ytdl-org/youtube-dl/pull/7599)
r';ytplayer\.config\s*=\s*({.+?});ytplayer',
r';ytplayer\.config\s*=\s*({.+?});',
r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
)
config = self._search_regex(
patterns, webpage, 'ytplayer.config', default=None)
@ -1397,15 +1463,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
def _get_music_metadata_from_yt_initial(self, yt_initial):
music_metadata = []
key_map = {
@ -1454,10 +1511,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning(err_msg)
return {}
try:
args = player_config['args']
caption_url = args.get('ttsurl')
if caption_url:
if "args" in player_config and "ttsurl" in player_config["args"]:
args = player_config['args']
caption_url = args['ttsurl']
timestamp = args['timestamp']
# We get the available subtitles
list_params = compat_urllib_parse_urlencode({
'type': 'list',
@ -1513,40 +1571,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return captions
# New captions format as of 22.06.2017
player_response = args.get('player_response')
if player_response and isinstance(player_response, compat_str):
player_response = self._parse_json(
player_response, video_id, fatal=False)
if player_response:
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
caption_tracks = renderer['captionTracks']
for caption_track in caption_tracks:
if 'kind' not in caption_track:
# not an automatic transcription
continue
base_url = caption_track['baseUrl']
sub_lang_list = []
for lang in renderer['translationLanguages']:
lang_code = lang.get('languageCode')
if lang_code:
sub_lang_list.append(lang_code)
return make_captions(base_url, sub_lang_list)
if "args" in player_config:
player_response = player_config["args"].get('player_response')
else:
# New player system (ytInitialPlayerResponse) as of October 2020
player_response = player_config
self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
return {}
# Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA)
# Does not used anymore as of 22.06.2017
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
sub_lang_list = []
for lang in caption_translation_languages.split(','):
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
sub_lang = lang_qs.get('lc', [None])[0]
if sub_lang:
sub_lang_list.append(sub_lang)
return make_captions(caption_url, sub_lang_list)
if player_response:
if isinstance(player_response, compat_str):
player_response = self._parse_json(
player_response, video_id, fatal=False)
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
caption_tracks = renderer['captionTracks']
for caption_track in caption_tracks:
if 'kind' not in caption_track:
# not an automatic transcription
continue
base_url = caption_track['baseUrl']
sub_lang_list = []
for lang in renderer['translationLanguages']:
lang_code = lang.get('languageCode')
if lang_code:
sub_lang_list.append(lang_code)
return make_captions(base_url, sub_lang_list)
self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
return {}
if "args" in player_config:
args = player_config["args"]
# Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA)
# Does not used anymore as of 22.06.2017
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
sub_lang_list = []
for lang in caption_translation_languages.split(','):
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
sub_lang = lang_qs.get('lc', [None])[0]
if sub_lang:
sub_lang_list.append(sub_lang)
return make_captions(caption_url, sub_lang_list)
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
except (KeyError, IndexError, ExtractorError):
@ -1822,21 +1890,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
args = ytplayer_config['args']
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
add_dash_mpd(video_info)
# Rental video is not rented but preview is available (e.g.
# https://www.youtube.com/watch?v=yYr8q0y5Jfg,
# https://github.com/ytdl-org/youtube-dl/issues/10532)
if not video_info and args.get('ypc_vid'):
return self.url_result(
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id)
args = ytplayer_config.get("args")
if args is not None:
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
add_dash_mpd(video_info)
# Rental video is not rented but preview is available (e.g.
# https://www.youtube.com/watch?v=yYr8q0y5Jfg,
# https://github.com/ytdl-org/youtube-dl/issues/10532)
if not video_info and args.get('ypc_vid'):
return self.url_result(
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id)
elif not player_response:
player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response)
else:
@ -1866,8 +1937,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
age_gate = False
# Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
args = ytplayer_config['args']
args = ytplayer_config.get("args")
if args is not None:
if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
@ -1882,6 +1953,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
is_live = True
if not player_response:
player_response = extract_player_response(args.get('player_response'), video_id)
elif not player_response:
player_response = ytplayer_config
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response)
@ -2614,6 +2687,12 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist'
_YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'
_YTM_CHANNEL_INFO = {
'uploader': 'Youtube Music',
'uploader_id': 'music', # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
'uploader_url': 'https://www.youtube.com/music'
}
_TESTS = [{
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': {
@ -2811,10 +2890,21 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return zip(ids_in_page, titles_in_page)
def _extract_mix_ids_from_yt_initial(self, yt_initial):
ids = []
playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
if playlist_contents:
for item in playlist_contents:
videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
if videoId:
ids.append(videoId)
return ids
def _extract_mix(self, playlist_id):
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
ids = []
yt_initial = None
last_id = playlist_id[-11:]
for n in itertools.count(1):
url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
@ -2824,6 +2914,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
r'''(?xs)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
webpage))
# if no ids in html of page, try using embedded json
if (len(new_ids) == 0):
yt_initial = self._get_yt_initial_data(playlist_id, webpage)
if yt_initial:
new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
# Fetch new pages until all the videos are repeated, it seems that
# there are always 51 unique videos.
new_ids = [_id for _id in new_ids if _id not in ids]
@ -2841,6 +2938,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
or search_title('title'))
title = clean_html(title_span)
if not title:
title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)
return self.playlist_result(url_results, playlist_id, title)
def _extract_playlist(self, playlist_id):
@ -2902,6 +3002,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'uploader_id': uploader_id,
'uploader_url': uploader_url,
})
if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
playlist.update(self._YTM_CHANNEL_INFO)
return has_videos, playlist
@ -2932,8 +3034,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return video
if playlist_id.startswith(('RD', 'UL', 'PU')):
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
# Mixes require a custom extraction process,
# Youtube Music playlists act like normal playlists (with randomized order)
return self._extract_mix(playlist_id)
has_videos, playlist = self._extract_playlist(playlist_id)
if has_videos or not video_id:
@ -3192,11 +3296,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
}]
class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
@ -3293,11 +3393,10 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
_SEARCH_PARAMS = 'CAI%3D'
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
_SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
@ -3309,63 +3408,20 @@ class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
'only_matching': True,
}]
def _find_videos_in_json(self, extracted):
videos = []
def _process_json_dict(self, obj, videos, c):
if "videoId" in obj:
videos.append(obj)
return
def _real_find(obj):
if obj is None or isinstance(obj, str):
return
if type(obj) is list:
for elem in obj:
_real_find(elem)
if type(obj) is dict:
if "videoId" in obj:
videos.append(obj)
return
for _, o in obj.items():
_real_find(o)
_real_find(extracted)
return videos
def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
result_items = self._find_videos_in_json(search_response)
for renderer in result_items:
video_id = try_get(renderer, lambda x: x['videoId'])
video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
if video_id is None or video_title is None:
# we do not have a videoRenderer or title extraction broke
continue
video_title = video_title.strip()
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
titles_in_page[idx] = video_title
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
if "nextContinuationData" in obj:
c["continuation"] = obj["nextContinuationData"]
return
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
query = compat_urllib_parse_unquote_plus(mobj.group('query'))
webpage = self._download_webpage(url, query)
return self.playlist_result(self._process_page(webpage), playlist_title=query)
return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)
class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
@ -3387,14 +3443,12 @@ class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
'https://www.youtube.com/show/%s/playlists' % playlist_id)
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
class YoutubeFeedsInfoExtractor(YoutubePlaylistBaseInfoExtractor):
"""
Base class for feed extractors
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
"""
_LOGIN_REQUIRED = True
_FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
_YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
@property
def IE_NAME(self):
@ -3403,96 +3457,35 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
def _find_videos_in_json(self, extracted):
videos = []
c = {}
def _process_entries(self, entries, seen):
new_info = []
for v in entries:
v_id = try_get(v, lambda x: x['videoId'])
if not v_id:
continue
def _real_find(obj):
if obj is None or isinstance(obj, str):
return
have_video = False
for old in seen:
if old['videoId'] == v_id:
have_video = True
break
if type(obj) is list:
for elem in obj:
_real_find(elem)
if not have_video:
new_info.append(v)
if type(obj) is dict:
if "videoId" in obj:
videos.append(obj)
return
if not new_info:
return
if "nextContinuationData" in obj:
c["continuation"] = obj["nextContinuationData"]
return
for _, o in obj.items():
_real_find(o)
_real_find(extracted)
return videos, try_get(c, lambda x: x["continuation"])
def _entries(self, page):
info = []
yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
for page_num in itertools.count(1):
video_info, continuation = self._find_videos_in_json(search_response)
new_info = []
for v in video_info:
v_id = try_get(v, lambda x: x['videoId'])
if not v_id:
continue
have_video = False
for old in info:
if old['videoId'] == v_id:
have_video = True
break
if not have_video:
new_info.append(v)
if not new_info:
break
info.extend(new_info)
for video in new_info:
yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText']))
if not continuation or not yt_conf:
break
search_response = self._download_json(
'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
'Downloading page #%s' % page_num,
transform_source=uppercase_escape,
query={
"ctoken": try_get(continuation, lambda x: x["continuation"]),
"continuation": try_get(continuation, lambda x: x["continuation"]),
"itct": try_get(continuation, lambda x: x["clickTrackingParams"])
},
headers={
"X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
"X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
"X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
"X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
"X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
"X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
"X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
})
seen.extend(new_info)
for video in new_info:
yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=self._extract_title(video))
def _real_extract(self, url):
page = self._download_webpage(
'https://www.youtube.com/feed/%s' % self._FEED_NAME,
self._PLAYLIST_TITLE)
return self.playlist_result(
self._entries(page), playlist_title=self._PLAYLIST_TITLE)
return self.playlist_result(self._entries(page, self._PLAYLIST_TITLE),
playlist_title=self._PLAYLIST_TITLE)
class YoutubeWatchLaterIE(YoutubePlaylistIE):

View file

@ -0,0 +1,82 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
parse_filesize,
urlencode_postdata
)
class ZoomIE(InfoExtractor):
IE_NAME = 'zoom'
_VALID_URL = r'https://(?:.*).?zoom.us/rec(?:ording)?/(play|share)/(?P<id>[A-Za-z0-9\-_.]+)'
_TEST = {
'url': 'https://zoom.us/recording/play/SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK',
'info_dict': {
'md5': '031a5b379f1547a8b29c5c4c837dccf2',
'title': "GAZ Transformational Tuesdays W/ Landon & Stapes",
'id': "SILVuCL4bFtRwWTtOCFQQxAsBQsJljFtm9e4Z_bvo-A8B-nzUSYZRNuPl3qW5IGK",
'ext': "mp4"
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
password_protected = self._search_regex(r'<form[^>]+?id="(password_form)"', webpage, 'password field', fatal=False, default=None)
if password_protected is not None:
self._verify_video_password(url, display_id, webpage)
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(r"viewMp4Url: \'(.*)\'", webpage, 'video url')
title = self._html_search_regex([r"topic: \"(.*)\",", r"<title>(.*) - Zoom</title>"], webpage, 'title')
viewResolvtionsWidth = self._search_regex(r"viewResolvtionsWidth: (\d*)", webpage, 'res width', fatal=False)
viewResolvtionsHeight = self._search_regex(r"viewResolvtionsHeight: (\d*)", webpage, 'res height', fatal=False)
fileSize = parse_filesize(self._search_regex(r"fileSize: \'(.+)\'", webpage, 'fileSize', fatal=False))
urlprefix = url.split("zoom.us")[0] + "zoom.us/"
formats = []
formats.append({
'url': url_or_none(video_url),
'width': int_or_none(viewResolvtionsWidth),
'height': int_or_none(viewResolvtionsHeight),
'http_headers': {'Accept': 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',
'Referer': urlprefix},
'ext': "mp4",
'filesize_approx': int_or_none(fileSize)
})
self._sort_formats(formats)
return {
'id': display_id,
'title': title,
'formats': formats
}
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
meetId = self._search_regex(r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId')
data = urlencode_postdata({
'id': meetId,
'passwd': password,
'action': "viewdetailedpage",
'recaptcha': ""
})
validation_url = url.split("zoom.us")[0] + "zoom.us/rec/validate_meet_passwd"
validation_response = self._download_json(
validation_url, video_id,
note='Validating Password...',
errnote='Wrong password?',
data=data)
if validation_response['errorCode'] != 0:
raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, validation_response['errorMessage']))

View file

@ -412,7 +412,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
for lang, sub_info in subtitles.items():
sub_ext = sub_info['ext']
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
if sub_ext == 'json':
self._downloader.to_screen('[ffmpeg] JSON subtitles cannot be embedded')
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang)
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
else:
@ -643,13 +645,18 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
continue
elif ext == 'json':
self._downloader.to_screen(
'[ffmpeg] You have requested to convert json subtitles into another format, '
'which is currently not possible')
continue
old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, '
'[ffmpeg] You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss')
dfxp_file = old_file

View file

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2020.10.25'
__version__ = '2020.11.11-2'