Compare commits

...

9 Commits

Author SHA1 Message Date
Tom-Oliver Heidel 98e248faa4
Merge pull request #106 from diegorodriguezv/fix-tmz
[TMZ] Fix TMZ.com extractor
2020-12-02 01:46:46 +01:00
Unknown 40ec740f7b [yt] temporary fix for automatic captions - disable json3 2020-12-02 01:44:14 +01:00
Tom-Oliver Heidel 8662875551
Merge pull request #270 from nixxo/skyit
[Skyit] Replace old skyitalia extractor
2020-12-02 01:31:23 +01:00
nixxo 4f618e64f5 [skyit] removed old skyitalia extractor 2020-12-01 12:10:50 +01:00
Remita Amine 12300fa45a [skyit] add support for multiple Sky Italia websites(closes #26629) 2020-12-01 12:09:45 +01:00
Diego Fernando Rodríguez Varón d71eb83b05 Extract embedded youtube and twitter videos 2020-11-19 23:51:43 -05:00
Diego Fernando Rodríguez Varón a2044d57ca Merge branch 'master' of https://github.com/blackjack4494/yt-dlc into fix-tmz 2020-11-14 09:40:51 -05:00
Diego Fernando Rodríguez Varón fff5071112 [TMZ] Add support for new page structure using JSON-LD 2020-11-08 15:36:41 -05:00
Diego Fernando Rodríguez Varón 60351178a5 [TMZ] Fix TMZ.com extractor 2020-11-01 21:25:34 -05:00
5 changed files with 399 additions and 179 deletions

View File

@ -1052,6 +1052,16 @@ from .shared import (
from .showroomlive import ShowRoomLiveIE from .showroomlive import ShowRoomLiveIE
from .sina import SinaIE from .sina import SinaIE
from .sixplay import SixPlayIE from .sixplay import SixPlayIE
from .skyit import (
SkyItPlayerIE,
SkyItVideoIE,
SkyItVideoLiveIE,
SkyItIE,
SkyItAcademyIE,
SkyItArteIE,
CieloTVItIE,
TV8ItIE,
)
from .skylinewebcams import SkylineWebcamsIE from .skylinewebcams import SkylineWebcamsIE
from .skynewsarabia import ( from .skynewsarabia import (
SkyNewsArabiaIE, SkyNewsArabiaIE,
@ -1061,10 +1071,6 @@ from .sky import (
SkyNewsIE, SkyNewsIE,
SkySportsIE, SkySportsIE,
) )
from .skyitalia import (
SkyArteItaliaIE,
SkyItaliaIE,
)
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE from .slutload import SlutloadIE
@ -1207,10 +1213,7 @@ from .thisvid import ThisVidIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE from .tiktok import TikTokIE
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
from .tmz import ( from .tmz import TMZIE
TMZIE,
TMZArticleIE,
)
from .tnaflix import ( from .tnaflix import (
TNAFlixNetworkEmbedIE, TNAFlixNetworkEmbedIE,
TNAFlixIE, TNAFlixIE,

View File

@ -0,0 +1,239 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
dict_get,
int_or_none,
parse_duration,
unified_timestamp,
)
class SkyItPlayerIE(InfoExtractor):
IE_NAME = 'player.sky.it'
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
_GEO_BYPASS = False
_DOMAIN = 'sky'
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
# http://static.sky.it/static/skyplayer/conf.json
_TOKEN_MAP = {
'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
}
def _player_url_result(self, video_id):
return self.url_result(
self._PLAYER_TMPL % (video_id, self._DOMAIN),
SkyItPlayerIE.ie_key(), video_id)
def _parse_video(self, video, video_id):
title = video['title']
is_live = video.get('type') == 'live'
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
if not hls_url and video.get('geoblock' if is_live else 'geob'):
self.raise_geo_restricted(countries=['IT'])
if is_live:
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
else:
formats = self._extract_akamai_formats(
hls_url, video_id, {'http': 'videoplatform.sky.it'})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'formats': formats,
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
'description': video.get('short_desc') or None,
'timestamp': unified_timestamp(video.get('create_date')),
'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
'is_live': is_live,
}
def _real_extract(self, url):
video_id = self._match_id(url)
domain = compat_parse_qs(compat_urllib_parse_urlparse(
url).query).get('domain', [None])[0]
token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
video = self._download_json(
'https://apid.sky.it/vdp/v1/getVideoData',
video_id, query={
'caller': 'sky',
'id': video_id,
'token': token
}, headers=self.geo_verification_headers())
return self._parse_video(video, video_id)
class SkyItVideoIE(SkyItPlayerIE):
IE_NAME = 'video.sky.it'
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
'info_dict': {
'id': '631227',
'ext': 'mp4',
'title': 'Uomo ucciso da uno squalo in Australia',
'timestamp': 1606036192,
'upload_date': '20201122',
}
}, {
'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
'only_matching': True,
}, {
'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self._player_url_result(video_id)
class SkyItVideoLiveIE(SkyItPlayerIE):
IE_NAME = 'video.sky.it:live'
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://video.sky.it/diretta/tg24',
'info_dict': {
'id': '1',
'ext': 'mp4',
'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
asset_id = compat_str(self._parse_json(self._search_regex(
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
livestream = self._download_json(
'https://apid.sky.it/vdp/v1/getLivestream',
asset_id, query={'id': asset_id})
return self._parse_video(livestream, asset_id)
class SkyItIE(SkyItPlayerIE):
IE_NAME = 'sky.it'
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
'info_dict': {
'id': '631201',
'ext': 'mp4',
'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
'upload_date': '20201121',
'timestamp': 1605995753,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
'info_dict': {
'id': '631227',
'ext': 'mp4',
'title': 'Uomo ucciso da uno squalo in Australia',
'timestamp': 1606036192,
'upload_date': '20201122',
},
}]
_VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
self._VIDEO_ID_REGEX, webpage, 'video id')
return self._player_url_result(video_id)
class SkyItAcademyIE(SkyItIE):
IE_NAME = 'skyacademy.it'
_VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
'info_dict': {
'id': '523458',
'ext': 'mp4',
'title': 'Sky Academy "The Best CineCamp 2019"',
'timestamp': 1562843784,
'upload_date': '20190711',
}
}]
_DOMAIN = 'skyacademy'
_VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
class SkyItArteIE(SkyItIE):
IE_NAME = 'arte.sky.it'
_VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
'md5': '515aee97b87d7a018b6c80727d3e7e17',
'info_dict': {
'id': '627926',
'ext': 'mp4',
'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
'upload_date': '20201106',
'timestamp': 1604664493,
}
}]
_DOMAIN = 'skyarte'
_VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
class CieloTVItIE(SkyItIE):
IE_NAME = 'cielotv.it'
_VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
_TESTS = [{
'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
'md5': 'c4deed77552ba901c2a0d9258320304b',
'info_dict': {
'id': '499240',
'ext': 'mp4',
'title': 'Il lunedì è sempre un dramma',
'upload_date': '20190329',
'timestamp': 1553862178,
}
}]
_DOMAIN = 'cielo'
_VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
class TV8ItIE(SkyItVideoIE):
IE_NAME = 'tv8.it'
_VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
_TESTS = [{
'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
'md5': '9ab906a3f75ea342ed928442f9dabd21',
'info_dict': {
'id': '630529',
'ext': 'mp4',
'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
'timestamp': 1605721374,
'upload_date': '20201118',
}
}]
_DOMAIN = 'mtv8'

View File

@ -1,123 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class SkyItaliaBaseIE(InfoExtractor):
_GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}'
_RES = {
'low': [426, 240],
'med': [640, 360],
'high': [854, 480],
'hd': [1280, 720]
}
_GEO_BYPASS = False
def _extract_video_id(self, url):
webpage = self._download_webpage(url, 'skyitalia')
video_id = self._html_search_regex(
[r'data-videoid=\"(\d+)\"',
r'http://player\.sky\.it/social\?id=(\d+)\&'],
webpage, 'video_id')
if video_id:
return video_id
raise ExtractorError('Video ID not found.')
def _get_formats(self, video_id, token):
data_url = self._GET_VIDEO_DATA.replace('{id}', video_id)
data_url = data_url.replace('{token}', token)
video_data = self._parse_json(
self._download_webpage(data_url, video_id),
video_id)
formats = []
for q, r in self._RES.items():
key = 'web_%s_url' % q
if key not in video_data:
continue
formats.append({
'url': video_data.get(key),
'format_id': q,
'width': r[0],
'height': r[1]
})
if not formats and video_data.get('geob') == 1:
self.raise_geo_restricted(countries=['IT'])
self._sort_formats(formats)
title = video_data.get('title')
thumb = video_data.get('thumb')
return {
'id': video_id,
'title': title,
'thumbnail': thumb,
'formats': formats
}
def _real_extract(self, url):
video_id = self._match_id(url)
if video_id == 'None':
video_id = self._extract_video_id(url)
return self._get_formats(video_id, self._TOKEN)
class SkyItaliaIE(SkyItaliaBaseIE):
IE_NAME = 'sky.it'
_VALID_URL = r'''(?x)https?://
(?P<ie>sport|tg24|video)
\.sky\.it/(?:.+?)
(?P<id>[0-9]{6})?
(?:$|\?)'''
_TESTS = [{
'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162',
'md5': '9c03b590b06e5952d8051f0e02b0feca',
'info_dict': {
'id': '616162',
'ext': 'mp4',
'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
}
}, {
'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta',
'md5': '9c03b590b06e5952d8051f0e02b0feca',
'info_dict': {
'id': '616162',
'ext': 'mp4',
'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere',
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg',
}
}, {
'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi',
'md5': 'caa25e62dadb529bc5e0b078da99f854',
'info_dict': {
'id': '615904',
'ext': 'mp4',
'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti',
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg',
}
}, {
'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api',
'only_matching': True,
}]
_TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk'
class SkyArteItaliaIE(SkyItaliaBaseIE):
IE_NAME = 'arte.sky.it'
_VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P<id>[0-9]{6})?$'
_TEST = {
'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/',
'md5': '2f22513a89f45142f2746f878d690647',
'info_dict': {
'id': '612888',
'ext': 'mp4',
'title': 'I maestri del cinema Federico Felini',
'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg',
}
}
_TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd'

View File

@ -1,56 +1,157 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
ExtractorError,
get_element_by_attribute,
)
class TMZIE(InfoExtractor): class TMZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)' _VALID_URL = r"https?://(?:www\.)?tmz\.com/.*"
_TESTS = [{ _TESTS = [
'url': 'http://www.tmz.com/videos/0_okj015ty/', {
'md5': '4d22a51ef205b6c06395d8394f72d560', "url": "http://www.tmz.com/videos/0-cegprt2p/",
'info_dict': { "info_dict": {
'id': '0_okj015ty', "id": "http://www.tmz.com/videos/0-cegprt2p/",
'ext': 'mp4', "ext": "mp4",
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', "title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?', "description": "Harvey talks about Director Comeys decision not to prosecute Hillary Clinton.",
'timestamp': 1394747163, "timestamp": 1467831837,
'uploader_id': 'batchUser', "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
'upload_date': '20140313', "upload_date": "20160706",
} },
}, { },
'url': 'http://www.tmz.com/videos/0-cegprt2p/', {
'only_matching': True, "url": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
}] "info_dict": {
"id": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
"ext": "mp4",
"title": "Angry Bagel Shop Guy Says He Doesn't Trust Women",
"description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.",
"timestamp": 1562889485,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20190711",
},
},
{
"url": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
"md5": "5429c85db8bde39a473a56ca8c4c5602",
"info_dict": {
"id": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
"ext": "mp4",
"title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake",
"description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
"timestamp": 1429467813,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20150419",
},
},
{
"url": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
"info_dict": {
"id": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
"ext": "mp4",
"title": "Patti LaBelle -- Goes Nuclear On Stripping Fan",
"description": "Patti LaBelle made it known loud and clear last night ... NO "
"ONE gets on her stage and strips down.",
"timestamp": 1442683746,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20150919",
},
},
{
"url": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
"info_dict": {
"id": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
"ext": "mp4",
"title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This",
"description": "Two pretty parts of this video with NBA Commish Adam Silver.",
"timestamp": 1454010989,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20160128",
},
},
{
"url": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
"info_dict": {
"id": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
"ext": "mp4",
"title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!",
"description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.",
"timestamp": 1477500095,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20161026",
},
},
{
"url": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
"info_dict": {
"id": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
"ext": "mp4",
"title": "Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist "
"Demonstrators",
"description": "Beverly Hills may be an omen of what's coming next week, "
"because things got crazy on the streets and cops started "
"swinging their billy clubs at both Anti-Fascist and Pro-Trump "
"demonstrators.",
"timestamp": 1604182772,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20201031",
},
},
{
"url": "https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/",
"info_dict": {
"id": "Dddb6IGe-ws",
"ext": "mp4",
"title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing",
"uploader": "ESNEWS",
"description": "md5:49675bc58883ccf80474b8aa701e1064",
"upload_date": "20201101",
"uploader_id": "ESNEWS",
},
},
{
"url": "https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/",
"info_dict": {
"id": "1329450007125225473",
"ext": "mp4",
"title": "TheMacLife - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.",
"uploader": "TheMacLife",
"description": "md5:56e6009bbc3d12498e10d08a8e1f1c69",
"upload_date": "20201119",
"uploader_id": "Maclifeofficial",
"timestamp": 1605800556,
},
},
]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).replace('-', '_') webpage = self._download_webpage(url, url)
return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id) jsonld = self._search_json_ld(webpage, url)
if not jsonld or "url" not in jsonld:
# try to extract from YouTube Player API
class TMZArticleIE(InfoExtractor): # see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?' match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage)
_TEST = { if match_obj:
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', res = self.url_result(match_obj.group("id"))
'md5': '3316ff838ae5bb7f642537825e1e90d2', return res
'info_dict': { # try to extract from twitter
'id': '0_6snoelag', blockquote_el = get_element_by_attribute("class", "twitter-tweet", webpage)
'ext': 'mov', if blockquote_el:
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', matches = re.findall(
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', r'<a[^>]+href=\s*(?P<quote>[\'"])(?P<link>.*?)(?P=quote)',
'timestamp': 1429467813, blockquote_el)
'upload_date': '20150419', if matches:
'uploader_id': 'batchUser', for _, match in matches:
} if "/status/" in match:
} res = self.url_result(match)
return res
def _real_extract(self, url): raise ExtractorError("No video found!")
video_id = self._match_id(url) if id not in jsonld:
jsonld["id"] = url
webpage = self._download_webpage(url, video_id) return jsonld
embedded_video_info = self._parse_json(self._html_search_regex(
r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
video_id)
return self.url_result(
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])

View File

@ -506,7 +506,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
} }
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') # TODO 'json3' raising issues with automatic captions
_GEO_BYPASS = False _GEO_BYPASS = False