Merge pull request #106 from diegorodriguezv/fix-tmz

[TMZ] Fix TMZ.com extractor
This commit is contained in:
Tom-Oliver Heidel 2020-12-02 01:46:46 +01:00 committed by GitHub
commit 98e248faa4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 149 additions and 51 deletions

View File

@ -1213,10 +1213,7 @@ from .thisvid import ThisVidIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE from .tiktok import TikTokIE
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
from .tmz import ( from .tmz import TMZIE
TMZIE,
TMZArticleIE,
)
from .tnaflix import ( from .tnaflix import (
TNAFlixNetworkEmbedIE, TNAFlixNetworkEmbedIE,
TNAFlixIE, TNAFlixIE,

View File

@ -1,56 +1,157 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
ExtractorError,
get_element_by_attribute,
)
class TMZIE(InfoExtractor): class TMZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)' _VALID_URL = r"https?://(?:www\.)?tmz\.com/.*"
_TESTS = [{ _TESTS = [
'url': 'http://www.tmz.com/videos/0_okj015ty/', {
'md5': '4d22a51ef205b6c06395d8394f72d560', "url": "http://www.tmz.com/videos/0-cegprt2p/",
'info_dict': { "info_dict": {
'id': '0_okj015ty', "id": "http://www.tmz.com/videos/0-cegprt2p/",
'ext': 'mp4', "ext": "mp4",
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', "title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?', "description": "Harvey talks about Director Comeys decision not to prosecute Hillary Clinton.",
'timestamp': 1394747163, "timestamp": 1467831837,
'uploader_id': 'batchUser', "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
'upload_date': '20140313', "upload_date": "20160706",
} },
}, { },
'url': 'http://www.tmz.com/videos/0-cegprt2p/', {
'only_matching': True, "url": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
}] "info_dict": {
"id": "https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/",
"ext": "mp4",
"title": "Angry Bagel Shop Guy Says He Doesn't Trust Women",
"description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.",
"timestamp": 1562889485,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20190711",
},
},
{
"url": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
"md5": "5429c85db8bde39a473a56ca8c4c5602",
"info_dict": {
"id": "http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert",
"ext": "mp4",
"title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake",
"description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
"timestamp": 1429467813,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20150419",
},
},
{
"url": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
"info_dict": {
"id": "http://www.tmz.com/2015/09/19/patti-labelle-concert-fan-stripping-kicked-out-nicki-minaj/",
"ext": "mp4",
"title": "Patti LaBelle -- Goes Nuclear On Stripping Fan",
"description": "Patti LaBelle made it known loud and clear last night ... NO "
"ONE gets on her stage and strips down.",
"timestamp": 1442683746,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20150919",
},
},
{
"url": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
"info_dict": {
"id": "http://www.tmz.com/2016/01/28/adam-silver-sting-drake-blake-griffin/",
"ext": "mp4",
"title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This",
"description": "Two pretty parts of this video with NBA Commish Adam Silver.",
"timestamp": 1454010989,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20160128",
},
},
{
"url": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
"info_dict": {
"id": "http://www.tmz.com/2016/10/27/donald-trump-star-vandal-arrested-james-otis/",
"ext": "mp4",
"title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!",
"description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.",
"timestamp": 1477500095,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20161026",
},
},
{
"url": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
"info_dict": {
"id": "https://www.tmz.com/videos/2020-10-31-103120-beverly-hills-protest-4878209/",
"ext": "mp4",
"title": "Cops Use Billy Clubs Against Pro-Trump and Anti-Fascist "
"Demonstrators",
"description": "Beverly Hills may be an omen of what's coming next week, "
"because things got crazy on the streets and cops started "
"swinging their billy clubs at both Anti-Fascist and Pro-Trump "
"demonstrators.",
"timestamp": 1604182772,
"uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}",
"upload_date": "20201031",
},
},
{
"url": "https://www.tmz.com/2020/11/05/gervonta-davis-car-crash-hit-and-run-police/",
"info_dict": {
"id": "Dddb6IGe-ws",
"ext": "mp4",
"title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing",
"uploader": "ESNEWS",
"description": "md5:49675bc58883ccf80474b8aa701e1064",
"upload_date": "20201101",
"uploader_id": "ESNEWS",
},
},
{
"url": "https://www.tmz.com/2020/11/19/conor-mcgregor-dustin-poirier-contract-fight-ufc-257-fight-island/",
"info_dict": {
"id": "1329450007125225473",
"ext": "mp4",
"title": "TheMacLife - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.",
"uploader": "TheMacLife",
"description": "md5:56e6009bbc3d12498e10d08a8e1f1c69",
"upload_date": "20201119",
"uploader_id": "Maclifeofficial",
"timestamp": 1605800556,
},
},
]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).replace('-', '_') webpage = self._download_webpage(url, url)
return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id) jsonld = self._search_json_ld(webpage, url)
if not jsonld or "url" not in jsonld:
# try to extract from YouTube Player API
class TMZArticleIE(InfoExtractor): # see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?' match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage)
_TEST = { if match_obj:
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', res = self.url_result(match_obj.group("id"))
'md5': '3316ff838ae5bb7f642537825e1e90d2', return res
'info_dict': { # try to extract from twitter
'id': '0_6snoelag', blockquote_el = get_element_by_attribute("class", "twitter-tweet", webpage)
'ext': 'mov', if blockquote_el:
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', matches = re.findall(
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', r'<a[^>]+href=\s*(?P<quote>[\'"])(?P<link>.*?)(?P=quote)',
'timestamp': 1429467813, blockquote_el)
'upload_date': '20150419', if matches:
'uploader_id': 'batchUser', for _, match in matches:
} if "/status/" in match:
} res = self.url_result(match)
return res
def _real_extract(self, url): raise ExtractorError("No video found!")
video_id = self._match_id(url) if id not in jsonld:
jsonld["id"] = url
webpage = self._download_webpage(url, video_id) return jsonld
embedded_video_info = self._parse_json(self._html_search_regex(
r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
video_id)
return self.url_result(
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])