From 94db1f7f3b7269d5843b815ef2aa5b71d0361e6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Oct 2018 23:53:39 +0700 Subject: [PATCH] [cnbc] Simplify extraction (closes #14280, closes #17110) --- youtube_dl/extractor/cnbc.py | 29 ++++++++++------------------- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 35c0b6124..81b0c9fc4 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -3,10 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - js_to_json, - smuggle_url, -) +from ..utils import smuggle_url class CNBCIE(InfoExtractor): @@ -40,36 +37,30 @@ class CNBCIE(InfoExtractor): } -class CNBCNewIE(InfoExtractor): - IE_NAME = 'CNBC:new' - _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P[^.]+)' +class CNBCVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video/(?:[^/]+/)+(?P[^./?#&]+)' _TEST = { 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', 'info_dict': { 'id': '7000031301', 'ext': 'mp4', - 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'title': "Trump: I don't necessarily agree with raising rates", 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'timestamp': 1531958400, 'upload_date': '20180719', 'uploader': 'NBCU-CNBC', }, 'params': { - # m3u8 download 'skip_download': True, }, } - CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._parse_json( - self._search_regex( - r'(?s).*]*>.*?({.+?content_id.+?}).*?', - webpage, display_id), - display_id, transform_source=js_to_json - )['content_id'] - - return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') + video_id = self._search_regex( + r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, + 'video id') + return self.url_result( + 'http://video.cnbc.com/gallery/?video=%s' % video_id, + CNBCIE.ie_key()) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 93574907b..d96e23905 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -211,7 +211,7 @@ from .clyp import ClypIE from .cmt import CMTIE from .cnbc import ( CNBCIE, - CNBCNewIE, + CNBCVideoIE, ) from .cnn import ( CNNIE,