From 789b7774a771335c7d0b42c834195bef2e8617c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 May 2018 21:58:55 +0700 Subject: [PATCH] [businessinsider] Add extractor (closes #16387, closes #16388, closes #16389) --- youtube_dl/extractor/businessinsider.py | 42 +++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 15 --------- 3 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 youtube_dl/extractor/businessinsider.py diff --git a/youtube_dl/extractor/businessinsider.py b/youtube_dl/extractor/businessinsider.py new file mode 100644 index 000000000..dfcf9bc6b --- /dev/null +++ b/youtube_dl/extractor/businessinsider.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .jwplatform import JWPlatformIE + + +class BusinessInsiderIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6', + 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', + 'info_dict': { + 'id': 'hZRllCfw', + 'ext': 'mp4', + 'title': "Here's how much radiation you're exposed to in everyday life", + 'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd', + 'upload_date': '20170709', + 'timestamp': 1499606400, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/', + 'only_matching': True, + }, { + 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + jwplatform_id = self._search_regex( + (r'data-media-id=["\']([a-zA-Z0-9]{8})', + r'id=["\']jwplayer_([a-zA-Z0-9]{8})', + r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'), + webpage, 'jwplatform id') + return self.url_result( + 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), + video_id=video_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a00e003c2..f03f98a6c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -137,6 +137,7 @@ from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) +from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 532c995f5..76852f9dc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1472,21 +1472,6 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': ['Failed to parse JSON Expecting value'], }, - # Ooyala embed - { - 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', - 'info_dict': { - 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs', - 'ext': 'mp4', - 'description': 'Index/Match versus VLOOKUP.', - 'title': 'This is what separates the Excel masters from the wannabes', - 'duration': 191.933, - }, - 'params': { - # m3u8 downloads - 'skip_download': True, - } - }, # Brightcove URL in single quotes { 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',