From 47c914f9954f6a88a9cd56f487f493e08eb78765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 17 Dec 2016 18:47:49 +0700 Subject: [PATCH] [ondemandkorea] Fix extraction (closes #10772) --- youtube_dl/extractor/ondemandkorea.py | 52 ++++++++++++++------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index 125c310c8..c3e830c23 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import re - -from .common import InfoExtractor -from ..utils import ExtractorError +from .jwplatform import JWPlatformBaseIE +from ..utils import ( + ExtractorError, + js_to_json, +) -class OnDemandKoreaIE(InfoExtractor): +class OnDemandKoreaIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', @@ -29,30 +29,32 @@ class OnDemandKoreaIE(InfoExtractor): if not webpage: # Page sometimes returns captcha page with HTTP 403 - raise ExtractorError('Unable to access page. You may have been blocked.', expected=True) + raise ExtractorError( + 'Unable to access page. You may have been blocked.', + expected=True) if 'msg_block_01.png' in webpage: - raise ExtractorError('This content is not available in your region.', expected=True) - + self.raise_geo_restricted( + 'This content is not available in your region') + if 'This video is only available to ODK PLUS members.' in webpage: - raise ExtractorError('This video is only available to ODK PLUS members.', expected=True) + raise ExtractorError( + 'This video is only available to ODK PLUS members.', + expected=True) title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) - manifest_url = self._search_regex(r'file:\s"(https?://[\S].+?/manifest\.m3u8)', webpage, 'manifest') - formats = self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id='hls') - self._sort_formats(formats) + jw_config = self._parse_json( + self._search_regex( + r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P.+?)\);', + webpage, 'jw config', group='options'), + video_id, transform_source=js_to_json) + info = self._parse_jwplayer_data( + jw_config, video_id, require_title=False, m3u8_id='hls', + base_url=url) - subs = re.findall(r'file:\s\'(?P[^\']+\.vtt)\',\s+label:\s+\'(?P[^\']+)\'', webpage) - subtitles = {} - for sub in subs: - subtitles[sub[1]] = [{'url': 'http://www.ondemandkorea.com' + sub[0], 'ext': sub[0][-3:]}] - - return { - 'id': video_id, + info.update({ 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - 'subtitles': subtitles, - } + 'thumbnail': self._og_search_thumbnail(webpage), + }) + return info