[ondemandkorea] Add extractor

This commit is contained in:
ping 2016-09-27 13:29:21 +08:00 committed by Sergey M․
parent 0ae9560eea
commit 594601f545
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 59 additions and 0 deletions

View File

@ -662,6 +662,7 @@ from .nzz import NZZIE
from .odatv import OdaTVIE from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE
from .ondemandkorea import OnDemandKoreaIE
from .onet import ( from .onet import (
OnetIE, OnetIE,
OnetChannelIE, OnetChannelIE,

View File

@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
'id': 'ask-us-anything-e43',
'ext': 'mp4',
'title': 'Ask Us Anything : E43',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8 download'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)
if not webpage:
# Page sometimes returns captcha page with HTTP 403
raise ExtractorError('Unable to access page. You may have been blocked.', expected=True)
if 'msg_block_01.png' in webpage:
raise ExtractorError('This content is not available in your region.', expected=True)
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError('This video is only available to ODK PLUS members.', expected=True)
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
manifest_url = self._search_regex(r'file:\s"(https?://[\S].+?/manifest\.m3u8)', webpage, 'manifest')
formats = self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id='hls')
self._sort_formats(formats)
subs = re.findall(r'file:\s\'(?P<file>[^\']+\.vtt)\',\s+label:\s+\'(?P<lang>[^\']+)\'', webpage)
subtitles = {}
for sub in subs:
subtitles[sub[1]] = [{'url': 'http://www.ondemandkorea.com' + sub[0], 'ext': sub[0][-3:]}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
'subtitles': subtitles,
}