yt-dlc/youtube_dl/extractor/onionstudios.py

79 lines
2.6 KiB
Python
Raw Normal View History

2015-06-24 17:12:13 +00:00
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
2016-04-16 19:53:13 +00:00
from ..utils import (
determine_ext,
int_or_none,
2016-07-05 17:04:42 +00:00
float_or_none,
mimetype2ext,
2016-04-16 19:53:13 +00:00
)
2015-06-24 17:12:13 +00:00
class OnionStudiosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
2016-07-05 17:04:42 +00:00
'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
2015-06-24 17:12:13 +00:00
'info_dict': {
'id': '2937',
'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail',
'thumbnail': r're:^https?://.*\.jpg$',
2015-06-24 17:12:13 +00:00
'uploader': 'The A.V. Club',
2016-07-05 17:04:42 +00:00
'uploader_id': 'the-av-club',
2015-06-24 17:12:13 +00:00
},
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
if mobj:
return mobj.group('url')
2015-06-24 17:12:13 +00:00
def _real_extract(self, url):
video_id = self._match_id(url)
2016-07-05 17:04:42 +00:00
video_data = self._download_json(
'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
title = video_data['title']
2015-06-24 17:12:13 +00:00
formats = []
2016-07-05 17:04:42 +00:00
for source in video_data.get('sources', []):
source_url = source.get('url')
if not source_url:
continue
ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
if ext == 'm3u8':
2016-04-16 19:53:13 +00:00
formats.extend(self._extract_m3u8_formats(
2016-07-05 17:04:42 +00:00
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
2016-04-16 19:53:13 +00:00
else:
2016-07-05 17:04:42 +00:00
tbr = int_or_none(source.get('bitrate'))
2015-06-24 17:12:13 +00:00
formats.append({
2016-07-05 17:04:42 +00:00
'format_id': ext + ('-%d' % tbr if tbr else ''),
'url': source_url,
'width': int_or_none(source.get('width')),
'tbr': tbr,
2016-04-16 19:53:13 +00:00
'ext': ext,
2015-06-24 17:12:13 +00:00
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
2016-07-05 17:04:42 +00:00
'thumbnail': video_data.get('poster_url'),
'uploader': video_data.get('channel_name'),
'uploader_id': video_data.get('channel_slug'),
'duration': float_or_none(video_data.get('duration', 1000)),
'tags': video_data.get('tags'),
2015-06-24 17:12:13 +00:00
'formats': formats,
}