mirror of
https://github.com/blackjack4494/yt-dlc.git
synced 2025-01-03 05:36:07 +00:00
[ruutu] Improve, make more robust and fix python 2.6 support
This commit is contained in:
parent
717b0239fd
commit
9414338a48
1 changed files with 79 additions and 50 deletions
|
@ -3,88 +3,117 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlparse
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
import re
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RuutuIE(InfoExtractor):
|
class RuutuIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(www\.)?ruutu\.fi/ohjelmat/(?:[^/]+/)?(?P<id>.*)$'
|
_VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
||||||
'md5': 'ab2093f39be1ca8581963451b3c0234f',
|
'md5': 'ab2093f39be1ca8581963451b3c0234f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
'id': '2058907',
|
||||||
|
'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
|
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
|
||||||
'description': 'Toinen toistaan huikeampia ohjelmaideoita ja täysin päätöntä sekoilua? No sitä juuri nimenomaan. Metro Helsingin Iltapäivän vieraaksi saapui Tuomas Kauhanen ja he Petra Kalliomaan kanssa keskustelivat hieman ennen lähetyksen alkua, mutta kamerat olivatkin jo päällä.',
|
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 114,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'format': 'http-1000',
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
|
'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa',
|
||||||
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
|
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'superpesis-katso-koko-kausi-ruudussa',
|
'id': '2057306',
|
||||||
|
'display_id': 'superpesis-katso-koko-kausi-ruudussa',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Superpesis: katso koko kausi Ruudussa',
|
'title': 'Superpesis: katso koko kausi Ruudussa',
|
||||||
'description': 'Huippujännittävän Superpesiksen suoria ottelulähetyksiä seurataan Ruudussa kauden alusta viimeiseen finaaliin asti. Katso lisätiedot osoitteesta ruutu.fi/superpesis.',
|
'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 40,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'format': 'http-1000',
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
webpage = self._download_webpage(url, display_id)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
media_id = self._html_search_regex(r'data-media-id="(\d+)"', webpage, 'media_id')
|
video_id = self._search_regex(
|
||||||
media_json = self._parse_json(self._search_regex(r'jQuery.extend\([^,]+, (.*)\);', webpage, 'media_data'), video_id)
|
r'data-media-id="(\d+)"', webpage, 'media id')
|
||||||
xml_url = media_json['ruutuplayer']['xmlUrl'].replace('{ID}', media_id)
|
|
||||||
media_xml = self._download_xml(xml_url, media_id)
|
video_xml_url = None
|
||||||
|
|
||||||
|
media_data = self._search_regex(
|
||||||
|
r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage,
|
||||||
|
'media data', default=None)
|
||||||
|
if media_data:
|
||||||
|
media_json = self._parse_json(media_data, display_id, fatal=False)
|
||||||
|
if media_json:
|
||||||
|
xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl')
|
||||||
|
if xml_url:
|
||||||
|
video_xml_url = xml_url.replace('{ID}', video_id)
|
||||||
|
|
||||||
|
if not video_xml_url:
|
||||||
|
video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id
|
||||||
|
|
||||||
|
video_xml = self._download_xml(video_xml_url, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
parsed_urls = []
|
processed_urls = []
|
||||||
for fmt in media_xml.findall('.//Clip//'):
|
|
||||||
url = fmt.text
|
|
||||||
if not fmt.tag.endswith('File') or url in parsed_urls or \
|
|
||||||
'NOT_USED' in url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if url.endswith('m3u8'):
|
def extract_formats(node):
|
||||||
formats.extend(self._extract_m3u8_formats(url, media_id, m3u8_id='hls'))
|
for child in node:
|
||||||
parsed_urls.append(url)
|
if child.tag.endswith('Files'):
|
||||||
elif url.endswith('f4m'):
|
extract_formats(child)
|
||||||
formats.extend(self._extract_f4m_formats(url, media_id, f4m_id='hds'))
|
elif child.tag.endswith('File'):
|
||||||
parsed_urls.append(url)
|
video_url = child.text
|
||||||
else:
|
if not video_url or video_url in processed_urls or 'NOT_USED' in video_url:
|
||||||
if not fmt.tag.startswith('HTTP'):
|
return
|
||||||
continue
|
processed_urls.append(video_url)
|
||||||
proto = compat_urllib_parse_urlparse(url).scheme
|
ext = determine_ext(video_url)
|
||||||
width_str, height_str = fmt.get('resolution').split('x')
|
if ext == 'm3u8':
|
||||||
tbr = int(fmt.get('bitrate', 0))
|
formats.extend(self._extract_m3u8_formats(
|
||||||
formats.append({
|
video_url, video_id, 'mp4', m3u8_id='hls'))
|
||||||
'format_id': '%s-%d' % (proto, tbr),
|
elif ext == 'f4m':
|
||||||
'url': url,
|
formats.extend(self._extract_f4m_formats(
|
||||||
'width': int(width_str),
|
video_url, video_id, f4m_id='hds'))
|
||||||
'height': int(height_str),
|
else:
|
||||||
'tbr': tbr,
|
proto = compat_urllib_parse_urlparse(video_url).scheme
|
||||||
'ext': url.rsplit('.', 1)[-1],
|
if not child.tag.startswith('HTTP') and proto != 'rtmp':
|
||||||
'live': True,
|
continue
|
||||||
'protocol': proto,
|
preference = -1 if proto == 'rtmp' else 1
|
||||||
})
|
label = child.get('label')
|
||||||
|
tbr = int_or_none(child.get('bitrate'))
|
||||||
|
width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')]
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%s-%s' % (proto, label if label else tbr),
|
||||||
|
'url': video_url,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'tbr': tbr,
|
||||||
|
'preference': preference,
|
||||||
|
})
|
||||||
|
|
||||||
|
extract_formats(video_xml.find('./Clip'))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'formats': formats,
|
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'duration': int(media_xml.find('.//Runtime').text),
|
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
|
||||||
'age_limit': int(media_xml.find('.//AgeLimit').text),
|
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue