From d66d43c5547daf4fc1a269824a8432477fbb099d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 25 Mar 2017 18:13:13 +0100 Subject: [PATCH] [atvat] Add new extractor(closes #5325) --- youtube_dl/extractor/atvat.py | 73 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/atvat.py diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py new file mode 100644 index 000000000..1584d53fc --- /dev/null +++ b/youtube_dl/extractor/atvat.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + unescapeHTML, +) + + +class ATVAtIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P[dv]\d+)' + _TESTS = [{ + 'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', + 'md5': 'c3b6b975fb3150fc628572939df205f2', + 'info_dict': { + 'id': '1698447', + 'ext': 'mp4', + 'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', + } + }, { + 'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_data = self._parse_json(unescapeHTML(self._search_regex( + r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"', + webpage, 'player data')), display_id)['config']['initial_video'] + + video_id = video_data['id'] + video_title = video_data['title'] + + parts = [] + for part in video_data.get('parts', []): + part_id = part['id'] + part_title = part['title'] + + formats = [] + for source in part.get('sources', []): + source_url = source.get('src') + if not source_url: + continue + ext = determine_ext(source_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, part_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': source.get('delivery'), + 'url': source_url, + }) + self._sort_formats(formats) + + parts.append({ + 'id': part_id, + 'title': part_title, + 'thumbnail': part.get('preview_image_url'), + 'duration': int_or_none(part.get('duration')), + 'is_live': part.get('is_livestream'), + 'formats': formats, + }) + + return { + '_type': 'multi_video', + 'id': video_id, + 'title': video_title, + 'entries': parts, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 40a5c9842..6a7028a4d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -71,6 +71,7 @@ from .arte import ( ) from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE +from .atvat import ATVAtIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE from .audiomack import AudiomackIE, AudiomackAlbumIE