[npo:radio] Move to extractor to common npo place and add extractor for fragments

2015-02-13 01:36:54 +06:00 · 2015-02-13 01:36:54 +06:00 · 171ca612af
parent c3d64fc1b3
commit 171ca612af
3 changed files with 83 additions and 42 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -318,9 +318,10 @@ from .nowvideo import NowVideoIE
 from .npo import (
    NPOIE,
    NPOLiveIE,
    NPORadioIE,
    NPORadioFragmentIE,
    TegenlichtVproIE,
 )
 from .nporadio import NPORadioIE
 from .nrk import (
    NRKIE,
    NRKTVIE,
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 from .subtitles import SubtitlesInfoExtractor
 from .common import InfoExtractor
 from ..utils import (
    fix_xml_ampersands,
    parse_duration,
@ -22,7 +23,7 @@ class NPOBaseIE(SubtitlesInfoExtractor):
 class NPOIE(NPOBaseIE):
    IE_NAME = 'npo.nl'
-    _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
    _TESTS = [
        {
@ -185,7 +186,7 @@ class NPOIE(NPOBaseIE):
 class NPOLiveIE(NPOBaseIE):
    IE_NAME = 'npo.nl:live'
-    _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
    _TEST = {
        'url': 'http://www.npo.nl/live/npo-1',
@ -260,6 +261,84 @@ class NPOLiveIE(NPOBaseIE):
        }
 class NPORadioIE(InfoExtractor):
    IE_NAME = 'npo.nl:radio'
    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
    _TEST = {
        'url': 'http://www.npo.nl/radio/radio-1',
        'info_dict': {
            'id': 'radio-1',
            'ext': 'mp3',
            'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'is_live': True,
        },
        'params': {
            'skip_download': True,
        }
    }
    @staticmethod
    def _html_get_attribute_regex(attribute):
        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(
            self._html_get_attribute_regex('data-channel'), webpage, 'title')
        stream = self._parse_json(
            self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
            video_id)
        codec = stream.get('codec')
        return {
            'id': video_id,
            'url': stream['url'],
            'title': self._live_title(title),
            'acodec': codec,
            'ext': codec,
            'is_live': True,
        }
 class NPORadioFragmentIE(InfoExtractor):
    IE_NAME = 'npo.nl:radio:fragment'
    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
        'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
        'info_dict': {
            'id': '174356',
            'ext': 'mp3',
            'title': 'Jubileumconcert Willeke Alberti',
        },
    }
    def _real_extract(self, url):
        audio_id = self._match_id(url)
        webpage = self._download_webpage(url, audio_id)
        title = self._html_search_regex(
            r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
            webpage, 'title')
        audio_url = self._search_regex(
            r"data-streams='([^']+)'", webpage, 'audio url')
        return {
            'id': audio_id,
            'url': audio_url,
            'title': title,
        }
 class TegenlichtVproIE(NPOIE):
    IE_NAME = 'tegenlicht.vpro.nl'
    _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
--- a/youtube_dl/extractor/nporadio.py
+++ b/youtube_dl/extractor/nporadio.py
@ -1,39 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 from .common import InfoExtractor
 class NPORadioIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>.*)'
    _TEST = {
        'url': 'http://www.npo.nl/radio/radio-1',
        'info_dict': {
            'id': 'radio-1',
            'ext': 'mp3',
            'title': 'NPO Radio 1',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(
            self._html_get_attribute_regex('data-channel'), webpage, 'title')
        json_data = json.loads(
            self._html_search_regex(
                self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'))
        return {
            'id': video_id,
            'title': title,
            'ext': json_data['codec'],
            'url': json_data['url']
        }
    def _html_get_attribute_regex(self, attribute):
        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)