[br] add support for BR Mediathek videos(fixes #14560)(fixes #14788)

2017-11-29 14:21:38 +01:00 · 2017-11-29 14:21:38 +01:00 · 07cf18b9c5
parent 5f699251e9
commit 07cf18b9c5
2 changed files with 147 additions and 4 deletions
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@ -1,20 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    parse_duration,
    parse_iso8601,
    xpath_element,
    xpath_text,
 )
 class BRIE(InfoExtractor):
-    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    IE_DESC = 'Bayerischer Rundfunk'
    _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
    _TESTS = [
@ -123,10 +126,10 @@ class BRIE(InfoExtractor):
        for asset in assets.findall('asset'):
            format_url = xpath_text(asset, ['downloadUrl', 'url'])
            asset_type = asset.get('type')
-            if asset_type == 'HDS':
+            if asset_type.startswith('HDS'):
                formats.extend(self._extract_f4m_formats(
                    format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
-            elif asset_type == 'HLS':
+            elif asset_type.startswith('HLS'):
                formats.extend(self._extract_m3u8_formats(
                    format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
            else:
@ -169,3 +172,140 @@ class BRIE(InfoExtractor):
        } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
        thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
        return thumbnails
 class BRMediathekIE(InfoExtractor):
    IE_DESC = 'Bayerischer Rundfunk Mediathek'
    _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
    _TESTS = [{
        'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
        'md5': 'fdc3d485835966d1622587d08ba632ec',
        'info_dict': {
            'id': 'av:5a1e6a6e8fce6d001871cc8e',
            'ext': 'mp4',
            'title': 'Die Sendung vom 28.11.2017',
            'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
            'timestamp': 1511942766,
            'upload_date': '20171129',
        }
    }]
    def _real_extract(self, url):
        clip_id = self._match_id(url)
        clip = self._download_json(
            'https://proxy-base.master.mango.express/graphql',
            clip_id, data=json.dumps({
                "query": """{
  viewer {
    clip(id: "%s") {
      title
      description
      duration
      createdAt
      ageRestriction
      videoFiles {
        edges {
          node {
            publicLocation
            fileSize
            videoProfile {
              width
              height
              bitrate
              encoding
            }
          }
        }
      }
      captionFiles {
        edges {
          node {
            publicLocation
          }
        }
      }
      teaserImages {
        edges {
          node {
            imageFiles {
              edges {
                node {
                  publicLocation
                  width
                  height
                }
              }
            }
          }
        }
      }
    }
  }
 }""" % clip_id}).encode(), headers={
                'Content-Type': 'application/json',
            })['data']['viewer']['clip']
        title = clip['title']
        formats = []
        for edge in clip.get('videoFiles', {}).get('edges', []):
            node = edge.get('node', {})
            n_url = node.get('publicLocation')
            if not n_url:
                continue
            ext = determine_ext(n_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    n_url, clip_id, 'mp4', 'm3u8_native',
                    m3u8_id='hls', fatal=False))
            else:
                video_profile = node.get('videoProfile', {})
                tbr = int_or_none(video_profile.get('bitrate'))
                format_id = 'http'
                if tbr:
                    format_id += '-%d' % tbr
                formats.append({
                    'format_id': format_id,
                    'url': n_url,
                    'width': int_or_none(video_profile.get('width')),
                    'height': int_or_none(video_profile.get('height')),
                    'tbr': tbr,
                    'filesize': int_or_none(node.get('fileSize')),
                })
        self._sort_formats(formats)
        subtitles = {}
        for edge in clip.get('captionFiles', {}).get('edges', []):
            node = edge.get('node', {})
            n_url = node.get('publicLocation')
            if not n_url:
                continue
            subtitles.setdefault('de', []).append({
                'url': n_url,
            })
        thumbnails = []
        for edge in clip.get('teaserImages', {}).get('edges', []):
            for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
                node = image_edge.get('node', {})
                n_url = node.get('publicLocation')
                if not n_url:
                    continue
                thumbnails.append({
                    'url': n_url,
                    'width': int_or_none(node.get('width')),
                    'height': int_or_none(node.get('height')),
                })
        return {
            'id': clip_id,
            'title': title,
            'description': clip.get('description'),
            'duration': int_or_none(clip.get('duration')),
            'timestamp': parse_iso8601(clip.get('createdAt')),
            'age_limit': int_or_none(clip.get('ageRestriction')),
            'formats': formats,
            'subtitles': subtitles,
            'thumbnails': thumbnails,
        }
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -127,7 +127,10 @@ from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bostonglobe import BostonGlobeIE
 from .bpb import BpbIE
-from .br import BRIE
+from .br import (
    BRIE,
    BRMediathekIE,
 )
 from .bravotv import BravoTVIE
 from .breakcom import BreakIE
 from .brightcove import (