[funk] Fix extraction and rework extractors (closes #15792)

This commit is contained in:
Sergey M․ 2018-03-08 03:17:46 +07:00
parent d91dd0ce19
commit 690404a6f8
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 84 additions and 22 deletions

View File

@ -385,7 +385,10 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freshlive import FreshLiveIE
from .funimation import FunimationIE
from .funk import FunkIE
from .funk import (
FunkMixIE,
FunkChannelIE,
)
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE

View File

@ -1,43 +1,102 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .nexx import NexxIE
from ..utils import extract_attributes
from ..utils import int_or_none
class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
class FunkBaseIE(InfoExtractor):
def _make_url_result(self, video):
return {
'_type': 'url_transparent',
'url': 'nexx:741:%s' % video['sourceId'],
'ie_key': NexxIE.ie_key(),
'id': video['sourceId'],
'title': video.get('title'),
'description': video.get('description'),
'duration': int_or_none(video.get('duration')),
'season_number': int_or_none(video.get('seasonNr')),
'episode_number': int_or_none(video.get('episodeNr')),
}
class FunkMixIE(FunkBaseIE):
_VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
'md5': '4d40974481fa3475f8bccfd20c5361f8',
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
'md5': '8edf617c2f2b7c9847dfda313f199009',
'info_dict': {
'id': '716599',
'id': '123748',
'ext': 'mp4',
'title': 'Neue Rechte Welle',
'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
'timestamp': 1501337639,
'upload_date': '20170729',
'title': '"Die realste Kifferdoku aller Zeiten"',
'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
'timestamp': 1490274721,
'upload_date': '20170323',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
mix_id = mobj.group('id')
alias = mobj.group('alias')
lists = self._download_json(
'https://www.funk.net/api/v3.1/curation/curatedLists/',
mix_id, headers={
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
'Referer': url,
}, query={
'size': 100,
})['result']['lists']
metas = next(
l for l in lists
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
video = next(
meta['videoDataDelegate']
for meta in metas if meta.get('alias') == alias)
return self._make_url_result(video)
class FunkChannelIE(FunkBaseIE):
_VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
'info_dict': {
'id': '1155821',
'ext': 'mp4',
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
'timestamp': 1514507395,
'upload_date': '20171229',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
alias = mobj.group('alias')
webpage = self._download_webpage(url, video_id)
results = self._download_json(
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
headers={
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
'Referer': url,
}, query={
'channelId': channel_id,
'size': 100,
})['result']
domain_id = NexxIE._extract_domain_id(webpage) or '741'
nexx_id = extract_attributes(self._search_regex(
r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
webpage, 'media player'))['data-id']
video = next(r for r in results if r.get('alias') == alias)
return self.url_result(
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
video_id=nexx_id)
return self._make_url_result(video)