Fixed 4tube.com extractor to pull metadata from associated Javascript and not the HTML of the desired page.
This commit is contained in:
parent
c725e2c8b0
commit
b90b0c4ffa
|
@ -45,11 +45,9 @@ class FourTubeIE(InfoExtractor):
|
||||||
'uploadDate', webpage))
|
'uploadDate', webpage))
|
||||||
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
|
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
r'<a class="img-avatar" href="[^"]+/users/([^/"]+)" title="Go to [^"]+ page">', webpage, 'uploader id')
|
||||||
webpage, 'uploader id')
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
r'<a class="img-avatar" href="[^"]+/users/[^/"]+" title="Go to ([^"]+) page">', webpage, 'uploader')
|
||||||
webpage, 'uploader')
|
|
||||||
|
|
||||||
categories_html = self._search_regex(
|
categories_html = self._search_regex(
|
||||||
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
|
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
|
||||||
|
@ -68,9 +66,12 @@ class FourTubeIE(InfoExtractor):
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||||
|
|
||||||
|
player_url = self._search_regex(r'<script id="playerembed" src="([^"]+)">',webpage,'player javascript')
|
||||||
|
player_js = self._download_webpage(player_url,video_id,'Downloading player Javascript')
|
||||||
|
|
||||||
params_js = self._search_regex(
|
params_js = self._search_regex(
|
||||||
r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
|
r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
|
||||||
webpage, 'initialization parameters'
|
player_js, 'initialization parameters'
|
||||||
)
|
)
|
||||||
params = self._parse_json('[%s]' % params_js, video_id)
|
params = self._parse_json('[%s]' % params_js, video_id)
|
||||||
media_id = params[0]
|
media_id = params[0]
|
||||||
|
|
Loading…
Reference in New Issue