[nuvid] Improve extraction
This commit is contained in:
parent
09ffa08ba1
commit
3048e82a94
|
@ -3,6 +3,11 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
unified_strdate,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NuvidIE(InfoExtractor):
|
class NuvidIE(InfoExtractor):
|
||||||
|
@ -13,8 +18,10 @@ class NuvidIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1310741',
|
'id': '1310741',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
"title": "Horny babes show their awesome bodeis and",
|
'title': 'Horny babes show their awesome bodeis and',
|
||||||
"age_limit": 18,
|
'duration': 129,
|
||||||
|
'upload_date': '20140508',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,27 +29,41 @@ class NuvidIE(InfoExtractor):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
murl = url.replace('://www.', '://m.')
|
formats = []
|
||||||
webpage = self._download_webpage(murl, video_id)
|
|
||||||
|
|
||||||
|
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'http://m.nuvid.com/play/%s' % video_id)
|
||||||
|
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
request, video_id, 'Downloading %s page' % format_id)
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>',
|
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
|
||||||
webpage, 'title').strip()
|
|
||||||
|
|
||||||
url_end = self._html_search_regex(
|
|
||||||
r'href="(/[^"]+)"[^>]*data-link_type="mp4"',
|
|
||||||
webpage, 'video_url')
|
|
||||||
video_url = 'http://m.nuvid.com' + url_end
|
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
||||||
webpage, 'thumbnail URL', fatal=False)
|
webpage, 'thumbnail URL', fatal=False)
|
||||||
|
duration = parse_duration(self._html_search_regex(
|
||||||
|
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': 'http://m.nuvid.com/%s' % thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'upload_date': upload_date,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
Loading…
Reference in New Issue