[spankbang] Fix formats extraction (closes #15727)

This commit is contained in:
Sergey M․ 2018-03-02 23:39:20 +07:00
parent b871d7e954
commit 7773a92800
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 35 additions and 15 deletions

View File

@ -3,7 +3,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import (
ExtractorError,
parse_duration,
parse_resolution,
str_to_int,
)
class SpankBangIE(InfoExtractor): class SpankBangIE(InfoExtractor):
@ -15,7 +20,7 @@ class SpankBangIE(InfoExtractor):
'id': '3vvn', 'id': '3vvn',
'ext': 'mp4', 'ext': 'mp4',
'title': 'fantasy solo', 'title': 'fantasy solo',
'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.', 'description': 'dillion harper masturbates on a bed',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'silly2587', 'uploader': 'silly2587',
'age_limit': 18, 'age_limit': 18,
@ -32,36 +37,49 @@ class SpankBangIE(InfoExtractor):
# mobile page # mobile page
'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
'only_matching': True, 'only_matching': True,
}, {
# 4k
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id, headers={
'Cookie': 'country=US'
})
if re.search(r'<[^>]+\bid=["\']video_removed', webpage): if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
raise ExtractorError( raise ExtractorError(
'Video %s is not available' % video_id, expected=True) 'Video %s is not available' % video_id, expected=True)
stream_key = self._html_search_regex( formats = []
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''', for mobj in re.finditer(
webpage, 'stream key') r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
webpage):
formats = [{ format_id, format_url = mobj.group('id', 'url')
'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height), f = parse_resolution(format_id)
'ext': 'mp4', f.update({
'format_id': '%sp' % height, 'url': format_url,
'height': int(height), 'format_id': format_id,
} for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)] })
self._check_formats(formats, video_id) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
title = self._html_search_regex( title = self._html_search_regex(
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title') r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
description = self._og_search_description(webpage) description = self._search_regex(
r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
uploader = self._search_regex( uploader = self._search_regex(
r'class="user"[^>]*><img[^>]+>([^<]+)', r'class="user"[^>]*><img[^>]+>([^<]+)',
webpage, 'uploader', default=None) webpage, 'uploader', default=None)
duration = parse_duration(self._search_regex(
r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
@ -71,6 +89,8 @@ class SpankBangIE(InfoExtractor):
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': uploader, 'uploader': uploader,
'duration': duration,
'view_count': view_count,
'formats': formats, 'formats': formats,
'age_limit': age_limit, 'age_limit': age_limit,
} }