diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 693c16e49..d7685cd87 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1058,7 +1058,10 @@ from .southpark import ( SouthParkEsIE, SouthParkNlIE ) -from .spankbang import SpankBangIE +from .spankbang import ( + SpankBangIE, + SpankBangPlaylistIE, +) from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index e48cfff71..fbe6ef31a 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + orderedSet, parse_duration, parse_resolution, str_to_int, @@ -12,7 +13,7 @@ from ..utils import ( class SpankBangIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P[\da-z]+)/(?:video|play|embed)' + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P[\da-z]+)/(?:video|play|embed)\b' _TESTS = [{ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'md5': '1cc433e1d6aa14bc376535b8679302f7', @@ -103,3 +104,33 @@ class SpankBangIE(InfoExtractor): 'formats': formats, 'age_limit': age_limit, } + + +class SpankBangPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P[\da-z]+)/playlist/[^/]+' + _TEST = { + 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', + 'info_dict': { + 'id': 'ug0k', + 'title': 'Big Ass Titties', + }, + 'playlist_mincount': 50, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage( + url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) + + entries = [self.url_result( + 'https://spankbang.com/%s/video' % video_id, + ie=SpankBangIE.ie_key(), video_id=video_id) + for video_id in orderedSet(re.findall( + r']+\bhref=["\']/?([\da-z]+)/play/', webpage))] + + title = self._html_search_regex( + r'

([^<]+)\s+playlist

', webpage, 'playlist title', + fatal=False) + + return self.playlist_result(entries, playlist_id, title)