[bitchute] Fix extraction (closes #18567)

This commit is contained in:
Sergey M․ 2019-01-01 18:12:44 +07:00
parent 9d9daed464
commit 6b688b8942
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 12 additions and 4 deletions

View File

@ -5,7 +5,10 @@ import itertools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import urlencode_postdata from ..utils import (
orderedSet,
urlencode_postdata,
)
class BitChuteIE(InfoExtractor): class BitChuteIE(InfoExtractor):
@ -43,10 +46,15 @@ class BitChuteIE(InfoExtractor):
'description', webpage, 'title', 'description', webpage, 'title',
default=None) or self._og_search_description(webpage) default=None) or self._og_search_description(webpage)
format_urls = []
for mobj in re.finditer(
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
format_urls.append(mobj.group('url'))
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
formats = [ formats = [
{'url': mobj.group('url')} {'url': format_url}
for mobj in re.finditer( for format_url in orderedSet(format_urls)]
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
self._sort_formats(formats) self._sort_formats(formats)
description = self._html_search_regex( description = self._html_search_regex(