From 96bbda4a4da2fc79ae86d558038bb44271a85ecf Mon Sep 17 00:00:00 2001 From: Nial McCallister <48334675+nmcc1212@users.noreply.github.com> Date: Tue, 28 Sep 2021 00:09:26 +0100 Subject: [PATCH] Create streamtape.py --- youtube_dlc/extractor/streamtape.py | 54 +++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 youtube_dlc/extractor/streamtape.py diff --git a/youtube_dlc/extractor/streamtape.py b/youtube_dlc/extractor/streamtape.py new file mode 100644 index 000000000..5b41ea555 --- /dev/null +++ b/youtube_dlc/extractor/streamtape.py @@ -0,0 +1,54 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json, urljoin + +# strings are obfuscated by concatenating substrings +split_string_part = r'(?:%s|%s)' % (r'"(?:[^"\\]|\\.)*"', + r"'(?:[^'\\]|\\.)*'") +split_string = r'(?:' + split_string_part + r'(?:\s*\+\s*' + split_string_part + r')*)' +videolink = r"(?:'\+')?".join('videolink') +videolink = r"document\.getElementById\('" + videolink + r"'\)\.innerHTML\s*=\s*(?P" + split_string + r")" + + +class StreamtapeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?streamtape.com/[ev]/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://streamtape.com/v/AJD9gAVwMOcXrQ8/Big_Buck_Bunny_Trailer_400p.ogg', + 'md5': '6f7cdddd436852f054728bfd4d3be873', + 'info_dict': { + 'id': 'AJD9gAVwMOcXrQ8', + 'ext': 'mp4', + 'title': 'Big_Buck_Bunny_Trailer_400p.ogg.mp4', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video = self._html_search_regex(videolink, webpage, 'video', group='data') + video = video.split('+') + video = [self._parse_json(v, video_id, js_to_json) for v in video] + video = urljoin(url, ''.join(video)) + + try: + poster = self._html_search_regex(r' id="mainvideo"[^>]* poster="(?P.*?)"', + webpage, 'poster', group='data') + poster = urljoin(url, poster) + except ValueError: + poster = None + + title = self._og_search_title(webpage) + + return { + 'id': video_id, + 'url': video, + 'title': title, + 'thumbnail': poster, + 'age_limit': 18, + 'ext': 'mp4', + }