From e2ba532cee04b6fbf1e536d18666dc4d017f23ee Mon Sep 17 00:00:00 2001 From: Jens Lee Date: Fri, 21 Apr 2023 18:25:09 +0800 Subject: [PATCH] Fixed zimuku that now require anti-captcha --- bazarr/app/config.py | 2 +- bazarr/init.py | 2 +- frontend/src/pages/Settings/Providers/list.ts | 6 +- libs/subliminal_patch/providers/zimuku.py | 101 +++++++++++------- requirements.txt | 3 +- 5 files changed, 71 insertions(+), 43 deletions(-) diff --git a/bazarr/app/config.py b/bazarr/app/config.py index acf5e9104..306c46539 100644 --- a/bazarr/app/config.py +++ b/bazarr/app/config.py @@ -280,7 +280,7 @@ defaults = { 'database': '', 'username': '', 'password': '', - } + }, } settings = SimpleConfigParser(defaults=defaults, interpolation=None) diff --git a/bazarr/init.py b/bazarr/init.py index 3c4f5a621..f0e06416f 100644 --- a/bazarr/init.py +++ b/bazarr/init.py @@ -76,7 +76,7 @@ def is_virtualenv(): # deploy requirements.txt if not args.no_update: try: - import lxml, numpy, webrtcvad, setuptools # noqa E401 + import lxml, numpy, webrtcvad, setuptools, PIL # noqa E401 except ImportError: try: import pip # noqa W0611 diff --git a/frontend/src/pages/Settings/Providers/list.ts b/frontend/src/pages/Settings/Providers/list.ts index 780258764..c266f5203 100644 --- a/frontend/src/pages/Settings/Providers/list.ts +++ b/frontend/src/pages/Settings/Providers/list.ts @@ -448,5 +448,9 @@ export const ProviderList: Readonly = [ description: "Bulgarian Subtitles Provider", }, { key: "yifysubtitles", name: "YIFY Subtitles" }, - { key: "zimuku", description: "Chinese Subtitles Provider" }, + { + key: "zimuku", + name: "Zimuku", + description: "Chinese Subtitles Provider. Anti-captcha required", + }, ]; diff --git a/libs/subliminal_patch/providers/zimuku.py b/libs/subliminal_patch/providers/zimuku.py index 27251e657..1940f0f1c 100644 --- a/libs/subliminal_patch/providers/zimuku.py +++ b/libs/subliminal_patch/providers/zimuku.py @@ -1,11 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import +import base64 import io import logging import os import zipfile import re import copy +from PIL import Image try: from urlparse import urljoin @@ -20,6 +22,7 @@ from requests import Session from six import text_type from random import randint +from python_anticaptcha import AnticaptchaClient, ImageToTextTask from subliminal.providers import ParserBeautifulSoup from subliminal_patch.providers import Provider from subliminal.subtitle import ( @@ -39,6 +42,7 @@ language_converters.register('zimuku = subliminal_patch.converters.zimuku:zimuku supported_languages = list(language_converters['zimuku'].to_zimuku.keys()) + class ZimukuSubtitle(Subtitle): """Zimuku Subtitle.""" @@ -80,6 +84,13 @@ class ZimukuSubtitle(Subtitle): return matches +def string_to_hex(s): + val = "" + for i in s: + val += hex(ord(i))[2:] + return val + + class ZimukuProvider(Provider): """Zimuku Provider.""" @@ -87,40 +98,58 @@ class ZimukuProvider(Provider): video_types = (Episode, Movie) logger.info(str(supported_languages)) - server_url = "http://zimuku.org" - search_url = "/search?q={}&security_verify_data={}" - download_url = "http://zimuku.org/" + server_url = "https://so.zimuku.org" + search_url = "/search?q={}" subtitle_class = ZimukuSubtitle def __init__(self): self.session = None - def stringToHex(self, s): - val = "" - for i in s: - val += hex(ord(i))[2:] - return val - vertoken = "" + verify_token = "" + code = "" location_re = re.compile( - r'self\.location = "(.*)" \+ stringToHex\(screendate\)') + r'self\.location = "(.*)" \+ stringToHex\(text\)') + verification_image_re = re.compile(r'') def yunsuo_bypass(self, url, *args, **kwargs): + def parse_verification_image(image_content: str): + + def bmp_to_image(base64_str, img_type='png'): + img_data = base64.b64decode(base64_str) + img = Image.open(io.BytesIO(img_data)) + img = img.convert("RGB") + img_fp = io.BytesIO() + img.save(img_fp, img_type) + img_fp.seek(0) + return img_fp + + fp = bmp_to_image(image_content) + task = ImageToTextTask(fp) + client = AnticaptchaClient(os.environ.get('ANTICAPTCHA_ACCOUNT_KEY')) + job = client.createTask(task) + job.join() + return job.get_captcha_text() + i = -1 while True: i += 1 r = self.session.get(url, *args, **kwargs) - if(r.status_code == 404): + if r.status_code == 404: + # mock js script logic tr = self.location_re.findall(r.text) - self.session.cookies.set("srcurl", self.stringToHex(r.url)) - if(tr): + verification_image = self.verification_image_re.findall(r.text) + self.code = parse_verification_image(verification_image[0]) + self.session.cookies.set("srcurl", string_to_hex(r.url)) + if tr: verify_resp = self.session.get( - self.server_url+tr[0]+self.stringToHex("1920,1080"), allow_redirects=False) - if(verify_resp.status_code == 302 and self.session.cookies.get("security_session_verify") != None): + self.server_url + tr[0] + string_to_hex(self.code), allow_redirects=False) + if verify_resp.status_code == 302 \ + and self.session.cookies.get("security_session_verify") is not None: pass continue if len(self.location_re.findall(r.text)) == 0: - self.vertoken = self.stringToHex("1920,1080") + self.verify_token = string_to_hex(self.code) return r def initialize(self): @@ -147,14 +176,14 @@ class ZimukuProvider(Provider): language = Language("eng") for img in sub.find("td", class_="tac lang").find_all("img"): if ( - "china" in img.attrs["src"] - and "hongkong" in img.attrs["src"] + "china" in img.attrs["src"] + and "hongkong" in img.attrs["src"] ): language = Language("zho").add(Language('zho', 'TW', None)) - logger.debug("language:"+str(language)) + logger.debug("language:" + str(language)) elif ( - "china" in img.attrs["src"] - or "jollyroger" in img.attrs["src"] + "china" in img.attrs["src"] + or "jollyroger" in img.attrs["src"] ): language = Language("zho") elif "hongkong" in img.attrs["src"]: @@ -171,8 +200,6 @@ class ZimukuProvider(Provider): return subs def query(self, keyword, season=None, episode=None, year=None): - if self.vertoken == "": - self.yunsuo_bypass(self.server_url + '/') params = keyword if season: params += ".S{season:02d}".format(season=season) @@ -181,8 +208,8 @@ class ZimukuProvider(Provider): logger.debug("Searching subtitles %r", params) subtitles = [] - search_link = self.server_url + text_type(self.search_url).format(params, self.vertoken) - + search_link = self.server_url + text_type(self.search_url).format(params) + r = self.yunsuo_bypass(search_link, timeout=30) r.raise_for_status() @@ -198,7 +225,7 @@ class ZimukuProvider(Provider): while parts: parts.reverse() redirect_url = urljoin(self.server_url, "".join(parts)) - r = self.query_resp(redirect_url, timeout=30) + r = self.session.get(redirect_url, timeout=30) html = r.content.decode("utf-8", "ignore") parts = re.findall(pattern, html) logger.debug("search url located: " + redirect_url) @@ -267,26 +294,22 @@ class ZimukuProvider(Provider): return subtitles def download_subtitle(self, subtitle): - def _get_archive_dowload_link(yunsuopass, sub_page_link): - r = yunsuopass(sub_page_link) + def _get_archive_download_link(yunsuopass, sub_page_link): + res = yunsuopass(sub_page_link) bs_obj = ParserBeautifulSoup( - r.content.decode("utf-8", "ignore"), ["html.parser"] + res.content.decode("utf-8", "ignore"), ["html.parser"] ) down_page_link = bs_obj.find("a", {"id": "down1"}).attrs["href"] down_page_link = urljoin(sub_page_link, down_page_link) - r = yunsuopass(down_page_link) + res = yunsuopass(down_page_link) bs_obj = ParserBeautifulSoup( - r.content.decode("utf-8", "ignore"), ["html.parser"] + res.content.decode("utf-8", "ignore"), ["html.parser"] ) - download_link = bs_obj.find("a", {"rel": "nofollow"}) - download_link = download_link.attrs["href"] - download_link = urljoin(sub_page_link, download_link) - return download_link + return urljoin(sub_page_link, bs_obj.find("a", {"rel": "nofollow"}).attrs["href"]) # download the subtitle logger.info("Downloading subtitle %r", subtitle) - self.session = subtitle.session - download_link = _get_archive_dowload_link(self.yunsuo_bypass, subtitle.page_link) + download_link = _get_archive_download_link(self.yunsuo_bypass, subtitle.page_link) r = self.yunsuo_bypass(download_link, headers={'Referer': subtitle.page_link}, timeout=30) r.raise_for_status() try: @@ -404,7 +427,7 @@ def _extract_name(name): result = [start, end] start = end end += 1 - new_name = name[result[0] : result[1]] + new_name = name[result[0]: result[1]] new_name = new_name.strip() + suffix return new_name @@ -413,7 +436,7 @@ def num_to_cn(number): """ convert numbers(1-99) to Chinese """ assert number.isdigit() and 1 <= int(number) <= 99 - trans_map = {n: c for n, c in zip(("123456789"), ("一二三四五六七八九"))} + trans_map = {n: c for n, c in zip("123456789", "一二三四五六七八九")} if len(number) == 1: return trans_map[number] diff --git a/requirements.txt b/requirements.txt index c061d866e..5489479b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ setuptools lxml>=4.3.0 numpy>=1.12.0 -webrtcvad-wheels>=2.0.10 \ No newline at end of file +webrtcvad-wheels>=2.0.10 +Pillow>=9.0.0 \ No newline at end of file