Fixed zimuku that now require anti-captcha

This commit is contained in:
Jens Lee 2023-04-21 18:25:09 +08:00 committed by GitHub
parent ac6dddd607
commit e2ba532cee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 71 additions and 43 deletions

View File

@ -280,7 +280,7 @@ defaults = {
'database': '', 'database': '',
'username': '', 'username': '',
'password': '', 'password': '',
} },
} }
settings = SimpleConfigParser(defaults=defaults, interpolation=None) settings = SimpleConfigParser(defaults=defaults, interpolation=None)

View File

@ -76,7 +76,7 @@ def is_virtualenv():
# deploy requirements.txt # deploy requirements.txt
if not args.no_update: if not args.no_update:
try: try:
import lxml, numpy, webrtcvad, setuptools # noqa E401 import lxml, numpy, webrtcvad, setuptools, PIL # noqa E401
except ImportError: except ImportError:
try: try:
import pip # noqa W0611 import pip # noqa W0611

View File

@ -448,5 +448,9 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
description: "Bulgarian Subtitles Provider", description: "Bulgarian Subtitles Provider",
}, },
{ key: "yifysubtitles", name: "YIFY Subtitles" }, { key: "yifysubtitles", name: "YIFY Subtitles" },
{ key: "zimuku", description: "Chinese Subtitles Provider" }, {
key: "zimuku",
name: "Zimuku",
description: "Chinese Subtitles Provider. Anti-captcha required",
},
]; ];

View File

@ -1,11 +1,13 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
import base64
import io import io
import logging import logging
import os import os
import zipfile import zipfile
import re import re
import copy import copy
from PIL import Image
try: try:
from urlparse import urljoin from urlparse import urljoin
@ -20,6 +22,7 @@ from requests import Session
from six import text_type from six import text_type
from random import randint from random import randint
from python_anticaptcha import AnticaptchaClient, ImageToTextTask
from subliminal.providers import ParserBeautifulSoup from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.providers import Provider from subliminal_patch.providers import Provider
from subliminal.subtitle import ( from subliminal.subtitle import (
@ -39,6 +42,7 @@ language_converters.register('zimuku = subliminal_patch.converters.zimuku:zimuku
supported_languages = list(language_converters['zimuku'].to_zimuku.keys()) supported_languages = list(language_converters['zimuku'].to_zimuku.keys())
class ZimukuSubtitle(Subtitle): class ZimukuSubtitle(Subtitle):
"""Zimuku Subtitle.""" """Zimuku Subtitle."""
@ -80,6 +84,13 @@ class ZimukuSubtitle(Subtitle):
return matches return matches
def string_to_hex(s):
val = ""
for i in s:
val += hex(ord(i))[2:]
return val
class ZimukuProvider(Provider): class ZimukuProvider(Provider):
"""Zimuku Provider.""" """Zimuku Provider."""
@ -87,40 +98,58 @@ class ZimukuProvider(Provider):
video_types = (Episode, Movie) video_types = (Episode, Movie)
logger.info(str(supported_languages)) logger.info(str(supported_languages))
server_url = "http://zimuku.org" server_url = "https://so.zimuku.org"
search_url = "/search?q={}&security_verify_data={}" search_url = "/search?q={}"
download_url = "http://zimuku.org/"
subtitle_class = ZimukuSubtitle subtitle_class = ZimukuSubtitle
def __init__(self): def __init__(self):
self.session = None self.session = None
def stringToHex(self, s): verify_token = ""
val = "" code = ""
for i in s:
val += hex(ord(i))[2:]
return val
vertoken = ""
location_re = re.compile( location_re = re.compile(
r'self\.location = "(.*)" \+ stringToHex\(screendate\)') r'self\.location = "(.*)" \+ stringToHex\(text\)')
verification_image_re = re.compile(r'<img.*?src="data:image/bmp;base64,(.*?)".*?>')
def yunsuo_bypass(self, url, *args, **kwargs): def yunsuo_bypass(self, url, *args, **kwargs):
def parse_verification_image(image_content: str):
def bmp_to_image(base64_str, img_type='png'):
img_data = base64.b64decode(base64_str)
img = Image.open(io.BytesIO(img_data))
img = img.convert("RGB")
img_fp = io.BytesIO()
img.save(img_fp, img_type)
img_fp.seek(0)
return img_fp
fp = bmp_to_image(image_content)
task = ImageToTextTask(fp)
client = AnticaptchaClient(os.environ.get('ANTICAPTCHA_ACCOUNT_KEY'))
job = client.createTask(task)
job.join()
return job.get_captcha_text()
i = -1 i = -1
while True: while True:
i += 1 i += 1
r = self.session.get(url, *args, **kwargs) r = self.session.get(url, *args, **kwargs)
if(r.status_code == 404): if r.status_code == 404:
# mock js script logic
tr = self.location_re.findall(r.text) tr = self.location_re.findall(r.text)
self.session.cookies.set("srcurl", self.stringToHex(r.url)) verification_image = self.verification_image_re.findall(r.text)
if(tr): self.code = parse_verification_image(verification_image[0])
self.session.cookies.set("srcurl", string_to_hex(r.url))
if tr:
verify_resp = self.session.get( verify_resp = self.session.get(
self.server_url+tr[0]+self.stringToHex("1920,1080"), allow_redirects=False) self.server_url + tr[0] + string_to_hex(self.code), allow_redirects=False)
if(verify_resp.status_code == 302 and self.session.cookies.get("security_session_verify") != None): if verify_resp.status_code == 302 \
and self.session.cookies.get("security_session_verify") is not None:
pass pass
continue continue
if len(self.location_re.findall(r.text)) == 0: if len(self.location_re.findall(r.text)) == 0:
self.vertoken = self.stringToHex("1920,1080") self.verify_token = string_to_hex(self.code)
return r return r
def initialize(self): def initialize(self):
@ -147,14 +176,14 @@ class ZimukuProvider(Provider):
language = Language("eng") language = Language("eng")
for img in sub.find("td", class_="tac lang").find_all("img"): for img in sub.find("td", class_="tac lang").find_all("img"):
if ( if (
"china" in img.attrs["src"] "china" in img.attrs["src"]
and "hongkong" in img.attrs["src"] and "hongkong" in img.attrs["src"]
): ):
language = Language("zho").add(Language('zho', 'TW', None)) language = Language("zho").add(Language('zho', 'TW', None))
logger.debug("language:"+str(language)) logger.debug("language:" + str(language))
elif ( elif (
"china" in img.attrs["src"] "china" in img.attrs["src"]
or "jollyroger" in img.attrs["src"] or "jollyroger" in img.attrs["src"]
): ):
language = Language("zho") language = Language("zho")
elif "hongkong" in img.attrs["src"]: elif "hongkong" in img.attrs["src"]:
@ -171,8 +200,6 @@ class ZimukuProvider(Provider):
return subs return subs
def query(self, keyword, season=None, episode=None, year=None): def query(self, keyword, season=None, episode=None, year=None):
if self.vertoken == "":
self.yunsuo_bypass(self.server_url + '/')
params = keyword params = keyword
if season: if season:
params += ".S{season:02d}".format(season=season) params += ".S{season:02d}".format(season=season)
@ -181,8 +208,8 @@ class ZimukuProvider(Provider):
logger.debug("Searching subtitles %r", params) logger.debug("Searching subtitles %r", params)
subtitles = [] subtitles = []
search_link = self.server_url + text_type(self.search_url).format(params, self.vertoken) search_link = self.server_url + text_type(self.search_url).format(params)
r = self.yunsuo_bypass(search_link, timeout=30) r = self.yunsuo_bypass(search_link, timeout=30)
r.raise_for_status() r.raise_for_status()
@ -198,7 +225,7 @@ class ZimukuProvider(Provider):
while parts: while parts:
parts.reverse() parts.reverse()
redirect_url = urljoin(self.server_url, "".join(parts)) redirect_url = urljoin(self.server_url, "".join(parts))
r = self.query_resp(redirect_url, timeout=30) r = self.session.get(redirect_url, timeout=30)
html = r.content.decode("utf-8", "ignore") html = r.content.decode("utf-8", "ignore")
parts = re.findall(pattern, html) parts = re.findall(pattern, html)
logger.debug("search url located: " + redirect_url) logger.debug("search url located: " + redirect_url)
@ -267,26 +294,22 @@ class ZimukuProvider(Provider):
return subtitles return subtitles
def download_subtitle(self, subtitle): def download_subtitle(self, subtitle):
def _get_archive_dowload_link(yunsuopass, sub_page_link): def _get_archive_download_link(yunsuopass, sub_page_link):
r = yunsuopass(sub_page_link) res = yunsuopass(sub_page_link)
bs_obj = ParserBeautifulSoup( bs_obj = ParserBeautifulSoup(
r.content.decode("utf-8", "ignore"), ["html.parser"] res.content.decode("utf-8", "ignore"), ["html.parser"]
) )
down_page_link = bs_obj.find("a", {"id": "down1"}).attrs["href"] down_page_link = bs_obj.find("a", {"id": "down1"}).attrs["href"]
down_page_link = urljoin(sub_page_link, down_page_link) down_page_link = urljoin(sub_page_link, down_page_link)
r = yunsuopass(down_page_link) res = yunsuopass(down_page_link)
bs_obj = ParserBeautifulSoup( bs_obj = ParserBeautifulSoup(
r.content.decode("utf-8", "ignore"), ["html.parser"] res.content.decode("utf-8", "ignore"), ["html.parser"]
) )
download_link = bs_obj.find("a", {"rel": "nofollow"}) return urljoin(sub_page_link, bs_obj.find("a", {"rel": "nofollow"}).attrs["href"])
download_link = download_link.attrs["href"]
download_link = urljoin(sub_page_link, download_link)
return download_link
# download the subtitle # download the subtitle
logger.info("Downloading subtitle %r", subtitle) logger.info("Downloading subtitle %r", subtitle)
self.session = subtitle.session download_link = _get_archive_download_link(self.yunsuo_bypass, subtitle.page_link)
download_link = _get_archive_dowload_link(self.yunsuo_bypass, subtitle.page_link)
r = self.yunsuo_bypass(download_link, headers={'Referer': subtitle.page_link}, timeout=30) r = self.yunsuo_bypass(download_link, headers={'Referer': subtitle.page_link}, timeout=30)
r.raise_for_status() r.raise_for_status()
try: try:
@ -404,7 +427,7 @@ def _extract_name(name):
result = [start, end] result = [start, end]
start = end start = end
end += 1 end += 1
new_name = name[result[0] : result[1]] new_name = name[result[0]: result[1]]
new_name = new_name.strip() + suffix new_name = new_name.strip() + suffix
return new_name return new_name
@ -413,7 +436,7 @@ def num_to_cn(number):
""" convert numbers(1-99) to Chinese """ """ convert numbers(1-99) to Chinese """
assert number.isdigit() and 1 <= int(number) <= 99 assert number.isdigit() and 1 <= int(number) <= 99
trans_map = {n: c for n, c in zip(("123456789"), ("一二三四五六七八九"))} trans_map = {n: c for n, c in zip("123456789", "一二三四五六七八九")}
if len(number) == 1: if len(number) == 1:
return trans_map[number] return trans_map[number]

View File

@ -1,4 +1,5 @@
setuptools setuptools
lxml>=4.3.0 lxml>=4.3.0
numpy>=1.12.0 numpy>=1.12.0
webrtcvad-wheels>=2.0.10 webrtcvad-wheels>=2.0.10
Pillow>=9.0.0