From 25894c477fc05ecbf5952b95675ef80de16a84b9 Mon Sep 17 00:00:00 2001 From: Vitiko <59455966+vitiko98@users.noreply.github.com> Date: Sun, 28 Mar 2021 14:32:21 -0400 Subject: [PATCH] Added Subsynchro provider --- README.md | 1 + frontend/src/Settings/Providers/list.ts | 1 + libs/subliminal_patch/providers/subdivx.py | 21 ++- libs/subliminal_patch/providers/subsynchro.py | 158 ++++++++++++++++++ 4 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 libs/subliminal_patch/providers/subsynchro.py diff --git a/README.md b/README.md index afaad0d29..a61f6e7d3 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,7 @@ If you need something that is not already part of Bazarr, feel free to create a * Subscene * Subscenter * Subsunacs.net +* SubSynchro * subtitri.id.lv * Subtitulamos.tv * Sucha diff --git a/frontend/src/Settings/Providers/list.ts b/frontend/src/Settings/Providers/list.ts index 2f3574e3b..9eb85d7b5 100644 --- a/frontend/src/Settings/Providers/list.ts +++ b/frontend/src/Settings/Providers/list.ts @@ -160,6 +160,7 @@ export const ProviderList: Readonly = [ name: "Subsunacs.net", description: "Bulgarian Subtitles Provider", }, + { key: "subsynchro", description: "French Subtitles Provider" }, { key: "subtitri", name: "subtitri.id.lv", diff --git a/libs/subliminal_patch/providers/subdivx.py b/libs/subliminal_patch/providers/subdivx.py index dbe3a6913..c66c5a0a8 100644 --- a/libs/subliminal_patch/providers/subdivx.py +++ b/libs/subliminal_patch/providers/subdivx.py @@ -3,6 +3,7 @@ from __future__ import absolute_import import io import logging import os +import re import time import zipfile @@ -22,6 +23,13 @@ from subliminal_patch.subtitle import Subtitle from subliminal_patch.providers import Provider from guessit import guessit + +CLEAN_TITLE_RES = [ + (r"subt[ií]tulos de", ""), + (r"´|`", "'"), + (r" {2,}", " "), +] + logger = logging.getLogger(__name__) @@ -170,7 +178,7 @@ class SubdivxSubtitlesProvider(Provider): title_soup, body_soup = title_soups[subtitle], body_soups[subtitle] # title - title = title_soup.find("a").text.replace("Subtitulos de ", "") + title = self._clean_title(title_soup.find("a").text) # filter by year if video.year and str(video.year) not in title: @@ -214,6 +222,17 @@ class SubdivxSubtitlesProvider(Provider): raise APIThrottled("Download link not found") + @staticmethod + def _clean_title(title): + """ + Normalize apostrophes and spaces to avoid matching problems + (e.g. Subtitulos de Carlito´s Way -> Carlito's Way) + """ + for og, new in CLEAN_TITLE_RES: + title = re.sub(og, new, title, flags=re.IGNORECASE) + + return title + @staticmethod def _check_response(response): if response.status_code != 200: diff --git a/libs/subliminal_patch/providers/subsynchro.py b/libs/subliminal_patch/providers/subsynchro.py new file mode 100644 index 000000000..d3feab733 --- /dev/null +++ b/libs/subliminal_patch/providers/subsynchro.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +import io +import logging +import os + +from zipfile import ZipFile +from requests import Session +from guessit import guessit + +from subliminal import Movie +from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending +from subliminal_patch.exceptions import APIThrottled +from subliminal_patch.providers import Provider +from subliminal_patch.subtitle import Subtitle, guess_matches +from subzero.language import Language + +logger = logging.getLogger(__name__) + +SERVER_URL = "https://www.subsynchro.com/include/ajax/subMarin.php" +PAGE_URL = "https://www.subsynchro.com" + + +class SubsynchroSubtitle(Subtitle): + provider_name = "subsynchro" + hash_verifiable = False + + def __init__( + self, + language, + release_info, + filename, + download_url, + file_type, + matches, + ): + super(SubsynchroSubtitle, self).__init__( + language, hearing_impaired=False, page_link=download_url + ) + self.download_url = download_url + self.language = language + self.file_type = file_type + self.release_info = release_info + self.filename = filename + self.release_info = ( + release_info if len(release_info) > len(filename) else filename + ) + self.found_matches = matches + + @property + def id(self): + return self.download_url + + def get_matches(self, video): + self.found_matches |= guess_matches( + video, + guessit( + self.filename, + ), + ) + self.found_matches |= guess_matches( + video, + guessit( + self.release_info, + ), + ) + return self.found_matches + + +class SubsynchroProvider(Provider): + """Subsynchro Provider""" + + languages = {Language.fromalpha2(l) for l in ["fr"]} + language_list = list(languages) + video_types = (Movie,) + + def initialize(self): + self.session = Session() + self.session.headers = {"User-Agent": "Bazarr", "Referer": PAGE_URL} + + def terminate(self): + self.session.close() + + def query(self, languages, video): + language = self.language_list[0] + + params = {"title": video.title, "year": video.year} + + logger.debug("Searching subtitles from params: %s", params) + + result = self.session.get(SERVER_URL, params=params, timeout=10) + result.raise_for_status() + + subtitles = [] + results = result.json() or {} + + status_ = results.get("status") + + if status_ != 200: + logger.debug(f"No subtitles found (status {status_})") + return subtitles + + for i in results.get("data", []): + matches = set() + if any( + video.title.lower() in title.lower() + for title in (i.get("titre", "n/a"), i.get("titre_original", "n/a")) + ): + # Year is already set on query + matches.update(["title", "year"]) + + subtitles.append( + SubsynchroSubtitle( + language, + i.get("release", "n/a"), + i.get("filename", "n/a"), + i.get("telechargement"), + i.get("fichier"), + matches, + ) + ) + return subtitles + + def list_subtitles(self, video, languages): + return self.query(languages, video) + + def get_file(self, archive): + for name in archive.namelist(): + if os.path.split(name)[-1].startswith("."): + continue + + if not name.lower().endswith(SUBTITLE_EXTENSIONS): + continue + + logger.debug(f"Returning from archive: {name}") + return archive.read(name) + + raise APIThrottled("Can not find the subtitle in the zip file") + + def download_subtitle(self, subtitle): + logger.debug(f"Downloading subtitle {subtitle.download_url}") + + response = self.session.get( + subtitle.download_url, allow_redirects=True, timeout=10 + ) + response.raise_for_status() + + if subtitle.file_type.endswith(".zip"): + logger.debug("Zip file found") + subtitle_ = self.get_file(ZipFile(io.BytesIO(response.content))) + + elif subtitle.file_type.endswith(".srt"): + logger.debug("Srt file found") + subtitle_ = response.content + + else: + raise APIThrottled(f"Unknown file type: {subtitle.file_type}") + + subtitle.content = fix_line_ending(subtitle_)