From d4203ee7cb511feb520b603fea875f9d3451af06 Mon Sep 17 00:00:00 2001 From: silentcommitter <116306456+silentcommitter@users.noreply.github.com> Date: Sat, 22 Oct 2022 21:26:28 +0200 Subject: [PATCH] Subf2m provider improvements (#1973) * subf2m provider: add more languages * subf2m provider: use urllib parse rather than string replacement * subf2m provider: change movie title matching to match exact year and use similarity based title matching * subf2m provider: change tvshow title matching to match exact season and use similarity based title matching * no log: Subf2m Provider: add tests * Subf2m Provider: add serbian support Co-authored-by: Vitiko --- libs/subliminal_patch/providers/subf2m.py | 80 +++++++++++++++++++---- tests/subliminal_patch/test_subf2m.py | 37 +++++++++-- 2 files changed, 98 insertions(+), 19 deletions(-) diff --git a/libs/subliminal_patch/providers/subf2m.py b/libs/subliminal_patch/providers/subf2m.py index e6d8c1bc9..f4cf35e2d 100644 --- a/libs/subliminal_patch/providers/subf2m.py +++ b/libs/subliminal_patch/providers/subf2m.py @@ -2,10 +2,13 @@ import functools import logging +import urllib.parse +import re from bs4 import BeautifulSoup as bso from guessit import guessit from requests import Session +from difflib import SequenceMatcher from subliminal_patch.core import Episode from subliminal_patch.core import Movie from subliminal_patch.exceptions import APIThrottled @@ -82,12 +85,37 @@ _LANGUAGE_MAP = { "dutch": "dut", "hebrew": "heb", "indonesian": "ind", + "danish": "dan", + "norwegian": "nor", + "bengali": "ben", + "bulgarian": "bul", + "croatian": "hrv", + "swedish": "swe", + "vietnamese": "vie", + "czech": "cze", + "finnish": "fin", + "french": "fre", + "german": "ger", + "greek": "gre", + "hungarian": "hun", + "icelandic": "ice", + "japanese": "jpn", + "macedonian": "mac", + "malay": "may", + "polish": "pol", + "romanian": "rum", + "russian": "rus", + "serbian": "srp", + "thai": "tha", + "turkish": "tur", } class Subf2mProvider(Provider): provider_name = "subf2m" + _movie_title_regex = re.compile(r"^(.+?)( \((\d{4})\))?$") + _tv_show_title_regex = re.compile(r"^(.+?) - (.*?) season( \((\d{4})\))?$") _supported_languages = {} _supported_languages["brazillian-portuguese"] = Language("por", "BR") @@ -112,7 +140,7 @@ class Subf2mProvider(Provider): def _gen_results(self, query): req = self._session.get( - f"{_BASE_URL}/subtitles/searchbytitle?query={query.replace(' ', '+')}&l=", + f"{_BASE_URL}/subtitles/searchbytitle?query={urllib.parse.quote(query)}&l=", stream=True, ) text = "\n".join(line for line in req.iter_lines(decode_unicode=True) if line) @@ -123,35 +151,61 @@ class Subf2mProvider(Provider): def _search_movie(self, title, year): title = title.lower() - year = f"({year})" + year = str(year) found_movie = None + results = [] for result in self._gen_results(title): text = result.text.lower() - if title.lower() in text and year in text: - found_movie = result.get("href") - logger.debug("Movie found: %s", found_movie) - break + match = self._movie_title_regex.match(text) + if not match: + continue + match_title = match.group(1) + match_year = match.group(3) + if year == match_year: + results.append( + { + "href": result.get("href"), + "similarity": SequenceMatcher(None, title, match_title).ratio(), + } + ) + if results: + results.sort(key=lambda x: x["similarity"], reverse=True) + found_movie = results[0]["href"] + logger.debug("Movie found: %s", results[0]) return found_movie def _search_tv_show_season(self, title, season): try: - season_str = f"{_SEASONS[season - 1]} Season" + season_str = _SEASONS[season - 1].lower() except IndexError: logger.debug("Season number not supported: %s", season) return None - expected_result = f"{title} - {season_str}".lower() - found_tv_show_season = None + results = [] for result in self._gen_results(title): - if expected_result in result.text.lower(): - found_tv_show_season = result.get("href") - logger.debug("TV Show season found: %s", found_tv_show_season) - break + text = result.text.lower() + match = self._tv_show_title_regex.match(text) + if not match: + continue + match_title = match.group(1) + match_season = match.group(2) + if season_str == match_season: + results.append( + { + "href": result.get("href"), + "similarity": SequenceMatcher(None, title, match_title).ratio(), + } + ) + + if results: + results.sort(key=lambda x: x["similarity"], reverse=True) + found_tv_show_season = results[0]["href"] + logger.debug("TV Show season found: %s", results[0]) return found_tv_show_season diff --git a/tests/subliminal_patch/test_subf2m.py b/tests/subliminal_patch/test_subf2m.py index 49651f3d5..0e1b70121 100644 --- a/tests/subliminal_patch/test_subf2m.py +++ b/tests/subliminal_patch/test_subf2m.py @@ -5,20 +5,45 @@ from subliminal_patch.providers.subf2m import Subf2mSubtitle from subzero.language import Language -def test_search_movie(movies): - movie = movies["dune"] +@pytest.mark.parametrize( + "title,year,expected_url", + [ + ( + "Dead Man's Chest", + 2006, + "/subtitles/pirates-of-the-caribbean-2-dead-mans-chest", + ), + ("Dune", 2021, "/subtitles/dune-2021"), + ("Cure", 1997, "/subtitles/cure-kyua"), + ], +) +def test_search_movie(movies, title, year, expected_url): + movie = list(movies.values())[0] + movie.title = title + movie.year = year with Subf2mProvider() as provider: result = provider._search_movie(movie.title, movie.year) - assert result == "/subtitles/dune-2021" + assert result == expected_url -def test_search_tv_show_season(episodes): - episode = episodes["breaking_bad_s01e01"] +@pytest.mark.parametrize( + "title,season,expected_url", + [ + ("Breaking Bad", 1, "/subtitles/breaking-bad-first-season"), + ("House Of The Dragon", 1, "/subtitles/house-of-the-dragon-first-season"), + ("The Bear", 1, "/subtitles/the-bear-first-season"), + ], +) +def test_search_tv_show_season(episodes, title, season, expected_url): + episode = list(episodes.values())[0] + episode.name = title + episode.series = title + episode.season = season with Subf2mProvider() as provider: result = provider._search_tv_show_season(episode.series, episode.season) - assert result == "/subtitles/breaking-bad-first-season" + assert result == expected_url @pytest.mark.parametrize("language", [Language.fromalpha2("en"), Language("por", "BR")])