Fixed: Mis-classification of releases as being Czech (#3378)

After the changes from PR #2948, the regex was too eager to match any
substring contain "SK", such as in "MASK". Fix by requiring word
separation around it, as was already the case with the "CZ" token.
This commit is contained in:
Václav Slavík 2019-02-24 18:00:04 +01:00 committed by Leonardo Galli
parent 53f49f3b07
commit 264629cfa5
2 changed files with 2 additions and 1 deletions

View File

@ -49,6 +49,7 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("The Danish Girl 2015", Language.English)]
[TestCase("Nocturnal Animals (2016) MULTi VFQ English [1080p] BluRay x264-PopHD", Language.English, Language.French)]
[TestCase("Wonder.Woman.2017.720p.BluRay.DD5.1.x264-TayTO.CZ-FTU", Language.Czech)]
[TestCase("Fantastic.Beasts.The.Crimes.Of.Grindelwald.2018.2160p.WEBRip.x265.10bit.HDR.DD5.1-GASMASK", Language.English)]
public void should_parse_language(string postTitle, params Language[] languages)
{
var movieInfo = Parser.Parser.ParseMovieTitle(postTitle, true);

View File

@ -13,7 +13,7 @@ namespace NzbDrone.Core.Parser
{
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(LanguageParser));
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR|VO|VFF|VFQ|VF2|TRUEFRENCH)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<czech>\bCZ|SK\b)",
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR|VO|VFF|VFQ|VF2|TRUEFRENCH)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<czech>\b(?:CZ|SK)\b)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})$", RegexOptions.Compiled | RegexOptions.IgnoreCase);