Fixed: Parsing of more anime releases with Chinese and English titles

Closes #4531
This commit is contained in:
Mark McDowall 2021-11-28 15:03:04 -08:00
parent d600e2e3fb
commit 61633ab074
2 changed files with 6 additions and 1 deletions

View File

@ -42,6 +42,8 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("[悠哈璃羽字幕社&拉斯观测组&LoliHouse] : Alicization / Anime Series: Title - 17 [WebRip 1080p HEVC-10bit AAC][]", "Anime Series: Title", "&&LoliHouse", 17)]
[TestCase("[ZERO字幕組]·Anime-Series Title[11][BIG5][1080p]", "Anime-Series Title", "ZERO字幕組", 11)]
[TestCase("[Lilith-Raws] II 簿 - Grace note- / Anime-Series Title - 04 [BiliBili][WEB-DL][1080p][AVC AAC][CHT][MKV]", "Anime-Series Title", "Lilith-Raws", 4)]
[TestCase("[NC-Raws] / Anime-Series Title - 07 [B-Global][WEB-DL][1080p][AVC AAC][CHS_CHT_ENG_TH_SRT][MKV]", "Anime-Series Title", "NC-Raws", 7)]
[TestCase("[NC-Raws] ANIME-SERIES TITLE / Anime-Series Title - 07 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]", "Anime-Series Title", "NC-Raws", 7)]
public void should_parse_unbracketed_chinese_anime_releases(string postTitle, string title, string subgroup, int absoluteEpisodeNumber)
{
postTitle = XmlCleaner.ReplaceUnicode(postTitle);

View File

@ -27,7 +27,10 @@ namespace NzbDrone.Core.Parser
new RegexReplace(@"^\[(?<subgroup>[^\]]*?(?:LoliHouse|ZERO|Lilith-Raws)[^\]]*?)\](?<title>[^\[\]]+?)(?: - (?<episode>[0-9-]+)\s*|\[第?(?<episode>[0-9]+(?:-[0-9]+)?)话?(?:END|完)?\])\[", "[${subgroup}][${title}][${episode}][", RegexOptions.Compiled),
// Most Chinese anime releases contain additional brackets/separators for chinese and non-chinese titles, remove junk and replace with normal anime pattern
new RegexReplace(@"^\[(?<subgroup>[^\]]+)\](?:\s?★[^\[ -]+\s?)?\[?(?:(?<chinesetitle>[^\]]*?[\u4E00-\u9FCC][^\]]*?)(?:\]\[|\s*[_/·]\s*))?(?<title>[^\]]+?)\]?(?:\[\d{4}\])?\[第?(?<episode>[0-9]+(?:-[0-9]+)?)话?(?:END|完)?\]", "[${subgroup}] ${title} - ${episode} ", RegexOptions.Compiled)
new RegexReplace(@"^\[(?<subgroup>[^\]]+)\](?:\s?★[^\[ -]+\s?)?\[?(?:(?<chinesetitle>[^\]]*?[\u4E00-\u9FCC][^\]]*?)(?:\]\[|\s*[_/·]\s*))?(?<title>[^\]]+?)\]?(?:\[\d{4}\])?\[第?(?<episode>[0-9]+(?:-[0-9]+)?)话?(?:END|完)?\]", "[${subgroup}] ${title} - ${episode} ", RegexOptions.Compiled),
// Some Chinese anime releases contain both Chinese and English titles, remove the Chinese title and replace with normal anime pattern
new RegexReplace(@"^\[(?<subgroup>[^\]]+)\](?:\s)(?:(?<chinesetitle>[^\]]*?[\u4E00-\u9FCC][^\]]*?)(?:\s/\s))(?<title>[^\]]+?)(?:[- ]+)(?<episode>[0-9]+(?:-[0-9]+)?)话?(?:END|完)?", "[${subgroup}] ${title} - ${episode} ", RegexOptions.Compiled)
};
private static readonly Regex[] ReportTitleRegex = new[]