From 86e709e922029c11f0ce74d6916f41fa7a8371da Mon Sep 17 00:00:00 2001 From: Mark McDowall Date: Fri, 22 Apr 2011 17:20:58 -0700 Subject: [PATCH] Normalize REGEX will remove more words and will leave digits for now. --- NzbDrone.Core.Test/ParserTest.cs | 9 ++++++--- NzbDrone.Core/Parser.cs | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/NzbDrone.Core.Test/ParserTest.cs b/NzbDrone.Core.Test/ParserTest.cs index 56685fa7e..1163457b9 100644 --- a/NzbDrone.Core.Test/ParserTest.cs +++ b/NzbDrone.Core.Test/ParserTest.cs @@ -25,7 +25,7 @@ namespace NzbDrone.Core.Test [Row("The.Girls.Next.Door.S03E06.DVDRip.XviD-WiDE", "The.Girls.Next.Door", 3, 6)] [Row("Degrassi.S10E27.WS.DSR.XviD-2HD", "Degrassi", 10, 27)] [Row("Parenthood.2010.S02E14.HDTV.XviD-LOL", "Parenthood", 2, 14)] - [Row("Hawaii Five 0 S01E19 720p WEB DL DD5 1 H 264 NT", "Hawaii Five", 1, 19)] + [Row("Hawaii Five 0 S01E19 720p WEB DL DD5 1 H 264 NT", "Hawaii Five0", 1, 19)] [Row("The Event S01E14 A Message Back 720p WEB DL DD5 1 H264 SURFER", "The Event", 1, 14)] [Row("Adam Hills In Gordon St Tonight S01E07 WS PDTV XviD FUtV", "Adam Hills In Gordon St Tonight", 1, 7)] [Row("Adam Hills In Gordon St Tonight S01E07 WS PDTV XviD FUtV", "Adam Hills In Gordon St Tonight", 1, 7)] @@ -133,8 +133,11 @@ namespace NzbDrone.Core.Test [Test] [Row("the")] - [Row("And")] - [Row("Or")] + [Row("and")] + [Row("or")] + [Row("a")] + [Row("an")] + [Row("of")] public void Normalize_removed_common_words(string word) { var dirtyFormat = new[] diff --git a/NzbDrone.Core/Parser.cs b/NzbDrone.Core/Parser.cs index 2c9a5a5cb..ef5cb08b5 100644 --- a/NzbDrone.Core/Parser.cs +++ b/NzbDrone.Core/Parser.cs @@ -34,7 +34,7 @@ namespace NzbDrone.Core RegexOptions.Compiled), }; - private static readonly Regex NormalizeRegex = new Regex(@"((\s|^)the(\s|$))|((\s|^)and(\s|$))|[^a-z]", + private static readonly Regex NormalizeRegex = new Regex(@"((^|\W)(a|an|the|and|or|of)($|\W))|\W", RegexOptions.IgnoreCase | RegexOptions.Compiled); ///