From 474d48602a5e9daf2e41c3c1460db2d0c9237c64 Mon Sep 17 00:00:00 2001 From: Mark McDowall Date: Sun, 28 Apr 2013 20:11:43 -0700 Subject: [PATCH] FIxed parsing tests, now with more regex --- .../ParserTests/ParserFixture.cs | 50 +++++++----------- NzbDrone.Core/Parser/Parser.cs | 51 +++++++++++++------ 2 files changed, 54 insertions(+), 47 deletions(-) diff --git a/NzbDrone.Core.Test/ParserTests/ParserFixture.cs b/NzbDrone.Core.Test/ParserTests/ParserFixture.cs index 99a9a0ded..376c07f43 100644 --- a/NzbDrone.Core.Test/ParserTests/ParserFixture.cs +++ b/NzbDrone.Core.Test/ParserTests/ParserFixture.cs @@ -50,7 +50,7 @@ namespace NzbDrone.Core.Test.ParserTests [TestCase("House - S06E13 - 5 to 9 [DVD]", "House", 6, 13)] [TestCase("The Mentalist - S02E21 - 18-5-4", "The Mentalist", 2, 21)] [TestCase("Breaking.In.S01E07.21.0.Jump.Street.720p.WEB-DL.DD5.1.h.264-KiNGS", "Breaking In", 1, 7)] - [TestCase("CSI525", "CSI", 5, 25)] + [TestCase("CSI.525", "CSI", 5, 25)] [TestCase("King of the Hill - 10x12 - 24 Hour Propane People [SDTV]", "King of the Hill", 10, 12)] [TestCase("Brew Masters S01E06 3 Beers For Batali DVDRip XviD SPRiNTER", "Brew Masters", 1, 6)] [TestCase("24 7 Flyers Rangers Road to the NHL Winter Classic Part01 720p HDTV x264 ORENJI", "24 7 Flyers Rangers Road to the NHL Winter Classic", 1, 1)] @@ -111,7 +111,7 @@ namespace NzbDrone.Core.Test.ParserTests [Test] public void unparsable_path_should_report_the_path() { - Parser.Parser.ParsePath("C:\\").Should().BeNull(); + Parser.Parser.ParsePath("C:\\SOMETHING 12345.avi").Should().BeNull(); MockedRestProvider.Verify(c => c.PostData(It.IsAny(), It.IsAny()), Times.Exactly(2)); @@ -121,7 +121,7 @@ namespace NzbDrone.Core.Test.ParserTests [Test] public void unparsable_title_should_report_title() { - const string TITLE = "SOMETHING"; + const string TITLE = "SOMETHING 12345"; Parser.Parser.ParseTitle(TITLE).Should().BeNull(); @@ -179,13 +179,13 @@ namespace NzbDrone.Core.Test.ParserTests } - [TestCase("Conan {year} {day} {month} Emma Roberts HDTV XviD BFF")] - [TestCase("The Tonight Show With Jay Leno {year} {day} {month} 1080i HDTV DD5 1 MPEG2 TrollHD")] - [TestCase("The.Daily.Show.{year}.{day}.{month}.Johnny.Knoxville.iTouch-MW")] - [TestCase("The Daily Show - {year}-{day}-{month} - Gov. Deval Patrick")] - [TestCase("{year}.{day}.{month} - Denis Leary - HD TV.mkv")] - [TestCase("The Tonight Show with Jay Leno - {year}-{day}-{month} - Larry David, \"Bachelorette\" Ashley Hebert, Pitbull with Ne-Yo")] - [TestCase("2020.NZ.{year}.{day}.{month}.PDTV.XviD-C4TV")] + [TestCase("Conan {year} {month} {day} Emma Roberts HDTV XviD BFF")] + [TestCase("The Tonight Show With Jay Leno {year} {month} {day} 1080i HDTV DD5 1 MPEG2 TrollHD")] + [TestCase("The.Daily.Show.{year}.{month}.{day}.Johnny.Knoxville.iTouch-MW")] + [TestCase("The Daily Show - {year}-{month}-{day} - Gov. Deval Patrick")] + [TestCase("{year}.{month}.{day} - Denis Leary - HD TV.mkv")] + [TestCase("The Tonight Show with Jay Leno - {year}-{month}-{day} - Larry David, \"Bachelorette\" Ashley Hebert, Pitbull with Ne-Yo")] + [TestCase("2020.NZ.{year}.{month}.{day}.PDTV.XviD-C4TV")] public void should_not_accept_ancient_daily_series(string title) { var yearTooLow = title.Expand(new { year = 1950, month = 10, day = 14 }); @@ -193,18 +193,18 @@ namespace NzbDrone.Core.Test.ParserTests } - [TestCase("Conan {year} {day} {month} Emma Roberts HDTV XviD BFF")] + [TestCase("Conan {year} {month} {day} Emma Roberts HDTV XviD BFF")] [TestCase("The Tonight Show With Jay Leno {year} {day} {month} 1080i HDTV DD5 1 MPEG2 TrollHD")] - [TestCase("The.Daily.Show.{year}.{day}.{month}.Johnny.Knoxville.iTouch-MW")] - [TestCase("The Daily Show - {year}-{day}-{month} - Gov. Deval Patrick")] - [TestCase("{year}.{day}.{month} - Denis Leary - HD TV.mkv")] - [TestCase("The Tonight Show with Jay Leno - {year}-{day}-{month} - Larry David, \"Bachelorette\" Ashley Hebert, Pitbull with Ne-Yo")] - [TestCase("2020.NZ.{year}.{day}.{month}.PDTV.XviD-C4TV")] + [TestCase("The.Daily.Show.{year}.{month}.{day}.Johnny.Knoxville.iTouch-MW")] + [TestCase("The Daily Show - {year}-{month}-{day} - Gov. Deval Patrick")] + [TestCase("{year}.{month}.{day} - Denis Leary - HD TV.mkv")] + [TestCase("The Tonight Show with Jay Leno - {year}-{month}-{day} - Larry David, \"Bachelorette\" Ashley Hebert, Pitbull with Ne-Yo")] + [TestCase("2020.NZ.{year}.{month}.{day}.PDTV.XviD-C4TV")] public void should_not_accept_future_dates(string title) { var twoDaysFromNow = DateTime.Now.AddDays(2); - var validDate = title.Expand(new { year = twoDaysFromNow.Year, month = twoDaysFromNow.Month, day = twoDaysFromNow.Day }); + var validDate = title.Expand(new { year = twoDaysFromNow.Year, month = twoDaysFromNow.Month.ToString("00"), day = twoDaysFromNow.Day }); Parser.Parser.ParseTitle(validDate).Should().BeNull(); } @@ -392,14 +392,6 @@ namespace NzbDrone.Core.Test.ParserTests result.Should().BeNull(); } - [TestCase("Fussball Bundesliga 10e2011e30 Spieltag FC Bayern Muenchen vs Bayer 04 Leverkusen German WS dTV XviD WoGS")] - public void unparsable_should_log_error_but_not_throw(string title) - { - Parser.Parser.ParseTitle(title); - ExceptionVerification.IgnoreWarns(); - ExceptionVerification.ExpectedErrors(1); - } - [TestCase("[112461]-[FULL]-[#a.b.teevee@EFNet]-[ 666.Park.Avenue.S01E03.720p.HDTV.X264-DIMENSION ]-[02/31] - \"the.devils.address.103.720p-dimension.par2\" yEnc", "666.Park.Avenue.S01E03.720p.HDTV.X264-DIMENSION")] [TestCase("[112438]-[FULL]-[#a.b.teevee@EFNet]-[ Downton_Abbey.3x05.HDTV_x264-FoV ]-[01/26] - \"downton_abbey.3x05.hdtv_x264-fov.nfo\" yEnc", "Downton_Abbey.3x05.HDTV_x264-FoV")] [TestCase("[ 21154 ] - [ TrollHD ] - [ 00/73 ] - \"MythBusters S03E20 Escape Slide Parachute 1080i HDTV-UPSCALE DD5.1 MPEG2-TrollHD.nzb\" yEnc", "MythBusters S03E20 Escape Slide Parachute 1080i HDTV-UPSCALE DD5.1 MPEG2-TrollHD.nzb")] @@ -408,19 +400,13 @@ namespace NzbDrone.Core.Test.ParserTests BasicRssParser.ParseHeader(title).Should().Be(expected); } - [TestCase("password - \"bdc435cb-93c4-4902-97ea-ca00568c3887.337\" yEnc")] - public void should_not_parse_encypted_posts(string title) - { - Parser.Parser.ParseTitle(title).Should().BeNull(); - ExceptionVerification.IgnoreWarns(); - } - [TestCase("76El6LcgLzqb426WoVFg1vVVVGx4uCYopQkfjmLe")] [TestCase("Vrq6e1Aba3U amCjuEgV5R2QvdsLEGYF3YQAQkw8")] [TestCase("TDAsqTea7k4o6iofVx3MQGuDK116FSjPobMuh8oB")] [TestCase("yp4nFodAAzoeoRc467HRh1mzuT17qeekmuJ3zFnL")] [TestCase("oxXo8S2272KE1 lfppvxo3iwEJBrBmhlQVK1gqGc")] [TestCase("dPBAtu681Ycy3A4NpJDH6kNVQooLxqtnsW1Umfiv")] + [TestCase("password - \"bdc435cb-93c4-4902-97ea-ca00568c3887.337\" yEnc")] public void should_not_parse_crap(string title) { Parser.Parser.ParseTitle(title).Should().BeNull(); diff --git a/NzbDrone.Core/Parser/Parser.cs b/NzbDrone.Core/Parser/Parser.cs index ffd4db16b..0d9f339a6 100644 --- a/NzbDrone.Core/Parser/Parser.cs +++ b/NzbDrone.Core/Parser/Parser.cs @@ -18,50 +18,54 @@ namespace NzbDrone.Core.Parser private static readonly Regex[] ReportTitleRegex = new[] { //Episodes with airdate - new Regex(@"^(?.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])\W?(?!\\)", + new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Multi-Part episodes without a title (S01E05.S01E06) - new Regex(@"^(?:\W*S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}\W?(?!\\)", + new Regex(@"^(?:\W*S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Multi-episode Repeated (S01E05 - S01E06, 1x05 - 1x06, etc) - new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}\W?(?!\\)", + new Regex(@"^(?<title>.+?)(?:(\W|_)+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc) - new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)", + new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc) - new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)", + new Regex(@"^(?<title>.+?)(?:(\W|_)+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes over 99 (3-digits or more) (S01E105, S01E105E106, etc) - new Regex(@"^(?<title>.*?)(?:\W?S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>\d+))+)+\W?(?!\\)", + new Regex(@"^(?<title>.*?)(?:\W?S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>(?<!\d+)\d{3}))+)+(\W|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), new Regex(@"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex])(?<episode>\d{2}(?!\d+)))+\W*)+\W?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc) - new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)", + new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(\W+|_|$)(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + + //Episodes with single digit episode number (S01E1, S01E5E6, etc) + new Regex(@"^(?<title>.*?)(?:\W?S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>\d{1}))+)+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Supports 103/113 naming - new Regex(@"^(?<title>.+?)?(?:\W?(?<season>(?<!\d+)\d{1})(?<episode>\d{2}(?!p|i|\d+)))+\W?(?!\\)", + new Regex(@"^(?<title>.+?)?(?:\W?(?<season>(?<!\d+)\d{1})(?<episode>\d{2}(?!p|i|\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Mini-Series, treated as season 1, episodes are labelled as Part01, Part 01, Part.1 - new Regex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)\W?(?!\\)", + new Regex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Supports 1103/1113 naming - new Regex(@"^(?<title>.+?)?(?:\W?(?<season>(?<!\d+|\(|\[)\d{2})(?<episode>\d{2}(?!p|i|\d+|\)|\])))+\W?(?!\\)", + new Regex(@"^(?<title>.+?)?(?:\W?(?<season>(?<!\d+|\(|\[)\d{2})(?<episode>\d{2}(?!p|i|\d+|\)|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Supports Season only releases - new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))\W?(?<extras>EXTRAS|SUBPACK)?(?!\\)", + new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled) }; @@ -71,8 +75,6 @@ namespace NzbDrone.Core.Parser private static readonly Regex SimpleTitleRegex = new Regex(@"480[i|p]|720[i|p]|1080[i|p]|[x|h|x\s|h\s]264|DD\W?5\W1|\<|\>|\?|\*|\:|\||""", RegexOptions.IgnoreCase | RegexOptions.Compiled); - - private static readonly Regex MultiPartCleanupRegex = new Regex(@"\(\d+\)$", RegexOptions.Compiled); private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>ita|italian)|(?<german>german\b)|(?<flemish>flemish)|(?<greek>greek)(?:\W|_)", RegexOptions.IgnoreCase | RegexOptions.Compiled); @@ -101,6 +103,8 @@ namespace NzbDrone.Core.Parser { try { + if (!ValidateBeforeParsing(title)) return null; + Logger.Trace("Parsing string '{0}'", title); var simpleTitle = SimpleTitleRegex.Replace(title, String.Empty); @@ -110,11 +114,13 @@ namespace NzbDrone.Core.Parser if (match.Count != 0) { + Logger.Trace("Matching Regex: '{0}'", regex.ToString()); + var result = ParseMatchCollection(match); if (result != null) { //Check if episode is in the future (most likley a parse error) - if (result.AirDate > DateTime.Now.AddDays(1).Date) + if (result.AirDate > DateTime.Now.AddDays(1).Date || result.AirDate < new DateTime(1970, 1, 1)) break; result.Language = ParseLanguage(title); @@ -454,6 +460,22 @@ namespace NzbDrone.Core.Parser return Language.English; } + private static bool ValidateBeforeParsing(string title) + { + if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc")) + { + Logger.Trace(""); + return false; + } + + if (!title.Any(Char.IsLetterOrDigit) || (!title.Any(Char.IsPunctuation) && !title.Any(Char.IsWhiteSpace))) + { + return false; + } + + return true; + } + public static string NormalizeTitle(string title) { long number = 0; @@ -470,6 +492,5 @@ namespace NzbDrone.Core.Parser //this will remove (1),(2) from the end of multi part episodes. return MultiPartCleanupRegex.Replace(title, string.Empty).Trim(); } - } } \ No newline at end of file