FIxed parsing tests, now with more regex

This commit is contained in:
Mark McDowall 2013-04-28 20:11:43 -07:00
parent 9345211536
commit 474d48602a
2 changed files with 54 additions and 47 deletions

View File

@ -50,7 +50,7 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("House - S06E13 - 5 to 9 [DVD]", "House", 6, 13)]
[TestCase("The Mentalist - S02E21 - 18-5-4", "The Mentalist", 2, 21)]
[TestCase("Breaking.In.S01E07.21.0.Jump.Street.720p.WEB-DL.DD5.1.h.264-KiNGS", "Breaking In", 1, 7)]
[TestCase("CSI525", "CSI", 5, 25)]
[TestCase("CSI.525", "CSI", 5, 25)]
[TestCase("King of the Hill - 10x12 - 24 Hour Propane People [SDTV]", "King of the Hill", 10, 12)]
[TestCase("Brew Masters S01E06 3 Beers For Batali DVDRip XviD SPRiNTER", "Brew Masters", 1, 6)]
[TestCase("24 7 Flyers Rangers Road to the NHL Winter Classic Part01 720p HDTV x264 ORENJI", "24 7 Flyers Rangers Road to the NHL Winter Classic", 1, 1)]
@ -111,7 +111,7 @@ namespace NzbDrone.Core.Test.ParserTests
[Test]
public void unparsable_path_should_report_the_path()
{
Parser.Parser.ParsePath("C:\\").Should().BeNull();
Parser.Parser.ParsePath("C:\\SOMETHING 12345.avi").Should().BeNull();
MockedRestProvider.Verify(c => c.PostData(It.IsAny<string>(), It.IsAny<ParseErrorReport>()), Times.Exactly(2));
@ -121,7 +121,7 @@ namespace NzbDrone.Core.Test.ParserTests
[Test]
public void unparsable_title_should_report_title()
{
const string TITLE = "SOMETHING";
const string TITLE = "SOMETHING 12345";
Parser.Parser.ParseTitle(TITLE).Should().BeNull();
@ -179,13 +179,13 @@ namespace NzbDrone.Core.Test.ParserTests
}
[TestCase("Conan {year} {day} {month} Emma Roberts HDTV XviD BFF")]
[TestCase("The Tonight Show With Jay Leno {year} {day} {month} 1080i HDTV DD5 1 MPEG2 TrollHD")]
[TestCase("The.Daily.Show.{year}.{day}.{month}.Johnny.Knoxville.iTouch-MW")]
[TestCase("The Daily Show - {year}-{day}-{month} - Gov. Deval Patrick")]
[TestCase("{year}.{day}.{month} - Denis Leary - HD TV.mkv")]
[TestCase("The Tonight Show with Jay Leno - {year}-{day}-{month} - Larry David, \"Bachelorette\" Ashley Hebert, Pitbull with Ne-Yo")]
[TestCase("2020.NZ.{year}.{day}.{month}.PDTV.XviD-C4TV")]
[TestCase("Conan {year} {month} {day} Emma Roberts HDTV XviD BFF")]
[TestCase("The Tonight Show With Jay Leno {year} {month} {day} 1080i HDTV DD5 1 MPEG2 TrollHD")]
[TestCase("The.Daily.Show.{year}.{month}.{day}.Johnny.Knoxville.iTouch-MW")]
[TestCase("The Daily Show - {year}-{month}-{day} - Gov. Deval Patrick")]
[TestCase("{year}.{month}.{day} - Denis Leary - HD TV.mkv")]
[TestCase("The Tonight Show with Jay Leno - {year}-{month}-{day} - Larry David, \"Bachelorette\" Ashley Hebert, Pitbull with Ne-Yo")]
[TestCase("2020.NZ.{year}.{month}.{day}.PDTV.XviD-C4TV")]
public void should_not_accept_ancient_daily_series(string title)
{
var yearTooLow = title.Expand(new { year = 1950, month = 10, day = 14 });
@ -193,18 +193,18 @@ namespace NzbDrone.Core.Test.ParserTests
}
[TestCase("Conan {year} {day} {month} Emma Roberts HDTV XviD BFF")]
[TestCase("Conan {year} {month} {day} Emma Roberts HDTV XviD BFF")]
[TestCase("The Tonight Show With Jay Leno {year} {day} {month} 1080i HDTV DD5 1 MPEG2 TrollHD")]
[TestCase("The.Daily.Show.{year}.{day}.{month}.Johnny.Knoxville.iTouch-MW")]
[TestCase("The Daily Show - {year}-{day}-{month} - Gov. Deval Patrick")]
[TestCase("{year}.{day}.{month} - Denis Leary - HD TV.mkv")]
[TestCase("The Tonight Show with Jay Leno - {year}-{day}-{month} - Larry David, \"Bachelorette\" Ashley Hebert, Pitbull with Ne-Yo")]
[TestCase("2020.NZ.{year}.{day}.{month}.PDTV.XviD-C4TV")]
[TestCase("The.Daily.Show.{year}.{month}.{day}.Johnny.Knoxville.iTouch-MW")]
[TestCase("The Daily Show - {year}-{month}-{day} - Gov. Deval Patrick")]
[TestCase("{year}.{month}.{day} - Denis Leary - HD TV.mkv")]
[TestCase("The Tonight Show with Jay Leno - {year}-{month}-{day} - Larry David, \"Bachelorette\" Ashley Hebert, Pitbull with Ne-Yo")]
[TestCase("2020.NZ.{year}.{month}.{day}.PDTV.XviD-C4TV")]
public void should_not_accept_future_dates(string title)
{
var twoDaysFromNow = DateTime.Now.AddDays(2);
var validDate = title.Expand(new { year = twoDaysFromNow.Year, month = twoDaysFromNow.Month, day = twoDaysFromNow.Day });
var validDate = title.Expand(new { year = twoDaysFromNow.Year, month = twoDaysFromNow.Month.ToString("00"), day = twoDaysFromNow.Day });
Parser.Parser.ParseTitle(validDate).Should().BeNull();
}
@ -392,14 +392,6 @@ namespace NzbDrone.Core.Test.ParserTests
result.Should().BeNull();
}
[TestCase("Fussball Bundesliga 10e2011e30 Spieltag FC Bayern Muenchen vs Bayer 04 Leverkusen German WS dTV XviD WoGS")]
public void unparsable_should_log_error_but_not_throw(string title)
{
Parser.Parser.ParseTitle(title);
ExceptionVerification.IgnoreWarns();
ExceptionVerification.ExpectedErrors(1);
}
[TestCase("[112461]-[FULL]-[#a.b.teevee@EFNet]-[ 666.Park.Avenue.S01E03.720p.HDTV.X264-DIMENSION ]-[02/31] - \"the.devils.address.103.720p-dimension.par2\" yEnc", "666.Park.Avenue.S01E03.720p.HDTV.X264-DIMENSION")]
[TestCase("[112438]-[FULL]-[#a.b.teevee@EFNet]-[ Downton_Abbey.3x05.HDTV_x264-FoV ]-[01/26] - \"downton_abbey.3x05.hdtv_x264-fov.nfo\" yEnc", "Downton_Abbey.3x05.HDTV_x264-FoV")]
[TestCase("[ 21154 ] - [ TrollHD ] - [ 00/73 ] - \"MythBusters S03E20 Escape Slide Parachute 1080i HDTV-UPSCALE DD5.1 MPEG2-TrollHD.nzb\" yEnc", "MythBusters S03E20 Escape Slide Parachute 1080i HDTV-UPSCALE DD5.1 MPEG2-TrollHD.nzb")]
@ -408,19 +400,13 @@ namespace NzbDrone.Core.Test.ParserTests
BasicRssParser.ParseHeader(title).Should().Be(expected);
}
[TestCase("password - \"bdc435cb-93c4-4902-97ea-ca00568c3887.337\" yEnc")]
public void should_not_parse_encypted_posts(string title)
{
Parser.Parser.ParseTitle(title).Should().BeNull();
ExceptionVerification.IgnoreWarns();
}
[TestCase("76El6LcgLzqb426WoVFg1vVVVGx4uCYopQkfjmLe")]
[TestCase("Vrq6e1Aba3U amCjuEgV5R2QvdsLEGYF3YQAQkw8")]
[TestCase("TDAsqTea7k4o6iofVx3MQGuDK116FSjPobMuh8oB")]
[TestCase("yp4nFodAAzoeoRc467HRh1mzuT17qeekmuJ3zFnL")]
[TestCase("oxXo8S2272KE1 lfppvxo3iwEJBrBmhlQVK1gqGc")]
[TestCase("dPBAtu681Ycy3A4NpJDH6kNVQooLxqtnsW1Umfiv")]
[TestCase("password - \"bdc435cb-93c4-4902-97ea-ca00568c3887.337\" yEnc")]
public void should_not_parse_crap(string title)
{
Parser.Parser.ParseTitle(title).Should().BeNull();

View File

@ -18,50 +18,54 @@ namespace NzbDrone.Core.Parser
private static readonly Regex[] ReportTitleRegex = new[]
{
//Episodes with airdate
new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])\W?(?!\\)",
new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Multi-Part episodes without a title (S01E05.S01E06)
new Regex(@"^(?:\W*S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}\W?(?!\\)",
new Regex(@"^(?:\W*S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Multi-episode Repeated (S01E05 - S01E06, 1x05 - 1x06, etc)
new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}\W?(?!\\)",
new Regex(@"^(?<title>.+?)(?:(\W|_)+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc)
new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)",
new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)",
new Regex(@"^(?<title>.+?)(?:(\W|_)+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes over 99 (3-digits or more) (S01E105, S01E105E106, etc)
new Regex(@"^(?<title>.*?)(?:\W?S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>\d+))+)+\W?(?!\\)",
new Regex(@"^(?<title>.*?)(?:\W?S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>(?<!\d+)\d{3}))+)+(\W|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
new Regex(@"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex])(?<episode>\d{2}(?!\d+)))+\W*)+\W?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)",
new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with single digit episode number (S01E1, S01E5E6, etc)
new Regex(@"^(?<title>.*?)(?:\W?S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>\d{1}))+)+(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Supports 103/113 naming
new Regex(@"^(?<title>.+?)?(?:\W?(?<season>(?<!\d+)\d{1})(?<episode>\d{2}(?!p|i|\d+)))+\W?(?!\\)",
new Regex(@"^(?<title>.+?)?(?:\W?(?<season>(?<!\d+)\d{1})(?<episode>\d{2}(?!p|i|\d+)))+(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Mini-Series, treated as season 1, episodes are labelled as Part01, Part 01, Part.1
new Regex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)\W?(?!\\)",
new Regex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Supports 1103/1113 naming
new Regex(@"^(?<title>.+?)?(?:\W?(?<season>(?<!\d+|\(|\[)\d{2})(?<episode>\d{2}(?!p|i|\d+|\)|\])))+\W?(?!\\)",
new Regex(@"^(?<title>.+?)?(?:\W?(?<season>(?<!\d+|\(|\[)\d{2})(?<episode>\d{2}(?!p|i|\d+|\)|\]|\W\d+)))+(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Supports Season only releases
new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))\W?(?<extras>EXTRAS|SUBPACK)?(?!\\)",
new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled)
};
@ -71,8 +75,6 @@ namespace NzbDrone.Core.Parser
private static readonly Regex SimpleTitleRegex = new Regex(@"480[i|p]|720[i|p]|1080[i|p]|[x|h|x\s|h\s]264|DD\W?5\W1|\<|\>|\?|\*|\:|\||""",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex MultiPartCleanupRegex = new Regex(@"\(\d+\)$", RegexOptions.Compiled);
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>ita|italian)|(?<german>german\b)|(?<flemish>flemish)|(?<greek>greek)(?:\W|_)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
@ -101,6 +103,8 @@ namespace NzbDrone.Core.Parser
{
try
{
if (!ValidateBeforeParsing(title)) return null;
Logger.Trace("Parsing string '{0}'", title);
var simpleTitle = SimpleTitleRegex.Replace(title, String.Empty);
@ -110,11 +114,13 @@ namespace NzbDrone.Core.Parser
if (match.Count != 0)
{
Logger.Trace("Matching Regex: '{0}'", regex.ToString());
var result = ParseMatchCollection(match);
if (result != null)
{
//Check if episode is in the future (most likley a parse error)
if (result.AirDate > DateTime.Now.AddDays(1).Date)
if (result.AirDate > DateTime.Now.AddDays(1).Date || result.AirDate < new DateTime(1970, 1, 1))
break;
result.Language = ParseLanguage(title);
@ -454,6 +460,22 @@ namespace NzbDrone.Core.Parser
return Language.English;
}
private static bool ValidateBeforeParsing(string title)
{
if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc"))
{
Logger.Trace("");
return false;
}
if (!title.Any(Char.IsLetterOrDigit) || (!title.Any(Char.IsPunctuation) && !title.Any(Char.IsWhiteSpace)))
{
return false;
}
return true;
}
public static string NormalizeTitle(string title)
{
long number = 0;
@ -470,6 +492,5 @@ namespace NzbDrone.Core.Parser
//this will remove (1),(2) from the end of multi part episodes.
return MultiPartCleanupRegex.Replace(title, string.Empty).Trim();
}
}
}