Don't clean common words from the start of the title

Fixed: Do not treat similar titles as the exact same
This commit is contained in:
Mark McDowall 2014-03-10 12:20:28 -07:00
parent 70266c921b
commit db5baf70ea
3 changed files with 32 additions and 40 deletions

View File

@ -15,8 +15,6 @@ namespace NzbDrone.Core.Test.ParserTests
public class NormalizeTitleFixture : CoreTest
{
[TestCase("Conan", "conan")]
[TestCase("The Tonight Show With Jay Leno", "tonightshowwithjayleno")]
[TestCase("The.Daily.Show", "dailyshow")]
[TestCase("Castle (2009)", "castle2009")]
[TestCase("Parenthood.2010", "parenthood2010")]
[TestCase("Law_and_Order_SVU", "lawordersvu")]
@ -51,9 +49,6 @@ namespace NzbDrone.Core.Test.ParserTests
"word.{0}.word",
"word {0} word",
"word-{0}-word",
"{0}.word.word",
"{0}-word-word",
"{0} word word",
"word.word.{0}",
"word-word-{0}",
"word-word {0}",
@ -64,7 +59,6 @@ namespace NzbDrone.Core.Test.ParserTests
var dirty = String.Format(s, word);
dirty.CleanSeriesTitle().Should().Be("wordword");
}
}
[TestCase("the")]
@ -92,5 +86,36 @@ namespace NzbDrone.Core.Test.ParserTests
}
}
[TestCase("The Office", "theoffice")]
[TestCase("The Tonight Show With Jay Leno", "thetonightshowwithjayleno")]
[TestCase("The.Daily.Show", "thedailyshow")]
public void should_not_remove_from_the_beginning_of_the_title(string parsedSeriesName, string seriesName)
{
var result = parsedSeriesName.CleanSeriesTitle();
result.Should().Be(seriesName);
}
[TestCase("the")]
[TestCase("and")]
[TestCase("or")]
[TestCase("a")]
[TestCase("an")]
[TestCase("of")]
public void should_not_clean_word_from_beginning_of_string(string word)
{
var dirtyFormat = new[]
{
"{0}.word.word",
"{0}-word-word",
"{0} word word"
};
foreach (var s in dirtyFormat)
{
var dirty = String.Format(s, word);
dirty.CleanSeriesTitle().Should().Be(word + "wordword");
}
}
}
}

View File

@ -23,39 +23,6 @@ namespace NzbDrone.Core.Test.ParserTests
* Superman.-.The.Man.of.Steel.1994-05.33.hybrid.DreamGirl-Novus-HD
*/
[TestCase("[SubDESU]_High_School_DxD_07_(1280x720_x264-AAC)_[6B7FD717]", "High School DxD", 7, 0, 0)]
[TestCase("[Chihiro]_Working!!_-_06_[848x480_H.264_AAC][859EEAFA]", "Working!!", 6, 0, 0)]
[TestCase("[Commie]_Senki_Zesshou_Symphogear_-_11_[65F220B4]", "Senki_Zesshou_Symphogear", 11, 0, 0)]
[TestCase("[Underwater]_Rinne_no_Lagrange_-_12_(720p)_[5C7BC4F9]", "Rinne_no_Lagrange", 12, 0, 0)]
[TestCase("[Commie]_Rinne_no_Lagrange_-_15_[E76552EA]", "Rinne_no_Lagrange", 15, 0, 0)]
[TestCase("[HorribleSubs]_Hunter_X_Hunter_-_33_[720p]", "Hunter_X_Hunter", 33, 0, 0)]
[TestCase("[HorribleSubs]_Fairy_Tail_-_145_[720p]", "Fairy_Tail", 145, 0, 0)]
[TestCase("[HorribleSubs] Tonari no Kaibutsu-kun - 13 [1080p].mkv", "Tonari no Kaibutsu-kun", 13, 0, 0)]
[TestCase("[Doremi].Yes.Pretty.Cure.5.Go.Go!.31.[1280x720].[C65D4B1F].mkv", "Yes.Pretty.Cure.5.Go.Go!", 31, 0, 0)]
[TestCase("[K-F] One Piece 214", "One Piece", 214, 0, 0)]
[TestCase("[K-F] One Piece S10E14 214", "One Piece", 214, 10, 14)]
[TestCase("[K-F] One Piece 10x14 214", "One Piece", 214, 10, 14)]
[TestCase("[K-F] One Piece 214 10x14", "One Piece", 214, 10, 14)]
// [TestCase("One Piece S10E14 214", "One Piece", 214, 10, 14)]
// [TestCase("One Piece 10x14 214", "One Piece", 214, 10, 14)]
// [TestCase("One Piece 214 10x14", "One Piece", 214, 10, 14)]
// [TestCase("214 One Piece 10x14", "One Piece", 214, 10, 14)]
[TestCase("Bleach - 031 - The Resolution to Kill [Lunar].avi", "Bleach", 31, 0, 0)]
[TestCase("Bleach - 031 - The Resolution to Kill [Lunar]", "Bleach", 31, 0, 0)]
[TestCase("[ACX]Hack Sign 01 Role Play [Kosaka] [9C57891E].mkv", "Hack Sign", 1, 0, 0)]
[TestCase("[SFW-sage] Bakuman S3 - 12 [720p][D07C91FC]", "Bakuman S3", 12, 0, 0)]
[TestCase("ducktales_e66_time_is_money_part_one_marking_time", "DuckTales", 66, 0, 0)]
public void parse_absolute_numbers(string postTitle, string title, int absoluteEpisodeNumber, int seasonNumber, int episodeNumber)
{
var result = Parser.Parser.ParseTitle(postTitle);
result.Should().NotBeNull();
result.AbsoluteEpisodeNumbers.First().Should().Be(absoluteEpisodeNumber);
result.SeasonNumber.Should().Be(seasonNumber);
result.EpisodeNumbers.FirstOrDefault().Should().Be(episodeNumber);
result.SeriesTitle.Should().Be(title.CleanSeriesTitle());
result.FullSeason.Should().BeFalse();
}
[TestCase("Chuck - 4x05 - Title", "Chuck")]
[TestCase("Law & Order - 4x05 - Title", "laworder")]
[TestCase("Bad Format", "badformat")]

View File

@ -100,7 +100,7 @@ namespace NzbDrone.Core.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled)
};
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(a|an|the|and|or|of)(?:\b|_))|\W|_",
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^)(a|an|the|and|or|of)(?:\b|_))|\W|_",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SimpleTitleRegex = new Regex(@"480[i|p]|720[i|p]|1080[i|p]|[x|h|x\s|h\s]264|DD\W?5\W1|\<|\>|\?|\*|\:|\|",