Improved detection of hashed releases.

- Added specific rejection code for 32 random characters to ignore all md5 and mixed-case hashes.
- Added NZBGeek hash format.
- Removed -RP from Release Group.
- Added test and fix for reversed title. Currently matching it based on a couple of patterns.
This commit is contained in:
Taloth Saldono 2014-04-18 01:16:40 +02:00
parent 9c858445a3
commit 5428d9d53f
5 changed files with 131 additions and 9 deletions

View File

@ -48,6 +48,14 @@ namespace NzbDrone.Common
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
}
public static string TrimEnd(this string text, string postfix)
{
if (text.EndsWith(postfix))
text = text.Substring(0, text.Length - postfix.Length);
return text;
}
public static string CleanSpaces(this string text)
{
return CollapseSpace.Replace(text, " ").Trim();

View File

@ -7,6 +7,7 @@ using NzbDrone.Core.Parser;
using NzbDrone.Core.Test.Framework;
using NzbDrone.Core.Tv;
using NzbDrone.Test.Common;
using System.Text;
namespace NzbDrone.Core.Test.ParserTests
{
@ -29,10 +30,60 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("THIS SHOULD NEVER PARSE")]
[TestCase("Vh1FvU3bJXw6zs8EEUX4bMo5vbbMdHghxHirc.mkv")]
[TestCase("0e895c37245186812cb08aab1529cf8ee389dd05.mkv")]
[TestCase("08bbc153931ce3ca5fcafe1b92d3297285feb061.mkv")]
[TestCase("185d86a343e39f3341e35c4dad3ff159")]
public void should_not_parse_crap(string title)
{
Parser.Parser.ParseTitle(title).Should().BeNull();
ExceptionVerification.IgnoreWarns();
}
[Test]
public void should_not_parse_md5()
{
string hash = "CRAPPY TEST SEED";
var hashAlgo = System.Security.Cryptography.MD5.Create();
var repetitions = 100;
var success = 0;
for (int i = 0; i < repetitions; i++)
{
var hashData = hashAlgo.ComputeHash(System.Text.Encoding.Default.GetBytes(hash));
hash = BitConverter.ToString(hashData).Replace("-", "");
if (Parser.Parser.ParseTitle(hash) == null)
success++;
}
success.Should().Be(repetitions);
}
[TestCase(32)]
[TestCase(40)]
public void should_not_parse_random(int length)
{
string charset = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
var hashAlgo = new Random();
var repetitions = 500;
var success = 0;
for (int i = 0; i < repetitions; i++)
{
StringBuilder hash = new StringBuilder(length);
for (int x = 0; x < length; x++)
{
hash.Append(charset[hashAlgo.Next() % charset.Length]);
}
if (Parser.Parser.ParseTitle(hash.ToString()) == null)
success++;
}
success.Should().Be(repetitions);
}
}
}

View File

@ -12,17 +12,31 @@ namespace NzbDrone.Core.Test.ParserTests
{
new object[]
{
@"C:\Test\Some.Hashed.Release.S01E01.720p.WEB-DL.AAC2.0.H.264-Mercury\0e895c3724.mkv".AsOsAgnostic(),
@"C:\Test\Some.Hashed.Release.S01E01.720p.WEB-DL.AAC2.0.H.264-Mercury\0e895c37245186812cb08aab1529cf8ee389dd05.mkv".AsOsAgnostic(),
"somehashedrelease",
"WEBDL-720p",
"Mercury"
},
new object[]
{
@"C:\Test\0e895c3724\Some.Hashed.Release.S01E01.720p.WEB-DL.AAC2.0.H.264-Mercury.mkv".AsOsAgnostic(),
@"C:\Test\0e895c37245186812cb08aab1529cf8ee389dd05\Some.Hashed.Release.S01E01.720p.WEB-DL.AAC2.0.H.264-Mercury.mkv".AsOsAgnostic(),
"somehashedrelease",
"WEBDL-720p",
"Mercury"
},
new object[]
{
@"C:\Test\Some.Hashed.Release.S01E01.720p.WEB-DL.AAC2.0.H.264-Mercury.mkv\yrucreM-462.H.0.2CAA.LD-BEW.p027.10E10S.esaeleR.dehsaH.emoS.mkv".AsOsAgnostic(),
"somehashedrelease",
"WEBDL-720p",
"Mercury"
},
new object[]
{
@"C:\Test\Weeds.S01E10.DVDRip.XviD-NZBgeek\AHFMZXGHEWD660.mkv".AsOsAgnostic(),
"weeds",
"DVD",
"NZBgeek"
}
};

View File

@ -24,11 +24,17 @@ namespace NzbDrone.Core.Test.ParserTests
}
[Test]
public void should_not_include_extension_in_release_roup()
public void should_not_include_extension_in_release_group()
{
const string path = @"C:\Test\Doctor.Who.2005.s01e01.internal.bdrip.x264-archivist.mkv";
Parser.Parser.ParsePath(path).ReleaseGroup.Should().Be("archivist");
}
[TestCase("The.Longest.Mystery.S02E04.720p.WEB-DL.AAC2.0.H.264-EVL-RP", "EVL")]
public void should_not_include_repost_in_release_group(string title, string expected)
{
Parser.Parser.ParseReleaseGroup(title).Should().Be(expected);
}
}
}

View File

@ -43,7 +43,7 @@ namespace NzbDrone.Core.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc)
new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))+(?![\da-z]))",
new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
@ -92,7 +92,7 @@ namespace NzbDrone.Core.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
new Regex(@"^(?<title>.+?)(?:(\W|_)+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+(?![\da-z]))\W?(?!\\)",
new Regex(@"^(?<title>.+?)(?:(\W|_)+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)\W?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - Title Absolute Episode Number
@ -100,6 +100,18 @@ namespace NzbDrone.Core.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled)
};
private static readonly Regex[] RejectHashedReleasesRegex = new Regex[]
{
// Generic match for md5 and mixed-case hashes.
new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled),
// Format seen on some NZBGeek releases
new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled)
};
//Regex to detect whether the title was reversed.
private static readonly Regex ReversedTitleRegex = new Regex(@"\.p027\.|\.p0801\.|\.\d{2}E\d{2}S\.", RegexOptions.Compiled);
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^)(a|an|the|and|or|of)(?:\b|_))|\W|_",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
@ -155,6 +167,17 @@ namespace NzbDrone.Core.Parser
if (!ValidateBeforeParsing(title)) return null;
Logger.Debug("Parsing string '{0}'", title);
if (ReversedTitleRegex.IsMatch(title))
{
var titleWithoutExtension = RemoveFileExtension(title).ToCharArray();
Array.Reverse(titleWithoutExtension);
title = new string(titleWithoutExtension) + title.Substring(titleWithoutExtension.Length);
Logger.Debug("Reversed name detected. Converted to '{0}'", title);
}
var simpleTitle = SimpleTitleRegex.Replace(title, String.Empty);
foreach (var regex in ReportTitleRegex)
@ -245,10 +268,9 @@ namespace NzbDrone.Core.Parser
title = title.Trim();
if (!title.ContainsInvalidPathChars() && MediaFiles.MediaFileExtensions.Extensions.Contains(Path.GetExtension(title).ToLower()))
{
title = Path.GetFileNameWithoutExtension(title).Trim();
}
title = RemoveFileExtension(title);
title = title.TrimEnd("-RP");
var index = title.LastIndexOf('-');
@ -275,6 +297,19 @@ namespace NzbDrone.Core.Parser
return group;
}
public static string RemoveFileExtension(string title)
{
if (!title.ContainsInvalidPathChars())
{
if (MediaFiles.MediaFileExtensions.Extensions.Contains(Path.GetExtension(title).ToLower()))
{
title = Path.Combine(Path.GetDirectoryName(title), Path.GetFileNameWithoutExtension(title));
}
}
return title;
}
private static SeriesTitleInfo GetSeriesTitleInfo(string title)
{
var seriesTitleInfo = new SeriesTitleInfo();
@ -511,6 +546,14 @@ namespace NzbDrone.Core.Parser
return false;
}
var titleWithoutExtension = RemoveFileExtension(title);
if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension)))
{
Logger.Debug("Rejected Hashed Release Title: " + title);
return false;
}
return true;
}
}