2018-03-14 20:41:36 +00:00
using System ;
2012-08-03 07:01:34 +00:00
using System.Collections.Generic ;
using System.IO ;
using System.Linq ;
using System.Text.RegularExpressions ;
2019-02-17 23:09:41 +00:00
using NLog ;
2019-12-22 22:08:53 +00:00
using NzbDrone.Common.Extensions ;
2013-08-31 01:42:30 +00:00
using NzbDrone.Common.Instrumentation ;
2019-12-22 22:08:53 +00:00
using NzbDrone.Core.Parser.Model ;
2012-08-03 07:01:34 +00:00
2013-04-15 01:41:39 +00:00
namespace NzbDrone.Core.Parser
2012-08-03 07:01:34 +00:00
{
public static class Parser
{
2014-12-17 07:12:26 +00:00
private static readonly Logger Logger = NzbDroneLogger . GetLogger ( typeof ( Parser ) ) ;
2012-08-03 07:01:34 +00:00
2021-06-26 10:20:51 +00:00
private static readonly Regex EditionRegex = new Regex ( @"\(?\b(?<edition>(((Recut.|Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Extended|Despecialized|(Special|Rouge|Final|Assembly|Imperial|Diamond|Signature|Hunter|Rekall)(?=(.(Cut|Edition|Version)))|\d{2,3}(th)?.Anniversary)(?:.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Restored|((2|3|4)in1))))))\b\)?" , RegexOptions . Compiled | RegexOptions . IgnoreCase ) ;
2020-04-08 17:12:23 +00:00
private static readonly Regex ReportEditionRegex = new Regex ( @"^.+?" + EditionRegex , RegexOptions . Compiled | RegexOptions . IgnoreCase ) ;
2020-04-01 20:47:51 +00:00
2017-01-04 21:59:34 +00:00
private static readonly Regex [ ] ReportMovieTitleRegex = new [ ]
{
2020-04-08 16:54:05 +00:00
//Some german or french tracker formats (missing year, ...) (Only applies to german and French/TrueFrench releases) - see ParserFixture for examples and tests
2020-04-08 17:12:23 +00:00
new Regex ( @"^(?<title>(?![(\[]).+?)((\W|_))(" + EditionRegex + @".{1,3})?(?:(?<!(19|20)\d{2}.*?)(German|French|TrueFrench))(.+?)(?=((19|20)\d{2}|$))(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+))?(\W+|_|$)(?!\\)" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2020-04-08 16:54:05 +00:00
2017-01-04 21:59:34 +00:00
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.Special.Edition.2011
2020-04-08 17:12:23 +00:00
new Regex ( @"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*" + EditionRegex + @".{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)" ,
2017-01-04 21:59:34 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2018-08-05 14:28:05 +00:00
2017-03-07 23:29:02 +00:00
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.2011.Special.Edition //TODO: Seems to slow down parsing heavily!
2017-06-18 21:12:14 +00:00
/ * new Regex ( @"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(19|20)\d{2}(?!p|i|(19|20)\d{2}|\]|\W(19|20)\d{2})))+(\W+|_|$)(?!\\)\(?(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\)?" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) , * /
2018-08-05 14:28:05 +00:00
2017-03-07 23:29:02 +00:00
//Normal movie format, e.g: Mission.Impossible.3.2011
2018-12-30 12:55:45 +00:00
new Regex ( @"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|(1(8|9)|20)\d{2}|\]|\W(1(8|9)|20)\d{2})))+(\W+|_|$)(?!\\)" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-06-17 13:02:58 +00:00
2017-03-07 23:29:02 +00:00
//PassThePopcorn Torrent names: Star.Wars[PassThePopcorn]
new Regex ( @"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))*(?<year>(\[\w *\])))+(\W+|_|$)(?!\\)" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
//That did not work? Maybe some tool uses [] for years. Who would do that?
2018-12-30 12:55:45 +00:00
new Regex ( @"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?!\\)" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-02-10 18:00:16 +00:00
2019-12-22 21:24:10 +00:00
//As a last resort for movies that have ( or [ in their title.
new Regex ( @"^(?<title>.+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-01-11 20:49:59 +00:00
} ;
private static readonly Regex [ ] ReportMovieTitleFolderRegex = new [ ]
{
//When year comes first.
new Regex ( @"^(?:(?:[-_\W](?<![)!]))*(?<year>(19|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?<title>.+?)?$" )
2017-01-04 21:59:34 +00:00
} ;
2018-08-05 14:28:05 +00:00
2014-04-17 23:16:40 +00:00
private static readonly Regex [ ] RejectHashedReleasesRegex = new Regex [ ]
{
// Generic match for md5 and mixed-case hashes.
new Regex ( @"^[0-9a-zA-Z]{32}" , RegexOptions . Compiled ) ,
2018-08-05 14:28:05 +00:00
2014-06-25 18:44:57 +00:00
// Generic match for shorter lower-case hashes.
new Regex ( @"^[a-z0-9]{24}$" , RegexOptions . Compiled ) ,
2014-04-17 23:16:40 +00:00
// Format seen on some NZBGeek releases
2015-03-15 12:11:17 +00:00
// Be very strict with these coz they are very close to the valid 101 ep numbering.
2014-12-02 02:27:53 +00:00
new Regex ( @"^[A-Z]{11}\d{3}$" , RegexOptions . Compiled ) ,
2015-03-15 12:11:17 +00:00
new Regex ( @"^[a-z]{12}\d{3}$" , RegexOptions . Compiled ) ,
2014-12-02 02:27:53 +00:00
//Backup filename (Unknown origins)
2014-12-15 18:04:55 +00:00
new Regex ( @"^Backup_\d{5,}S\d{2}-\d{2}$" , RegexOptions . Compiled ) ,
//123 - Started appearing December 2014
2015-01-09 06:45:29 +00:00
new Regex ( @"^123$" , RegexOptions . Compiled ) ,
//abc - Started appearing January 2015
2015-01-10 05:59:06 +00:00
new Regex ( @"^abc$" , RegexOptions . Compiled | RegexOptions . IgnoreCase ) ,
2020-10-04 04:51:59 +00:00
//abc - Started appearing 2020
new Regex ( @"^abc[-_. ]xyz" , RegexOptions . Compiled | RegexOptions . IgnoreCase ) ,
2015-01-10 05:59:06 +00:00
//b00bs - Started appearing January 2015
2015-01-21 22:48:35 +00:00
new Regex ( @"^b00bs$" , RegexOptions . Compiled | RegexOptions . IgnoreCase )
2014-04-17 23:16:40 +00:00
} ;
//Regex to detect whether the title was reversed.
2019-02-24 17:03:42 +00:00
private static readonly Regex ReversedTitleRegex = new Regex ( @"(?:^|[-._ ])(p027|p0801)[-._ ]" , RegexOptions . Compiled ) ;
2014-04-17 23:16:40 +00:00
2019-05-02 11:18:28 +00:00
private static readonly Regex NormalizeRegex = new Regex ( @"((?:\b|_)(?<!^|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])(a(?!$|[^a-zA-Z0-9_']\w[^a-zA-Z0-9_'])|an|the|and|or|of)(?:\b|_))|\W|_" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2012-08-03 07:01:34 +00:00
2014-10-21 21:51:38 +00:00
private static readonly Regex FileExtensionRegex = new Regex ( @"\.[a-z0-9]{2,4}$" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2020-06-01 02:19:18 +00:00
private static readonly Regex ReportImdbId = new Regex ( @"(?<imdbid>tt\d{7,8})" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2020-11-17 20:55:55 +00:00
private static readonly Regex ReportTmdbId = new Regex ( @"tmdb(id)?-(?<tmdbid>\d+)" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2017-01-09 16:52:55 +00:00
2019-10-31 03:46:40 +00:00
private static readonly RegexReplace SimpleTitleRegex = new RegexReplace ( @"\s*(?:480[ip]|576[ip]|720[ip]|1080[ip]|2160[ip]|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*:|]|848x480|1280x720|1920x1080|(8|10)b(it)?)" ,
string . Empty ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2012-08-03 07:01:34 +00:00
2018-08-05 14:28:05 +00:00
private static readonly Regex SimpleReleaseTitleRegex = new Regex ( @"\s*(?:[<>?*:|])" , RegexOptions . Compiled | RegexOptions . IgnoreCase ) ;
2020-05-19 21:01:21 +00:00
private static readonly RegexReplace WebsitePrefixRegex = new RegexReplace ( @"^\[\s*[-a-z]+(\.[a-z]+)+\s*\][- ]*|^www\.[a-z]+\.(?:com|net|org)[ -]*" ,
2019-10-31 03:46:40 +00:00
string . Empty ,
2014-05-03 18:26:47 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2020-03-21 02:31:24 +00:00
private static readonly RegexReplace WebsitePostfixRegex = new RegexReplace ( @"\[\s*[-a-z]+(\.[a-z0-9]+)+\s*\]$" ,
string . Empty ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2020-04-09 01:33:59 +00:00
private static readonly RegexReplace CleanReleaseGroupRegex = new RegexReplace ( @"(-(RP|1|NZBGeek|Obfuscated|Obfuscation|Scrambled|sample|Pre|postbot|xpost|Rakuv[a-z0-9]*|WhiteRev|BUYMORE|AsRequested|AlternativeToRequested|GEROV|Z0iDS3N|Chamele0n|4P|4Planet|AlteZachen))+$" ,
2019-10-31 03:46:40 +00:00
string . Empty ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2014-07-25 15:33:51 +00:00
2021-07-30 13:06:31 +00:00
private static readonly RegexReplace CleanTorrentSuffixRegex = new RegexReplace ( @"\[(?:ettv|rartv|rarbg|cttv|publichd)\]$" ,
2019-10-31 03:46:40 +00:00
string . Empty ,
2015-01-10 08:43:35 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2019-10-31 03:46:40 +00:00
private static readonly Regex CleanQualityBracketsRegex = new Regex ( @"\[[a-z0-9 ._-]+\]$" ,
2015-10-20 18:12:35 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2021-02-05 02:01:57 +00:00
private static readonly Regex ReleaseGroupRegex = new Regex ( @"-(?<releasegroup>[a-z0-9]+(?<part2>-[a-z0-9]+)?(?!.+?(?:480p|720p|1080p|2160p)))(?<!(?:WEB-DL|Blu-Ray|480p|720p|1080p|2160p|DTS-HD|DTS-X|DTS-MA|DTS-ES|[ ._]\d{4}-\d{2}|-\d{2}|tmdb(id)?-(?<tmdbid>\d+)|(?<imdbid>tt\d{7,8}))(?:\k<part2>)?)(?:\b|[-._ ]|$)|[-._ ]\[(?<releasegroup>[a-z0-9]+)\]$" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2015-01-16 21:48:22 +00:00
private static readonly Regex AnimeReleaseGroupRegex = new Regex ( @"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)" ,
2015-01-10 08:43:35 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2013-10-31 23:50:39 +00:00
private static readonly Regex YearInTitleRegex = new Regex ( @"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2013-10-31 23:50:39 +00:00
2020-12-23 20:07:13 +00:00
//Handle Exception Release Groups that don't follow -RlsGrp; Manual List
//First Group is groups whose releases end with RlsGroup) or RlsGroup] second group (entries after `(?=\]|\))|`) is name only...BE VERY CAREFUL WITH THIS, HIGH CHANCE OF FALSE POSITIVES
2021-05-07 03:27:23 +00:00
private static readonly Regex ExceptionReleaseGroupRegex = new Regex ( @"(?<releasegroup>(Tigole|Joy|YIFY|YTS.MX|YTS.LT|FreetheFish|afm72|Anna|Bandi|Ghost|Kappa|MONOLITH|Qman|RZeroX|SAMPA|Silence|theincognito|t3nzin|Vyndros)(?=\]|\))|KRaLiMaRKo|E\.N\.D|D\-Z0N3)" ,
2020-12-23 20:07:13 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2018-02-15 14:21:15 +00:00
private static readonly Regex WordDelimiterRegex = new Regex ( @"(\s|\.|,|_|-|=|'|\|)+" , RegexOptions . Compiled ) ;
2017-02-06 21:34:11 +00:00
private static readonly Regex SpecialCharRegex = new Regex ( @"(\&|\:|\\|\/)+" , RegexOptions . Compiled ) ;
2014-01-08 05:54:23 +00:00
private static readonly Regex PunctuationRegex = new Regex ( @"[^\w\s]" , RegexOptions . Compiled ) ;
2014-12-12 23:49:32 +00:00
private static readonly Regex CommonWordRegex = new Regex ( @"\b(a|an|the|and|or|of)\b\s?" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2014-12-15 18:52:16 +00:00
private static readonly Regex SpecialEpisodeWordRegex = new Regex ( @"\b(part|special|edition|christmas)\b\s?" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2014-12-12 23:49:32 +00:00
private static readonly Regex DuplicateSpacesRegex = new Regex ( @"\s{2,}" , RegexOptions . Compiled ) ;
2014-01-07 08:24:50 +00:00
2019-08-31 02:50:03 +00:00
private static readonly Regex RequestInfoRegex = new Regex ( @"^(?:\[.+?\])+" , RegexOptions . Compiled ) ;
2018-08-05 14:28:05 +00:00
2016-06-15 18:58:42 +00:00
private static readonly string [ ] Numbers = new [ ] { "zero" , "one" , "two" , "three" , "four" , "five" , "six" , "seven" , "eight" , "nine" } ;
2019-12-22 22:08:53 +00:00
private static Dictionary < string , string > _umlautMappings = new Dictionary < string , string >
2017-04-17 11:08:47 +00:00
{
2019-12-22 22:08:53 +00:00
{ "ö" , "oe" } ,
{ "ä" , "ae" } ,
{ "ü" , "ue" } ,
2017-04-17 11:08:47 +00:00
} ;
2016-06-15 18:58:42 +00:00
2020-04-08 16:54:05 +00:00
public static ParsedMovieInfo ParseMoviePath ( string path )
2017-01-09 00:48:04 +00:00
{
var fileInfo = new FileInfo ( path ) ;
2020-04-08 16:54:05 +00:00
var result = ParseMovieTitle ( fileInfo . Name , true ) ;
2017-01-09 00:48:04 +00:00
if ( result = = null )
{
2018-03-14 20:41:36 +00:00
Logger . Debug ( "Attempting to parse movie info using directory and file names. {0}" , fileInfo . Directory . Name ) ;
2020-04-08 16:54:05 +00:00
result = ParseMovieTitle ( fileInfo . Directory . Name + " " + fileInfo . Name ) ;
2017-01-09 00:48:04 +00:00
}
if ( result = = null )
{
2018-03-14 20:41:36 +00:00
Logger . Debug ( "Attempting to parse movie info using directory name. {0}" , fileInfo . Directory . Name ) ;
2020-04-08 16:54:05 +00:00
result = ParseMovieTitle ( fileInfo . Directory . Name + fileInfo . Extension ) ;
2017-01-09 00:48:04 +00:00
}
return result ;
}
2020-04-08 16:54:05 +00:00
public static ParsedMovieInfo ParseMovieTitle ( string title , bool isDir = false )
2017-01-04 21:59:34 +00:00
{
2020-04-10 09:52:00 +00:00
var originalTitle = title ;
2017-01-04 21:59:34 +00:00
try
{
2019-12-22 22:08:53 +00:00
if ( ! ValidateBeforeParsing ( title ) )
{
return null ;
}
2017-01-04 21:59:34 +00:00
Logger . Debug ( "Parsing string '{0}'" , title ) ;
if ( ReversedTitleRegex . IsMatch ( title ) )
{
var titleWithoutExtension = RemoveFileExtension ( title ) . ToCharArray ( ) ;
Array . Reverse ( titleWithoutExtension ) ;
title = new string ( titleWithoutExtension ) + title . Substring ( titleWithoutExtension . Length ) ;
Logger . Debug ( "Reversed name detected. Converted to '{0}'" , title ) ;
}
2019-10-31 03:46:40 +00:00
var releaseTitle = RemoveFileExtension ( title ) ;
2017-01-04 21:59:34 +00:00
2020-04-10 12:27:30 +00:00
//Trim dashes from end
releaseTitle = releaseTitle . Trim ( '-' , '_' ) ;
2019-10-31 03:46:40 +00:00
releaseTitle = releaseTitle . Replace ( "【" , "[" ) . Replace ( "】" , "]" ) ;
2017-01-04 21:59:34 +00:00
2019-10-31 03:46:40 +00:00
var simpleTitle = SimpleTitleRegex . Replace ( releaseTitle ) ;
2018-08-05 14:28:05 +00:00
2017-01-04 21:59:34 +00:00
// TODO: Quick fix stripping [url] - prefixes.
2019-10-31 03:46:40 +00:00
simpleTitle = WebsitePrefixRegex . Replace ( simpleTitle ) ;
2020-03-21 02:31:24 +00:00
simpleTitle = WebsitePostfixRegex . Replace ( simpleTitle ) ;
2017-01-04 21:59:34 +00:00
2019-10-31 03:46:40 +00:00
simpleTitle = CleanTorrentSuffixRegex . Replace ( simpleTitle ) ;
simpleTitle = CleanQualityBracketsRegex . Replace ( simpleTitle , m = >
{
if ( QualityParser . ParseQualityName ( m . Value ) . Quality ! = Qualities . Quality . Unknown )
{
return string . Empty ;
}
return m . Value ;
} ) ;
2017-01-04 21:59:34 +00:00
2017-01-11 20:49:59 +00:00
var allRegexes = ReportMovieTitleRegex . ToList ( ) ;
if ( isDir )
{
allRegexes . AddRange ( ReportMovieTitleFolderRegex ) ;
}
foreach ( var regex in allRegexes )
2017-01-04 21:59:34 +00:00
{
var match = regex . Matches ( simpleTitle ) ;
if ( match . Count ! = 0 )
{
Logger . Trace ( regex ) ;
try
{
var result = ParseMovieMatchCollection ( match ) ;
if ( result ! = null )
{
2018-08-05 14:28:05 +00:00
//TODO: Add tests for this!
2020-04-10 09:52:00 +00:00
var simpleReleaseTitle = SimpleReleaseTitleRegex . Replace ( releaseTitle , string . Empty ) ;
2019-10-31 03:46:40 +00:00
2020-05-02 19:11:00 +00:00
var simpleTitleReplaceString = match [ 0 ] . Groups [ "title" ] . Success ? match [ 0 ] . Groups [ "title" ] . Value : result . MovieTitle ;
if ( simpleTitleReplaceString . IsNotNullOrWhiteSpace ( ) )
2017-01-04 21:59:34 +00:00
{
2020-05-02 19:11:00 +00:00
simpleReleaseTitle = simpleReleaseTitle . Replace ( simpleTitleReplaceString , simpleTitleReplaceString . Contains ( "." ) ? "A.Movie" : "A Movie" ) ;
2017-01-04 21:59:34 +00:00
}
2020-04-10 09:52:00 +00:00
result . ReleaseGroup = ParseReleaseGroup ( simpleReleaseTitle ) ;
2019-10-31 03:46:40 +00:00
var subGroup = GetSubGroup ( match ) ;
if ( ! subGroup . IsNullOrWhiteSpace ( ) )
{
result . ReleaseGroup = subGroup ;
2019-12-22 21:24:11 +00:00
}
2019-10-31 03:46:40 +00:00
Logger . Debug ( "Release Group parsed: {0}" , result . ReleaseGroup ) ;
2020-06-07 18:59:37 +00:00
result . Languages = LanguageParser . ParseLanguages ( result . ReleaseGroup . IsNotNullOrWhiteSpace ( ) ? simpleReleaseTitle . Replace ( result . ReleaseGroup , "RlsGrp" ) : simpleReleaseTitle ) ;
Logger . Debug ( "Languages parsed: {0}" , string . Join ( ", " , result . Languages ) ) ;
result . Quality = QualityParser . ParseQuality ( title ) ;
Logger . Debug ( "Quality parsed: {0}" , result . Quality ) ;
if ( result . Edition . IsNullOrWhiteSpace ( ) )
{
result . Edition = ParseEdition ( simpleReleaseTitle ) ;
Logger . Debug ( "Edition parsed: {0}" , result . Edition ) ;
}
2019-10-31 03:46:40 +00:00
result . ReleaseHash = GetReleaseHash ( match ) ;
if ( ! result . ReleaseHash . IsNullOrWhiteSpace ( ) )
{
Logger . Debug ( "Release Hash parsed: {0}" , result . ReleaseHash ) ;
}
2020-04-10 09:52:00 +00:00
result . OriginalTitle = originalTitle ;
result . ReleaseTitle = releaseTitle ;
2018-08-05 14:28:05 +00:00
result . SimpleReleaseTitle = simpleReleaseTitle ;
2017-01-04 21:59:34 +00:00
2019-10-31 03:46:40 +00:00
result . ImdbId = ParseImdbId ( simpleReleaseTitle ) ;
2020-11-17 20:55:55 +00:00
result . TmdbId = ParseTmdbId ( simpleReleaseTitle ) ;
2017-01-04 21:59:34 +00:00
return result ;
}
}
catch ( InvalidDateException ex )
{
Logger . Debug ( ex , ex . Message ) ;
break ;
}
}
}
}
catch ( Exception e )
{
if ( ! title . ToLower ( ) . Contains ( "password" ) & & ! title . ToLower ( ) . Contains ( "yenc" ) )
2019-12-22 22:08:53 +00:00
{
2019-10-31 03:46:40 +00:00
Logger . Error ( e , "An error has occurred while trying to parse {0}" , title ) ;
2019-12-22 22:08:53 +00:00
}
2017-01-04 21:59:34 +00:00
}
Logger . Debug ( "Unable to parse {0}" , title ) ;
2019-10-31 03:46:40 +00:00
return null ;
2017-06-18 21:12:14 +00:00
}
2017-01-09 16:52:55 +00:00
public static string ParseImdbId ( string title )
{
var match = ReportImdbId . Match ( title ) ;
if ( match . Success )
{
if ( match . Groups [ "imdbid" ] . Value ! = null )
{
2020-06-01 02:19:18 +00:00
if ( match . Groups [ "imdbid" ] . Length = = 9 | | match . Groups [ "imdbid" ] . Length = = 10 )
2017-01-09 16:52:55 +00:00
{
return match . Groups [ "imdbid" ] . Value ;
}
}
}
return "" ;
}
2020-11-17 20:55:55 +00:00
public static int ParseTmdbId ( string title )
{
var match = ReportTmdbId . Match ( title ) ;
if ( match . Success )
{
if ( match . Groups [ "tmdbid" ] . Value ! = null )
{
return int . TryParse ( match . Groups [ "tmdbid" ] . Value , out var tmdbId ) ? tmdbId : 0 ;
}
}
return 0 ;
}
2017-06-18 21:12:14 +00:00
public static string ParseEdition ( string languageTitle )
{
var editionMatch = ReportEditionRegex . Match ( languageTitle ) ;
if ( editionMatch . Success & & editionMatch . Groups [ "edition" ] . Value ! = null & &
editionMatch . Groups [ "edition" ] . Value . IsNotNullOrWhiteSpace ( ) )
{
return editionMatch . Groups [ "edition" ] . Value . Replace ( "." , " " ) ;
}
return "" ;
}
2017-03-06 01:13:13 +00:00
public static string ReplaceGermanUmlauts ( string s )
{
var t = s ;
t = t . Replace ( "ä" , "ae" ) ;
t = t . Replace ( "ö" , "oe" ) ;
t = t . Replace ( "ü" , "ue" ) ;
t = t . Replace ( "Ä" , "Ae" ) ;
t = t . Replace ( "Ö" , "Oe" ) ;
t = t . Replace ( "Ü" , "Ue" ) ;
t = t . Replace ( "ß" , "ss" ) ;
return t ;
}
2017-03-07 23:29:02 +00:00
public static string NormalizeImdbId ( string imdbId )
{
2021-01-07 04:06:07 +00:00
var imdbRegex = new Regex ( @"^(\d{1,10}|(tt)\d{1,10})$" ) ;
if ( ! imdbRegex . IsMatch ( imdbId ) )
{
return null ;
}
2017-03-07 23:29:02 +00:00
if ( imdbId . Length > 2 )
{
2018-09-11 21:47:00 +00:00
imdbId = imdbId . Replace ( "tt" , "" ) . PadLeft ( 7 , '0' ) ;
return $"tt{imdbId}" ;
2017-03-07 23:29:02 +00:00
}
return null ;
}
2017-03-09 00:00:00 +00:00
public static string ToUrlSlug ( string value )
{
//First to lower case
value = value . ToLowerInvariant ( ) ;
//Remove all accents
2019-10-14 21:42:30 +00:00
value = value . RemoveAccent ( ) ;
2017-03-09 00:00:00 +00:00
//Replace spaces
value = Regex . Replace ( value , @"\s" , "-" , RegexOptions . Compiled ) ;
//Remove invalid chars
value = Regex . Replace ( value , @"[^a-z0-9\s-_]" , "" , RegexOptions . Compiled ) ;
//Trim dashes from end
value = value . Trim ( '-' , '_' ) ;
//Replace double occurences of - or _
value = Regex . Replace ( value , @"([-_]){2,}" , "$1" , RegexOptions . Compiled ) ;
return value ;
}
2020-07-22 02:08:59 +00:00
public static string CleanMovieTitle ( this string title )
2013-10-31 23:50:39 +00:00
{
long number = 0 ;
//If Title only contains numbers return it as is.
2015-10-03 17:45:26 +00:00
if ( long . TryParse ( title , out number ) )
2019-12-22 22:08:53 +00:00
{
2013-10-31 23:50:39 +00:00
return title ;
2019-12-22 22:08:53 +00:00
}
2013-10-31 23:50:39 +00:00
2017-04-17 11:08:47 +00:00
return ReplaceGermanUmlauts ( NormalizeRegex . Replace ( title , string . Empty ) . ToLower ( ) ) . RemoveAccent ( ) ;
2013-10-31 23:50:39 +00:00
}
2020-01-22 21:47:33 +00:00
public static string NormalizeEpisodeTitle ( this string title )
2014-11-19 06:01:16 +00:00
{
2015-10-03 17:45:26 +00:00
title = SpecialEpisodeWordRegex . Replace ( title , string . Empty ) ;
2015-04-07 01:43:29 +00:00
title = PunctuationRegex . Replace ( title , " " ) ;
title = DuplicateSpacesRegex . Replace ( title , " " ) ;
return title . Trim ( )
. ToLower ( ) ;
2014-11-19 06:01:16 +00:00
}
2020-01-22 21:47:33 +00:00
public static string NormalizeTitle ( this string title )
2014-01-07 08:24:50 +00:00
{
2014-11-19 06:01:16 +00:00
title = WordDelimiterRegex . Replace ( title , " " ) ;
2015-10-03 17:45:26 +00:00
title = PunctuationRegex . Replace ( title , string . Empty ) ;
title = CommonWordRegex . Replace ( title , string . Empty ) ;
2014-12-12 23:49:32 +00:00
title = DuplicateSpacesRegex . Replace ( title , " " ) ;
2017-02-06 21:34:11 +00:00
title = SpecialCharRegex . Replace ( title , string . Empty ) ;
2014-11-19 06:01:16 +00:00
return title . Trim ( ) . ToLower ( ) ;
2014-01-07 08:24:50 +00:00
}
2020-01-22 21:47:33 +00:00
public static string SimplifyReleaseTitle ( this string title )
{
return SimpleReleaseTitleRegex . Replace ( title , string . Empty ) ;
}
2013-12-09 06:25:11 +00:00
public static string ParseReleaseGroup ( string title )
{
title = title . Trim ( ) ;
2014-04-17 23:16:40 +00:00
title = RemoveFileExtension ( title ) ;
2019-10-31 03:46:40 +00:00
title = WebsitePrefixRegex . Replace ( title ) ;
2021-07-30 13:06:31 +00:00
title = CleanTorrentSuffixRegex . Replace ( title ) ;
2015-01-10 08:43:35 +00:00
var animeMatch = AnimeReleaseGroupRegex . Match ( title ) ;
if ( animeMatch . Success )
{
return animeMatch . Groups [ "subgroup" ] . Value ;
}
2014-04-15 21:21:59 +00:00
2019-10-31 03:46:40 +00:00
title = CleanReleaseGroupRegex . Replace ( title ) ;
2015-01-16 21:48:22 +00:00
2020-12-23 20:07:13 +00:00
var exceptionMatch = ExceptionReleaseGroupRegex . Matches ( title ) ;
if ( exceptionMatch . Count ! = 0 )
{
return exceptionMatch . OfType < Match > ( ) . Last ( ) . Groups [ "releasegroup" ] . Value ;
}
2014-05-04 18:44:18 +00:00
var matches = ReleaseGroupRegex . Matches ( title ) ;
2014-06-05 04:54:40 +00:00
2014-05-04 18:44:18 +00:00
if ( matches . Count ! = 0 )
{
2014-06-05 04:54:40 +00:00
var group = matches . OfType < Match > ( ) . Last ( ) . Groups [ "releasegroup" ] . Value ;
int groupIsNumeric ;
2015-10-03 17:45:26 +00:00
if ( int . TryParse ( group , out groupIsNumeric ) )
2014-06-05 04:54:40 +00:00
{
2014-09-01 23:37:59 +00:00
return null ;
2014-06-05 04:54:40 +00:00
}
return group ;
2013-12-09 06:25:11 +00:00
}
2014-09-01 23:37:59 +00:00
return null ;
2013-12-09 06:25:11 +00:00
}
2014-04-17 23:16:40 +00:00
public static string RemoveFileExtension ( string title )
{
2014-10-21 21:51:38 +00:00
title = FileExtensionRegex . Replace ( title , m = >
2014-04-17 23:16:40 +00:00
{
2014-10-21 21:51:38 +00:00
var extension = m . Value . ToLower ( ) ;
if ( MediaFiles . MediaFileExtensions . Extensions . Contains ( extension ) | | new [ ] { ".par2" , ".nzb" } . Contains ( extension ) )
{
2015-10-03 17:45:26 +00:00
return string . Empty ;
2014-10-21 21:51:38 +00:00
}
2019-12-22 22:08:53 +00:00
2014-12-07 07:23:11 +00:00
return m . Value ;
2014-10-21 21:51:38 +00:00
} ) ;
2014-04-17 23:16:40 +00:00
return title ;
}
2018-08-05 14:28:05 +00:00
2017-01-04 21:59:34 +00:00
private static ParsedMovieInfo ParseMovieMatchCollection ( MatchCollection matchCollection )
{
2017-08-18 22:24:41 +00:00
if ( ! matchCollection [ 0 ] . Groups [ "title" ] . Success | | matchCollection [ 0 ] . Groups [ "title" ] . Value = = "(" )
2017-01-24 09:02:20 +00:00
{
return null ;
}
2018-08-05 14:28:05 +00:00
2020-05-02 19:11:00 +00:00
var movieName = matchCollection [ 0 ] . Groups [ "title" ] . Value . Replace ( '_' , ' ' ) ;
2018-03-14 20:41:36 +00:00
movieName = RequestInfoRegex . Replace ( movieName , "" ) . Trim ( ' ' ) ;
2017-01-04 21:59:34 +00:00
2019-12-22 21:24:10 +00:00
var parts = movieName . Split ( '.' ) ;
movieName = "" ;
int n = 0 ;
bool previousAcronym = false ;
string nextPart = "" ;
foreach ( var part in parts )
{
2019-12-22 22:08:53 +00:00
if ( parts . Length > = n + 2 )
2019-12-22 21:24:10 +00:00
{
2019-12-22 22:08:53 +00:00
nextPart = parts [ n + 1 ] ;
2019-12-22 21:24:10 +00:00
}
2020-04-01 20:47:51 +00:00
else
{
nextPart = "" ;
}
2019-12-22 22:08:53 +00:00
2020-04-01 20:47:51 +00:00
if ( part . Length = = 1 & & part . ToLower ( ) ! = "a" & & ! int . TryParse ( part , out _ ) & &
( previousAcronym | | n < parts . Length - 1 ) & &
( previousAcronym | | nextPart . Length ! = 1 | | ! int . TryParse ( nextPart , out _ ) ) )
2019-12-22 21:24:10 +00:00
{
movieName + = part + "." ;
previousAcronym = true ;
}
2020-04-01 20:47:51 +00:00
else if ( part . ToLower ( ) = = "a" & & ( previousAcronym | | nextPart . Length = = 1 ) )
2019-12-22 21:24:10 +00:00
{
movieName + = part + "." ;
previousAcronym = true ;
}
2020-04-08 17:12:23 +00:00
else if ( part . ToLower ( ) = = "dr" )
{
movieName + = part + "." ;
previousAcronym = true ;
}
2019-12-22 21:24:10 +00:00
else
{
if ( previousAcronym )
{
movieName + = " " ;
previousAcronym = false ;
}
2019-12-22 22:08:53 +00:00
2019-12-22 21:24:10 +00:00
movieName + = part + " " ;
}
2019-12-22 22:08:53 +00:00
2019-12-22 21:24:10 +00:00
n + + ;
}
movieName = movieName . Trim ( ' ' ) ;
2017-02-10 18:00:16 +00:00
2020-11-17 20:55:55 +00:00
int . TryParse ( matchCollection [ 0 ] . Groups [ "year" ] . Value , out var airYear ) ;
2017-01-04 21:59:34 +00:00
ParsedMovieInfo result ;
result = new ParsedMovieInfo { Year = airYear } ;
if ( matchCollection [ 0 ] . Groups [ "edition" ] . Success )
{
result . Edition = matchCollection [ 0 ] . Groups [ "edition" ] . Value . Replace ( "." , " " ) ;
}
2018-03-14 20:41:36 +00:00
result . MovieTitle = movieName ;
2017-01-04 21:59:34 +00:00
Logger . Debug ( "Movie Parsed. {0}" , result ) ;
return result ;
}
2013-04-29 03:11:43 +00:00
private static bool ValidateBeforeParsing ( string title )
{
if ( title . ToLower ( ) . Contains ( "password" ) & & title . ToLower ( ) . Contains ( "yenc" ) )
{
2014-03-13 20:12:42 +00:00
Logger . Debug ( "" ) ;
2013-04-29 03:11:43 +00:00
return false ;
}
2015-10-03 17:45:26 +00:00
if ( ! title . Any ( char . IsLetterOrDigit ) )
2013-04-29 03:11:43 +00:00
{
return false ;
}
2014-04-17 23:16:40 +00:00
var titleWithoutExtension = RemoveFileExtension ( title ) ;
if ( RejectHashedReleasesRegex . Any ( v = > v . IsMatch ( titleWithoutExtension ) ) )
{
Logger . Debug ( "Rejected Hashed Release Title: " + title ) ;
return false ;
}
2013-04-29 03:11:43 +00:00
return true ;
}
2014-05-19 19:14:41 +00:00
private static string GetSubGroup ( MatchCollection matchCollection )
{
var subGroup = matchCollection [ 0 ] . Groups [ "subgroup" ] ;
if ( subGroup . Success )
{
return subGroup . Value ;
}
2015-10-03 17:45:26 +00:00
return string . Empty ;
2014-05-19 19:14:41 +00:00
}
private static string GetReleaseHash ( MatchCollection matchCollection )
{
var hash = matchCollection [ 0 ] . Groups [ "hash" ] ;
if ( hash . Success )
{
2014-12-17 07:12:26 +00:00
var hashValue = hash . Value . Trim ( '[' , ']' ) ;
2014-05-19 19:14:41 +00:00
if ( hashValue . Equals ( "1280x720" ) )
{
2015-10-03 17:45:26 +00:00
return string . Empty ;
2014-05-19 19:14:41 +00:00
}
return hashValue ;
}
2014-12-17 07:12:26 +00:00
2015-10-03 17:45:26 +00:00
return string . Empty ;
2014-05-19 19:14:41 +00:00
}
2012-08-03 07:01:34 +00:00
}
2015-01-21 22:48:35 +00:00
}