using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using NLog;
using NzbDrone.Common.Extensions;
using NzbDrone.Common.Instrumentation;
using NzbDrone.Core.Parser.Model;
using NzbDrone.Core.Tv;
namespace NzbDrone.Core.Parser
{
public static class Parser
{
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(Parser));
private static readonly Regex[] ReportMovieTitleRegex = new[]
{
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.Special.Edition.2011
new Regex(@"^(?
(?![(\[]).+?)?(?:(?:[-_\W](?(\.?((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final|Extended|Rogue|Special|Despecialized).(Cut|Edition|Version)|Extended|Uncensored|Remastered|Unrated|Uncut|IMAX)))\.(?(19|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.2011.Special.Edition //TODO: Seems to slow down parsing heavily!
new Regex(@"^(?(?![(\[]).+?)?(?:(?:[-_\W](?(19|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)(?((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final|Extended|Rogue|Special|Despecialized).(Cut|Edition|Version)|Extended|Uncensored|Remastered|Unrated|Uncut|IMAX))",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Normal movie format, e.g: Mission.Impossible.3.2011
new Regex(@"^(?(?![(\[]).+?)?(?:(?:[-_\W](?(19|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
//PassThePopcorn Torrent names: Star.Wars[PassThePopcorn]
new Regex(@"^(?.+?)?(?:(?:[-_\W](?(\[\w *\])))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
//That did not work? Maybe some tool uses [] for years. Who would do that?
new Regex(@"^(?(?![(\[]).+?)?(?:(?:[-_\W](?(19|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
//As a last resort for movies that have ( or [ in their title.
new Regex(@"^(?.+?)?(?:(?:[-_\W](?(19|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ReportMovieTitleFolderRegex = new[]
{
//When year comes first.
new Regex(@"^(?:(?:[-_\W](?(19|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?.+?)?$")
};
private static readonly Regex[] ReportTitleRegex = new[]
{
//Anime - Absolute Episode Number + Title + Season+Episode
//Todo: This currently breaks series that start with numbers
// new Regex(@"^(?:(?\d{2,3})(?:_|-|\s|\.)+)+(?.+?)(?:\W|_)+(?:S?(?(?\d{2}(?!\d+)))+)",
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Multi-Part episodes without a title (S01E05.S01E06)
new Regex(@"^(?:\W*S?(?(?\d{1,3}(?!\d+)))+){2,}",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Matches Movie name with AirYear
new Regex(@"^(?.+?)?(?:(?:[-_\W](?(?(?\d{2,3}(?!\d+)))+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - [SubGroup] Title Episode Absolute Episode Number ([SubGroup] Series Title Episode 01)
new Regex(@"^(?:\[(?.+?)\][-_. ]?)(?.+?)[-_. ](?:Episode)(?:[-_. ]+(?(?\[.{8}\])?(?:$|\.)?",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - [SubGroup] Title Absolute Episode Number + Season+Episode
new Regex(@"^(?:\[(?.+?)\](?:_|-|\s|\.)?)(?.+?)(?:(?:[-_\W](?\d{2,3}))+(?:_|-|\s|\.)+(?:S?(?(?\d{2}(?!\d+)))+).*?(?[(\[]\w{8}[)\]])?(?:$|\.)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - [SubGroup] Title Season+Episode + Absolute Episode Number
new Regex(@"^(?:\[(?.+?)\](?:_|-|\s|\.)?)(?.+?)(?:[-_\W](?(?\d{2}(?!\d+)))+)(?:(?:_|-|\s|\.)+(?(?\[\w{8}\])?(?:$|\.)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - [SubGroup] Title Season+Episode
new Regex(@"^(?:\[(?.+?)\](?:_|-|\s|\.)?)(?.+?)(?:[-_\W](?(?\d{2}(?!\d+)))+)(?:\s|\.).*?(?\[\w{8}\])?(?:$|\.)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - [SubGroup] Title with trailing number Absolute Episode Number
new Regex(@"^\[(?.+?)\][-_. ]?(?[^-]+?\d+?)[-_. ]+(?:[-_. ]?(?\d{3}(?!\d+)))+(?:[-_. ]+(?special|ova|ovd))?.*?(?\[\w{8}\])?(?:$|\.mkv)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - [SubGroup] Title - Absolute Episode Number
new Regex(@"^\[(?.+?)\][-_. ]?(?.+?)(?:[. ]-[. ](?\d{2,3}(?!\d+|[-])))+(?:[-_. ]+(?special|ova|ovd))?.*?(?\[\w{8}\])?(?:$|\.mkv)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - [SubGroup] Title Absolute Episode Number
new Regex(@"^\[(?.+?)\][-_. ]?(?.+?)[-_. ]+\(?(?:[-_. ]?(?\d{2,3}(?!\d+)))+\)?(?:[-_. ]+(?special|ova|ovd))?.*?(?\[\w{8}\])?(?:$|\.mkv)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - Title Season EpisodeNumber + Absolute Episode Number [SubGroup]
new Regex(@"^(?.+?)(?:[-_\W](?(?(?(?.+?)\](?:$|\.mkv)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - Title Absolute Episode Number [SubGroup]
new Regex(@"^(?.+?)(?:(?:_|-|\s|\.)+(?\d{3}(?!\d+)))+(?:.+?)\[(?.+?)\].*?(?\[\w{8}\])?(?:$|\.)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - Title Absolute Episode Number [Hash]
new Regex(@"^(?.+?)(?:(?:_|-|\s|\.)+(?\d{2,3}(?!\d+)))+(?:[-_. ]+(?special|ova|ovd))?[-_. ]+.*?(?\[\w{8}\])(?:$|\.)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with airdate AND season/episode number, capture season/epsiode only
new Regex(@"^(?.+?)?\W*(?\d{4}\W+[0-1][0-9]\W+[0-3][0-9])(?!\W+[0-3][0-9])[-_. ](?:s?(?(?(?.+?)?\W*(?\d{4})\W+(?[0-1][0-9])\W+(?[0-3][0-9])(?!\W+[0-3][0-9]).+?(?:s?(?(?(?.+?)(?:(?:[-_\W](?(?\d{1,3}(?!\d+)))+){2,}",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
new Regex(@"^(?.+?)(?:(?:[-_\W](?(?\d{2,3}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?\d{2,3}(?!\d+)))*)\W?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title, 4 digit season number, Single episodes (S2016E05, etc) & Multi-episode (S2016E05E06, S2016E05-06, S2016E05 E06, etc)
new Regex(@"^(?.+?)(?:(?:[-_\W](?(?\d{2,3}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?\d{2,3}(?!\d+)))*)\W?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Mini-Series with year in title, treated as season 1, episodes are labelled as Part01, Part 01, Part.1
new Regex(@"^(?.+?\d{4})(?:\W+(?:(?:Part\W?|e)(?\d{1,2}(?!\d+)))+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Mini-Series, treated as season 1, episodes are labelled as Part01, Part 01, Part.1
new Regex(@"^(?.+?)(?:\W+(?:(?:Part\W?|(?\d{1,2}(?!\d+)))+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Mini-Series, treated as season 1, episodes are labelled as Part One/Two/Three/...Nine, Part.One, Part_One
new Regex(@"^(?.+?)(?:\W+(?:Part[-._ ](?One|Two|Three|Four|Five|Six|Seven|Eight|Nine)(?>[-._ ])))",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Mini-Series, treated as season 1, episodes are labelled as XofY
new Regex(@"^(?.+?)(?:\W+(?:(?(?.*?)(?:[-_\W](?(?(?.*?)(?:\W?|_)S(?(?(?.+?)[-_. ]S(?(?(?.*?)(?:\W?|_)S(?(?(?.*?)(?:\W?|_)S(?(?(?.+?)(?:_|-|\s|\.)+)(?:S?(?(?(?.+?)(?:_|-|\s|\.)+)(?:S?(?(?(?.+?)(?:_|-|\s|\.)+S(?\d{2}(?!\d+))(\W-\W)E(?(?.+?)\W(?:S|Season)\W?(?\d{1,2}(?!\d+))(\W+|_|$)(?EXTRAS|SUBPACK)?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//4 digit season only releases
new Regex(@"^(?.+?)\W(?:S|Season)\W?(?\d{4}(?!\d+))(\W+|_|$)(?EXTRAS|SUBPACK)?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title and season/episode in square brackets
new Regex(@"^(?.+?)(?:(?:[-_\W](?(?(?.+?)?(?:(?:[-_\W](?(?[1-9][0-9]|[0][1-9])(?![a-z]|\d+))+",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with airdate
new Regex(@"^(?.+?)?\W*(?\d{4})\W+(?[0-1][0-9])\W+(?[0-3][0-9])(?!\W+[0-3][0-9])",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Supports 1103/1113 naming
new Regex(@"^(?.+?)?(?:(?:[-_\W](?(?(?(?\d{4}(?!\d+|i|p)))+)(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//4 digit episode number
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
new Regex(@"^(?.+?)(?:(?:[-_\W](?(?\d{4}(?!\d+|i|p)))+)\W?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with single digit episode number (S01E1, S01E5E6, etc)
new Regex(@"^(?.*?)(?:(?:[-_\W](?(?\d{1}))+)+(\W+|_|$)(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//iTunes Season 1\05 Title (Quality).ext
new Regex(@"^(?:Season(?:_|-|\s|\.)(?(?(?.+?)\][-_. ]?)?(?.+?)(?:(?:_|-|\s|\.)+(?:e|ep)(?\d{2,3}))+.*?(?\[\w{8}\])?(?:$|\.)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - Title Episode Absolute Episode Number (Series Title Episode 01)
new Regex(@"^(?.+?)[-_. ](?:Episode)(?:[-_. ]+(?(?\[.{8}\])?(?:$|\.)?",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - Title Absolute Episode Number
new Regex(@"^(?:\[(?.+?)\][-_. ]?)?(?.+?)(?:[-_. ]+(?(?\[.{8}\])?(?:$|\.)?",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - Title {Absolute Episode Number}
new Regex(@"^(?:\[(?.+?)\][-_. ]?)?(?.+?)(?:(?:[-_\W](?(?\[.{8}\])?(?:$|\.)?",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Extant, terrible multi-episode naming (extant.10708.hdtv-lol.mp4)
new Regex(@"^(?.+?)[-_. ](?[0]?\d?)(?:(?\d{2}){2}(?!\d+))[-_. ]",
RegexOptions.IgnoreCase | RegexOptions.Compiled)
};
private static readonly Regex[] RejectHashedReleasesRegex = new Regex[]
{
// Generic match for md5 and mixed-case hashes.
new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled),
// Generic match for shorter lower-case hashes.
new Regex(@"^[a-z0-9]{24}$", RegexOptions.Compiled),
// Format seen on some NZBGeek releases
// Be very strict with these coz they are very close to the valid 101 ep numbering.
new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled),
new Regex(@"^[a-z]{12}\d{3}$", RegexOptions.Compiled),
//Backup filename (Unknown origins)
new Regex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.Compiled),
//123 - Started appearing December 2014
new Regex(@"^123$", RegexOptions.Compiled),
//abc - Started appearing January 2015
new Regex(@"^abc$", RegexOptions.Compiled | RegexOptions.IgnoreCase),
//b00bs - Started appearing January 2015
new Regex(@"^b00bs$", RegexOptions.Compiled | RegexOptions.IgnoreCase)
};
//Regex to detect whether the title was reversed.
private static readonly Regex ReversedTitleRegex = new Regex(@"[-._ ](p027|p0801|\d{2}E\d{2}S)[-._ ]", RegexOptions.Compiled);
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?tt\d{7})", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SimpleTitleRegex = new Regex(@"(?:480[ip]|576[ip]|720[ip]|1080[ip]|2160[ip]|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*:|]|848x480|1280x720|1920x1080|(8|10)b(it)?)\s*",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex WebsitePrefixRegex = new Regex(@"^\[\s*[a-z]+(\.[a-z]+)+\s*\][- ]*",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex AirDateRegex = new Regex(@"^(.*?)(?\d{4})[_.-](?[0-1][0-9])[_.-](?[0-3][0-9])|(?[0-1][0-9])[_.-](?[0-3][0-9])[_.-](?\d{4}))(?!\d)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SixDigitAirDateRegex = new Regex(@"(?<=[_.-])(?(?[1-9]\d{1})(?[0-1][0-9])(?[0-3][0-9]))(?=[_.-])",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex CleanReleaseGroupRegex = new Regex(@"^(.*?[-._ ](S\d+E\d+)[-._ ])|(-(RP|1|NZBGeek|Obfuscated|sample))+$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex CleanTorrentSuffixRegex = new Regex(@"\[(?:ettv|rartv|rarbg|cttv)\]$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex ReleaseGroupRegex = new Regex(@"-(?[a-z0-9]+)(?(?!\s).+?(?.+?)(?:\W|_)?(?\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|\|)+", RegexOptions.Compiled);
private static readonly Regex SpecialCharRegex = new Regex(@"(\&|\:|\\|\/)+", RegexOptions.Compiled);
private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled);
private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled);
private static readonly Regex RequestInfoRegex = new Regex(@"\[.+?\]", RegexOptions.Compiled);
private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
public static ParsedEpisodeInfo ParsePath(string path)
{
var fileInfo = new FileInfo(path);
var result = ParseTitle(fileInfo.Name);
if (result == null)
{
Logger.Debug("Attempting to parse episode info using directory and file names. {0}", fileInfo.Directory.Name);
result = ParseTitle(fileInfo.Directory.Name + " " + fileInfo.Name);
}
if (result == null)
{
Logger.Debug("Attempting to parse episode info using directory name. {0}", fileInfo.Directory.Name);
result = ParseTitle(fileInfo.Directory.Name + fileInfo.Extension);
}
return result;
}
public static ParsedMovieInfo ParseMoviePath(string path)
{
var fileInfo = new FileInfo(path);
var result = ParseMovieTitle(fileInfo.Name, true);
if (result == null)
{
Logger.Debug("Attempting to parse episode info using directory and file names. {0}", fileInfo.Directory.Name);
result = ParseMovieTitle(fileInfo.Directory.Name + " " + fileInfo.Name);
}
if (result == null)
{
Logger.Debug("Attempting to parse episode info using directory name. {0}", fileInfo.Directory.Name);
result = ParseMovieTitle(fileInfo.Directory.Name + fileInfo.Extension);
}
return result;
}
public static ParsedMovieInfo ParseMovieTitle(string title, bool isDir = false)
{
ParsedMovieInfo realResult = null;
try
{
if (!ValidateBeforeParsing(title)) return null;
//title = title.Replace(" ", "."); //TODO: Determine if this breaks something. However, it shouldn't.
Logger.Debug("Parsing string '{0}'", title);
if (ReversedTitleRegex.IsMatch(title))
{
var titleWithoutExtension = RemoveFileExtension(title).ToCharArray();
Array.Reverse(titleWithoutExtension);
title = new string(titleWithoutExtension) + title.Substring(titleWithoutExtension.Length);
Logger.Debug("Reversed name detected. Converted to '{0}'", title);
}
var simpleTitle = SimpleTitleRegex.Replace(title, string.Empty);
simpleTitle = RemoveFileExtension(simpleTitle);
// TODO: Quick fix stripping [url] - prefixes.
simpleTitle = WebsitePrefixRegex.Replace(simpleTitle, string.Empty);
simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle, string.Empty);
var allRegexes = ReportMovieTitleRegex.ToList();
if (isDir)
{
allRegexes.AddRange(ReportMovieTitleFolderRegex);
}
foreach (var regex in allRegexes)
{
var match = regex.Matches(simpleTitle);
if (match.Count != 0)
{
Logger.Trace(regex);
try
{
var result = ParseMovieMatchCollection(match);
if (result != null)
{
result.Language = LanguageParser.ParseLanguage(simpleTitle.Replace(result.MovieTitle, "A Movie"));
Logger.Debug("Language parsed: {0}", result.Language);
result.Quality = QualityParser.ParseQuality(title);
Logger.Debug("Quality parsed: {0}", result.Quality);
result.ReleaseGroup = ParseReleaseGroup(title);
result.ImdbId = ParseImdbId(title);
var subGroup = GetSubGroup(match);
if (!subGroup.IsNullOrWhiteSpace())
{
result.ReleaseGroup = subGroup;
}
Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup);
result.ReleaseHash = GetReleaseHash(match);
if (!result.ReleaseHash.IsNullOrWhiteSpace())
{
Logger.Debug("Release Hash parsed: {0}", result.ReleaseHash);
}
realResult = result;
return result;
}
}
catch (InvalidDateException ex)
{
Logger.Debug(ex, ex.Message);
break;
}
}
}
}
catch (Exception e)
{
if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc"))
Logger.Error(e, "An error has occurred while trying to parse " + title);
}
Logger.Debug("Unable to parse {0}", title);
return realResult;
}
public static string ParseImdbId(string title)
{
var match = ReportImdbId.Match(title);
if (match.Success)
{
if (match.Groups["imdbid"].Value != null)
{
if (match.Groups["imdbid"].Length == 9)
{
return match.Groups["imdbid"].Value;
}
}
}
return "";
}
public static ParsedEpisodeInfo ParseTitle(string title)
{
ParsedEpisodeInfo realResult = null;
try
{
if (!ValidateBeforeParsing(title)) return null;
Logger.Debug("Parsing string '{0}'", title);
if (ReversedTitleRegex.IsMatch(title))
{
var titleWithoutExtension = RemoveFileExtension(title).ToCharArray();
Array.Reverse(titleWithoutExtension);
title = new string(titleWithoutExtension) + title.Substring(titleWithoutExtension.Length);
Logger.Debug("Reversed name detected. Converted to '{0}'", title);
}
var simpleTitle = SimpleTitleRegex.Replace(title, string.Empty);
simpleTitle = RemoveFileExtension(simpleTitle);
// TODO: Quick fix stripping [url] - prefixes.
simpleTitle = WebsitePrefixRegex.Replace(simpleTitle, string.Empty);
simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle, string.Empty);
var airDateMatch = AirDateRegex.Match(simpleTitle);
if (airDateMatch.Success)
{
simpleTitle = airDateMatch.Groups[1].Value + airDateMatch.Groups["airyear"].Value + "." + airDateMatch.Groups["airmonth"].Value + "." + airDateMatch.Groups["airday"].Value;
}
var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle);
if (sixDigitAirDateMatch.Success)
{
var airYear = sixDigitAirDateMatch.Groups["airyear"].Value;
var airMonth = sixDigitAirDateMatch.Groups["airmonth"].Value;
var airDay = sixDigitAirDateMatch.Groups["airday"].Value;
if (airMonth != "00" || airDay != "00")
{
var fixedDate = string.Format("20{0}.{1}.{2}", airYear, airMonth, airDay);
simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate);
}
}
foreach (var regex in ReportTitleRegex)
{
var match = regex.Matches(simpleTitle);
if (match.Count != 0)
{
Logger.Trace(regex);
try
{
var result = ParseMatchCollection(match);
if (result != null)
{
if (result.FullSeason && title.ContainsIgnoreCase("Special"))
{
result.FullSeason = false;
result.Special = true;
}
result.Language = LanguageParser.ParseLanguage(title);
Logger.Debug("Language parsed: {0}", result.Language);
result.Quality = QualityParser.ParseQuality(title);
Logger.Debug("Quality parsed: {0}", result.Quality);
result.ReleaseGroup = ParseReleaseGroup(title);
var subGroup = GetSubGroup(match);
if (!subGroup.IsNullOrWhiteSpace())
{
result.ReleaseGroup = subGroup;
}
Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup);
result.ReleaseHash = GetReleaseHash(match);
if (!result.ReleaseHash.IsNullOrWhiteSpace())
{
Logger.Debug("Release Hash parsed: {0}", result.ReleaseHash);
}
realResult = result;
return result;
}
}
catch (InvalidDateException ex)
{
Logger.Debug(ex, ex.Message);
break;
}
}
}
}
catch (Exception e)
{
if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc"))
Logger.Error(e, "An error has occurred while trying to parse " + title);
}
Logger.Debug("Unable to parse {0}", title);
return realResult;
}
public static string ReplaceGermanUmlauts(string s)
{
var t = s;
t = t.Replace("ä", "ae");
t = t.Replace("ö", "oe");
t = t.Replace("ü", "ue");
t = t.Replace("Ä", "Ae");
t = t.Replace("Ö", "Oe");
t = t.Replace("Ü", "Ue");
t = t.Replace("ß", "ss");
return t;
}
public static string NormalizeImdbId(string imdbId)
{
if (imdbId.Length > 2)
{
return (imdbId.Substring(2) != "tt" ? $"tt{imdbId}" : imdbId);
}
return null;
}
public static string ToUrlSlug(string value)
{
//First to lower case
value = value.ToLowerInvariant();
//Remove all accents
var bytes = Encoding.GetEncoding("ISO-8859-8").GetBytes(value);
value = Encoding.ASCII.GetString(bytes);
//Replace spaces
value = Regex.Replace(value, @"\s", "-", RegexOptions.Compiled);
//Remove invalid chars
value = Regex.Replace(value, @"[^a-z0-9\s-_]", "", RegexOptions.Compiled);
//Trim dashes from end
value = value.Trim('-', '_');
//Replace double occurences of - or _
value = Regex.Replace(value, @"([-_]){2,}", "$1", RegexOptions.Compiled);
return value;
}
public static string ParseSeriesName(string title)
{
Logger.Debug("Parsing string '{0}'", title);
var parseResult = ParseTitle(title);
if (parseResult == null)
{
return CleanSeriesTitle(title);
}
return parseResult.SeriesTitle;
}
public static string CleanSeriesTitle(this string title)
{
long number = 0;
//If Title only contains numbers return it as is.
if (long.TryParse(title, out number))
return title;
return NormalizeRegex.Replace(title, string.Empty).ToLower().RemoveAccent();
}
public static string NormalizeEpisodeTitle(string title)
{
title = SpecialEpisodeWordRegex.Replace(title, string.Empty);
title = PunctuationRegex.Replace(title, " ");
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim()
.ToLower();
}
public static string NormalizeTitle(string title)
{
title = WordDelimiterRegex.Replace(title, " ");
title = PunctuationRegex.Replace(title, string.Empty);
title = CommonWordRegex.Replace(title, string.Empty);
title = DuplicateSpacesRegex.Replace(title, " ");
title = SpecialCharRegex.Replace(title, string.Empty);
return title.Trim().ToLower();
}
public static string ParseReleaseGroup(string title)
{
title = title.Trim();
title = RemoveFileExtension(title);
title = WebsitePrefixRegex.Replace(title, "");
var animeMatch = AnimeReleaseGroupRegex.Match(title);
if (animeMatch.Success)
{
return animeMatch.Groups["subgroup"].Value;
}
title = CleanReleaseGroupRegex.Replace(title, "");
var matches = ReleaseGroupRegex.Matches(title);
if (matches.Count != 0)
{
var group = matches.OfType().Last().Groups["releasegroup"].Value;
int groupIsNumeric;
if (int.TryParse(group, out groupIsNumeric))
{
return null;
}
return group;
}
return null;
}
public static string RemoveFileExtension(string title)
{
title = FileExtensionRegex.Replace(title, m =>
{
var extension = m.Value.ToLower();
if (MediaFiles.MediaFileExtensions.Extensions.Contains(extension) || new[] { ".par2", ".nzb" }.Contains(extension))
{
return string.Empty;
}
return m.Value;
});
return title;
}
private static SeriesTitleInfo GetSeriesTitleInfo(string title)
{
var seriesTitleInfo = new SeriesTitleInfo();
seriesTitleInfo.Title = title;
var match = YearInTitleRegex.Match(title);
if (!match.Success)
{
seriesTitleInfo.TitleWithoutYear = title;
}
else
{
seriesTitleInfo.TitleWithoutYear = match.Groups["title"].Value;
seriesTitleInfo.Year = Convert.ToInt32(match.Groups["year"].Value);
}
return seriesTitleInfo;
}
private static ParsedMovieInfo ParseMovieMatchCollection(MatchCollection matchCollection)
{
if (!matchCollection[0].Groups["title"].Success)
{
return null;
}
var seriesName = matchCollection[0].Groups["title"].Value./*Replace('.', ' ').*/Replace('_', ' ');
seriesName = RequestInfoRegex.Replace(seriesName, "").Trim(' ');
var parts = seriesName.Split('.');
seriesName = "";
int n = 0;
bool previousAcronym = false;
string nextPart = "";
foreach (var part in parts)
{
if (parts.Length >= n+2)
{
nextPart = parts[n+1];
}
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out n))
{
seriesName += part + ".";
previousAcronym = true;
}
else if (part.ToLower() == "a" && (previousAcronym == true || nextPart.Length == 1))
{
seriesName += part + ".";
previousAcronym = true;
}
else
{
if (previousAcronym)
{
seriesName += " ";
previousAcronym = false;
}
seriesName += part + " ";
}
n++;
}
seriesName = seriesName.Trim(' ');
int airYear;
int.TryParse(matchCollection[0].Groups["year"].Value, out airYear);
ParsedMovieInfo result;
result = new ParsedMovieInfo { Year = airYear };
if (matchCollection[0].Groups["edition"].Success)
{
result.Edition = matchCollection[0].Groups["edition"].Value.Replace(".", " ");
}
result.MovieTitle = seriesName;
result.MovieTitleInfo = GetSeriesTitleInfo(result.MovieTitle);
Logger.Debug("Movie Parsed. {0}", result);
return result;
}
private static ParsedEpisodeInfo ParseMatchCollection(MatchCollection matchCollection)
{
var seriesName = matchCollection[0].Groups["title"].Value.Replace('.', ' ').Replace('_', ' ');
seriesName = RequestInfoRegex.Replace(seriesName, "").Trim(' ');
int airYear;
int.TryParse(matchCollection[0].Groups["airyear"].Value, out airYear);
//int.TryParse(matchCollection[0].Groups["year"].Value, out airYear);
ParsedEpisodeInfo result;
if (airYear < 1900)
{
var seasons = new List();
foreach (Capture seasonCapture in matchCollection[0].Groups["season"].Captures)
{
int parsedSeason;
if (int.TryParse(seasonCapture.Value, out parsedSeason))
seasons.Add(parsedSeason);
}
//If no season was found it should be treated as a mini series and season 1
if (seasons.Count == 0) seasons.Add(1);
//If more than 1 season was parsed go to the next REGEX (A multi-season release is unlikely)
if (seasons.Distinct().Count() > 1) return null;
result = new ParsedEpisodeInfo
{
SeasonNumber = seasons.First(),
EpisodeNumbers = new int[0],
AbsoluteEpisodeNumbers = new int[0]
};
foreach (Match matchGroup in matchCollection)
{
var episodeCaptures = matchGroup.Groups["episode"].Captures.Cast().ToList();
var absoluteEpisodeCaptures = matchGroup.Groups["absoluteepisode"].Captures.Cast().ToList();
//Allows use to return a list of 0 episodes (We can handle that as a full season release)
if (episodeCaptures.Any())
{
var first = ParseNumber(episodeCaptures.First().Value);
var last = ParseNumber(episodeCaptures.Last().Value);
if (first > last)
{
return null;
}
var count = last - first + 1;
result.EpisodeNumbers = Enumerable.Range(first, count).ToArray();
}
if (absoluteEpisodeCaptures.Any())
{
var first = Convert.ToInt32(absoluteEpisodeCaptures.First().Value);
var last = Convert.ToInt32(absoluteEpisodeCaptures.Last().Value);
if (first > last)
{
return null;
}
var count = last - first + 1;
result.AbsoluteEpisodeNumbers = Enumerable.Range(first, count).ToArray();
if (matchGroup.Groups["special"].Success)
{
result.Special = true;
}
}
if (!episodeCaptures.Any() && !absoluteEpisodeCaptures.Any())
{
//Check to see if this is an "Extras" or "SUBPACK" release, if it is, return NULL
//Todo: Set a "Extras" flag in EpisodeParseResult if we want to download them ever
if (!matchCollection[0].Groups["extras"].Value.IsNullOrWhiteSpace()) return null;
result.FullSeason = true;
}
}
if (result.AbsoluteEpisodeNumbers.Any() && !result.EpisodeNumbers.Any())
{
result.SeasonNumber = 0;
}
}
else
{
//Try to Parse as a daily show
var airmonth = Convert.ToInt32(matchCollection[0].Groups["airmonth"].Value);
var airday = Convert.ToInt32(matchCollection[0].Groups["airday"].Value);
//Swap day and month if month is bigger than 12 (scene fail)
if (airmonth > 12)
{
var tempDay = airday;
airday = airmonth;
airmonth = tempDay;
}
DateTime airDate;
try
{
airDate = new DateTime(airYear, airmonth, airday);
}
catch (Exception)
{
throw new InvalidDateException("Invalid date found: {0}-{1}-{2}", airYear, airmonth, airday);
}
//Check if episode is in the future (most likely a parse error)
if (airDate > DateTime.Now.AddDays(1).Date || airDate < new DateTime(1970, 1, 1))
{
throw new InvalidDateException("Invalid date found: {0}", airDate);
}
result = new ParsedEpisodeInfo
{
AirDate = airDate.ToString(Episode.AIR_DATE_FORMAT),
};
}
result.SeriesTitle = seriesName;
result.SeriesTitleInfo = GetSeriesTitleInfo(result.SeriesTitle);
Logger.Debug("Episode Parsed. {0}", result);
return result;
}
private static bool ValidateBeforeParsing(string title)
{
if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc"))
{
Logger.Debug("");
return false;
}
if (!title.Any(char.IsLetterOrDigit))
{
return false;
}
var titleWithoutExtension = RemoveFileExtension(title);
if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension)))
{
Logger.Debug("Rejected Hashed Release Title: " + title);
return false;
}
return true;
}
private static string GetSubGroup(MatchCollection matchCollection)
{
var subGroup = matchCollection[0].Groups["subgroup"];
if (subGroup.Success)
{
return subGroup.Value;
}
return string.Empty;
}
private static string GetReleaseHash(MatchCollection matchCollection)
{
var hash = matchCollection[0].Groups["hash"];
if (hash.Success)
{
var hashValue = hash.Value.Trim('[', ']');
if (hashValue.Equals("1280x720"))
{
return string.Empty;
}
return hashValue;
}
return string.Empty;
}
private static int ParseNumber(string value)
{
int number;
if (int.TryParse(value, out number))
{
return number;
}
number = Array.IndexOf(Numbers, value.ToLower());
if (number != -1)
{
return number;
}
throw new FormatException(string.Format("{0} isn't a number", value));
}
}
}