2013-04-07 19:01:24 +00:00
using System ;
2012-08-03 07:01:34 +00:00
using System.Collections.Generic ;
using System.IO ;
using System.Linq ;
using System.Text.RegularExpressions ;
using NLog ;
2014-12-02 06:26:25 +00:00
using NzbDrone.Common.Extensions ;
2013-08-31 01:42:30 +00:00
using NzbDrone.Common.Instrumentation ;
2013-04-15 01:41:39 +00:00
using NzbDrone.Core.Parser.Model ;
2013-10-23 05:17:02 +00:00
using NzbDrone.Core.Tv ;
2012-08-03 07:01:34 +00:00
2013-04-15 01:41:39 +00:00
namespace NzbDrone.Core.Parser
2012-08-03 07:01:34 +00:00
{
public static class Parser
{
2014-12-17 07:12:26 +00:00
private static readonly Logger Logger = NzbDroneLogger . GetLogger ( typeof ( Parser ) ) ;
2012-08-03 07:01:34 +00:00
2017-01-04 21:59:34 +00:00
private static readonly Regex [ ] ReportMovieTitleRegex = new [ ]
{
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.Special.Edition.2011
2017-01-24 09:02:20 +00:00
new Regex ( @"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*(?<edition>(\.?((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final|Extended|Rogue|Special|Despecialized).(Cut|Edition|Version)|Extended|Uncensored|Remastered|Unrated|Uncut|IMAX)))\.(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)" ,
2017-01-04 21:59:34 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-01-04 22:20:48 +00:00
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.2011.Special.Edition //TODO: Seems to slow down parsing heavily!
2017-01-24 09:02:20 +00:00
new Regex ( @"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)(?<edition>((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final|Extended|Rogue|Special|Despecialized).(Cut|Edition|Version)|Extended|Uncensored|Remastered|Unrated|Uncut|IMAX))" ,
2017-01-04 21:59:34 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-01-05 22:12:22 +00:00
2017-01-04 21:59:34 +00:00
//Normal movie format, e.g: Mission.Impossible.3.2011
2017-01-24 09:02:20 +00:00
new Regex ( @"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)" ,
2017-01-04 21:59:34 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-01-05 22:12:22 +00:00
//PassThePopcorn Torrent names: Star.Wars[PassThePopcorn]
new Regex ( @"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))*(?<year>(\[\w *\])))+(\W+|_|$)(?!\\)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-01-11 20:49:59 +00:00
//That did not work? Maybe some tool uses [] for years. Who would do that?
2017-01-24 09:02:20 +00:00
new Regex ( @"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)!]))*(?<year>(19|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?!\\)" ,
2017-01-11 20:49:59 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-02-10 18:00:16 +00:00
//As a last resort for movies that have ( or [ in their title.
new Regex ( @"^(?<title>.+?)?(?:(?:[-_\W](?<![)\[!]))*(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2017-01-11 20:49:59 +00:00
} ;
private static readonly Regex [ ] ReportMovieTitleFolderRegex = new [ ]
{
//When year comes first.
new Regex ( @"^(?:(?:[-_\W](?<![)!]))*(?<year>(19|20)\d{2}(?!p|i|\d+|\W\d+)))+(\W+|_|$)(?<title>.+?)?$" )
2017-01-04 21:59:34 +00:00
} ;
2012-08-03 07:01:34 +00:00
private static readonly Regex [ ] ReportTitleRegex = new [ ]
2013-04-15 01:41:39 +00:00
{
2013-10-27 22:50:15 +00:00
//Anime - Absolute Episode Number + Title + Season+Episode
2013-11-29 17:05:52 +00:00
//Todo: This currently breaks series that start with numbers
// new Regex(@"^(?:(?<absoluteepisode>\d{2,3})(?:_|-|\s|\.)+)+(?<title>.+?)(?:\W|_)+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)",
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
2013-10-27 22:50:15 +00:00
2014-12-24 06:24:59 +00:00
//Multi-Part episodes without a title (S01E05.S01E06)
new Regex ( @"^(?:\W*S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2016-12-28 16:13:18 +00:00
//Matches Movie name with AirYear
new Regex ( @"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))*(?<year>(?<!e|x)\d{4}(?!p|i|\d+|\)|\]|\W\d+)))+(\W+|_|$)(?!\\)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2014-12-24 06:24:59 +00:00
//Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc)
new Regex ( @"^(?:S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))+)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2016-12-22 02:07:49 +00:00
//Anime - [SubGroup] Title Episode Absolute Episode Number ([SubGroup] Series Title Episode 01)
new Regex ( @"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>.+?)[-_. ](?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-10-27 22:50:15 +00:00
//Anime - [SubGroup] Title Absolute Episode Number + Season+Episode
2015-07-06 19:27:23 +00:00
new Regex ( @"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+(?<absoluteepisode>\d{2,3}))+(?:_|-|\s|\.)+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+).*?(?<hash>[(\[]\w{8}[)\]])?(?:$|\.)" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-10-27 22:50:15 +00:00
//Anime - [SubGroup] Title Season+Episode + Absolute Episode Number
2015-07-06 19:27:23 +00:00
new Regex ( @"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:(?:_|-|\s|\.)+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+.*?(?<hash>\[\w{8}\])?(?:$|\.)" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-10-27 22:50:15 +00:00
2014-10-21 22:48:23 +00:00
//Anime - [SubGroup] Title Season+Episode
2015-07-06 19:27:23 +00:00
new Regex ( @"^(?:\[(?<subgroup>.+?)\](?:_|-|\s|\.)?)(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>\d{2}(?!\d+)))+)(?:\s|\.).*?(?<hash>\[\w{8}\])?(?:$|\.)" ,
2014-10-21 22:48:23 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-06-03 06:14:07 +00:00
//Anime - [SubGroup] Title with trailing number Absolute Episode Number
new Regex ( @"^\[(?<subgroup>.+?)\][-_. ]?(?<title>[^-]+?\d+?)[-_. ]+(?:[-_. ]?(?<absoluteepisode>\d{3}(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2016-10-18 20:52:15 +00:00
//Anime - [SubGroup] Title - Absolute Episode Number
new Regex ( @"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)(?:[. ]-[. ](?<absoluteepisode>\d{2,3}(?!\d+|[-])))+(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-10-27 22:50:15 +00:00
//Anime - [SubGroup] Title Absolute Episode Number
2016-10-18 00:51:55 +00:00
new Regex ( @"^\[(?<subgroup>.+?)\][-_. ]?(?<title>.+?)[-_. ]+\(?(?:[-_. ]?(?<absoluteepisode>\d{2,3}(?!\d+)))+\)?(?:[-_. ]+(?<special>special|ova|ovd))?.*?(?<hash>\[\w{8}\])?(?:$|\.mkv)" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-10-27 22:50:15 +00:00
2016-08-26 04:46:22 +00:00
//Anime - Title Season EpisodeNumber + Absolute Episode Number [SubGroup]
2016-10-25 17:16:05 +00:00
new Regex ( @"^(?<title>.+?)(?:[-_\W](?<![()\[!]))+(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:[ex]|\W[ex]){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+)))+).+?(?:[-_. ]?(?<absoluteepisode>(?<!\d+)\d{3}(?!\d+)))+.+?\[(?<subgroup>.+?)\](?:$|\.mkv)" ,
2016-08-26 04:46:22 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2014-12-24 06:24:59 +00:00
//Anime - Title Absolute Episode Number [SubGroup]
2015-01-05 00:29:07 +00:00
new Regex ( @"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{3}(?!\d+)))+(?:.+?)\[(?<subgroup>.+?)\].*?(?<hash>\[\w{8}\])?(?:$|\.)" ,
2014-12-02 00:48:00 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-01-16 07:25:22 +00:00
//Anime - Title Absolute Episode Number [Hash]
2015-10-30 06:58:56 +00:00
new Regex ( @"^(?<title>.+?)(?:(?:_|-|\s|\.)+(?<absoluteepisode>\d{2,3}(?!\d+)))+(?:[-_. ]+(?<special>special|ova|ovd))?[-_. ]+.*?(?<hash>\[\w{8}\])(?:$|\.)" ,
2014-12-02 00:48:00 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2016-03-26 02:13:24 +00:00
//Episodes with airdate AND season/episode number, capture season/epsiode only
new Regex ( @"^(?<title>.+?)?\W*(?<airdate>\d{4}\W+[0-1][0-9]\W+[0-3][0-9])(?!\W+[0-3][0-9])[-_. ](?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-02-11 16:04:01 +00:00
//Episodes with airdate AND season/episode number
2016-02-12 02:46:32 +00:00
new Regex ( @"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(?!\W+[0-3][0-9]).+?(?:s?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+)))(?:[ex](?<episode>(?<!\d+)(?:\d{1,3})(?!\d+)))" ,
2015-02-11 16:04:01 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-04-15 01:41:39 +00:00
//Multi-episode Repeated (S01E05 - S01E06, 1x05 - 1x06, etc)
2016-01-21 21:43:48 +00:00
new Regex ( @"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:(?:[ex]|[-_. ]e){1,2}(?<episode>\d{1,3}(?!\d+)))+){2,}" ,
2013-04-15 01:41:39 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2012-08-03 07:01:34 +00:00
2016-12-12 18:43:37 +00:00
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
new Regex ( @"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))(?:[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))*)\W?(?!\\)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
//Episodes with a title, 4 digit season number, Single episodes (S2016E05, etc) & Multi-episode (S2016E05E06, S2016E05-06, S2016E05 E06, etc)
new Regex ( @"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S(?<season>(?<!\d+)(?:\d{4})(?!\d+))(?:[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))*)\W?(?!\\)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
//Mini-Series with year in title, treated as season 1, episodes are labelled as Part01, Part 01, Part.1
new Regex ( @"^(?<title>.+?\d{4})(?:\W+(?:(?:Part\W?|e)(?<episode>\d{1,2}(?!\d+)))+)" ,
2013-04-29 03:11:43 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-04-15 01:41:39 +00:00
//Mini-Series, treated as season 1, episodes are labelled as Part01, Part 01, Part.1
2013-11-15 08:53:12 +00:00
new Regex ( @"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)" ,
2013-04-15 01:41:39 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2012-08-03 07:01:34 +00:00
2016-06-15 18:58:42 +00:00
//Mini-Series, treated as season 1, episodes are labelled as Part One/Two/Three/...Nine, Part.One, Part_One
new Regex ( @"^(?<title>.+?)(?:\W+(?:Part[-._ ](?<episode>One|Two|Three|Four|Five|Six|Seven|Eight|Nine)(?>[-._ ])))" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2016-01-30 19:16:59 +00:00
//Mini-Series, treated as season 1, episodes are labelled as XofY
new Regex ( @"^(?<title>.+?)(?:\W+(?:(?<episode>(?<!\d+)\d{1,2}(?!\d+))of\d+)+)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-06-19 15:44:41 +00:00
//Supports Season 01 Episode 03
2015-07-06 19:27:23 +00:00
new Regex ( @"(?:.*(?:\""|^))(?<title>.*?)(?:[-_\W](?<![()\[]))+(?:\W?Season\W?)(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)+(?:Episode\W)(?:[-_. ]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+" ,
2013-06-19 15:44:41 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-12-08 23:26:52 +00:00
//Multi-episode release with no space between series title and season (S01E11E12)
new Regex ( @"(?:.*(?:^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{2}(?!\d+))(?:E(?<episode>(?<!\d+)\d{2}(?!\d+)))+" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-12-20 08:11:41 +00:00
//Multi-episode with single episode numbers (S6.E1-E2, S6.E1E2, S6E1E2, etc)
new Regex ( @"^(?<title>.+?)[-_. ]S(?<season>(?<!\d+)(?:\d{1,2}|\d{4})(?!\d+))(?:[-_. ]?[ex]?(?<episode>(?<!\d+)\d{1,2}(?!\d+)))+" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2014-11-05 06:11:44 +00:00
//Single episode season or episode S1E1 or S1-E1
new Regex ( @"(?:.*(?:\""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:\W|_)?E(?<episode>(?<!\d+)\d{1,2}(?!\d+))" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
//3 digit season S010E05
new Regex ( @"(?:.*(?:\""|^))(?<title>.*?)(?:\W?|_)S(?<season>(?<!\d+)\d{3}(?!\d+))(?:\W|_)?E(?<episode>(?<!\d+)\d{1,2}(?!\d+))" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-03-10 00:15:56 +00:00
//5 digit episode number with a title
new Regex ( @"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-08-02 06:12:15 +00:00
//5 digit multi-episode with a title
new Regex ( @"^(?:(?<title>.+?)(?:_|-|\s|\.)+)(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:(?:[-_. ]{1,3}ep){1,2}(?<episode>(?<!\d+)\d{5}(?!\d+)))+" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2016-06-03 02:07:15 +00:00
// Separated season and episode numbers S01 - E01
2016-06-06 17:15:21 +00:00
new Regex ( @"^(?<title>.+?)(?:_|-|\s|\.)+S(?<season>\d{2}(?!\d+))(\W-\W)E(?<episode>(?<!\d+)\d{2}(?!\d+))(?!\\)" ,
2016-06-03 02:07:15 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-09-20 18:21:04 +00:00
//Season only releases
2013-04-29 03:11:43 +00:00
new Regex ( @"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)" ,
2013-11-11 06:41:21 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-09-20 18:21:04 +00:00
//4 digit season only releases
new Regex ( @"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{4}(?!\d+))(\W+|_|$)(?<extras>EXTRAS|SUBPACK)?(?!\\)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-12-06 19:03:11 +00:00
//Episodes with a title and season/episode in square brackets
new Regex ( @"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+\[S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>(?<!\d+)\d{2}(?!\d+|i|p)))+\])\W?(?!\\)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-10-22 17:29:59 +00:00
//Supports 103/113 naming
new Regex ( @"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))+(?<season>(?<!\d+)[1-9])(?<episode>[1-9][0-9]|[0][1-9])(?![a-z]|\d+))+" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2014-04-15 23:53:08 +00:00
//Episodes with airdate
2014-11-17 05:31:53 +00:00
new Regex ( @"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>[0-1][0-9])\W+(?<airday>[0-3][0-9])(?!\W+[0-3][0-9])" ,
2014-04-15 23:53:08 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2013-12-23 01:32:50 +00:00
//Supports 1103/1113 naming
2015-07-06 19:27:23 +00:00
new Regex ( @"^(?<title>.+?)?(?:(?:[-_\W](?<![()\[!]))*(?<season>(?<!\d+|\(|\[|e|x)\d{2})(?<episode>(?<!e|x)\d{2}(?!p|i|\d+|\)|\]|\W\d+)))+(\W+|_|$)(?!\\)" ,
2013-12-23 01:32:50 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-03-10 00:15:56 +00:00
//4 digit episode number
2013-11-11 06:56:15 +00:00
//Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc)
new Regex ( @"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)(\W+|_|$)(?!\\)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-05-25 18:25:03 +00:00
//4 digit episode number
2013-11-11 06:56:15 +00:00
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
2015-07-06 19:27:23 +00:00
new Regex ( @"^(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)\W?(?!\\)" ,
2013-11-11 06:56:15 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2014-05-19 19:14:41 +00:00
//Episodes with single digit episode number (S01E1, S01E5E6, etc)
2015-07-06 19:27:23 +00:00
new Regex ( @"^(?<title>.*?)(?:(?:[-_\W](?<![()\[!]))+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]){1,2}(?<episode>\d{1}))+)+(\W+|_|$)(?!\\)" ,
2014-05-19 19:14:41 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2015-02-09 05:49:44 +00:00
//iTunes Season 1\05 Title (Quality).ext
new Regex ( @"^(?:Season(?:_|-|\s|\.)(?<season>(?<!\d+)\d{1,2}(?!\d+)))(?:_|-|\s|\.)(?<episode>(?<!\d+)\d{1,2}(?!\d+))" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2014-05-19 19:14:41 +00:00
//Anime - Title Absolute Episode Number (e66)
2015-01-05 00:29:07 +00:00
new Regex ( @"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:_|-|\s|\.)+(?:e|ep)(?<absoluteepisode>\d{2,3}))+.*?(?<hash>\[\w{8}\])?(?:$|\.)" ,
2014-05-19 19:14:41 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2016-08-29 18:59:50 +00:00
2016-12-22 02:07:49 +00:00
//Anime - Title Episode Absolute Episode Number (Series Title Episode 01)
new Regex ( @"^(?<title>.+?)[-_. ](?:Episode)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?" ,
2016-08-29 18:59:50 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
2014-05-19 19:14:41 +00:00
2015-04-30 14:08:55 +00:00
//Anime - Title Absolute Episode Number
new Regex ( @"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:[-_. ]+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
//Anime - Title {Absolute Episode Number}
2015-07-06 19:27:23 +00:00
new Regex ( @"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)(?:(?:[-_\W](?<![()\[!]))+(?<absoluteepisode>(?<!\d+)\d{2,3}(?!\d+)))+(?:_|-|\s|\.)*?(?<hash>\[.{8}\])?(?:$|\.)?" ,
2014-08-29 14:16:06 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ,
//Extant, terrible multi-episode naming (extant.10708.hdtv-lol.mp4)
new Regex ( @"^(?<title>.+?)[-_. ](?<season>[0]?\d?)(?:(?<episode>\d{2}){2}(?!\d+))[-_. ]" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled )
2013-04-15 01:41:39 +00:00
} ;
2012-08-03 07:01:34 +00:00
2014-04-17 23:16:40 +00:00
private static readonly Regex [ ] RejectHashedReleasesRegex = new Regex [ ]
{
// Generic match for md5 and mixed-case hashes.
new Regex ( @"^[0-9a-zA-Z]{32}" , RegexOptions . Compiled ) ,
2014-06-25 18:44:57 +00:00
// Generic match for shorter lower-case hashes.
new Regex ( @"^[a-z0-9]{24}$" , RegexOptions . Compiled ) ,
2014-04-17 23:16:40 +00:00
// Format seen on some NZBGeek releases
2015-03-15 12:11:17 +00:00
// Be very strict with these coz they are very close to the valid 101 ep numbering.
2014-12-02 02:27:53 +00:00
new Regex ( @"^[A-Z]{11}\d{3}$" , RegexOptions . Compiled ) ,
2015-03-15 12:11:17 +00:00
new Regex ( @"^[a-z]{12}\d{3}$" , RegexOptions . Compiled ) ,
2014-12-02 02:27:53 +00:00
//Backup filename (Unknown origins)
2014-12-15 18:04:55 +00:00
new Regex ( @"^Backup_\d{5,}S\d{2}-\d{2}$" , RegexOptions . Compiled ) ,
//123 - Started appearing December 2014
2015-01-09 06:45:29 +00:00
new Regex ( @"^123$" , RegexOptions . Compiled ) ,
//abc - Started appearing January 2015
2015-01-10 05:59:06 +00:00
new Regex ( @"^abc$" , RegexOptions . Compiled | RegexOptions . IgnoreCase ) ,
//b00bs - Started appearing January 2015
2015-01-21 22:48:35 +00:00
new Regex ( @"^b00bs$" , RegexOptions . Compiled | RegexOptions . IgnoreCase )
2014-04-17 23:16:40 +00:00
} ;
//Regex to detect whether the title was reversed.
2014-08-13 19:28:47 +00:00
private static readonly Regex ReversedTitleRegex = new Regex ( @"[-._ ](p027|p0801|\d{2}E\d{2}S)[-._ ]" , RegexOptions . Compiled ) ;
2014-04-17 23:16:40 +00:00
2015-01-11 18:23:47 +00:00
private static readonly Regex NormalizeRegex = new Regex ( @"((?:\b|_)(?<!^)(a(?!$)|an|the|and|or|of)(?:\b|_))|\W|_" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2012-08-03 07:01:34 +00:00
2014-10-21 21:51:38 +00:00
private static readonly Regex FileExtensionRegex = new Regex ( @"\.[a-z0-9]{2,4}$" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2017-01-21 19:37:08 +00:00
private static readonly Regex ReportImdbId = new Regex ( @"(?<imdbid>tt\d{7})" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2017-01-09 16:52:55 +00:00
2017-01-20 23:39:44 +00:00
private static readonly Regex SimpleTitleRegex = new Regex ( @"(?:480[ip]|576[ip]|720[ip]|1080[ip]|2160[ip]|[xh][\W_]?26[45]|DD\W?5\W1|[<>?*:|]|848x480|1280x720|1920x1080|(8|10)b(it)?)\s*" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2012-08-03 07:01:34 +00:00
2014-05-13 17:57:46 +00:00
private static readonly Regex WebsitePrefixRegex = new Regex ( @"^\[\s*[a-z]+(\.[a-z]+)+\s*\][- ]*" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2014-05-03 18:26:47 +00:00
private static readonly Regex AirDateRegex = new Regex ( @"^(.*?)(?<!\d)((?<airyear>\d{4})[_.-](?<airmonth>[0-1][0-9])[_.-](?<airday>[0-3][0-9])|(?<airmonth>[0-1][0-9])[_.-](?<airday>[0-3][0-9])[_.-](?<airyear>\d{4}))(?!\d)" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2014-12-02 00:48:00 +00:00
private static readonly Regex SixDigitAirDateRegex = new Regex ( @"(?<=[_.-])(?<airdate>(?<!\d)(?<airyear>[1-9]\d{1})(?<airmonth>[0-1][0-9])(?<airday>[0-3][0-9]))(?=[_.-])" ,
2014-07-25 15:33:51 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2016-02-06 09:43:56 +00:00
private static readonly Regex CleanReleaseGroupRegex = new Regex ( @"^(.*?[-._ ](S\d+E\d+)[-._ ])|(-(RP|1|NZBGeek|Obfuscated|sample))+$" ,
2015-01-10 08:43:35 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2015-10-20 18:12:35 +00:00
private static readonly Regex CleanTorrentSuffixRegex = new Regex ( @"\[(?:ettv|rartv|rarbg|cttv)\]$" ,
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2016-03-26 20:42:33 +00:00
private static readonly Regex ReleaseGroupRegex = new Regex ( @"-(?<releasegroup>[a-z0-9]+)(?<!WEB-DL|480p|720p|1080p|2160p)(?:\b|[-._ ])" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2015-01-16 21:48:22 +00:00
private static readonly Regex AnimeReleaseGroupRegex = new Regex ( @"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)" ,
2015-01-10 08:43:35 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2013-10-31 23:50:39 +00:00
private static readonly Regex YearInTitleRegex = new Regex ( @"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})" ,
2014-05-04 18:44:18 +00:00
RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2013-10-31 23:50:39 +00:00
2014-01-08 05:54:23 +00:00
private static readonly Regex WordDelimiterRegex = new Regex ( @"(\s|\.|,|_|-|=|\|)+" , RegexOptions . Compiled ) ;
2017-02-06 21:34:11 +00:00
private static readonly Regex SpecialCharRegex = new Regex ( @"(\&|\:|\\|\/)+" , RegexOptions . Compiled ) ;
2014-01-08 05:54:23 +00:00
private static readonly Regex PunctuationRegex = new Regex ( @"[^\w\s]" , RegexOptions . Compiled ) ;
2014-12-12 23:49:32 +00:00
private static readonly Regex CommonWordRegex = new Regex ( @"\b(a|an|the|and|or|of)\b\s?" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2014-12-15 18:52:16 +00:00
private static readonly Regex SpecialEpisodeWordRegex = new Regex ( @"\b(part|special|edition|christmas)\b\s?" , RegexOptions . IgnoreCase | RegexOptions . Compiled ) ;
2014-12-12 23:49:32 +00:00
private static readonly Regex DuplicateSpacesRegex = new Regex ( @"\s{2,}" , RegexOptions . Compiled ) ;
2014-01-07 08:24:50 +00:00
2014-08-12 05:11:06 +00:00
private static readonly Regex RequestInfoRegex = new Regex ( @"\[.+?\]" , RegexOptions . Compiled ) ;
2016-06-15 18:58:42 +00:00
private static readonly string [ ] Numbers = new [ ] { "zero" , "one" , "two" , "three" , "four" , "five" , "six" , "seven" , "eight" , "nine" } ;
2013-04-15 01:41:39 +00:00
public static ParsedEpisodeInfo ParsePath ( string path )
2012-08-03 07:01:34 +00:00
{
var fileInfo = new FileInfo ( path ) ;
2013-04-15 01:41:39 +00:00
var result = ParseTitle ( fileInfo . Name ) ;
2012-08-03 07:01:34 +00:00
2014-04-15 21:21:59 +00:00
if ( result = = null )
{
2015-02-09 05:49:44 +00:00
Logger . Debug ( "Attempting to parse episode info using directory and file names. {0}" , fileInfo . Directory . Name ) ;
2016-09-20 04:33:18 +00:00
result = ParseTitle ( fileInfo . Directory . Name + " " + fileInfo . Name ) ;
2014-04-15 21:21:59 +00:00
}
2012-08-03 07:01:34 +00:00
if ( result = = null )
{
2015-02-09 05:49:44 +00:00
Logger . Debug ( "Attempting to parse episode info using directory name. {0}" , fileInfo . Directory . Name ) ;
result = ParseTitle ( fileInfo . Directory . Name + fileInfo . Extension ) ;
2012-08-03 07:01:34 +00:00
}
return result ;
}
2017-01-09 00:48:04 +00:00
public static ParsedMovieInfo ParseMoviePath ( string path )
{
var fileInfo = new FileInfo ( path ) ;
2017-01-11 20:49:59 +00:00
var result = ParseMovieTitle ( fileInfo . Name , true ) ;
2017-01-09 00:48:04 +00:00
if ( result = = null )
{
Logger . Debug ( "Attempting to parse episode info using directory and file names. {0}" , fileInfo . Directory . Name ) ;
result = ParseMovieTitle ( fileInfo . Directory . Name + " " + fileInfo . Name ) ;
}
if ( result = = null )
{
Logger . Debug ( "Attempting to parse episode info using directory name. {0}" , fileInfo . Directory . Name ) ;
result = ParseMovieTitle ( fileInfo . Directory . Name + fileInfo . Extension ) ;
}
return result ;
}
2017-01-11 20:49:59 +00:00
public static ParsedMovieInfo ParseMovieTitle ( string title , bool isDir = false )
2017-01-04 21:59:34 +00:00
{
ParsedMovieInfo realResult = null ;
try
{
if ( ! ValidateBeforeParsing ( title ) ) return null ;
2017-02-10 18:00:16 +00:00
//title = title.Replace(" ", "."); //TODO: Determine if this breaks something. However, it shouldn't.
2017-01-05 10:36:26 +00:00
2017-01-04 21:59:34 +00:00
Logger . Debug ( "Parsing string '{0}'" , title ) ;
if ( ReversedTitleRegex . IsMatch ( title ) )
{
var titleWithoutExtension = RemoveFileExtension ( title ) . ToCharArray ( ) ;
Array . Reverse ( titleWithoutExtension ) ;
title = new string ( titleWithoutExtension ) + title . Substring ( titleWithoutExtension . Length ) ;
Logger . Debug ( "Reversed name detected. Converted to '{0}'" , title ) ;
}
var simpleTitle = SimpleTitleRegex . Replace ( title , string . Empty ) ;
simpleTitle = RemoveFileExtension ( simpleTitle ) ;
// TODO: Quick fix stripping [url] - prefixes.
simpleTitle = WebsitePrefixRegex . Replace ( simpleTitle , string . Empty ) ;
simpleTitle = CleanTorrentSuffixRegex . Replace ( simpleTitle , string . Empty ) ;
2017-01-11 20:49:59 +00:00
var allRegexes = ReportMovieTitleRegex . ToList ( ) ;
if ( isDir )
{
allRegexes . AddRange ( ReportMovieTitleFolderRegex ) ;
}
foreach ( var regex in allRegexes )
2017-01-04 21:59:34 +00:00
{
var match = regex . Matches ( simpleTitle ) ;
if ( match . Count ! = 0 )
{
Logger . Trace ( regex ) ;
try
{
var result = ParseMovieMatchCollection ( match ) ;
if ( result ! = null )
{
2017-02-22 12:40:06 +00:00
result . Language = LanguageParser . ParseLanguage ( simpleTitle . Replace ( result . MovieTitle , "A Movie" ) ) ;
2017-01-04 21:59:34 +00:00
Logger . Debug ( "Language parsed: {0}" , result . Language ) ;
result . Quality = QualityParser . ParseQuality ( title ) ;
Logger . Debug ( "Quality parsed: {0}" , result . Quality ) ;
result . ReleaseGroup = ParseReleaseGroup ( title ) ;
2017-01-09 16:52:55 +00:00
result . ImdbId = ParseImdbId ( title ) ;
2017-01-04 21:59:34 +00:00
var subGroup = GetSubGroup ( match ) ;
if ( ! subGroup . IsNullOrWhiteSpace ( ) )
{
result . ReleaseGroup = subGroup ;
}
Logger . Debug ( "Release Group parsed: {0}" , result . ReleaseGroup ) ;
result . ReleaseHash = GetReleaseHash ( match ) ;
if ( ! result . ReleaseHash . IsNullOrWhiteSpace ( ) )
{
Logger . Debug ( "Release Hash parsed: {0}" , result . ReleaseHash ) ;
}
realResult = result ;
return result ;
}
}
catch ( InvalidDateException ex )
{
Logger . Debug ( ex , ex . Message ) ;
break ;
}
}
}
}
catch ( Exception e )
{
if ( ! title . ToLower ( ) . Contains ( "password" ) & & ! title . ToLower ( ) . Contains ( "yenc" ) )
Logger . Error ( e , "An error has occurred while trying to parse " + title ) ;
}
Logger . Debug ( "Unable to parse {0}" , title ) ;
return realResult ;
}
2017-01-09 16:52:55 +00:00
public static string ParseImdbId ( string title )
{
var match = ReportImdbId . Match ( title ) ;
if ( match . Success )
{
if ( match . Groups [ "imdbid" ] . Value ! = null )
{
2017-01-21 19:37:08 +00:00
if ( match . Groups [ "imdbid" ] . Length = = 9 )
2017-01-09 16:52:55 +00:00
{
return match . Groups [ "imdbid" ] . Value ;
}
}
}
return "" ;
}
2013-04-15 01:41:39 +00:00
public static ParsedEpisodeInfo ParseTitle ( string title )
2012-08-03 07:01:34 +00:00
{
2016-12-28 16:13:18 +00:00
ParsedEpisodeInfo realResult = null ;
2012-08-03 07:01:34 +00:00
try
{
2013-04-29 03:11:43 +00:00
if ( ! ValidateBeforeParsing ( title ) ) return null ;
2014-03-13 20:12:42 +00:00
Logger . Debug ( "Parsing string '{0}'" , title ) ;
2014-04-17 23:16:40 +00:00
if ( ReversedTitleRegex . IsMatch ( title ) )
{
var titleWithoutExtension = RemoveFileExtension ( title ) . ToCharArray ( ) ;
Array . Reverse ( titleWithoutExtension ) ;
2015-10-03 17:45:26 +00:00
title = new string ( titleWithoutExtension ) + title . Substring ( titleWithoutExtension . Length ) ;
2014-04-17 23:16:40 +00:00
Logger . Debug ( "Reversed name detected. Converted to '{0}'" , title ) ;
}
2015-10-03 17:45:26 +00:00
var simpleTitle = SimpleTitleRegex . Replace ( title , string . Empty ) ;
2012-08-03 07:01:34 +00:00
2015-10-20 18:12:35 +00:00
simpleTitle = RemoveFileExtension ( simpleTitle ) ;
2014-05-13 17:57:46 +00:00
// TODO: Quick fix stripping [url] - prefixes.
2015-10-03 17:45:26 +00:00
simpleTitle = WebsitePrefixRegex . Replace ( simpleTitle , string . Empty ) ;
2014-05-13 17:57:46 +00:00
2015-10-20 18:12:35 +00:00
simpleTitle = CleanTorrentSuffixRegex . Replace ( simpleTitle , string . Empty ) ;
2014-05-03 18:26:47 +00:00
var airDateMatch = AirDateRegex . Match ( simpleTitle ) ;
if ( airDateMatch . Success )
{
simpleTitle = airDateMatch . Groups [ 1 ] . Value + airDateMatch . Groups [ "airyear" ] . Value + "." + airDateMatch . Groups [ "airmonth" ] . Value + "." + airDateMatch . Groups [ "airday" ] . Value ;
}
2014-07-25 15:33:51 +00:00
var sixDigitAirDateMatch = SixDigitAirDateRegex . Match ( simpleTitle ) ;
if ( sixDigitAirDateMatch . Success )
{
2016-07-27 05:31:03 +00:00
var airYear = sixDigitAirDateMatch . Groups [ "airyear" ] . Value ;
var airMonth = sixDigitAirDateMatch . Groups [ "airmonth" ] . Value ;
var airDay = sixDigitAirDateMatch . Groups [ "airday" ] . Value ;
2014-07-25 15:33:51 +00:00
2016-07-27 05:31:03 +00:00
if ( airMonth ! = "00" | | airDay ! = "00" )
{
var fixedDate = string . Format ( "20{0}.{1}.{2}" , airYear , airMonth , airDay ) ;
simpleTitle = simpleTitle . Replace ( sixDigitAirDateMatch . Groups [ "airdate" ] . Value , fixedDate ) ;
}
2014-07-25 15:33:51 +00:00
}
2016-12-28 16:13:18 +00:00
2012-08-03 07:01:34 +00:00
foreach ( var regex in ReportTitleRegex )
{
var match = regex . Matches ( simpleTitle ) ;
if ( match . Count ! = 0 )
{
2014-01-07 12:12:55 +00:00
Logger . Trace ( regex ) ;
2013-10-23 05:17:02 +00:00
try
2012-08-03 07:01:34 +00:00
{
2013-10-23 05:17:02 +00:00
var result = ParseMatchCollection ( match ) ;
2014-07-09 07:13:01 +00:00
2013-10-23 05:17:02 +00:00
if ( result ! = null )
{
2014-07-09 07:13:01 +00:00
if ( result . FullSeason & & title . ContainsIgnoreCase ( "Special" ) )
{
result . FullSeason = false ;
result . Special = true ;
}
2015-12-25 09:22:00 +00:00
result . Language = LanguageParser . ParseLanguage ( title ) ;
2014-03-13 20:12:42 +00:00
Logger . Debug ( "Language parsed: {0}" , result . Language ) ;
2013-11-28 20:53:30 +00:00
2013-10-23 05:17:02 +00:00
result . Quality = QualityParser . ParseQuality ( title ) ;
2014-03-13 20:12:42 +00:00
Logger . Debug ( "Quality parsed: {0}" , result . Quality ) ;
2013-11-28 20:53:30 +00:00
2013-12-09 06:25:11 +00:00
result . ReleaseGroup = ParseReleaseGroup ( title ) ;
2014-05-19 19:14:41 +00:00
var subGroup = GetSubGroup ( match ) ;
if ( ! subGroup . IsNullOrWhiteSpace ( ) )
{
result . ReleaseGroup = subGroup ;
}
2014-03-13 20:12:42 +00:00
Logger . Debug ( "Release Group parsed: {0}" , result . ReleaseGroup ) ;
2013-12-09 06:25:11 +00:00
2014-05-19 19:14:41 +00:00
result . ReleaseHash = GetReleaseHash ( match ) ;
if ( ! result . ReleaseHash . IsNullOrWhiteSpace ( ) )
{
Logger . Debug ( "Release Hash parsed: {0}" , result . ReleaseHash ) ;
}
2016-12-28 16:13:18 +00:00
realResult = result ;
2013-10-23 05:17:02 +00:00
return result ;
}
}
catch ( InvalidDateException ex )
{
2016-02-11 21:13:42 +00:00
Logger . Debug ( ex , ex . Message ) ;
2013-10-23 05:17:02 +00:00
break ;
2012-08-03 07:01:34 +00:00
}
}
}
}
catch ( Exception e )
{
2012-12-20 23:29:14 +00:00
if ( ! title . ToLower ( ) . Contains ( "password" ) & & ! title . ToLower ( ) . Contains ( "yenc" ) )
2016-02-11 21:13:42 +00:00
Logger . Error ( e , "An error has occurred while trying to parse " + title ) ;
2012-08-03 07:01:34 +00:00
}
2014-03-13 20:12:42 +00:00
Logger . Debug ( "Unable to parse {0}" , title ) ;
2016-12-28 16:13:18 +00:00
return realResult ;
2012-08-03 07:01:34 +00:00
}
2017-03-06 01:13:13 +00:00
public static string ReplaceGermanUmlauts ( string s )
{
var t = s ;
t = t . Replace ( "ä" , "ae" ) ;
t = t . Replace ( "ö" , "oe" ) ;
t = t . Replace ( "ü" , "ue" ) ;
t = t . Replace ( "Ä" , "Ae" ) ;
t = t . Replace ( "Ö" , "Oe" ) ;
t = t . Replace ( "Ü" , "Ue" ) ;
t = t . Replace ( "ß" , "ss" ) ;
return t ;
}
2013-10-31 23:50:39 +00:00
public static string ParseSeriesName ( string title )
{
2014-03-13 20:12:42 +00:00
Logger . Debug ( "Parsing string '{0}'" , title ) ;
2013-10-31 23:50:39 +00:00
var parseResult = ParseTitle ( title ) ;
if ( parseResult = = null )
{
return CleanSeriesTitle ( title ) ;
}
return parseResult . SeriesTitle ;
}
public static string CleanSeriesTitle ( this string title )
{
long number = 0 ;
//If Title only contains numbers return it as is.
2015-10-03 17:45:26 +00:00
if ( long . TryParse ( title , out number ) )
2013-10-31 23:50:39 +00:00
return title ;
2015-10-03 17:45:26 +00:00
return NormalizeRegex . Replace ( title , string . Empty ) . ToLower ( ) . RemoveAccent ( ) ;
2013-10-31 23:50:39 +00:00
}
2014-11-19 06:01:16 +00:00
public static string NormalizeEpisodeTitle ( string title )
{
2015-10-03 17:45:26 +00:00
title = SpecialEpisodeWordRegex . Replace ( title , string . Empty ) ;
2015-04-07 01:43:29 +00:00
title = PunctuationRegex . Replace ( title , " " ) ;
title = DuplicateSpacesRegex . Replace ( title , " " ) ;
return title . Trim ( )
. ToLower ( ) ;
2014-11-19 06:01:16 +00:00
}
2014-11-05 16:45:50 +00:00
public static string NormalizeTitle ( string title )
2014-01-07 08:24:50 +00:00
{
2014-11-19 06:01:16 +00:00
title = WordDelimiterRegex . Replace ( title , " " ) ;
2015-10-03 17:45:26 +00:00
title = PunctuationRegex . Replace ( title , string . Empty ) ;
title = CommonWordRegex . Replace ( title , string . Empty ) ;
2014-12-12 23:49:32 +00:00
title = DuplicateSpacesRegex . Replace ( title , " " ) ;
2017-02-06 21:34:11 +00:00
title = SpecialCharRegex . Replace ( title , string . Empty ) ;
2014-11-19 06:01:16 +00:00
return title . Trim ( ) . ToLower ( ) ;
2014-01-07 08:24:50 +00:00
}
2013-12-09 06:25:11 +00:00
public static string ParseReleaseGroup ( string title )
{
title = title . Trim ( ) ;
2014-04-17 23:16:40 +00:00
title = RemoveFileExtension ( title ) ;
2015-01-16 21:48:22 +00:00
title = WebsitePrefixRegex . Replace ( title , "" ) ;
2015-01-10 08:43:35 +00:00
var animeMatch = AnimeReleaseGroupRegex . Match ( title ) ;
if ( animeMatch . Success )
{
return animeMatch . Groups [ "subgroup" ] . Value ;
}
2014-04-15 21:21:59 +00:00
2015-01-16 21:48:22 +00:00
title = CleanReleaseGroupRegex . Replace ( title , "" ) ;
2014-05-04 18:44:18 +00:00
var matches = ReleaseGroupRegex . Matches ( title ) ;
2014-06-05 04:54:40 +00:00
2014-05-04 18:44:18 +00:00
if ( matches . Count ! = 0 )
{
2014-06-05 04:54:40 +00:00
var group = matches . OfType < Match > ( ) . Last ( ) . Groups [ "releasegroup" ] . Value ;
int groupIsNumeric ;
2015-10-03 17:45:26 +00:00
if ( int . TryParse ( group , out groupIsNumeric ) )
2014-06-05 04:54:40 +00:00
{
2014-09-01 23:37:59 +00:00
return null ;
2014-06-05 04:54:40 +00:00
}
return group ;
2013-12-09 06:25:11 +00:00
}
2014-09-01 23:37:59 +00:00
return null ;
2013-12-09 06:25:11 +00:00
}
2014-04-17 23:16:40 +00:00
public static string RemoveFileExtension ( string title )
{
2014-10-21 21:51:38 +00:00
title = FileExtensionRegex . Replace ( title , m = >
2014-04-17 23:16:40 +00:00
{
2014-10-21 21:51:38 +00:00
var extension = m . Value . ToLower ( ) ;
if ( MediaFiles . MediaFileExtensions . Extensions . Contains ( extension ) | | new [ ] { ".par2" , ".nzb" } . Contains ( extension ) )
{
2015-10-03 17:45:26 +00:00
return string . Empty ;
2014-10-21 21:51:38 +00:00
}
2014-12-07 07:23:11 +00:00
return m . Value ;
2014-10-21 21:51:38 +00:00
} ) ;
2014-04-17 23:16:40 +00:00
return title ;
}
2015-12-25 09:22:00 +00:00
2013-10-31 23:50:39 +00:00
private static SeriesTitleInfo GetSeriesTitleInfo ( string title )
{
var seriesTitleInfo = new SeriesTitleInfo ( ) ;
seriesTitleInfo . Title = title ;
var match = YearInTitleRegex . Match ( title ) ;
if ( ! match . Success )
{
seriesTitleInfo . TitleWithoutYear = title ;
}
else
{
seriesTitleInfo . TitleWithoutYear = match . Groups [ "title" ] . Value ;
seriesTitleInfo . Year = Convert . ToInt32 ( match . Groups [ "year" ] . Value ) ;
}
return seriesTitleInfo ;
}
2017-01-04 21:59:34 +00:00
private static ParsedMovieInfo ParseMovieMatchCollection ( MatchCollection matchCollection )
{
2017-01-24 09:02:20 +00:00
if ( ! matchCollection [ 0 ] . Groups [ "title" ] . Success )
{
return null ;
}
2017-02-10 18:00:16 +00:00
var seriesName = matchCollection [ 0 ] . Groups [ "title" ] . Value . /*Replace('.', ' ').*/ Replace ( '_' , ' ' ) ;
2017-01-04 21:59:34 +00:00
seriesName = RequestInfoRegex . Replace ( seriesName , "" ) . Trim ( ' ' ) ;
2017-02-10 18:00:16 +00:00
var parts = seriesName . Split ( '.' ) ;
seriesName = "" ;
int n ;
bool previousAcronym = false ;
foreach ( var part in parts )
{
if ( part . Length = = 1 & & part . ToLower ( ) ! = "a" & & ! int . TryParse ( part , out n ) )
{
seriesName + = part + "." ;
previousAcronym = true ;
}
else
{
if ( previousAcronym )
{
seriesName + = " " ;
previousAcronym = false ;
}
seriesName + = part + " " ;
}
}
seriesName = seriesName . Trim ( ' ' ) ;
2017-01-04 21:59:34 +00:00
int airYear ;
int . TryParse ( matchCollection [ 0 ] . Groups [ "year" ] . Value , out airYear ) ;
ParsedMovieInfo result ;
result = new ParsedMovieInfo { Year = airYear } ;
if ( matchCollection [ 0 ] . Groups [ "edition" ] . Success )
{
result . Edition = matchCollection [ 0 ] . Groups [ "edition" ] . Value . Replace ( "." , " " ) ;
}
result . MovieTitle = seriesName ;
result . MovieTitleInfo = GetSeriesTitleInfo ( result . MovieTitle ) ;
Logger . Debug ( "Movie Parsed. {0}" , result ) ;
return result ;
}
2013-04-15 01:41:39 +00:00
private static ParsedEpisodeInfo ParseMatchCollection ( MatchCollection matchCollection )
2012-08-03 07:01:34 +00:00
{
2015-07-06 19:27:23 +00:00
var seriesName = matchCollection [ 0 ] . Groups [ "title" ] . Value . Replace ( '.' , ' ' ) . Replace ( '_' , ' ' ) ;
2014-08-12 05:11:06 +00:00
seriesName = RequestInfoRegex . Replace ( seriesName , "" ) . Trim ( ' ' ) ;
2012-08-03 07:01:34 +00:00
2013-04-07 22:40:13 +00:00
int airYear ;
2015-10-03 17:45:26 +00:00
int . TryParse ( matchCollection [ 0 ] . Groups [ "airyear" ] . Value , out airYear ) ;
2016-12-28 16:13:18 +00:00
//int.TryParse(matchCollection[0].Groups["year"].Value, out airYear);
2012-08-03 07:01:34 +00:00
2013-04-15 01:41:39 +00:00
ParsedEpisodeInfo result ;
2012-08-03 07:01:34 +00:00
2013-04-07 22:40:13 +00:00
if ( airYear < 1900 )
2012-08-03 07:01:34 +00:00
{
var seasons = new List < int > ( ) ;
foreach ( Capture seasonCapture in matchCollection [ 0 ] . Groups [ "season" ] . Captures )
{
int parsedSeason ;
2015-10-03 17:45:26 +00:00
if ( int . TryParse ( seasonCapture . Value , out parsedSeason ) )
2012-08-03 07:01:34 +00:00
seasons . Add ( parsedSeason ) ;
}
//If no season was found it should be treated as a mini series and season 1
2014-07-09 07:13:01 +00:00
if ( seasons . Count = = 0 ) seasons . Add ( 1 ) ;
2012-08-03 07:01:34 +00:00
//If more than 1 season was parsed go to the next REGEX (A multi-season release is unlikely)
2014-07-09 07:13:01 +00:00
if ( seasons . Distinct ( ) . Count ( ) > 1 ) return null ;
2012-08-03 07:01:34 +00:00
2013-04-15 01:41:39 +00:00
result = new ParsedEpisodeInfo
2013-10-31 23:50:39 +00:00
{
SeasonNumber = seasons . First ( ) ,
EpisodeNumbers = new int [ 0 ] ,
2013-11-08 19:03:01 +00:00
AbsoluteEpisodeNumbers = new int [ 0 ]
2013-10-31 23:50:39 +00:00
} ;
2012-08-03 07:01:34 +00:00
foreach ( Match matchGroup in matchCollection )
{
var episodeCaptures = matchGroup . Groups [ "episode" ] . Captures . Cast < Capture > ( ) . ToList ( ) ;
2013-10-27 22:50:15 +00:00
var absoluteEpisodeCaptures = matchGroup . Groups [ "absoluteepisode" ] . Captures . Cast < Capture > ( ) . ToList ( ) ;
2012-08-03 07:01:34 +00:00
//Allows use to return a list of 0 episodes (We can handle that as a full season release)
2013-11-08 19:03:01 +00:00
if ( episodeCaptures . Any ( ) )
2012-08-03 07:01:34 +00:00
{
2016-06-15 18:58:42 +00:00
var first = ParseNumber ( episodeCaptures . First ( ) . Value ) ;
var last = ParseNumber ( episodeCaptures . Last ( ) . Value ) ;
2013-10-27 22:50:15 +00:00
2013-11-08 19:03:01 +00:00
if ( first > last )
2013-10-27 22:50:15 +00:00
{
2013-11-08 19:03:01 +00:00
return null ;
}
2013-07-02 19:51:16 +00:00
2013-11-08 19:03:01 +00:00
var count = last - first + 1 ;
result . EpisodeNumbers = Enumerable . Range ( first , count ) . ToArray ( ) ;
}
2013-11-11 06:41:21 +00:00
if ( absoluteEpisodeCaptures . Any ( ) )
2013-11-08 19:03:01 +00:00
{
var first = Convert . ToInt32 ( absoluteEpisodeCaptures . First ( ) . Value ) ;
var last = Convert . ToInt32 ( absoluteEpisodeCaptures . Last ( ) . Value ) ;
2013-10-27 22:50:15 +00:00
2013-11-08 19:03:01 +00:00
if ( first > last )
{
return null ;
2013-10-27 22:50:15 +00:00
}
2013-11-08 19:03:01 +00:00
var count = last - first + 1 ;
result . AbsoluteEpisodeNumbers = Enumerable . Range ( first , count ) . ToArray ( ) ;
2014-08-12 02:44:26 +00:00
if ( matchGroup . Groups [ "special" ] . Success )
{
result . Special = true ;
}
2012-08-03 07:01:34 +00:00
}
2013-12-24 04:17:20 +00:00
if ( ! episodeCaptures . Any ( ) & & ! absoluteEpisodeCaptures . Any ( ) )
2012-08-03 07:01:34 +00:00
{
//Check to see if this is an "Extras" or "SUBPACK" release, if it is, return NULL
//Todo: Set a "Extras" flag in EpisodeParseResult if we want to download them ever
2014-07-09 07:13:01 +00:00
if ( ! matchCollection [ 0 ] . Groups [ "extras" ] . Value . IsNullOrWhiteSpace ( ) ) return null ;
2012-08-03 07:01:34 +00:00
2013-04-07 22:40:13 +00:00
result . FullSeason = true ;
2012-08-03 07:01:34 +00:00
}
}
2014-07-09 07:13:01 +00:00
2013-10-27 22:50:15 +00:00
if ( result . AbsoluteEpisodeNumbers . Any ( ) & & ! result . EpisodeNumbers . Any ( ) )
{
result . SeasonNumber = 0 ;
}
2012-08-03 07:01:34 +00:00
}
else
{
//Try to Parse as a daily show
var airmonth = Convert . ToInt32 ( matchCollection [ 0 ] . Groups [ "airmonth" ] . Value ) ;
var airday = Convert . ToInt32 ( matchCollection [ 0 ] . Groups [ "airday" ] . Value ) ;
//Swap day and month if month is bigger than 12 (scene fail)
if ( airmonth > 12 )
{
var tempDay = airday ;
airday = airmonth ;
airmonth = tempDay ;
}
2015-07-07 20:20:16 +00:00
DateTime airDate ;
try
{
airDate = new DateTime ( airYear , airmonth , airday ) ;
}
catch ( Exception )
{
throw new InvalidDateException ( "Invalid date found: {0}-{1}-{2}" , airYear , airmonth , airday ) ;
}
2013-10-23 05:17:02 +00:00
//Check if episode is in the future (most likely a parse error)
if ( airDate > DateTime . Now . AddDays ( 1 ) . Date | | airDate < new DateTime ( 1970 , 1 , 1 ) )
{
throw new InvalidDateException ( "Invalid date found: {0}" , airDate ) ;
}
2013-04-15 01:41:39 +00:00
result = new ParsedEpisodeInfo
2013-10-31 23:50:39 +00:00
{
AirDate = airDate . ToString ( Episode . AIR_DATE_FORMAT ) ,
} ;
2012-08-03 07:01:34 +00:00
}
2015-07-06 19:27:23 +00:00
result . SeriesTitle = seriesName ;
2013-10-31 23:50:39 +00:00
result . SeriesTitleInfo = GetSeriesTitleInfo ( result . SeriesTitle ) ;
2012-08-03 07:01:34 +00:00
2014-03-13 20:12:42 +00:00
Logger . Debug ( "Episode Parsed. {0}" , result ) ;
2012-08-03 07:01:34 +00:00
2013-04-07 22:40:13 +00:00
return result ;
2012-08-03 07:01:34 +00:00
}
2013-04-29 03:11:43 +00:00
private static bool ValidateBeforeParsing ( string title )
{
if ( title . ToLower ( ) . Contains ( "password" ) & & title . ToLower ( ) . Contains ( "yenc" ) )
{
2014-03-13 20:12:42 +00:00
Logger . Debug ( "" ) ;
2013-04-29 03:11:43 +00:00
return false ;
}
2015-10-03 17:45:26 +00:00
if ( ! title . Any ( char . IsLetterOrDigit ) )
2013-04-29 03:11:43 +00:00
{
return false ;
}
2014-04-17 23:16:40 +00:00
var titleWithoutExtension = RemoveFileExtension ( title ) ;
if ( RejectHashedReleasesRegex . Any ( v = > v . IsMatch ( titleWithoutExtension ) ) )
{
Logger . Debug ( "Rejected Hashed Release Title: " + title ) ;
return false ;
}
2013-04-29 03:11:43 +00:00
return true ;
}
2014-05-19 19:14:41 +00:00
private static string GetSubGroup ( MatchCollection matchCollection )
{
var subGroup = matchCollection [ 0 ] . Groups [ "subgroup" ] ;
if ( subGroup . Success )
{
return subGroup . Value ;
}
2015-10-03 17:45:26 +00:00
return string . Empty ;
2014-05-19 19:14:41 +00:00
}
private static string GetReleaseHash ( MatchCollection matchCollection )
{
var hash = matchCollection [ 0 ] . Groups [ "hash" ] ;
if ( hash . Success )
{
2014-12-17 07:12:26 +00:00
var hashValue = hash . Value . Trim ( '[' , ']' ) ;
2014-05-19 19:14:41 +00:00
if ( hashValue . Equals ( "1280x720" ) )
{
2015-10-03 17:45:26 +00:00
return string . Empty ;
2014-05-19 19:14:41 +00:00
}
return hashValue ;
}
2014-12-17 07:12:26 +00:00
2015-10-03 17:45:26 +00:00
return string . Empty ;
2014-05-19 19:14:41 +00:00
}
2016-06-15 18:58:42 +00:00
private static int ParseNumber ( string value )
{
int number ;
if ( int . TryParse ( value , out number ) )
{
return number ;
}
number = Array . IndexOf ( Numbers , value . ToLower ( ) ) ;
if ( number ! = - 1 )
{
return number ;
}
throw new FormatException ( string . Format ( "{0} isn't a number" , value ) ) ;
}
2012-08-03 07:01:34 +00:00
}
2015-01-21 22:48:35 +00:00
}