Radarr/NzbDrone.Core/Parser.cs

300 lines
12 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using NLog;
using NzbDrone.Core.Model;
using NzbDrone.Core.Repository.Quality;
namespace NzbDrone.Core
{
public static class Parser
{
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
private static readonly Regex[] ReportTitleRegex = new[]
2011-04-10 02:44:01 +00:00
{
new Regex(
@"(?<title>.+?)?\W?(?<year>\d{4}?)?\W+(?<airyear>\d{4})\W+(?<airmonth>\d{2})\W+(?<airday>\d{2})\W?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
2011-04-10 02:44:01 +00:00
new Regex(
@"(?<title>.+?)?\W?(?<year>\d{4}?)?(?:\WS?(?<season>\d{1,2})(?:(?:\-|\.|[ex]|\s|to)+(?<episode>\d+))+)+\W?(?!\\)",
2011-04-10 02:44:01 +00:00
RegexOptions.IgnoreCase | RegexOptions.Compiled),
new Regex(
@"(?<title>.+?)?\W?(?<year>\d{4}?)?(?:\W(?<season>\d+)(?<episode>\d{2}))+\W?(?!\\)",
2011-04-10 02:44:01 +00:00
RegexOptions.IgnoreCase | RegexOptions.Compiled)
//Supports 103/113 naming
};
private static readonly Regex[] SeasonReportTitleRegex = new[]
2011-04-10 02:44:01 +00:00
{
new Regex(
@"(?<title>.+?)?\W?(?<year>\d{4}?)?\W(?:S|Season)?\W?(?<season>\d+)(?!\\)",
RegexOptions.IgnoreCase |
RegexOptions.Compiled),
};
2011-04-10 02:44:01 +00:00
private static readonly Regex NormalizeRegex = new Regex(@"((\s|^)the(\s|$))|((\s|^)and(\s|$))|[^a-z]",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
/// <summary>
2011-04-10 02:44:01 +00:00
/// Parses a post title into list of episodes it contains
/// </summary>
2011-04-10 02:44:01 +00:00
/// <param name = "title">Title of the report</param>
/// <returns>List of episodes contained to the post</returns>
2011-04-04 03:50:12 +00:00
internal static EpisodeParseResult ParseEpisodeInfo(string title)
{
Logger.Trace("Parsing string '{0}'", title);
2011-04-04 05:24:19 +00:00
foreach (var regex in ReportTitleRegex)
{
var simpleTitle = Regex.Replace(title, @"480[i|p]|720[i|p]|1080[i|p]|[x|h]264", String.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled);
var match = regex.Matches(simpleTitle);
if (match.Count != 0)
{
var seriesName = NormalizeTitle(match[0].Groups["title"].Value);
2011-02-03 20:09:19 +00:00
var year = 0;
Int32.TryParse(match[0].Groups["year"].Value, out year);
if (year < 1900 || year > DateTime.Now.Year + 1)
{
year = 0;
}
var airyear = 0;
Int32.TryParse(match[0].Groups["airyear"].Value, out airyear);
EpisodeParseResult parsedEpisode;
2011-02-03 20:09:19 +00:00
if (airyear < 1 )
2011-04-04 03:50:12 +00:00
{
var season = 0;
Int32.TryParse(match[0].Groups["season"].Value, out season);
parsedEpisode = new EpisodeParseResult
{
Proper = title.ToLower().Contains("proper"),
SeriesTitle = seriesName,
SeasonNumber = season,
Year = year,
Episodes = new List<int>()
};
foreach (Match matchGroup in match)
{
var count = matchGroup.Groups["episode"].Captures.Count;
var first = Convert.ToInt32(matchGroup.Groups["episode"].Captures[0].Value);
var last = Convert.ToInt32(matchGroup.Groups["episode"].Captures[count - 1].Value);
for (int i = first; i <= last; i++)
{
parsedEpisode.Episodes.Add(i);
}
}
}
else
{
//Try to Parse as a daily show
if (airyear > 0)
{
var airmonth = Convert.ToInt32(match[0].Groups["airmonth"].Value);
var airday = Convert.ToInt32(match[0].Groups["airday"].Value);
parsedEpisode = new EpisodeParseResult
{
Proper = title.ToLower().Contains("proper"),
SeriesTitle = seriesName,
Year = year,
AirDate = new DateTime(airyear, airmonth, airday)
};
}
//Something went wrong with this one... return null
else
return null;
2011-04-04 03:50:12 +00:00
}
2011-04-04 04:20:01 +00:00
parsedEpisode.Quality = ParseQuality(title);
2011-04-04 03:50:12 +00:00
Logger.Trace("Episode Parsed. {0}", parsedEpisode);
2011-04-04 04:20:01 +00:00
return parsedEpisode;
}
}
Logger.Warn("Unable to parse text into episode info. {0}", title);
2011-04-04 04:20:01 +00:00
return null;
}
/// <summary>
2011-04-10 02:44:01 +00:00
/// Parses a post title into season it contains
/// </summary>
2011-04-10 02:44:01 +00:00
/// <param name = "title">Title of the report</param>
/// <returns>Season information contained in the post</returns>
internal static SeasonParseResult ParseSeasonInfo(string title)
{
Logger.Trace("Parsing string '{0}'", title);
foreach (var regex in ReportTitleRegex)
{
var match = regex.Matches(title);
if (match.Count != 0)
{
var seriesName = NormalizeTitle(match[0].Groups["title"].Value);
2011-04-20 01:20:20 +00:00
int year;
Int32.TryParse(match[0].Groups["year"].Value, out year);
if (year < 1900 || year > DateTime.Now.Year + 1)
{
year = 0;
}
var seasonNumber = Convert.ToInt32(match[0].Groups["season"].Value);
var result = new SeasonParseResult
2011-04-10 02:44:01 +00:00
{
SeriesTitle = seriesName,
SeasonNumber = seasonNumber,
2011-04-20 01:20:20 +00:00
Year = year,
Quality = ParseQuality(title)
2011-04-10 02:44:01 +00:00
};
Logger.Trace("Season Parsed. {0}", result);
return result;
}
}
return null; //Return null
}
/// <summary>
2011-04-10 02:44:01 +00:00
/// Parses a post title to find the series that relates to it
/// </summary>
2011-04-10 02:44:01 +00:00
/// <param name = "title">Title of the report</param>
/// <returns>Normalized Series Name</returns>
internal static string ParseSeriesName(string title)
{
Logger.Trace("Parsing string '{0}'", title);
foreach (var regex in ReportTitleRegex)
{
var match = regex.Matches(title);
if (match.Count != 0)
{
var seriesName = NormalizeTitle(match[0].Groups["title"].Value);
2011-04-20 01:20:20 +00:00
Logger.Trace("Series Parsed. {0}", seriesName);
return seriesName;
}
}
return String.Empty;
}
/// <summary>
2011-04-10 02:44:01 +00:00
/// Parses proper status out of a report title
/// </summary>
2011-04-10 02:44:01 +00:00
/// <param name = "title">Title of the report</param>
/// <returns></returns>
internal static bool ParseProper(string title)
{
return title.ToLower().Contains("proper");
}
internal static QualityTypes ParseQuality(string name)
{
Logger.Trace("Trying to parse quality for {0}", name);
var result = QualityTypes.Unknown;
name = name.ToLowerInvariant();
if (name.Contains("dvd"))
return QualityTypes.DVD;
if (name.Contains("bdrip") || name.Contains("brrip"))
{
return QualityTypes.BDRip;
}
if (name.Contains("xvid") || name.Contains("divx"))
{
if (name.Contains("bluray"))
{
return QualityTypes.BDRip;
}
return QualityTypes.TV;
}
if (name.Contains("bluray"))
{
if (name.Contains("720p"))
return QualityTypes.Bluray720;
if (name.Contains("1080p"))
return QualityTypes.Bluray1080;
return QualityTypes.Bluray720;
}
if (name.Contains("web-dl"))
return QualityTypes.WEBDL;
if (name.Contains("x264") || name.Contains("h264") || name.Contains("720p"))
return QualityTypes.HDTV;
//Based on extension
if (result == QualityTypes.Unknown)
{
switch (new FileInfo(name).Extension.ToLower())
{
case ".avi":
case ".xvid":
case ".wmv":
{
result = QualityTypes.TV;
break;
}
case ".mkv":
{
result = QualityTypes.HDTV;
break;
}
}
}
Logger.Trace("Quality Parsed:{0} Title:", result, name);
return result;
}
/// <summary>
2011-04-10 02:44:01 +00:00
/// Normalizes the title. removing all non-word characters as well as common tokens
/// such as 'the' and 'and'
/// </summary>
2011-04-10 02:44:01 +00:00
/// <param name = "title">title</param>
/// <returns></returns>
public static string NormalizeTitle(string title)
{
return NormalizeRegex.Replace(title, String.Empty).ToLower();
}
2011-04-19 06:37:34 +00:00
public static string NormalizePath(string path)
{
2011-04-19 06:37:34 +00:00
if (String.IsNullOrWhiteSpace(path))
throw new ArgumentException("Path can not be null or empty");
2010-11-06 16:01:46 +00:00
var info = new FileInfo(path);
if (info.FullName.StartsWith(@"\\")) //UNC
{
return info.FullName.TrimEnd('/', '\\', ' ');
}
2010-11-06 16:01:46 +00:00
return info.FullName.Trim('/', '\\', ' ');
}
}
}