2015-12-25 09:22:00 +00:00
|
|
|
|
using System;
|
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
using NLog;
|
|
|
|
|
using NzbDrone.Common.Instrumentation;
|
2015-07-12 16:44:33 +00:00
|
|
|
|
using NzbDrone.Core.Languages;
|
2015-12-25 09:22:00 +00:00
|
|
|
|
|
|
|
|
|
namespace NzbDrone.Core.Parser
|
|
|
|
|
{
|
|
|
|
|
public static class LanguageParser
|
|
|
|
|
{
|
|
|
|
|
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(LanguageParser));
|
|
|
|
|
|
2019-02-04 21:01:25 +00:00
|
|
|
|
private static readonly RegexReplace[] CleanSeriesTitleRegex = new[]
|
|
|
|
|
{
|
2020-03-13 19:18:37 +00:00
|
|
|
|
new RegexReplace(@".*?\.(S\d{2}(?:E\d{2,4})*\..*)", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase)
|
2019-02-04 21:01:25 +00:00
|
|
|
|
};
|
|
|
|
|
|
2019-11-14 14:02:13 +00:00
|
|
|
|
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<chinese>\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)",
|
2015-12-25 09:22:00 +00:00
|
|
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
|
|
2017-08-19 05:24:59 +00:00
|
|
|
|
private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)",
|
|
|
|
|
RegexOptions.Compiled);
|
|
|
|
|
|
|
|
|
|
|
2015-12-25 09:22:00 +00:00
|
|
|
|
private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
|
2020-08-02 19:42:38 +00:00
|
|
|
|
public static Language ParseLanguage(string title, bool defaultToEnglish = true)
|
2015-12-25 09:22:00 +00:00
|
|
|
|
{
|
2019-02-04 21:01:25 +00:00
|
|
|
|
foreach (var regex in CleanSeriesTitleRegex)
|
|
|
|
|
{
|
|
|
|
|
if (regex.TryReplace(ref title))
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-25 09:22:00 +00:00
|
|
|
|
var lowerTitle = title.ToLower();
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("english"))
|
|
|
|
|
return Language.English;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("french"))
|
|
|
|
|
return Language.French;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("spanish"))
|
|
|
|
|
return Language.Spanish;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("danish"))
|
|
|
|
|
return Language.Danish;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("dutch"))
|
|
|
|
|
return Language.Dutch;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("japanese"))
|
|
|
|
|
return Language.Japanese;
|
|
|
|
|
|
2019-02-09 19:40:36 +00:00
|
|
|
|
if (lowerTitle.Contains("icelandic"))
|
|
|
|
|
return Language.Icelandic;
|
2015-12-25 09:22:00 +00:00
|
|
|
|
|
2019-02-09 19:40:36 +00:00
|
|
|
|
if (lowerTitle.Contains("mandarin") || lowerTitle.Contains("cantonese") || lowerTitle.Contains("chinese"))
|
|
|
|
|
return Language.Chinese;
|
2015-12-25 09:22:00 +00:00
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("korean"))
|
|
|
|
|
return Language.Korean;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("russian"))
|
|
|
|
|
return Language.Russian;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("polish"))
|
|
|
|
|
return Language.Polish;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("vietnamese"))
|
|
|
|
|
return Language.Vietnamese;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("swedish"))
|
|
|
|
|
return Language.Swedish;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("norwegian"))
|
|
|
|
|
return Language.Norwegian;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("nordic"))
|
|
|
|
|
return Language.Norwegian;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("finnish"))
|
|
|
|
|
return Language.Finnish;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("turkish"))
|
|
|
|
|
return Language.Turkish;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("portuguese"))
|
|
|
|
|
return Language.Portuguese;
|
|
|
|
|
|
|
|
|
|
if (lowerTitle.Contains("hungarian"))
|
|
|
|
|
return Language.Hungarian;
|
|
|
|
|
|
2017-08-11 10:16:47 +00:00
|
|
|
|
if (lowerTitle.Contains("hebrew"))
|
|
|
|
|
return Language.Hebrew;
|
|
|
|
|
|
2017-08-19 05:24:59 +00:00
|
|
|
|
var regexLanguage = RegexLanguage(title);
|
2015-12-25 09:22:00 +00:00
|
|
|
|
|
2017-08-19 05:24:59 +00:00
|
|
|
|
if (regexLanguage != Language.Unknown)
|
|
|
|
|
{
|
|
|
|
|
return regexLanguage;
|
|
|
|
|
}
|
2017-08-11 10:16:47 +00:00
|
|
|
|
|
2020-08-02 19:42:38 +00:00
|
|
|
|
return defaultToEnglish ? Language.English : Language.Unknown;
|
2015-12-25 09:22:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static Language ParseSubtitleLanguage(string fileName)
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
2017-02-18 16:19:21 +00:00
|
|
|
|
Logger.Debug("Parsing language from subtitle file: {0}", fileName);
|
2015-12-25 09:22:00 +00:00
|
|
|
|
|
|
|
|
|
var simpleFilename = Path.GetFileNameWithoutExtension(fileName);
|
|
|
|
|
var languageMatch = SubtitleLanguageRegex.Match(simpleFilename);
|
|
|
|
|
|
|
|
|
|
if (languageMatch.Success)
|
|
|
|
|
{
|
|
|
|
|
var isoCode = languageMatch.Groups["iso_code"].Value;
|
|
|
|
|
var isoLanguage = IsoLanguages.Find(isoCode);
|
|
|
|
|
|
|
|
|
|
return isoLanguage?.Language ?? Language.Unknown;
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-18 16:19:21 +00:00
|
|
|
|
foreach (Language language in Enum.GetValues(typeof(Language)))
|
|
|
|
|
{
|
|
|
|
|
if (simpleFilename.EndsWith(language.ToString(), StringComparison.OrdinalIgnoreCase))
|
|
|
|
|
{
|
|
|
|
|
return language;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Logger.Debug("Unable to parse language from subtitle file: {0}", fileName);
|
2015-12-25 09:22:00 +00:00
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
2017-02-18 16:19:21 +00:00
|
|
|
|
Logger.Debug(ex, "Failed parsing language from subtitle file: {0}", fileName);
|
2015-12-25 09:22:00 +00:00
|
|
|
|
}
|
2017-02-18 16:19:21 +00:00
|
|
|
|
|
2015-12-25 09:22:00 +00:00
|
|
|
|
return Language.Unknown;
|
|
|
|
|
}
|
2017-08-19 05:24:59 +00:00
|
|
|
|
|
|
|
|
|
private static Language RegexLanguage(string title)
|
|
|
|
|
{
|
|
|
|
|
// Case sensitive
|
|
|
|
|
var caseSensitiveMatch = CaseSensitiveLanguageRegex.Match(title);
|
|
|
|
|
|
|
|
|
|
if (caseSensitiveMatch.Groups["lithuanian"].Captures.Cast<Capture>().Any())
|
|
|
|
|
return Language.Lithuanian;
|
|
|
|
|
|
|
|
|
|
if (caseSensitiveMatch.Groups["czech"].Captures.Cast<Capture>().Any())
|
|
|
|
|
return Language.Czech;
|
|
|
|
|
|
|
|
|
|
// Case insensitive
|
|
|
|
|
var match = LanguageRegex.Match(title);
|
|
|
|
|
|
|
|
|
|
if (match.Groups["italian"].Captures.Cast<Capture>().Any())
|
|
|
|
|
return Language.Italian;
|
|
|
|
|
|
|
|
|
|
if (match.Groups["german"].Captures.Cast<Capture>().Any())
|
|
|
|
|
return Language.German;
|
|
|
|
|
|
|
|
|
|
if (match.Groups["flemish"].Captures.Cast<Capture>().Any())
|
|
|
|
|
return Language.Flemish;
|
|
|
|
|
|
|
|
|
|
if (match.Groups["greek"].Captures.Cast<Capture>().Any())
|
|
|
|
|
return Language.Greek;
|
|
|
|
|
|
|
|
|
|
if (match.Groups["french"].Success)
|
|
|
|
|
return Language.French;
|
|
|
|
|
|
|
|
|
|
if (match.Groups["russian"].Success)
|
|
|
|
|
return Language.Russian;
|
|
|
|
|
|
|
|
|
|
if (match.Groups["dutch"].Success)
|
|
|
|
|
return Language.Dutch;
|
|
|
|
|
|
|
|
|
|
if (match.Groups["hungarian"].Success)
|
|
|
|
|
return Language.Hungarian;
|
|
|
|
|
|
|
|
|
|
if (match.Groups["hebrew"].Success)
|
|
|
|
|
return Language.Hebrew;
|
|
|
|
|
|
2019-02-09 19:40:36 +00:00
|
|
|
|
if (match.Groups["chinese"].Success)
|
|
|
|
|
return Language.Chinese;
|
|
|
|
|
|
2017-08-19 05:24:59 +00:00
|
|
|
|
return Language.Unknown;
|
|
|
|
|
}
|
2015-12-25 09:22:00 +00:00
|
|
|
|
}
|
|
|
|
|
}
|