using System; using System.IO; using System.Linq; using System.Text.RegularExpressions; using NLog; using NzbDrone.Common.Instrumentation; using NzbDrone.Core.Languages; namespace NzbDrone.Core.Parser { public static class LanguageParser { private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(LanguageParser)); private static readonly RegexReplace[] CleanSeriesTitleRegex = new[] { new RegexReplace(@".*?[_. ](S\d{2}(?:E\d{2,4})*[_. ].*)", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase) }; private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?\b(?:ita|italian)\b)|(?german\b|videomann)|(?flemish)|(?greek)|(?(?:\W|_)(?:FR)(?:\W|_))|(?\brus\b)|(?\b(?:HUNDUB|HUN)\b)|(?\bHebDub\b)|(?\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)|(?\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?\bLT\b)|(?\bCZ\b)|(?\bPL\b)", RegexOptions.Compiled); private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?[a-z]{2,3})$", RegexOptions.Compiled | RegexOptions.IgnoreCase); public static Language ParseLanguage(string title, bool defaultToEnglish = true) { foreach (var regex in CleanSeriesTitleRegex) { if (regex.TryReplace(ref title)) break; } var lowerTitle = title.ToLower(); if (lowerTitle.Contains("english")) return Language.English; if (lowerTitle.Contains("french")) return Language.French; if (lowerTitle.Contains("spanish")) return Language.Spanish; if (lowerTitle.Contains("danish")) return Language.Danish; if (lowerTitle.Contains("dutch")) return Language.Dutch; if (lowerTitle.Contains("japanese")) return Language.Japanese; if (lowerTitle.Contains("icelandic")) return Language.Icelandic; if (lowerTitle.Contains("mandarin") || lowerTitle.Contains("cantonese") || lowerTitle.Contains("chinese")) return Language.Chinese; if (lowerTitle.Contains("korean")) return Language.Korean; if (lowerTitle.Contains("russian")) return Language.Russian; if (lowerTitle.Contains("polish")) return Language.Polish; if (lowerTitle.Contains("vietnamese")) return Language.Vietnamese; if (lowerTitle.Contains("swedish")) return Language.Swedish; if (lowerTitle.Contains("norwegian")) return Language.Norwegian; if (lowerTitle.Contains("finnish")) return Language.Finnish; if (lowerTitle.Contains("turkish")) return Language.Turkish; if (lowerTitle.Contains("portuguese")) return Language.Portuguese; if (lowerTitle.Contains("hungarian")) return Language.Hungarian; if (lowerTitle.Contains("hebrew")) return Language.Hebrew; var regexLanguage = RegexLanguage(title); if (regexLanguage != Language.Unknown) { return regexLanguage; } return defaultToEnglish ? Language.English : Language.Unknown; } public static Language ParseSubtitleLanguage(string fileName) { try { Logger.Debug("Parsing language from subtitle file: {0}", fileName); var simpleFilename = Path.GetFileNameWithoutExtension(fileName); var languageMatch = SubtitleLanguageRegex.Match(simpleFilename); if (languageMatch.Success) { var isoCode = languageMatch.Groups["iso_code"].Value; var isoLanguage = IsoLanguages.Find(isoCode); return isoLanguage?.Language ?? Language.Unknown; } foreach (Language language in Enum.GetValues(typeof(Language))) { if (simpleFilename.EndsWith(language.ToString(), StringComparison.OrdinalIgnoreCase)) { return language; } } Logger.Debug("Unable to parse language from subtitle file: {0}", fileName); } catch (Exception ex) { Logger.Debug(ex, "Failed parsing language from subtitle file: {0}", fileName); } return Language.Unknown; } private static Language RegexLanguage(string title) { // Case sensitive var caseSensitiveMatch = CaseSensitiveLanguageRegex.Match(title); if (caseSensitiveMatch.Groups["lithuanian"].Captures.Cast().Any()) return Language.Lithuanian; if (caseSensitiveMatch.Groups["czech"].Captures.Cast().Any()) return Language.Czech; if (caseSensitiveMatch.Groups["polish"].Captures.Cast().Any()) return Language.Polish; // Case insensitive var match = LanguageRegex.Match(title); if (match.Groups["italian"].Captures.Cast().Any()) return Language.Italian; if (match.Groups["german"].Captures.Cast().Any()) return Language.German; if (match.Groups["flemish"].Captures.Cast().Any()) return Language.Flemish; if (match.Groups["greek"].Captures.Cast().Any()) return Language.Greek; if (match.Groups["french"].Success) return Language.French; if (match.Groups["russian"].Success) return Language.Russian; if (match.Groups["dutch"].Success) return Language.Dutch; if (match.Groups["hungarian"].Success) return Language.Hungarian; if (match.Groups["hebrew"].Success) return Language.Hebrew; if (match.Groups["polish"].Success) return Language.Polish; if (match.Groups["chinese"].Success) return Language.Chinese; return Language.Unknown; } } }