diff --git a/README.md b/README.md index 71497bd49..65d10d171 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,7 @@ Developer note: The software implements the [Torznab](https://github.com/Sonarr/ * DigitalCore * DigitalHive * DivTeam + * DivxTotal * DocumentaryTorrents (DT) * Downloadville * Dragonworld Reloaded diff --git a/src/Jackett.Common/Indexers/DivxTotal.cs b/src/Jackett.Common/Indexers/DivxTotal.cs new file mode 100644 index 000000000..e1ff417a2 --- /dev/null +++ b/src/Jackett.Common/Indexers/DivxTotal.cs @@ -0,0 +1,336 @@ +using System; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.Globalization; +using System.Linq; +using System.Net; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using AngleSharp.Dom; +using AngleSharp.Html.Parser; + +using Jackett.Common.Models; +using Jackett.Common.Models.IndexerConfig; +using Jackett.Common.Services.Interfaces; +using Jackett.Common.Utils; +using Newtonsoft.Json.Linq; +using NLog; +using WebClient = Jackett.Common.Utils.Clients.WebClient; +using static Jackett.Common.Models.IndexerConfig.ConfigurationData; + +namespace Jackett.Common.Indexers +{ + public class DivxTotal : BaseWebIndexer + { + + private readonly int MAX_RESULTS_PER_PAGE = 15; + private readonly int MAX_SEARCH_PAGE_LIMIT = 3; + private readonly long DEFAULT_FILESIZE = 524288000; // 500 MB + + public DivxTotal(IIndexerConfigurationService configService, WebClient w, Logger l, IProtectionService ps) + : base(name: "DivxTotal", + description: "DivxTotal is a SPANISH site for Movies, TV series and Software", + link: "https://www.divxtotal.la/", + caps: new TorznabCapabilities(), + configService: configService, + client: w, + logger: l, + p: ps, + configData: new ConfigurationData()) + { + Encoding = Encoding.UTF8; + Language = "es-es"; + Type = "public"; + + var matchWords = new BoolItem() { Name = "Match words in title", Value = true }; + configData.AddDynamic("MatchWords", matchWords); + + AddCategoryMapping("peliculas", TorznabCatType.MoviesSD); + AddCategoryMapping("peliculas-hd", TorznabCatType.MoviesSD); + AddCategoryMapping("peliculas-3-d", TorznabCatType.MoviesHD); + AddCategoryMapping("peliculas-dvdr", TorznabCatType.MoviesDVD); + AddCategoryMapping("series", TorznabCatType.TVSD); + AddCategoryMapping("programas", TorznabCatType.PC); + AddCategoryMapping("otros", TorznabCatType.OtherMisc); + } + + public override async Task ApplyConfiguration(JToken configJson) + { + configData.LoadValuesFromJson(configJson); + var releases = await PerformQuery(new TorznabQuery()); + + await ConfigureIfOK(string.Empty, releases.Any(), () => + { + throw new Exception("Could not find releases from this URL"); + }); + + return IndexerConfigurationStatus.Completed; + } + + protected override async Task> PerformQuery(TorznabQuery query) + { + var releases = new List(); + + var queryStr = query.GetQueryString().Trim(); + var matchWords = ((BoolItem)configData.GetDynamic("MatchWords")).Value; + matchWords = queryStr != "" && matchWords; + + // TODO: remove year (2019) and episode (S01E02) to make it work with Sonarr + var qc = new NameValueCollection(); + qc.Add("s", queryStr); + + var page = 1; + var isLastPage = false; + do + { + var url = SiteLink + "page/" + page + "/?" + qc.GetQueryString(); + var result = await RequestStringWithCookies(url); + + if (result.Status != HttpStatusCode.OK) + throw new ExceptionWithConfigData(result.Content, configData); + + try + { + var searchResultParser = new HtmlParser(); + var doc = searchResultParser.ParseDocument(result.Content); + + var table = doc.QuerySelector("table.table"); + var rows = table.QuerySelectorAll("tr"); + isLastPage = rows.Length -1 < MAX_RESULTS_PER_PAGE; // rows includes the header + var isHeader = true; + foreach (var row in rows) + { + if (isHeader) { + isHeader = false; + continue; + } + + await ParseRelease(releases, row, queryStr, query.Categories, matchWords); + } + } + catch (Exception ex) + { + OnParseError(result.Content, ex); + } + + page++; // update page number + + } while (!isLastPage && page <= MAX_SEARCH_PAGE_LIMIT); + + return releases; + } + + public override async Task Download(Uri link) + { + // for tv series we already have the link + var downloadUrl = link.ToString(); + // for other categories we have to do another step + if (!downloadUrl.EndsWith(".torrent")) + { + var result = await RequestStringWithCookies(downloadUrl); + + if (result.Status != HttpStatusCode.OK) + throw new ExceptionWithConfigData(result.Content, configData); + + var searchResultParser = new HtmlParser(); + var doc = searchResultParser.ParseDocument(result.Content); + + var onclick = doc.QuerySelector("a[onclick*=\"/download/torrent.php\"]") + .GetAttribute("onclick"); + downloadUrl = OnclickToDownloadLink(onclick); + } + + var content = await base.Download(new Uri(downloadUrl)); + return content; + } + + private async Task ParseRelease(List releases, IElement row, string queryStr, int[] queryCats, + bool matchWords) + { + var anchor = row.QuerySelector("a"); + var commentsLink = anchor.GetAttribute("href"); + var title = anchor.TextContent.Trim(); + var cat = commentsLink.Split('/')[3]; + var categories = MapTrackerCatToNewznab(cat); + var publishStr = row.QuerySelectorAll("td")[2].TextContent.Trim(); + var publishDate = TryToParseDate(publishStr, DateTime.Now); + var sizeStr = row.QuerySelectorAll("td")[3].TextContent.Trim(); + var size = TryToParseSize(sizeStr, DEFAULT_FILESIZE); + + // return results only for requested categories + if (queryCats.Any() && !queryCats.Contains(categories.First())) + return; + + // match the words in the query with the titles + if (matchWords && !CheckTitleMatchWords(queryStr, title)) + return; + + // parsing is different for each category + if (cat == "series") + { + await ParseSeriesRelease(releases, title, commentsLink, cat, publishDate); + } else + { + if (cat == "peliculas") + title += " [DVDRip]"; + else if (cat == "peliculas-hd") + title += " [HDRip]"; + else if (cat == "programas") + title += " [Windows]"; + GenerateRelease(releases, title, commentsLink, commentsLink, cat, publishDate, size); + } + } + + private async Task ParseSeriesRelease(List releases, string title, string commentsLink, + string cat, DateTime publishDate) + { + var result = await RequestStringWithCookies(commentsLink); + + if (result.Status != HttpStatusCode.OK) + throw new ExceptionWithConfigData(result.Content, configData); + + var searchResultParser = new HtmlParser(); + var doc = searchResultParser.ParseDocument(result.Content); + + var tables = doc.QuerySelectorAll("table.table"); + foreach (var table in tables) + { + var rows = table.QuerySelectorAll("tr"); + var isHeader = true; + foreach (var row in rows) + { + if (isHeader) { + isHeader = false; + continue; + } + + var anchor = row.QuerySelector("a"); + var episodeTitle = anchor.TextContent.Trim(); + var onclick = anchor.GetAttribute("onclick"); + var downloadLink = OnclickToDownloadLink(onclick); + var episodePublishStr = row.QuerySelectorAll("td")[3].TextContent.Trim(); + var episodePublish = TryToParseDate(episodePublishStr, publishDate); + + // clean up the title + episodeTitle = TryToCleanSeriesTitle(title, episodeTitle); + episodeTitle += " [HDTV]"; + + GenerateRelease(releases, episodeTitle, commentsLink, downloadLink, cat, episodePublish, + DEFAULT_FILESIZE); + } + } + } + + private void GenerateRelease(List releases, string title, string commentsLink, string downloadLink, + string cat, DateTime publishDate, long size) + { + var release = new ReleaseInfo(); + + release.Title = title + " [Spanish]"; + release.Comments = new Uri(commentsLink); + release.Link = new Uri(downloadLink); + release.Guid = release.Link; + + release.Category = MapTrackerCatToNewznab(cat); + release.PublishDate = publishDate; + release.Size = size; + + release.Seeders = 1; + release.Peers = 2; + + release.MinimumRatio = 0; + release.MinimumSeedTime = 0; + release.DownloadVolumeFactor = 0; + release.UploadVolumeFactor = 1; + + releases.Add(release); + } + + private string OnclickToDownloadLink(string onclick) + { + // onclick="post('/download/torrent.php', {u: 'aHR0cHM6Ly93d3cuZGl2eHRvdGFlbnQ='});" + var base64EncodedData = onclick.Split('\'')[3]; + var base64EncodedBytes = Convert.FromBase64String(base64EncodedData); + return Encoding.UTF8.GetString(base64EncodedBytes); + } + + private bool CheckTitleMatchWords(string queryStr, string title) + { + // this code split the words, remove words with 2 letters or less, remove accents and lowercase + MatchCollection queryMatches = Regex.Matches(queryStr, @"\b[\w']*\b"); + var queryWords = from m in queryMatches.Cast() + where !string.IsNullOrEmpty(m.Value) && m.Value.Length > 2 + select Encoding.UTF8.GetString(Encoding.GetEncoding("ISO-8859-8").GetBytes(m.Value.ToLower())); + + MatchCollection titleMatches = Regex.Matches(title, @"\b[\w']*\b"); + var titleWords = from m in titleMatches.Cast() + where !string.IsNullOrEmpty(m.Value) && m.Value.Length > 2 + select Encoding.UTF8.GetString(Encoding.GetEncoding("ISO-8859-8").GetBytes(m.Value.ToLower())); + titleWords = titleWords.ToArray(); + + foreach (var word in queryWords) + { + if (!titleWords.Contains(word)) + return false; + } + + return true; + } + + private string TryToCleanSeriesTitle(string title, string episodeTitle) + { + // title = Superman + // episodeTitle = Superman1x12 + var newTitle = episodeTitle; + try + { + newTitle = newTitle.Replace(title, title + " "); + Regex r = new Regex("(([0-9]+)x([0-9]+))", RegexOptions.IgnoreCase); + Match m = r.Match(newTitle); + if (m.Success) + { + var season = "S" + m.Groups[2].Value.PadLeft(2, '0'); + var episode = "E" + m.Groups[3].Value.PadLeft(2, '0'); + newTitle = newTitle.Replace(m.Groups[1].Value, season + episode); + } + newTitle = newTitle.Replace(" COMPLETA", "").Replace(" FINAL TEMPORADA", ""); + } + catch (Exception e) + { + Console.WriteLine(e); + throw; + } + // return Superman S01E012 + return newTitle; + } + + private DateTime TryToParseDate(string dateToParse, DateTime dateDefault) + { + var date = dateDefault; + try + { + date = DateTime.ParseExact(dateToParse, "dd-MM-yyyy", CultureInfo.InvariantCulture); + } + catch + { + // ignored + } + return date; + } + + private long TryToParseSize(string sizeToParse, long sizeDefault) + { + var size = sizeDefault; + try + { + size = ReleaseInfo.GetBytes(sizeToParse); + } + catch + { + // ignored + } + return size; + } + } +}