2020-02-09 02:35:16 +00:00
using System ;
2017-10-17 10:19:03 +00:00
using System.Collections.Generic ;
using System.Collections.Specialized ;
2020-05-03 23:35:52 +00:00
using System.Diagnostics.CodeAnalysis ;
2017-10-17 10:19:03 +00:00
using System.Globalization ;
using System.Linq ;
using System.Text ;
using System.Text.RegularExpressions ;
using System.Threading.Tasks ;
using AngleSharp.Dom ;
2019-01-20 00:09:27 +00:00
using AngleSharp.Html.Parser ;
2018-03-10 08:05:56 +00:00
using Jackett.Common.Models ;
using Jackett.Common.Models.IndexerConfig ;
using Jackett.Common.Services.Interfaces ;
using Jackett.Common.Utils ;
using Jackett.Common.Utils.Clients ;
2017-10-17 10:19:03 +00:00
using Newtonsoft.Json.Linq ;
using NLog ;
2018-03-10 08:05:56 +00:00
namespace Jackett.Common.Indexers
2017-10-17 10:19:03 +00:00
{
2020-05-03 23:35:52 +00:00
[ExcludeFromCodeCoverage]
2020-02-10 22:16:19 +00:00
internal class LostFilm : BaseWebIndexer
2017-10-17 10:19:03 +00:00
{
2020-10-19 21:19:10 +00:00
public override string [ ] LegacySiteLinks { get ; protected set ; } = {
2021-10-12 22:18:57 +00:00
"https://lostfilm.site" ,
"https://lostfilm.tw/" ,
2020-07-13 21:15:26 +00:00
} ;
2021-02-23 05:38:00 +00:00
public override string [ ] AlternativeSiteLinks { get ; protected set ; } = {
2021-06-07 03:49:01 +00:00
"https://www.lostfilm.run/" ,
"https://www.lostfilmtv.site/" ,
"https://www.lostfilm.tv/" ,
"https://www.lostfilm.win/" ,
2021-10-12 22:18:57 +00:00
"https://www.lostfilm.tw/" ,
2021-11-03 18:13:28 +00:00
"https://www.lostfilmtv2.site/" ,
2021-06-07 03:49:01 +00:00
"https://www.lostfilm.uno/"
2021-02-23 05:38:00 +00:00
} ;
2020-02-10 22:16:19 +00:00
private static readonly Regex parsePlayEpisodeRegex = new Regex ( "PlayEpisode\\('(?<id>\\d{1,3})(?<season>\\d{3})(?<episode>\\d{3})'\\)" , RegexOptions . Compiled | RegexOptions . IgnoreCase ) ;
private static readonly Regex parseReleaseDetailsRegex = new Regex ( "Видео:\\ (?<quality>.+).\\ Размер:\\ (?<size>.+).\\ Перевод" , RegexOptions . Compiled | RegexOptions . IgnoreCase ) ;
2020-02-25 16:08:03 +00:00
private string LoginUrl = > SiteLink + "login" ;
2017-10-17 10:19:03 +00:00
// http://www.lostfilm.tv/login
2020-02-25 16:08:03 +00:00
private string ApiUrl = > SiteLink + "ajaxik.php" ;
2020-02-10 22:16:19 +00:00
2017-10-17 10:19:03 +00:00
// http://www.lostfilm.tv/new
2020-02-25 16:08:03 +00:00
private string DiscoveryUrl = > SiteLink + "new" ;
2020-02-10 22:16:19 +00:00
2017-10-17 10:19:03 +00:00
// http://www.lostfilm.tv/search?q=breaking+bad
2020-02-25 16:08:03 +00:00
private string SearchUrl = > SiteLink + "search" ;
2020-02-10 22:16:19 +00:00
2017-10-17 10:19:03 +00:00
// PlayEpisode function produce urls like this:
// https://www.lostfilm.tv/v_search.php?c=119&s=5&e=16
2020-02-25 16:08:03 +00:00
private string ReleaseUrl = > SiteLink + "v_search.php" ;
2017-10-17 10:19:03 +00:00
internal class TrackerUrlDetails
{
internal string seriesId { get ; private set ; }
internal string season { get ; private set ; }
internal string episode { get ; private set ; }
internal TrackerUrlDetails ( string seriesId , string season , string episode )
{
this . seriesId = seriesId ;
this . season = season ;
this . episode = episode ;
}
internal TrackerUrlDetails ( IElement button )
{
var trigger = button . GetAttribute ( "onclick" ) ;
var match = parsePlayEpisodeRegex . Match ( trigger ) ;
2018-02-15 19:25:52 +00:00
seriesId = match . Groups [ "id" ] . Value . TrimStart ( '0' ) ;
season = match . Groups [ "season" ] . Value . TrimStart ( '0' ) ;
episode = match . Groups [ "episode" ] . Value . TrimStart ( '0' ) ;
2017-10-17 10:19:03 +00:00
}
2020-02-25 16:08:03 +00:00
// TODO: see if query.GetEpisodeString() is sufficient
2017-10-17 10:19:03 +00:00
internal string GetEpisodeString ( )
{
var result = string . Empty ;
if ( ! string . IsNullOrEmpty ( season ) & & season ! = "0" & & season ! = "999" )
{
result + = "S" + season ;
if ( ! string . IsNullOrEmpty ( episode ) & & episode ! = "0" & & episode ! = "999" )
{
result + = "E" + episode ;
}
}
return result ;
}
}
2020-02-10 22:16:19 +00:00
private new ConfigurationDataCaptchaLogin configData
2017-10-17 10:19:03 +00:00
{
2020-02-25 16:08:03 +00:00
get = > ( ConfigurationDataCaptchaLogin ) base . configData ;
set = > base . configData = value ;
2017-10-17 10:19:03 +00:00
}
2020-12-11 22:14:21 +00:00
public LostFilm ( IIndexerConfigurationService configService , WebClient wc , Logger l , IProtectionService ps ,
ICacheService cs )
2020-05-11 19:59:28 +00:00
: base ( id : "lostfilm" ,
name : "LostFilm.tv" ,
2017-10-17 10:19:03 +00:00
description : "Unique portal about foreign series" ,
2020-09-16 19:33:12 +00:00
link : "https://www.lostfilm.run/" ,
2021-05-16 18:13:54 +00:00
caps : new TorznabCapabilities
{
2020-10-18 20:47:36 +00:00
TvSearchParams = new List < TvSearchParam >
{
TvSearchParam . Q , TvSearchParam . Season , TvSearchParam . Ep
} ,
MovieSearchParams = new List < MovieSearchParam >
{
MovieSearchParam . Q
}
2020-10-18 17:26:22 +00:00
} ,
2017-10-17 10:19:03 +00:00
configService : configService ,
client : wc ,
logger : l ,
p : ps ,
2020-12-11 22:14:21 +00:00
cacheService : cs ,
2018-03-26 11:23:31 +00:00
configData : new ConfigurationDataCaptchaLogin ( ) )
2017-10-17 10:19:03 +00:00
{
Encoding = Encoding . UTF8 ;
2021-09-08 01:14:00 +00:00
Language = "ru-RU" ;
2017-10-17 10:19:03 +00:00
Type = "semi-private" ;
2020-10-13 20:17:26 +00:00
2022-03-27 02:03:43 +00:00
webclient . AddTrustedCertificate ( new Uri ( SiteLink ) . Host , "98D43B6E740B42C02A9BD1A9D1A813E4350BE332" ) ; // for *.win expired 26/Mar/22
2021-06-07 03:49:01 +00:00
webclient . AddTrustedCertificate ( new Uri ( SiteLink ) . Host , "34287FB53A58EC6AE590E7DD7E03C70C0263CADC" ) ; // for *.tw expired 01/Apr/21
2021-06-06 22:46:15 +00:00
2020-10-18 20:47:36 +00:00
// TODO: review if there is only this category (movie search is enabled)
2020-10-15 18:22:00 +00:00
AddCategoryMapping ( 1 , TorznabCatType . TV ) ;
2017-10-17 10:19:03 +00:00
}
2018-03-26 11:23:31 +00:00
public override async Task < ConfigurationData > GetConfigurationForSetup ( )
{
// looks like after some failed login attempts there's a captcha
2020-09-21 16:39:47 +00:00
var loginPage = await RequestWithCookiesAsync ( LoginUrl , string . Empty ) ;
2020-02-25 20:04:36 +00:00
var parser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
var document = parser . ParseDocument ( loginPage . ContentString ) ;
2020-02-25 20:04:36 +00:00
var qCaptchaImg = document . QuerySelector ( "img#captcha_pictcha" ) ;
if ( qCaptchaImg ! = null )
2018-03-26 11:23:31 +00:00
{
2020-02-25 20:04:36 +00:00
var captchaUrl = SiteLink + qCaptchaImg . GetAttribute ( "src" ) ;
2020-09-21 16:39:47 +00:00
var captchaImage = await RequestWithCookiesAsync ( captchaUrl , loginPage . Cookies ) ;
2020-03-14 23:58:50 +00:00
configData . CaptchaImage . Value = captchaImage . ContentBytes ;
2018-03-26 11:23:31 +00:00
}
else
{
configData . CaptchaImage . Value = new byte [ 0 ] ;
}
configData . CaptchaCookie . Value = loginPage . Cookies ;
UpdateCookieHeader ( loginPage . Cookies ) ;
return configData ;
}
2017-10-17 10:19:03 +00:00
public override async Task < IndexerConfigurationStatus > ApplyConfiguration ( JToken configJson )
{
2017-10-30 10:56:01 +00:00
logger . Debug ( "Applying configuration" ) ;
2017-10-17 10:19:03 +00:00
LoadValuesFromJson ( configJson ) ;
2018-03-26 11:23:31 +00:00
if ( ! configData . Username . Value . Contains ( "@" ) )
throw new ExceptionWithConfigData ( "Username must be an e-mail address" , configData ) ;
2017-11-07 00:16:05 +00:00
// Performing Logout is required to invalidate previous session otherwise the `{"error":1,"result":"ok"}` will be returned.
await Logout ( ) ;
2017-10-17 10:19:03 +00:00
var data = new Dictionary < string , string >
{
{ "act" , "users" } ,
{ "type" , "login" } ,
{ "mail" , configData . Username . Value } ,
{ "pass" , configData . Password . Value } ,
{ "rem" , "1" }
} ;
2018-03-26 11:23:31 +00:00
if ( ! string . IsNullOrWhiteSpace ( configData . CaptchaText . Value ) )
{
data . Add ( "need_captcha" , "1" ) ;
data . Add ( "captcha" , configData . CaptchaText . Value ) ;
}
2017-10-17 10:19:03 +00:00
var result = await RequestLoginAndFollowRedirect ( ApiUrl , data , CookieHeader , true , SiteLink , ApiUrl , true ) ;
2020-06-09 17:36:57 +00:00
await ConfigureIfOK ( result . Cookies , result . ContentString ! = null & & result . ContentString . Contains ( "\"success\":true" ) , ( ) = >
2017-10-17 10:19:03 +00:00
{
2020-06-09 17:36:57 +00:00
var errorMessage = result . ContentString ;
2020-01-19 23:09:10 +00:00
if ( errorMessage . Contains ( "\"error\":2" ) )
errorMessage = "Captcha is incorrect" ;
if ( errorMessage . Contains ( "\"error\":3" ) )
2018-03-26 11:23:31 +00:00
errorMessage = "E-mail or password is incorrect" ;
2017-10-17 10:19:03 +00:00
throw new ExceptionWithConfigData ( errorMessage , configData ) ;
} ) ;
return IndexerConfigurationStatus . RequiresTesting ;
}
2020-02-10 22:16:19 +00:00
private async Task < bool > Logout ( )
2017-11-07 00:16:05 +00:00
{
logger . Info ( "Performing logout" ) ;
var data = new Dictionary < string , string >
{
{ "act" , "users" } ,
{ "type" , "logout" }
} ;
2020-09-21 16:39:47 +00:00
var response = await RequestWithCookiesAsync ( ApiUrl , method : RequestType . POST , data : data ) ;
2020-06-09 17:36:57 +00:00
logger . Debug ( "Logout result: " + response . ContentString ) ;
2017-11-07 00:16:05 +00:00
var isOK = response . Status = = System . Net . HttpStatusCode . OK ;
if ( ! isOK )
{
2020-06-09 17:36:57 +00:00
logger . Error ( "Logout failed with response: " + response . ContentString ) ;
2017-11-07 00:16:05 +00:00
}
return isOK ;
}
#region Query
2017-10-17 10:19:03 +00:00
protected override async Task < IEnumerable < ReleaseInfo > > PerformQuery ( TorznabQuery query )
{
2017-10-30 10:56:01 +00:00
logger . Debug ( "PerformQuery: " + query . GetQueryString ( ) ) ;
2017-10-17 10:19:03 +00:00
// If the search string is empty use the latest releases
if ( query . IsTest | | string . IsNullOrWhiteSpace ( query . SearchTerm ) )
{
return await FetchNewReleases ( ) ;
}
else
{
return await PerformSearch ( query ) ;
}
}
2020-06-10 21:22:29 +00:00
private async Task < WebResult > RequestStringAndRelogin ( string url )
2017-10-17 10:19:03 +00:00
{
2020-09-21 16:39:47 +00:00
var results = await RequestWithCookiesAsync ( url ) ;
2020-06-09 17:36:57 +00:00
if ( results . ContentString . Contains ( "503 Service" ) )
2017-10-17 10:19:03 +00:00
{
2020-06-09 17:36:57 +00:00
throw new ExceptionWithConfigData ( results . ContentString , configData ) ;
2017-10-17 10:19:03 +00:00
}
2020-06-09 17:36:57 +00:00
else if ( results . ContentString . Contains ( "href=\"/login\"" ) )
2017-10-17 10:19:03 +00:00
{
// Re-login
await ApplyConfiguration ( null ) ;
2020-09-21 16:39:47 +00:00
return await RequestWithCookiesAsync ( url ) ;
2017-10-17 10:19:03 +00:00
}
else
{
return results ;
}
}
private async Task < List < ReleaseInfo > > PerformSearch ( TorznabQuery query )
{
2017-10-30 10:56:01 +00:00
logger . Debug ( "PerformSearch: " + query . SanitizedSearchTerm + " [" + query . QueryType + "]" ) ;
2017-10-17 10:19:03 +00:00
var releases = new List < ReleaseInfo > ( ) ;
2017-10-30 10:56:01 +00:00
/ *
Torznab query for some series could contains sanitized title . E . g . "Star Wars: The Clone Wars" will become "Star Wars The Clone Wars" .
Search API on LostFilm . tv doesn ' t return anything on such search query so the query should be "morphed" even for "tvsearch" queries .
2020-01-19 23:09:10 +00:00
Also the queries to Specials is a union of Series and Episode titles . E . g . : "Breaking Bad - El Camino: A Breaking Bad Movie" .
2017-10-30 10:56:01 +00:00
The algorythm works in the following way :
1. Search with the full SearchTerm . Just for example , let ' s search for episode by it ' s name
- { Star Wars The Clone Wars To Catch a Jedi }
2. [ loop ] If none were found , repeat search with SearchTerm reduced by 1 word from the end . Fail search if no words left and no results were obtained
- { Star Wars The Clone Wars To Catch a } Jedi
- { Star Wars The Clone Wars To Catch } a Jedi
- . . .
- { Star Wars } The Clone Wars To Catch a Jedi
3. When we got few results , try to filter them with the words excluded before
- [ Star Wars : The Clone Wars , Star Wars Rebels , Star Wars : Forces of Destiny ]
. filterBy ( The Clone Wars To Catch a Jedi )
4. [ loop ] Reduce filterTerm by 1 word from the end . Fail search if no words left and no results were obtained
. filterBy ( The Clone Wars To Catch a ) / Jedi
. filterBy ( The Clone Wars To Catch ) / a Jedi
. . .
. filterBy ( The Clone Wars ) / To Catch a Jedi
2020-01-19 23:09:10 +00:00
5. [ loop ] Now we know that series we ' re looking for is called "Star Wars The Clone Wars" . Fetch series detail page for it and try to apply remaining words as episode filter , reducing filter by 1 word each time we get no results :
- . episodes ( ) . filteredBy ( To Catch a Jedi )
- . episodes ( ) . filteredBy ( To Catch a ) / Jedi
- . . .
- . episodes ( ) / To Catch a Jedi
Test queries :
- "Star Wars The Clone Wars To Catch a Jedi" - > S05E19
- "Breaking Bad El Camino A Breaking Bad Movie" - > Special
- "The Magicians (2015)" - > Year should be ignored
2017-10-30 10:56:01 +00:00
* /
2017-10-17 10:19:03 +00:00
// Search query words. Consists of Series keywords that will be used for series search request, and Episode keywords that will be used for episode filtering.
var keywords = query . SanitizedSearchTerm . Split ( ' ' ) . ToList ( ) ;
2017-10-30 10:56:01 +00:00
// Keywords count related to Series Search.
var searchKeywords = keywords . Count ;
// Keywords count related to Series Filter.
var serieFilterKeywords = 0 ;
// Overall (keywords.count - searchKeywords - serieFilterKeywords) are related to episode filter
2017-10-17 10:19:03 +00:00
do
{
var searchString = string . Join ( " " , keywords . Take ( searchKeywords ) ) ;
var data = new Dictionary < string , string >
{
{ "act" , "common" } ,
{ "type" , "search" } ,
{ "val" , searchString }
} ;
2017-10-30 10:56:01 +00:00
logger . Debug ( "> Searching: " + searchString ) ;
2020-09-21 16:39:47 +00:00
var response = await RequestWithCookiesAsync ( ApiUrl , method : RequestType . POST , data : data ) ;
2020-06-09 17:36:57 +00:00
if ( response . ContentString = = null )
2020-01-19 23:09:10 +00:00
{
logger . Debug ( "> Empty series response for query: " + searchString ) ;
continue ;
}
2017-10-17 10:19:03 +00:00
try
{
2020-06-09 17:36:57 +00:00
var json = JToken . Parse ( response . ContentString ) ;
2020-01-19 23:09:10 +00:00
if ( json = = null | | json . Type = = JTokenType . Array )
{
logger . Debug ( "> Invalid response for query: " + searchString ) ;
continue ; // Search loop
}
2017-10-17 10:19:03 +00:00
// Protect from {"data":false,"result":"ok"}
2017-10-30 10:56:01 +00:00
var jsonData = json [ "data" ] ;
if ( jsonData . Type ! = JTokenType . Object )
continue ; // Search loop
var jsonSeries = jsonData [ "series" ] ;
if ( jsonSeries = = null | | ! jsonSeries . HasValues )
continue ; // Search loop
var series = jsonSeries . ToList ( ) ;
logger . Debug ( "> Found " + series . Count ( ) . ToString ( ) + " series: [" + string . Join ( ", " , series . Select ( s = > s [ "title_orig" ] . Value < string > ( ) ) ) + "]" ) ;
// Filter found series
if ( series . Count ( ) > 1 )
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
serieFilterKeywords = keywords . Count - searchKeywords ;
do
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
var serieFilter = string . Join ( " " , keywords . GetRange ( searchKeywords , serieFilterKeywords ) ) ;
logger . Debug ( "> Filtering: " + serieFilter ) ;
var filteredSeries = series . Where ( s = > s [ "title_orig" ] . Value < string > ( ) . Contains ( serieFilter ) ) . ToList ( ) ;
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
if ( filteredSeries . Count ( ) > 0 )
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
logger . Debug ( "> Series filtered: [" + string . Join ( ", " , filteredSeries . Select ( s = > s [ "title_orig" ] . Value < string > ( ) ) ) + "]" ) ;
series = filteredSeries ;
break ; // Serie Filter loop
}
}
while ( - - serieFilterKeywords > 0 ) ;
}
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
foreach ( var serie in series )
{
var link = serie [ "link" ] . ToString ( ) ;
var season = query . Season = = 0 ? "/seasons" : "/season_" + query . Season . ToString ( ) ;
var url = SiteLink + link . TrimStart ( '/' ) + season ;
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
if ( ! string . IsNullOrEmpty ( query . Episode ) ) // Fetch single episode releases
{
// TODO: Add a togglable Quick Path via v_search.php in Indexer Settings
url + = "/episode_" + query . Episode ;
var taskReleases = await FetchEpisodeReleases ( url ) ;
releases . AddRange ( taskReleases ) ;
}
else // Fetch the whole series OR episode with filter applied
{
2020-01-19 23:09:10 +00:00
var episodeKeywords = keywords . Skip ( searchKeywords + serieFilterKeywords ) ;
var episodeFilterKeywords = episodeKeywords . Count ( ) ;
2017-10-30 10:56:01 +00:00
2020-01-19 23:09:10 +00:00
// Search for episodes dropping 1 filter word each time when no results has found.
// Last search will be performed with empty filter
do
{
var filter = string . Join ( " " , episodeKeywords . Take ( episodeFilterKeywords ) ) ;
logger . Debug ( "> Searching episodes with filter [" + filter + "]" ) ;
var taskReleases = await FetchSeriesReleases ( url , query , filter ) ;
if ( taskReleases . Count ( ) > 0 )
{
logger . Debug ( "> Found " + taskReleases . Count ( ) . ToString ( ) + " episodes" ) ;
releases . AddRange ( taskReleases ) ;
break ; // Episodes Filter loop
}
}
while ( - - episodeFilterKeywords > = 0 ) ;
2017-10-17 10:19:03 +00:00
}
}
2017-10-30 10:56:01 +00:00
break ; // Search loop
2017-10-17 10:19:03 +00:00
}
catch ( Exception ex )
{
2020-06-09 17:36:57 +00:00
OnParseError ( response . ContentString , ex ) ;
2017-10-17 10:19:03 +00:00
}
2017-10-30 10:56:01 +00:00
}
while ( - - searchKeywords > 0 ) ;
2017-10-17 10:19:03 +00:00
return releases ;
}
2017-11-07 00:16:05 +00:00
#endregion
2017-10-17 10:19:03 +00:00
#region Page parsing
private async Task < List < ReleaseInfo > > FetchNewReleases ( )
{
var url = DiscoveryUrl ;
2017-10-30 10:56:01 +00:00
logger . Debug ( "FetchNewReleases: " + url ) ;
2022-03-08 11:35:15 +00:00
var results = await RequestWithCookiesAndRetryAsync (
url , referer : SiteLink ) ;
2017-10-17 10:19:03 +00:00
var releases = new List < ReleaseInfo > ( ) ;
try
{
var parser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
var document = parser . ParseDocument ( results . ContentString ) ;
2017-10-17 10:19:03 +00:00
var rows = document . QuerySelectorAll ( "div.row" ) ;
foreach ( var row in rows )
{
var link = row . QuerySelector ( "a" ) . GetAttribute ( "href" ) ;
var episodeUrl = SiteLink + link . TrimStart ( '/' ) ;
var episodeReleases = await FetchEpisodeReleases ( episodeUrl ) ;
releases . AddRange ( episodeReleases ) ;
}
}
catch ( Exception ex )
{
2020-06-09 17:36:57 +00:00
OnParseError ( results . ContentString , ex ) ;
2017-10-17 10:19:03 +00:00
}
return releases ;
}
private async Task < List < ReleaseInfo > > FetchEpisodeReleases ( string url )
{
2017-10-30 10:56:01 +00:00
logger . Debug ( "FetchEpisodeReleases: " + url ) ;
2017-10-17 10:19:03 +00:00
var results = await RequestStringAndRelogin ( url ) ;
var releases = new List < ReleaseInfo > ( ) ;
try
{
var parser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
var document = parser . ParseDocument ( results . ContentString ) ;
2017-10-30 10:56:01 +00:00
2017-10-17 10:19:03 +00:00
var playButton = document . QuerySelector ( "div.external-btn" ) ;
2017-10-30 10:56:01 +00:00
if ( playButton ! = null & & ! playButton . ClassList . Contains ( "inactive" ) )
2017-10-17 10:19:03 +00:00
{
2020-11-08 02:11:27 +00:00
var details = new Uri ( url ) ;
2017-10-30 10:56:01 +00:00
var dateString = document . QuerySelector ( "div.title-block > div.details-pane > div.left-box" ) . TextContent ;
2021-09-15 19:46:36 +00:00
var key = ( dateString . Contains ( "TBA" ) ) ? "ru: " : "eng: " ;
dateString = TrimString ( dateString , key , " г ." ) ; // '... Дата выхода eng: 09 марта 2012 г . ...' -> '09 марта 2012'
2018-06-29 13:16:04 +00:00
DateTime date ;
if ( dateString . Length = = 4 ) //dateString might be just a year, e.g. https://www.lostfilm.tv/series/Ghosted/season_1/episode_14/
{
date = DateTime . ParseExact ( dateString , "yyyy" , CultureInfo . InvariantCulture ) . ToLocalTime ( ) ;
}
else
{
date = DateTime . Parse ( dateString , new CultureInfo ( Language ) ) ; // dd mmmm yyyy
}
2017-10-30 10:56:01 +00:00
2017-10-17 10:19:03 +00:00
var urlDetails = new TrackerUrlDetails ( playButton ) ;
2017-10-30 10:56:01 +00:00
var episodeReleases = await FetchTrackerReleases ( urlDetails ) ;
foreach ( var release in episodeReleases )
{
2020-11-08 02:11:27 +00:00
release . Details = details ;
2017-10-30 10:56:01 +00:00
release . PublishDate = date ;
}
releases . AddRange ( episodeReleases ) ;
2017-10-17 10:19:03 +00:00
}
}
catch ( Exception ex )
{
2020-06-09 17:36:57 +00:00
OnParseError ( results . ContentString , ex ) ;
2017-10-17 10:19:03 +00:00
}
return releases ;
}
private async Task < List < ReleaseInfo > > FetchSeriesReleases ( string url , TorznabQuery query , string filter )
{
2017-10-30 10:56:01 +00:00
logger . Debug ( "FetchSeriesReleases: " + url + " S: " + query . Season . ToString ( ) + " E: " + query . Episode + " Filter: " + filter ) ;
2017-10-17 10:19:03 +00:00
var releases = new List < ReleaseInfo > ( ) ;
2020-09-21 16:39:47 +00:00
var results = await RequestWithCookiesAsync ( url ) ;
2017-10-17 10:19:03 +00:00
try
{
var parser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
var document = parser . ParseDocument ( results . ContentString ) ;
2017-10-17 10:19:03 +00:00
var seasons = document . QuerySelectorAll ( "div.serie-block" ) ;
2017-10-30 10:56:01 +00:00
var rowSelector = "table.movie-parts-list > tbody > tr" ;
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
foreach ( var season in seasons )
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
// Could ne null if serie-block is for Extras
var seasonButton = season . QuerySelector ( "div.movie-details-block > div.external-btn" ) ;
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
// Process only season we're searching for
if ( seasonButton ! = null & & query . Season > 0 )
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
// If seasonButton in "inactive" it will not contain "onClick" handler. Better to parse element which always exists.
var watchedButton = season . QuerySelector ( "div.movie-details-block > div.haveseen-btn" ) ;
var buttonCode = watchedButton . GetAttribute ( "data-code" ) ;
var currentSeason = buttonCode . Substring ( buttonCode . IndexOf ( '-' ) + 1 ) ;
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
if ( currentSeason ! = query . Season . ToString ( ) )
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
continue ; // Can't match season by regex OR season not matches to a searched one
2017-10-17 10:19:03 +00:00
}
2017-10-30 10:56:01 +00:00
// Stop parsing season episodes if season pack was required but it's not available yet.
if ( seasonButton . ClassList . Contains ( "inactive" ) )
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
logger . Debug ( "> No season pack is found for S" + query . Season . ToString ( ) ) ;
break ;
2017-10-17 10:19:03 +00:00
}
2017-10-30 10:56:01 +00:00
}
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
// Fetch season pack releases if no episode filtering is required.
// If seasonButton implements "inactive" class there are no season pack available and each episode should be fetched separately.
if ( string . IsNullOrEmpty ( query . Episode ) & & string . IsNullOrEmpty ( filter ) & & seasonButton ! = null & & ! seasonButton . ClassList . Contains ( "inactive" ) )
{
var lastEpisode = season . QuerySelector ( rowSelector ) ;
var dateColumn = lastEpisode . QuerySelector ( "td.delta" ) ;
var date = DateFromEpisodeColumn ( dateColumn ) ;
2017-10-17 10:19:03 +00:00
2020-11-08 02:11:27 +00:00
var details = new Uri ( url ) ; // Current season(-s) page url
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
var urlDetails = new TrackerUrlDetails ( seasonButton ) ;
var seasonReleases = await FetchTrackerReleases ( urlDetails ) ;
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
foreach ( var release in seasonReleases )
2017-10-17 10:19:03 +00:00
{
2020-11-08 02:11:27 +00:00
release . Details = details ;
2017-10-17 10:19:03 +00:00
release . PublishDate = date ;
}
2017-10-30 10:56:01 +00:00
releases . AddRange ( seasonReleases ) ;
if ( query . Season > 0 )
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
break ; // Searched season was processed
}
// Skip parsing separate episodes if season pack was added
if ( seasonReleases . Count ( ) > 0 )
{
continue ;
2017-10-17 10:19:03 +00:00
}
}
2017-10-30 10:56:01 +00:00
// No season filtering was applied OR season pack in not available
var rows = season . QuerySelectorAll ( rowSelector ) . Where ( s = > ! s . ClassList . Contains ( "not-available" ) ) ;
foreach ( var row in rows )
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
var couldBreak = false ; // Set to `true` if searched episode was found
try
2017-10-17 10:19:03 +00:00
{
2017-10-30 10:56:01 +00:00
if ( ! string . IsNullOrEmpty ( filter ) )
{
var titles = row . QuerySelector ( "td.gamma > div" ) ;
if ( titles . TextContent . IndexOf ( filter , StringComparison . OrdinalIgnoreCase ) = = - 1 )
{
continue ;
}
}
var playButton = row . QuerySelector ( "td.zeta > div.external-btn" ) ;
2020-10-04 18:02:27 +00:00
if ( playButton = = null ) // #9725
continue ;
2017-10-30 10:56:01 +00:00
if ( ! string . IsNullOrEmpty ( query . Episode ) )
{
var match = parsePlayEpisodeRegex . Match ( playButton . GetAttribute ( "onclick" ) ) ;
var episode = match . Groups [ "episode" ] ;
if ( episode = = null | | episode . Value ! = query . Episode )
{
continue ;
}
couldBreak = true ;
}
var dateColumn = row . QuerySelector ( "td.delta" ) ; // Contains both Date and EpisodeURL
var date = DateFromEpisodeColumn ( dateColumn ) ;
var link = dateColumn . GetAttribute ( "onclick" ) ; // goTo('/series/Prison_Break/season_5/episode_9/',false)
link = TrimString ( link , '\'' , '\'' ) ;
var episodeUrl = SiteLink + link . TrimStart ( '/' ) ;
2020-11-08 02:11:27 +00:00
var details = new Uri ( episodeUrl ) ;
2017-10-30 10:56:01 +00:00
2017-10-17 10:19:03 +00:00
var urlDetails = new TrackerUrlDetails ( playButton ) ;
2017-10-30 10:56:01 +00:00
var episodeReleases = await FetchTrackerReleases ( urlDetails ) ;
foreach ( var release in episodeReleases )
{
2020-11-08 02:11:27 +00:00
release . Details = details ;
2017-10-30 10:56:01 +00:00
release . PublishDate = date ;
}
releases . AddRange ( episodeReleases ) ;
}
catch ( Exception ex )
{
2020-05-11 19:59:28 +00:00
logger . Error ( string . Format ( "{0}: Error while parsing row '{1}':\n\n{2}" , Id , row . OuterHtml , ex ) ) ;
2017-10-30 10:56:01 +00:00
}
2017-10-17 10:19:03 +00:00
2017-10-30 10:56:01 +00:00
if ( couldBreak )
{
2017-10-17 10:19:03 +00:00
break ;
}
}
}
}
catch ( Exception ex )
{
2020-06-09 17:36:57 +00:00
OnParseError ( results . ContentString , ex ) ;
2017-10-17 10:19:03 +00:00
}
return releases ;
}
#endregion
#region Tracker parsing
private async Task < List < ReleaseInfo > > FetchTrackerReleases ( TrackerUrlDetails details )
{
2020-03-26 22:15:28 +00:00
var queryCollection = new NameValueCollection
{
{ "c" , details . seriesId } ,
{ "s" , details . season } ,
{ "e" , string . IsNullOrEmpty ( details . episode ) ? "999" : details . episode } // 999 is a synonym for the whole serie
} ;
2017-10-17 10:19:03 +00:00
var url = ReleaseUrl + "?" + queryCollection . GetQueryString ( ) ;
2017-10-30 10:56:01 +00:00
logger . Debug ( "FetchTrackerReleases: " + url ) ;
2017-10-17 10:19:03 +00:00
// Get redirection page with generated link on it. This link can't be constructed manually as it contains Hash field and hashing algo is unknown.
2020-09-21 16:39:47 +00:00
var results = await RequestWithCookiesAsync ( url ) ;
2020-06-09 17:36:57 +00:00
if ( results . ContentString = = null )
2020-01-19 23:09:10 +00:00
{
throw new ExceptionWithConfigData ( "Empty response from " + url , configData ) ;
}
2020-06-09 17:36:57 +00:00
if ( results . ContentString = = "log in first" )
2017-10-17 10:19:03 +00:00
{
2020-06-09 17:36:57 +00:00
throw new ExceptionWithConfigData ( results . ContentString , configData ) ;
2017-10-17 10:19:03 +00:00
}
try
{
var parser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
var document = parser . ParseDocument ( results . ContentString ) ;
2017-10-17 10:19:03 +00:00
var meta = document . QuerySelector ( "meta" ) ;
var metaContent = meta . GetAttribute ( "content" ) ;
// Follow redirection defined by async url.replace
var redirectionUrl = metaContent . Substring ( metaContent . IndexOf ( "http" ) ) ;
return await FollowTrackerRedirection ( redirectionUrl , details ) ;
}
catch ( Exception ex )
{
2020-06-09 17:36:57 +00:00
OnParseError ( results . ContentString , ex ) ;
2017-10-17 10:19:03 +00:00
}
// Failure path
return new List < ReleaseInfo > ( ) ;
}
private async Task < List < ReleaseInfo > > FollowTrackerRedirection ( string url , TrackerUrlDetails details )
{
2017-10-30 10:56:01 +00:00
logger . Debug ( "FollowTrackerRedirection: " + url ) ;
2020-09-21 16:39:47 +00:00
var results = await RequestWithCookiesAsync ( url ) ;
2017-10-17 10:19:03 +00:00
var releases = new List < ReleaseInfo > ( ) ;
try
{
var parser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
var document = parser . ParseDocument ( results . ContentString ) ;
2017-10-17 10:19:03 +00:00
var rows = document . QuerySelectorAll ( "div.inner-box--item" ) ;
2017-10-30 10:56:01 +00:00
logger . Debug ( "> Parsing " + rows . Count ( ) . ToString ( ) + " releases" ) ;
2017-10-17 10:19:03 +00:00
var serieTitle = document . QuerySelector ( "div.inner-box--subtitle" ) . TextContent ;
serieTitle = serieTitle . Substring ( 0 , serieTitle . LastIndexOf ( ',' ) ) ;
var episodeInfo = document . QuerySelector ( "div.inner-box--text" ) . TextContent ;
var episodeName = TrimString ( episodeInfo , '(' , ')' ) ;
foreach ( var row in rows )
{
try
{
var detailsInfo = row . QuerySelector ( "div.inner-box--desc" ) . TextContent ;
var releaseDetails = parseReleaseDetailsRegex . Match ( detailsInfo ) ;
2020-03-26 22:15:28 +00:00
// ReSharper states "Expression is always false"
// TODO Refactor to get the intended operation
2017-10-17 10:19:03 +00:00
if ( releaseDetails = = null )
{
throw new FormatException ( "Failed to map release details string: " + detailsInfo ) ;
}
2017-10-30 10:56:01 +00:00
/ *
* For supported qualities see :
* - TvCategoryParser . cs
* - https : //github.com/SickRage/SickRage/wiki/Quality-Settings#quality-names-to-recognize-the-quality-of-a-file
* /
var quality = releaseDetails . Groups [ "quality" ] . Value . Trim ( ) ;
// Adapt shitty quality format for common algorythms
quality = Regex . Replace ( quality , "-Rip" , "Rip" , RegexOptions . IgnoreCase ) ;
quality = Regex . Replace ( quality , "WEB-DLRip" , "WEBDL" , RegexOptions . IgnoreCase ) ;
quality = Regex . Replace ( quality , "WEB-DL" , "WEBDL" , RegexOptions . IgnoreCase ) ;
quality = Regex . Replace ( quality , "HDTVRip" , "HDTV" , RegexOptions . IgnoreCase ) ;
// Fix forgotten p-Progressive suffix in resolution index
quality = Regex . Replace ( quality , "1080 " , "1080p " , RegexOptions . IgnoreCase ) ;
quality = Regex . Replace ( quality , "720 " , "720p " , RegexOptions . IgnoreCase ) ;
2020-03-26 22:15:28 +00:00
var techComponents = new [ ]
{
"rus" ,
quality ,
"(LostFilm)"
2017-10-30 10:56:01 +00:00
} ;
var techInfo = string . Join ( " " , techComponents . Where ( s = > ! string . IsNullOrEmpty ( s ) ) ) ;
2017-10-17 10:19:03 +00:00
// Ru title: downloadLink.TextContent.Replace("\n", "");
// En title should be manually constructed.
2020-10-19 21:19:10 +00:00
var titleComponents = new [ ] {
2017-10-30 10:56:01 +00:00
serieTitle , details . GetEpisodeString ( ) , episodeName , techInfo
2017-10-17 10:19:03 +00:00
} ;
var downloadLink = row . QuerySelector ( "div.inner-box--link > a" ) ;
2017-10-30 10:56:01 +00:00
var sizeString = releaseDetails . Groups [ "size" ] . Value . ToUpper ( ) ;
2017-10-17 10:19:03 +00:00
sizeString = sizeString . Replace ( "ТБ" , "TB" ) ; // untested
sizeString = sizeString . Replace ( "ГБ" , "GB" ) ;
sizeString = sizeString . Replace ( "МБ" , "MB" ) ;
sizeString = sizeString . Replace ( "КБ" , "KB" ) ; // untested
2020-03-26 22:15:28 +00:00
var link = new Uri ( downloadLink . GetAttribute ( "href" ) ) ;
2020-02-25 20:04:36 +00:00
2020-03-26 22:15:28 +00:00
// TODO this feels sparse compared to other trackers. Expand later
var release = new ReleaseInfo
{
2020-04-07 16:17:17 +00:00
Category = new [ ] { TorznabCatType . TV . ID } ,
2020-03-26 22:15:28 +00:00
Title = string . Join ( " - " , titleComponents . Where ( s = > ! string . IsNullOrEmpty ( s ) ) ) ,
Link = link ,
Guid = link ,
Size = ReleaseInfo . GetBytes ( sizeString ) ,
// add missing torznab fields not available from results
Seeders = 1 ,
Peers = 2 ,
DownloadVolumeFactor = 0 ,
UploadVolumeFactor = 1 ,
MinimumRatio = 1 ,
MinimumSeedTime = 172800 // 48 hours
} ;
2020-04-07 16:17:17 +00:00
2020-03-26 22:15:28 +00:00
// TODO Other trackers don't have this log line. Remove or add to other trackers?
2017-10-30 10:56:01 +00:00
logger . Debug ( "> Add: " + release . Title ) ;
2017-10-17 10:19:03 +00:00
releases . Add ( release ) ;
}
catch ( Exception ex )
{
2020-05-11 19:59:28 +00:00
logger . Error ( string . Format ( "{0}: Error while parsing row '{1}':\n\n{2}" , Id , row . OuterHtml , ex ) ) ;
2017-10-17 10:19:03 +00:00
}
}
}
catch ( Exception ex )
{
2020-06-09 17:36:57 +00:00
OnParseError ( results . ContentString , ex ) ;
2017-10-17 10:19:03 +00:00
}
return releases ;
}
#endregion
#region Helpers
private string TrimString ( string s , char startChar , char endChar )
{
var start = s . IndexOf ( startChar ) ;
var end = s . LastIndexOf ( endChar ) ;
2017-10-30 10:56:01 +00:00
return ( start ! = - 1 & & end ! = - 1 ) ? s . Substring ( start + 1 , end - start - 1 ) : null ;
}
private string TrimString ( string s , string startString , string endString )
{
var start = s . IndexOf ( startString ) ;
var end = s . LastIndexOf ( endString ) ;
return ( start ! = - 1 & & end ! = - 1 ) ? s . Substring ( start + startString . Length , end - start - startString . Length ) : null ;
}
private DateTime DateFromEpisodeColumn ( IElement dateColumn )
{
2021-09-16 21:14:25 +00:00
var dateString = dateColumn . QuerySelector ( "span.small-text" ) ? . TextContent ;
// 'Eng: 23.05.2017' -> '23.05.2017' OR '23.05.2017' -> '23.05.2017'
dateString = ( string . IsNullOrEmpty ( dateString ) ) ? dateColumn . QuerySelector ( "span" ) ? . TextContent : dateString . Substring ( dateString . IndexOf ( ":" ) + 2 ) ;
2017-10-30 10:56:01 +00:00
var date = DateTime . Parse ( dateString , new CultureInfo ( Language ) ) ; // dd.mm.yyyy
return date ;
2017-10-17 10:19:03 +00:00
}
#endregion
}
}