mirror of
https://github.com/Jackett/Jackett
synced 2025-02-24 15:21:06 +00:00
girotorrent: Update regex for better title parsing... (#3529)
Italian release suck, you can find S01E01 or 01x01 or even S01 E01. So removed S and E from search (this bring in extra results) then try to parse and reconstruct the results Hope for now is enough to have a better search, at least on Italian tracker, for the external tracker (where are present Italian release too), only god can tell ... Sonarr will parse the result during a rss sync, but during a search I think no result will present ... There are 2 possible solution 1. Find and eliminate all release that don't respect the standard 2. Make Jackett / Sonarr / Radarr a little smartest and include a kind of regional substitution during a searching phase
This commit is contained in:
parent
7319078a5d
commit
7fde427731
1 changed files with 27 additions and 58 deletions
|
@ -101,80 +101,49 @@
|
|||
search:
|
||||
paths:
|
||||
- path: /index.php
|
||||
keywordsfilters:
|
||||
- name: re_replace
|
||||
args: ["S[0-9]{2}([^E]|$)", ""] # remove season tag without episode (search doesn't support it)
|
||||
- name: diacritics
|
||||
args: replace
|
||||
# most ITA TV torrents are in XXxYY format, so we search without S/E prefixes and filter later
|
||||
- name: re_replace
|
||||
args: ["S0?(\\d{1,2})", " $1 "]
|
||||
- name: re_replace
|
||||
args: ["E(\\d{2,3})", " $1 "]
|
||||
inputs:
|
||||
search: "{{ .Keywords }}"
|
||||
category: "{{range .Categories}}{{.}};{{end}}"
|
||||
page: "torrents"
|
||||
active: 0
|
||||
keywordsfilters:
|
||||
- name: diacritics
|
||||
args: replace
|
||||
- name: re_replace # S01 to 1
|
||||
args: ["\\b[sS]0*(\\d+)\\b", "$1"]
|
||||
- name: re_replace # S01E01 to 1 1
|
||||
args: ["\\b[sS]0*(\\d+)[eE]0*(\\d+)\\b", "$1 $2"]
|
||||
rows:
|
||||
selector: div.b-content > table > tbody > tr > td > table.lista > tbody > tr:has(a[href^="index.php?page=torrent-details&id="])
|
||||
#http://girotorrent.org/index.php?page=torrent-details&id=73d93dccf84ea3a8b614a3113acfd9eea186d730
|
||||
fields:
|
||||
download:
|
||||
selector: a[href^="index.php?page=downloadcheck&id="]
|
||||
attribute: href
|
||||
title: # shortened title?
|
||||
selector: a[onmouseover][href^="index.php?page=torrent-details&id="]
|
||||
# normalize to SXXEYY format
|
||||
filters:
|
||||
- name: re_replace # replace special characters with " " (space)
|
||||
args: ["[^a-zA-Z0-9]|\\.", " "]
|
||||
args: ["[^a-zA-Z0-9\\s]|\\.", " "]
|
||||
- name: re_replace # replace multiple spaces
|
||||
args: ["[ ]{2,}", " "]
|
||||
# normalize to SXXEYY format
|
||||
- name: re_replace
|
||||
args: ["(\\d{2})x(\\d{2})", "S$1E$2"]
|
||||
- name: re_replace
|
||||
args: ["(\\d{1})x(\\d{2})", "S0$1E$2"]
|
||||
- name: re_replace #Stagione X --> S0X
|
||||
args: ["Stagione (\\d{0,1}\\s)", "S0$1"]
|
||||
- name: re_replace #Stagione XX --> SXX
|
||||
args: ["Stagione (\\d{2}\\s)", "S$1"]
|
||||
- name: re_replace #/ Episodio [YY-YY --> EYY-YY
|
||||
args: ["(\\s\\/\\sEpisodio|\\s\\/\\sEpisodi|\\sEpisodio|\\s\\|\\sEpisodio|\\sEpisodi)\\s\\[", "E"]
|
||||
- name: re_replace #/ Completa [episodi YY-YY --> EYY-YY
|
||||
args: ["(\\s\\/\\sCompleta\\s\\[episodi\\s)", "E"]
|
||||
- name: re_replace #remove di YY] | remove /YY]
|
||||
args: ["(\\sdi\\s\\d{1,2}|\\/\\d{1,2})\\]", " "]
|
||||
- name: re_replace #remove various
|
||||
args: ["(Serie completa|Completa|\\[in pausa\\])", ""]
|
||||
# fine prova
|
||||
title: # long titles?
|
||||
optional: true
|
||||
selector: a[title][href^="index.php?page=torrent-details"]
|
||||
attribute: title
|
||||
filters:
|
||||
- name: replace
|
||||
args: ["Vedi Dettagli: ", ""]
|
||||
# inizio prova
|
||||
- name: re_replace # replace special characters with " " (space)
|
||||
args: ["[^a-zA-Z0-9]|\\.", " "]
|
||||
# normalize to SXXEYY format
|
||||
- name: re_replace
|
||||
args: ["(\\d{2})x(\\d{2})", "S$1E$2"]
|
||||
- name: re_replace
|
||||
args: ["(\\d{1})x(\\d{2})", "S0$1E$2"]
|
||||
- name: re_replace #Stagione X --> S0X
|
||||
args: ["Stagione (\\d{0,1}\\s)", "S0$1"]
|
||||
- name: re_replace #Stagione XX --> SXX
|
||||
args: ["Stagione (\\d{2}\\s)", "S$1"]
|
||||
- name: re_replace #/ Episodio [YY-YY --> EYY-YY
|
||||
args: ["(\\s\\/\\sEpisodio|\\s\\/\\sEpisodi|\\sEpisodio|\\s\\|\\sEpisodio|\\sEpisodi)\\s\\[", "E"]
|
||||
- name: re_replace #/ Completa [episodi YY-YY --> EYY-YY
|
||||
args: ["(\\s\\/\\sCompleta\\s\\[episodi\\s)", "E"]
|
||||
- name: re_replace #remove di YY] | remove /YY]
|
||||
args: ["(\\sdi\\s\\d{1,2}|\\/\\d{1,2})\\]", " "]
|
||||
- name: re_replace #remove various
|
||||
args: ["(Serie completa|Completa|\\[in pausa\\])", ""]
|
||||
# fine prova
|
||||
- name: re_replace # S01 E01 to S01E01
|
||||
args: ["\\b[sS](\\d+)\\s[eE](\\d+)\\b", "S$1E$2"]
|
||||
- name: re_replace # 01x01 to S01E01
|
||||
args: ["(\\d{2})x(\\d+)", "S$1E$2"]
|
||||
- name: re_replace # 1x01 to S01E01
|
||||
args: ["\\b(\\d{1})x(\\d+)", "S0$1E$2"]
|
||||
- name: re_replace # Stagione X --> S0X
|
||||
args: ["\\b[sS]tagion[eiEI]\\s?(\\d{1})\\b|\\b[sS]eason\\s?(\\d{1})\\b", "S0$1"]
|
||||
- name: re_replace # Stagione XX --> SXX
|
||||
args: ["\\b[sS]tagion[eiEI]\\s?(\\d{2,})\\b|\\b[sS]easons?\\s?(\\d{2,})\\b", "S$1"]
|
||||
- name: re_replace # Episodio 4 to E4
|
||||
args: ["\\b([\\/\\|]?[eE]pisodio\\s?(\\d+)|[Pp]untata\\s?(\\d+))", "E$2$3"]
|
||||
- name: re_replace # Episodi 4 5 to E04-05
|
||||
args: ["\\b(?:[Pp]untat[eE]\\s*)(\\d+)\\s?(\\d+)", "E0$1-0$2"]
|
||||
- name: re_replace # rimozioni varie
|
||||
args: ["([Ss]erie completa|[cC]ompleta[tT]?[aA]?|COMPLETA[TA]?|in pausa)", ""]
|
||||
- name: andmatch
|
||||
category:
|
||||
selector: a[href^="index.php?page=torrents&category="]
|
||||
attribute: href
|
||||
|
|
Loading…
Reference in a new issue