core: update subliminal_patch to 2.6.4.2917-dev; fix addic7ed, subscene, titlovi; fix SSAStyle parsing in SRT

This commit is contained in:
panni 2019-01-15 13:39:34 +01:00
parent d896599417
commit 8879f5a82e
12 changed files with 3900 additions and 58 deletions

3801
libs/inflect.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -56,7 +56,7 @@ class SSAStyle(object):
self.encoding = 1 #: Charset
for k, v in fields.items():
if k in self.FIELDS:
if k in self.FIELDS and v is not None:
setattr(self, k, v)
else:
raise ValueError("SSAStyle has no field named %r" % k)

View File

@ -150,7 +150,14 @@ class SubstationFormat(FormatBase):
if format_ == "ass":
return ass_rgba_to_color(v)
else:
return ssa_rgb_to_color(v)
try:
return ssa_rgb_to_color(v)
except ValueError:
try:
return ass_rgba_to_color(v)
except:
return Color(255, 255, 255, 0)
elif f in {"bold", "underline", "italic", "strikeout"}:
return v == "-1"
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:

View File

@ -493,7 +493,7 @@ def scan_video(path, dont_use_actual_file=False, hints=None, providers=None, ski
raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
dirpath, filename = os.path.split(path)
logger.info('Scanning video %r in %r', filename, dirpath)
logger.info('Determining basic video properties for %r in %r', filename, dirpath)
# hint guessit the filename itself and its 2 parent directories if we're an episode (most likely
# Series name/Season/filename), else only one

View File

@ -84,32 +84,35 @@ class Addic7edProvider(_Addic7edProvider):
# login
if self.username and self.password:
ccks = region.get("addic7ed_cookies", expiration_time=86400)
do_login = False
if ccks != NO_VALUE:
self.session.cookies.update(ccks)
r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
if r.status_code == 302:
logger.info('Addic7ed: Login expired')
do_login = True
else:
logger.info('Addic7ed: Reusing old login')
self.logged_in = True
try:
self.session.cookies._cookies.update(ccks)
r = self.session.get(self.server_url + 'panel.php', allow_redirects=False, timeout=10)
if r.status_code == 302:
logger.info('Addic7ed: Login expired')
region.delete("addic7ed_cookies")
else:
logger.info('Addic7ed: Reusing old login')
self.logged_in = True
return
except:
pass
if do_login:
logger.info('Addic7ed: Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
logger.info('Addic7ed: Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10,
headers={"Referer": self.server_url + "login.php"})
if "relax, slow down" in r.content:
raise TooManyRequests(self.username)
if "relax, slow down" in r.content:
raise TooManyRequests(self.username)
if r.status_code != 302:
raise AuthenticationError(self.username)
if r.status_code != 302:
raise AuthenticationError(self.username)
region.set("addic7ed_cookies", r.cookies)
region.set("addic7ed_cookies", self.session.cookies._cookies)
logger.debug('Addic7ed: Logged in')
self.logged_in = True
logger.debug('Addic7ed: Logged in')
self.logged_in = True
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)

View File

@ -18,7 +18,6 @@ except ImportError:
import xml.etree.cElementTree as etree
except ImportError:
import xml.etree.ElementTree as etree
from babelfish import language_converters
from subliminal import Episode
from subliminal import Movie

View File

@ -4,6 +4,7 @@ import io
import logging
import os
import time
import inflect
from random import randint
from zipfile import ZipFile
@ -20,6 +21,8 @@ from subliminal_patch.converters.subscene import language_ids, supported_languag
from subscene_api.subscene import search, Subtitle as APISubtitle
from subzero.language import Language
p = inflect.engine()
language_converters.register('subscene = subliminal_patch.converters.subscene:SubsceneConverter')
logger = logging.getLogger(__name__)
@ -192,21 +195,27 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
def query(self, video):
vfn = get_video_filename(video)
subtitles = []
logger.debug(u"Searching for: %s", vfn)
film = search(vfn, session=self.session)
subtitles = []
if film and film.subtitles:
logger.debug('Release results found: %s', len(film.subtitles))
subtitles = self.parse_results(video, film)
else:
logger.debug('No release results found')
# re-search for episodes without explicit release name
if isinstance(video, Episode):
term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
#term = u"%s S%02iE%02i" % (video.series, video.season, video.episode)
term = u"%s - %s Season" % (video.series, p.number_to_words("%sth" % video.season).capitalize())
time.sleep(self.search_throttle)
logger.debug('Searching for alternative results: %s', term)
film = search(term, session=self.session)
film = search(term, session=self.session, release=False)
if film and film.subtitles:
logger.debug('Alternative results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
else:
logger.debug('No alternative results found')
# packs
if video.season_fully_aired:
@ -215,9 +224,17 @@ class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin):
time.sleep(self.search_throttle)
film = search(term, session=self.session)
if film and film.subtitles:
logger.debug('Pack results found: %s', len(film.subtitles))
subtitles += self.parse_results(video, film)
else:
logger.debug('No pack results found')
else:
logger.debug("Not searching for packs, because the season hasn't fully aired")
else:
logger.debug('Searching for movie results: %s', video.title)
film = search(video.title, year=video.year, session=self.session, limit_to=None, release=False)
if film and film.subtitles:
subtitles += self.parse_results(video, film)
logger.info("%s subtitles found" % len(subtitles))
return subtitles

View File

@ -134,8 +134,8 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' \
'(KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3)' \
'Gecko/20100401 Firefox/3.6.3 ( .NET CLR 3.5.30729)'
logger.debug('User-Agent set to %s', self.session.headers['User-Agent'])
self.session.headers['Referer'] = self.server_url
logger.debug('Referer set to %s', self.session.headers['Referer'])
@ -202,7 +202,7 @@ class TitloviProvider(Provider, ProviderSubtitleArchiveMixin):
current_page = int(params['pg'])
try:
sublist = soup.select('section.titlovi > ul.titlovi > li')
sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit')
for sub in sublist:
# subtitle id
sid = sub.find(attrs={'data-id': True}).attrs['data-id']

View File

@ -25,6 +25,7 @@ this script that does the job by parsing the website"s pages.
# imports
import re
import enum
import sys
@ -36,7 +37,7 @@ else:
from contextlib import suppress
from urllib2.request import Request, urlopen
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, NavigableString
# constants
HEADERS = {
@ -207,7 +208,7 @@ def section_exists(soup, section):
return False
def get_first_film(soup, section, session=None):
def get_first_film(soup, section, year=None, session=None):
tag_part = SectionsParts[section]
tag = None
@ -220,12 +221,26 @@ def get_first_film(soup, section, session=None):
if not tag:
return
url = SITE_DOMAIN + tag.findNext("ul").find("li").div.a.get("href")
url = None
if not year:
url = SITE_DOMAIN + tag.findNext("ul").find("li").div.a.get("href")
else:
for t in tag.findNext("ul").findAll("li"):
if isinstance(t, NavigableString) or not t.div:
continue
if str(year) in t.div.a.string:
url = SITE_DOMAIN + t.div.a.get("href")
break
if not url:
return
return Film.from_url(url, session=session)
def search(term, session=None, limit_to=SearchTypes.Exact):
soup = soup_for("%s/subtitles/title?q=%s" % (SITE_DOMAIN, term), session=session)
def search(term, release=True, session=None, year=None, limit_to=SearchTypes.Exact):
soup = soup_for("%s/subtitles/%s?q=%s" % (SITE_DOMAIN, "release" if release else "title", term), session=session)
if "Subtitle search by" in str(soup):
rows = soup.find("table").tbody.find_all("tr")
@ -234,7 +249,7 @@ def search(term, session=None, limit_to=SearchTypes.Exact):
for junk, search_type in SearchTypes.__members__.items():
if section_exists(soup, search_type):
return get_first_film(soup, search_type)
return get_first_film(soup, search_type, year=year, session=session)
if limit_to == search_type:
return

View File

@ -4,7 +4,6 @@ import types
from babelfish.exceptions import LanguageError
from babelfish import Language as Language_, basestr
repl_map = {
"dk": "da",
"nld": "nl",

View File

@ -28,13 +28,16 @@ class CommonFixes(SubtitleTextModification):
NReProcessor(re.compile(r'(?u)(\w|\b|\s|^)(-\s?-{1,2})'), ur"\1", name="CM_multidash"),
# line = _/-/\s
NReProcessor(re.compile(r'(?u)(^\W*[-_.:]+\W*$)'), "", name="CM_non_word_only"),
NReProcessor(re.compile(r'(?u)(^\W*[-_.:>~]+\W*$)'), "", name="CM_non_word_only"),
# remove >>
NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
# line = : text
NReProcessor(re.compile(r'(?u)(^\W*:\s*(?=\w+))'), "", name="CM_empty_colon_start"),
# fix music symbols
NReProcessor(re.compile(ur'(?u)(^[-\s]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
NReProcessor(re.compile(ur'(?u)(^[-\s>~]*[*#¶]+\s*)|(\s*[*#¶]+\s*$)'),
lambda x: u"" if x.group(1) else u"",
name="CM_music_symbols"),
@ -85,9 +88,6 @@ class CommonFixes(SubtitleTextModification):
# space before ending doublequote?
# remove >>
NReProcessor(re.compile(r'(?u)^\s?>>\s*'), "", name="CM_leading_crocodiles"),
# replace uppercase I with lowercase L in words
NReProcessor(re.compile(ur'(?u)([a-zà-ž]+)(I+)'),
lambda match: ur'%s%s' % (match.group(1), "l" * len(match.group(2))),

View File

@ -29,6 +29,22 @@ class HearingImpaired(SubtitleTextModification):
FullBracketEntryProcessor(re.compile(ur'(?sux)^-?%(t)s[([].+(?=[^)\]]{3,}).+[)\]]%(t)s$' % {"t": TAG}),
"", name="HI_brackets_full"),
# uppercase text before colon (at least 3 uppercase chars); at start or after a sentence,
# possibly with a dash in front; ignore anything ending with a quote
NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"\']))([\s\->~]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
ur'[A-zÀ-ž-_0-9\s\"\'&+()\[\],:]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "",
name="HI_before_colon_caps"),
# any text before colon (at least 3 chars); at start or after a sentence,
# possibly with a dash in front; try not breaking actual sentences with a colon at the end by not matching if
# a space is inside the text; ignore anything ending with a quote
NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"]))([\s\->~]*((?=[A-zÀ-ž&+]\s*[A-zÀ-ž&+]\s*[A-zÀ-ž&+])'
ur'[A-zÀ-ž-_0-9\s\"\'&+()\[\]]+:)(?![\"’ʼ❜‘‛”“‟„])\s*)(?![0-9])'),
lambda match:
match.group(1) if (match.group(2).count(" ") > 0 or match.group(1).count("-") > 0)
else "" if not match.group(1).startswith(" ") else " ",
name="HI_before_colon_noncaps"),
# brackets (only remove if at least 3 chars in brackets)
NReProcessor(re.compile(ur'(?sux)-?%(t)s[([][^([)\]]+?(?=[A-zÀ-ž"\'.]{3,})[^([)\]]+[)\]][\s:]*%(t)s' %
{"t": TAG}), "", name="HI_brackets"),
@ -46,21 +62,6 @@ class HearingImpaired(SubtitleTextModification):
#NReProcessor(re.compile(ur'(?u)(\b|^)([\s-]*(?=[A-zÀ-ž-_0-9"\']{3,})[A-zÀ-ž-_0-9"\']+:\s*)'), "",
# name="HI_before_colon"),
# uppercase text before colon (at least 3 uppercase chars); at start or after a sentence,
# possibly with a dash in front; ignore anything ending with a quote
NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"\']))([\s-]*(?=[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+]\s*[A-ZÀ-Ž&+])'
ur'[A-ZÀ-Ž-_0-9\s\"\'&+]+:(?![\"\'’ʼ❜‘‛”“‟„])(?:\s+|$))(?![0-9])'), "",
name="HI_before_colon_caps"),
# any text before colon (at least 3 chars); at start or after a sentence,
# possibly with a dash in front; try not breaking actual sentences with a colon at the end by not matching if
# a space is inside the text; ignore anything ending with a quote
NReProcessor(re.compile(ur'(?u)(?:(?<=^)|(?<=[.\-!?\"]))([\s-]*((?=[A-zÀ-ž&+]\s*[A-zÀ-ž&+]\s*[A-zÀ-ž&+])'
ur'[A-zÀ-ž-_0-9\s\"\'&+]+:)(?![\"’ʼ❜‘‛”“‟„])\s*)(?![0-9])'),
lambda match:
match.group(1) if (match.group(2).count(" ") > 0 or match.group(1).count("-") > 0)
else "" if not match.group(1).startswith(" ") else " ",
name="HI_before_colon_noncaps"),
# text in brackets at start, after optional dash, before colon or at end of line
# fixme: may be too aggressive