Merge branch 'development' into hermes

# Conflicts:
#	.gitignore
#	bazarr/main.py
This commit is contained in:
Louis Vézina 2020-05-06 08:12:24 -04:00
commit 6fd6ee234c
13 changed files with 378 additions and 203 deletions

2
.gitignore vendored
View File

@ -3,6 +3,7 @@ cachefile.dbm
*.log *.log
*.log.* *.log.*
*.db *.db
*.pyc
.idea/* .idea/*
bazarr.pid bazarr.pid
/venv /venv
@ -10,6 +11,7 @@ bazarr.pid
static/scss/.sass-cache/* static/scss/.sass-cache/*
static/scss/.sass-cache static/scss/.sass-cache
*.scssc *.scssc
/.vscode
# Allow # Allow
!*.dll !*.dll

View File

@ -68,6 +68,7 @@ If you need something that is not already part of Bazarr, feel free to create a
* TVSubtitles * TVSubtitles
* Wizdom * Wizdom
* XSubs * XSubs
* Yavka.net
* Zimuku * Zimuku
## Screenshot ## Screenshot

View File

@ -80,6 +80,9 @@ if __name__ == '__main__':
while True: while True:
check_status() check_status()
try: try:
time.sleep(5) if sys.platform.startswith('win'):
time.sleep(5)
else:
os.wait()
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
pass pass

View File

@ -35,6 +35,11 @@ PROVIDER_THROTTLE_MAP = {
}, },
"titulky": { "titulky": {
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours") DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours")
},
"legendasdivx": {
TooManyRequests: (datetime.timedelta(hours=2), "2 hours"),
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
ParseResponseError: (datetime.timedelta(hours=1), "1 hours"),
} }
} }

View File

@ -387,7 +387,7 @@ def guess_external_subtitles(dest_folder, subtitles):
logging.debug('BAZARR detected encoding %r', guess) logging.debug('BAZARR detected encoding %r', guess)
if guess["confidence"] < 0.6: if guess["confidence"] < 0.6:
raise UnicodeError raise UnicodeError
if guess["confidence"] < 0.7 or guess["encoding"] == "ascii": if guess["encoding"] == "ascii":
guess["encoding"] = "utf-8" guess["encoding"] = "utf-8"
text = text.decode(guess["encoding"]) text = text.decode(guess["encoding"])
detected_language = guess_language(text) detected_language = guess_language(text)

View File

@ -45,6 +45,8 @@ from get_movies import *
from scheduler import Scheduler from scheduler import Scheduler
from check_update import check_and_apply_update from check_update import check_and_apply_update
from subliminal_patch.extensions import provider_registry as provider_manager from subliminal_patch.extensions import provider_registry as provider_manager
from subliminal_patch.core import SUBTITLE_EXTENSIONS
from subliminal.cache import region
from functools import wraps from functools import wraps
from app import create_app, socketio from app import create_app, socketio

View File

@ -2,20 +2,22 @@
from __future__ import absolute_import from __future__ import absolute_import
import logging import logging
import io import io
import re
import os import os
import rarfile import rarfile
import zipfile import zipfile
from requests import Session from requests import Session
from guessit import guessit from guessit import guessit
from subliminal_patch.exceptions import ParseResponseError from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded
from subliminal_patch.providers import Provider from subliminal_patch.providers import Provider
from subliminal.providers import ParserBeautifulSoup from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.subtitle import Subtitle from subliminal_patch.subtitle import Subtitle
from subliminal.video import Episode, Movie from subliminal.video import Episode, Movie
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending,guess_matches from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches
from subzero.language import Language from subzero.language import Language
from subliminal_patch.score import get_scores from subliminal_patch.score import get_scores
from subliminal.utils import sanitize, sanitize_release_group
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -27,11 +29,12 @@ class LegendasdivxSubtitle(Subtitle):
super(LegendasdivxSubtitle, self).__init__(language) super(LegendasdivxSubtitle, self).__init__(language)
self.language = language self.language = language
self.page_link = data['link'] self.page_link = data['link']
self.hits=data['hits'] self.hits = data['hits']
self.exact_match=data['exact_match'] self.exact_match = data['exact_match']
self.description=data['description'].lower() self.description = data['description']
self.video = video self.video = video
self.videoname =data['videoname'] self.video_filename = data['video_filename']
self.uploader = data['uploader']
@property @property
def id(self): def id(self):
@ -44,40 +47,37 @@ class LegendasdivxSubtitle(Subtitle):
def get_matches(self, video): def get_matches(self, video):
matches = set() matches = set()
if self.videoname.lower() in self.description: description = sanitize(self.description)
if sanitize(self.video_filename) in description:
matches.update(['title']) matches.update(['title'])
matches.update(['season']) matches.update(['season'])
matches.update(['episode']) matches.update(['episode'])
# episode # episode
if video.title and video.title.lower() in self.description: if video.title and sanitize(video.title) in description:
matches.update(['title']) matches.update(['title'])
if video.year and '{:04d}'.format(video.year) in self.description: if video.year and '{:04d}'.format(video.year) in description:
matches.update(['year']) matches.update(['year'])
if isinstance(video, Episode): if isinstance(video, Episode):
# already matched in search query # already matched in search query
if video.season and 's{:02d}'.format(video.season) in self.description: if video.season and 's{:02d}'.format(video.season) in description:
matches.update(['season']) matches.update(['season'])
if video.episode and 'e{:02d}'.format(video.episode) in self.description: if video.episode and 'e{:02d}'.format(video.episode) in description:
matches.update(['episode']) matches.update(['episode'])
if video.episode and video.season and video.series: if video.episode and video.season and video.series:
if '{}.s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description: if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description:
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
if '{} s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
matches.update(['series']) matches.update(['series'])
matches.update(['season']) matches.update(['season'])
matches.update(['episode']) matches.update(['episode'])
# release_group # release_group
if video.release_group and video.release_group.lower() in self.description: if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description):
matches.update(['release_group']) matches.update(['release_group'])
# resolution # resolution
if video.resolution and video.resolution.lower() in description:
if video.resolution and video.resolution.lower() in self.description:
matches.update(['resolution']) matches.update(['resolution'])
# format # format
@ -87,9 +87,9 @@ class LegendasdivxSubtitle(Subtitle):
if formats[0] == "web-dl": if formats[0] == "web-dl":
formats.append("webdl") formats.append("webdl")
formats.append("webrip") formats.append("webrip")
formats.append("web ") formats.append("web")
for frmt in formats: for frmt in formats:
if frmt.lower() in self.description: if frmt in description:
matches.update(['format']) matches.update(['format'])
break break
@ -97,11 +97,11 @@ class LegendasdivxSubtitle(Subtitle):
if video.video_codec: if video.video_codec:
video_codecs = [video.video_codec.lower()] video_codecs = [video.video_codec.lower()]
if video_codecs[0] == "h264": if video_codecs[0] == "h264":
formats.append("x264") video_codecs.append("x264")
elif video_codecs[0] == "h265": elif video_codecs[0] == "h265":
formats.append("x265") video_codecs.append("x265")
for vc in formats: for vc in video_codecs:
if vc.lower() in self.description: if vc in description:
matches.update(['video_codec']) matches.update(['video_codec'])
break break
@ -109,9 +109,6 @@ class LegendasdivxSubtitle(Subtitle):
# matches |= guess_matches(video, guessit(self.description)) # matches |= guess_matches(video, guessit(self.description))
return matches return matches
class LegendasdivxProvider(Provider): class LegendasdivxProvider(Provider):
"""Legendasdivx Provider.""" """Legendasdivx Provider."""
languages = {Language('por', 'BR')} | {Language('por')} languages = {Language('por', 'BR')} | {Language('por')}
@ -126,15 +123,21 @@ class LegendasdivxProvider(Provider):
'Cache-Control': 'no-cache' 'Cache-Control': 'no-cache'
} }
loginpage = site + '/forum/ucp.php?mode=login' loginpage = site + '/forum/ucp.php?mode=login'
logoutpage = site + '/sair.php'
searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}' searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}'
language_list = list(languages) download_link = site + '/modules.php{link}'
def __init__(self, username, password): def __init__(self, username, password):
# make sure login credentials are configured.
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username self.username = username
self.password = password self.password = password
self.logged_in = False
def initialize(self): def initialize(self):
self.session = Session() self.session = Session()
self.session.headers.update(self.headers)
self.login() self.login()
def terminate(self): def terminate(self):
@ -143,100 +146,103 @@ class LegendasdivxProvider(Provider):
def login(self): def login(self):
logger.info('Logging in') logger.info('Logging in')
self.headers['Referer'] = self.site + '/index.php'
self.session.headers.update(self.headers.items())
res = self.session.get(self.loginpage) res = self.session.get(self.loginpage)
bsoup = ParserBeautifulSoup(res.content, ['lxml']) bsoup = ParserBeautifulSoup(res.content, ['lxml'])
_allinputs = bsoup.findAll('input') _allinputs = bsoup.findAll('input')
fields = {} data = {}
# necessary to set 'sid' for POST request
for field in _allinputs: for field in _allinputs:
fields[field.get('name')] = field.get('value') data[field.get('name')] = field.get('value')
data['username'] = self.username
data['password'] = self.password
fields['username'] = self.username res = self.session.post(self.loginpage, data)
fields['password'] = self.password res.raise_for_status()
fields['autologin'] = 'on'
fields['viewonline'] = 'on'
self.headers['Referer'] = self.loginpage
self.session.headers.update(self.headers.items())
res = self.session.post(self.loginpage, fields)
try: try:
logger.debug('Got session id %s' % logger.debug('Logged in successfully: PHPSESSID: %s' %
self.session.cookies.get_dict()['PHPSESSID']) self.session.cookies.get_dict()['PHPSESSID'])
except KeyError as e: self.logged_in = True
logger.error(repr(e)) except KeyError:
logger.error("Didn't get session id, check your credentials") logger.error("Couldn't retrieve session ID, check your credentials")
return False raise AuthenticationError("Please check your credentials.")
except Exception as e: except Exception as e:
logger.error(repr(e)) if 'bloqueado' in res.text.lower(): # blocked IP address
logger.error('uncached error #legendasdivx #AA') logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
return False raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
return True raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
def logout(self): def logout(self):
# need to figure this out if self.logged_in:
return True logger.info('Legendasdivx:: Logging out')
r = self.session.get(self.logoutpage, timeout=10)
r.raise_for_status()
logger.debug('Legendasdivx :: Logged out')
self.logged_in = False
def _process_page(self, video, bsoup, video_filename):
def _process_page(self, video, bsoup, querytext, videoname):
subtitles = [] subtitles = []
_allsubs = bsoup.findAll("div", {"class": "sub_box"}) _allsubs = bsoup.findAll("div", {"class": "sub_box"})
lang = Language.fromopensubtitles("pob")
for _subbox in _allsubs: for _subbox in _allsubs:
hits=0 hits = 0
for th in _subbox.findAll("th", {"class": "color2"}): for th in _subbox.findAll("th", {"class": "color2"}):
if th.string == 'Hits:': if th.string == 'Hits:':
hits = int(th.parent.find("td").string) hits = int(th.parent.find("td").string)
if th.string == 'Idioma:': if th.string == 'Idioma:':
lang = th.parent.find("td").find ("img").get ('src') lang = th.parent.find("td").find("img").get('src')
if 'brazil' in lang: if 'brazil' in lang.lower():
lang = Language.fromopensubtitles('pob') lang = Language.fromopensubtitles('pob')
else: elif 'portugal' in lang.lower():
lang = Language.fromopensubtitles('por') lang = Language.fromopensubtitles('por')
else:
description = _subbox.find("td", {"class": "td_desc brd_up"}) continue
# get description for matches
description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text()
#get subtitle link
download = _subbox.find("a", {"class": "sub_download"}) download = _subbox.find("a", {"class": "sub_download"})
try:
# sometimes BSoup just doesn't get the link # sometimes BSoup can't find 'a' tag and returns None.
logger.debug(download.get('href')) i = 0
except Exception as e: while not (download): # must get it... trying again...
logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext)) download = _subbox.find("a", {"class": "sub_download"})
continue i=+1
logger.debug("Try number {0} try!".format(str(i)))
dl = download.get('href')
logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl))
# get subtitle uploader
sub_header = _subbox.find("div", {"class" :"sub_header"})
uploader = sub_header.find("a").text if sub_header else 'anonymous'
exact_match = False exact_match = False
if video.name.lower() in description.get_text().lower(): if video.name.lower() in description.lower():
exact_match = True exact_match = True
data = {'link': self.site + '/modules.php' + download.get('href'), data = {'link': self.site + '/modules.php' + download.get('href'),
'exact_match': exact_match, 'exact_match': exact_match,
'hits': hits, 'hits': hits,
'videoname': videoname, 'uploader': uploader,
'description': description.get_text() } 'video_filename': video_filename,
'description': description
}
subtitles.append( subtitles.append(
LegendasdivxSubtitle(lang, video, data) LegendasdivxSubtitle(lang, video, data)
) )
return subtitles return subtitles
def query(self, video, language): def query(self, video, languages):
try:
logger.debug('Got session id %s' %
self.session.cookies.get_dict()['PHPSESSID'])
except Exception as e:
self.login()
language_ids = '0' video_filename = video.name
if isinstance(language, (tuple, list, set)): video_filename = os.path.basename(video_filename)
if len(language) == 1: video_filename, _ = os.path.splitext(video_filename)
language_ids = ','.join(sorted(l.opensubtitles for l in language)) video_filename = sanitize_release_group(video_filename)
if language_ids == 'por':
language_ids = '&form_cat=28'
else:
language_ids = '&form_cat=29'
videoname = video.name
videoname = os.path.basename(videoname)
videoname, _ = os.path.splitext(videoname)
# querytext = videoname.lower()
_searchurl = self.searchurl _searchurl = self.searchurl
if video.imdb_id is None: if video.imdb_id is None:
if isinstance(video, Episode): if isinstance(video, Episode):
@ -246,22 +252,47 @@ class LegendasdivxProvider(Provider):
else: else:
querytext = video.imdb_id querytext = video.imdb_id
# language query filter
if isinstance(languages, (tuple, list, set)):
language_ids = ','.join(sorted(l.opensubtitles for l in languages))
if 'por' in language_ids: # prioritize portuguese subtitles
lang_filter = '&form_cat=28' # pt
elif 'pob' in language_ids:
lang_filter = '&form_cat=29' # br
else:
lang_filter = ''
querytext = querytext + lang_filter if lang_filter else querytext
# querytext = querytext.replace(
# ".", "+").replace("[", "").replace("]", "")
if language_ids != '0':
querytext = querytext + language_ids
self.headers['Referer'] = self.site + '/index.php' self.headers['Referer'] = self.site + '/index.php'
self.session.headers.update(self.headers.items()) self.session.headers.update(self.headers.items())
res = self.session.get(_searchurl.format(query=querytext)) res = self.session.get(_searchurl.format(query=querytext))
# form_cat=28 = br
# form_cat=29 = pt
if "A legenda não foi encontrada" in res.text: if "A legenda não foi encontrada" in res.text:
logger.warning('%s not found', querytext) logger.warning('%s not found', querytext)
return [] return []
bsoup = ParserBeautifulSoup(res.content, ['html.parser']) bsoup = ParserBeautifulSoup(res.content, ['html.parser'])
subtitles = self._process_page(video, bsoup, querytext, videoname) subtitles = self._process_page(video, bsoup, video_filename)
# search for more than 10 results (legendasdivx uses pagination)
# don't throttle - maximum results = 6 * 10
MAX_PAGES = 6
#get number of pages bases on results found
page_header = bsoup.find("div", {"class": "pager_bar"})
results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1)
num_pages = (int(results_found) // 10) + 1
num_pages = min(MAX_PAGES, num_pages)
if num_pages > 1:
for num_page in range(2, num_pages+2):
_search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page))
logger.debug("Moving to next page: %s" % _search_next)
res = self.session.get(_search_next)
next_page = ParserBeautifulSoup(res.content, ['html.parser'])
subs = self._process_page(video, next_page, video_filename)
subtitles.extend(subs)
return subtitles return subtitles
@ -270,9 +301,14 @@ class LegendasdivxProvider(Provider):
def download_subtitle(self, subtitle): def download_subtitle(self, subtitle):
res = self.session.get(subtitle.page_link) res = self.session.get(subtitle.page_link)
res.raise_for_status()
if res: if res:
if res.text == '500': if res.status_code in ['500', '503']:
raise ValueError('Error 500 on server') raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable")
elif 'limite' in res.text.lower(): # daily downloads limit reached
raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached")
elif 'bloqueado' in res.text.lower(): # blocked IP address
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
archive = self._get_archive(res.content) archive = self._get_archive(res.content)
# extract the subtitle # extract the subtitle
@ -281,7 +317,9 @@ class LegendasdivxProvider(Provider):
subtitle.normalize() subtitle.normalize()
return subtitle return subtitle
raise ValueError('Problems conecting to the server')
logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code)
return
def _get_archive(self, content): def _get_archive(self, content):
# open the archive # open the archive
@ -294,7 +332,6 @@ class LegendasdivxProvider(Provider):
logger.debug('Identified zip archive') logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream) archive = zipfile.ZipFile(archive_stream)
else: else:
# raise ParseResponseError('Unsupported compressed format')
raise Exception('Unsupported compressed format') raise Exception('Unsupported compressed format')
return archive return archive
@ -305,7 +342,7 @@ class LegendasdivxProvider(Provider):
_tmp.remove('.txt') _tmp.remove('.txt')
_subtitle_extensions = tuple(_tmp) _subtitle_extensions = tuple(_tmp)
_max_score = 0 _max_score = 0
_scores = get_scores (subtitle.video) _scores = get_scores(subtitle.video)
for name in archive.namelist(): for name in archive.namelist():
# discard hidden files # discard hidden files
@ -338,4 +375,4 @@ class LegendasdivxProvider(Provider):
logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score)) logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score))
return archive.read(_max_name) return archive.read(_max_name)
raise ParseResponseError('Can not find the subtitle in the compressed file') raise ValueError("No subtitle found on compressed file. Max score was 0")

View File

@ -44,6 +44,12 @@ class OpenSubtitlesSubtitle(_OpenSubtitlesSubtitle):
self.wrong_fps = False self.wrong_fps = False
self.skip_wrong_fps = skip_wrong_fps self.skip_wrong_fps = skip_wrong_fps
def get_fps(self):
try:
return float(self.fps)
except:
return None
def get_matches(self, video, hearing_impaired=False): def get_matches(self, video, hearing_impaired=False):
matches = super(OpenSubtitlesSubtitle, self).get_matches(video) matches = super(OpenSubtitlesSubtitle, self).get_matches(video)
@ -138,11 +144,9 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
return ServerProxy(url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout, return ServerProxy(url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout,
user_agent=os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"))) user_agent=os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")))
def log_in(self, server_url=None): def log_in_url(self, server_url):
if server_url: self.token = None
self.terminate() self.server = self.get_server_proxy(server_url)
self.server = self.get_server_proxy(server_url)
response = self.retry( response = self.retry(
lambda: checked( lambda: checked(
@ -155,6 +159,25 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
logger.debug('Logged in with token %r', self.token[:10]+"X"*(len(self.token)-10)) logger.debug('Logged in with token %r', self.token[:10]+"X"*(len(self.token)-10))
region.set("os_token", bytearray(self.token, encoding='utf-8')) region.set("os_token", bytearray(self.token, encoding='utf-8'))
region.set("os_server_url", bytearray(server_url, encoding='utf-8'))
def log_in(self):
logger.info('Logging in')
try:
self.log_in_url(self.vip_url if self.is_vip else self.default_url)
except Unauthorized:
if self.is_vip:
logger.info("VIP server login failed, falling back")
try:
self.log_in_url(self.default_url)
except Unauthorized:
pass
if not self.token:
logger.error("Login failed, please check your credentials")
raise Unauthorized
def use_token_or_login(self, func): def use_token_or_login(self, func):
if not self.token: if not self.token:
@ -167,45 +190,18 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
return func() return func()
def initialize(self): def initialize(self):
if self.is_vip: token_cache = region.get("os_token")
self.server = self.get_server_proxy(self.vip_url) url_cache = region.get("os_server_url")
logger.info("Using VIP server")
if token_cache is not NO_VALUE and url_cache is not NO_VALUE:
self.token = token_cache.decode("utf-8")
self.server = self.get_server_proxy(url_cache.decode("utf-8"))
logger.debug("Using previous login token: %r", self.token[:10] + "X" * (len(self.token) - 10))
else: else:
self.server = self.get_server_proxy(self.default_url) self.server = None
self.token = None
logger.info('Logging in')
token = str(region.get("os_token"))
if token is not NO_VALUE:
try:
logger.debug('Trying previous token: %r', token[:10]+"X"*(len(token)-10))
checked(lambda: self.server.NoOperation(token))
self.token = token
logger.debug("Using previous login token: %r", token[:10]+"X"*(len(token)-10))
return
except (NoSession, Unauthorized):
logger.debug('Token not valid.')
pass
try:
self.log_in()
except Unauthorized:
if self.is_vip:
logger.info("VIP server login failed, falling back")
self.log_in(self.default_url)
if self.token:
return
logger.error("Login failed, please check your credentials")
def terminate(self): def terminate(self):
if self.token:
try:
checked(lambda: self.server.LogOut(self.token))
except:
logger.error("Logout failed: %s", traceback.format_exc())
self.server = None self.server = None
self.token = None self.token = None

View File

@ -13,7 +13,6 @@ from guessit import guessit
from subliminal_patch.providers import Provider from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize, fix_inconsistent_naming from subliminal_patch.utils import sanitize, fix_inconsistent_naming
from subliminal.exceptions import ProviderError
from subliminal.utils import sanitize_release_group from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie from subliminal.video import Episode, Movie
@ -43,18 +42,23 @@ class SubsSabBzSubtitle(Subtitle):
"""SubsSabBz Subtitle.""" """SubsSabBz Subtitle."""
provider_name = 'subssabbz' provider_name = 'subssabbz'
def __init__(self, langauge, filename, type, video, link): def __init__(self, langauge, filename, type, video, link, fps, num_cds):
super(SubsSabBzSubtitle, self).__init__(langauge) super(SubsSabBzSubtitle, self).__init__(langauge)
self.langauge = langauge self.langauge = langauge
self.filename = filename self.filename = filename
self.page_link = link self.page_link = link
self.type = type self.type = type
self.video = video self.video = video
self.fps = fps
self.num_cds = num_cds
self.release_info = os.path.splitext(filename)[0] self.release_info = os.path.splitext(filename)[0]
@property @property
def id(self): def id(self):
return self.filename return self.page_link + self.filename
def get_fps(self):
return self.fps
def make_picklable(self): def make_picklable(self):
self.content = None self.content = None
@ -76,13 +80,21 @@ class SubsSabBzSubtitle(Subtitle):
if video_filename == subtitle_filename: if video_filename == subtitle_filename:
matches.add('hash') matches.add('hash')
matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) if video.year and self.year == video.year:
matches.add('year')
if isinstance(video, Movie):
if video.imdb_id and self.imdb_id == video.imdb_id:
matches.add('imdb_id')
matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]}))
return matches return matches
class SubsSabBzProvider(Provider): class SubsSabBzProvider(Provider):
"""SubsSabBz Provider.""" """SubsSabBz Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [ languages = {Language(l) for l in [
'bul', 'eng' 'bul', 'eng'
]} ]}
@ -136,19 +148,51 @@ class SubsSabBzProvider(Provider):
soup = BeautifulSoup(response.content, 'lxml') soup = BeautifulSoup(response.content, 'lxml')
rows = soup.findAll('tr', {'class': 'subs-row'}) rows = soup.findAll('tr', {'class': 'subs-row'})
# Search on first 20 rows only # Search on first 25 rows only
for row in rows[:20]: for row in rows[:25]:
a_element_wrapper = row.find('td', { 'class': 'c2field' }) a_element_wrapper = row.find('td', { 'class': 'c2field' })
if a_element_wrapper: if a_element_wrapper:
element = a_element_wrapper.find('a') element = a_element_wrapper.find('a')
if element: if element:
link = element.get('href') link = element.get('href')
element = row.find('a', href = re.compile(r'.*showuser=.*')) notes = element.get('onmouseover')
uploader = element.get_text() if element else None title = element.get_text()
try:
year = int(str(element.next_sibling).strip(' ()'))
except:
year = None
td = row.findAll('td')
try:
num_cds = int(td[6].get_text())
except:
num_cds = None
try:
fps = float(td[7].get_text())
except:
fps = None
try:
uploader = td[8].get_text()
except:
uploader = None
try:
imdb_id = re.findall(r'imdb.com/title/(tt\d+)/?$', td[9].find('a').get('href'))[0]
except:
imdb_id = None
logger.info('Found subtitle link %r', link) logger.info('Found subtitle link %r', link)
sub = self.download_archive_and_add_subtitle_files(link, language, video) sub = self.download_archive_and_add_subtitle_files(link, language, video, fps, num_cds)
for s in sub: for s in sub:
s.title = title
s.notes = notes
s.year = year
s.uploader = uploader s.uploader = uploader
s.imdb_id = imdb_id
subtitles = subtitles + sub subtitles = subtitles + sub
return subtitles return subtitles
@ -160,23 +204,24 @@ class SubsSabBzProvider(Provider):
pass pass
else: else:
seeking_subtitle_file = subtitle.filename seeking_subtitle_file = subtitle.filename
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video) arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video,
subtitle.fps, subtitle.num_cds)
for s in arch: for s in arch:
if s.filename == seeking_subtitle_file: if s.filename == seeking_subtitle_file:
subtitle.content = s.content subtitle.content = s.content
def process_archive_subtitle_files(self, archiveStream, language, video, link): def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds):
subtitles = [] subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie' type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist(): for file_name in sorted(archiveStream.namelist()):
if file_name.lower().endswith(('.srt', '.sub')): if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name) logger.info('Found subtitle file %r', file_name)
subtitle = SubsSabBzSubtitle(language, file_name, type, video, link) subtitle = SubsSabBzSubtitle(language, file_name, type, video, link, fps, num_cds)
subtitle.content = archiveStream.read(file_name) subtitle.content = fix_line_ending(archiveStream.read(file_name))
subtitles.append(subtitle) subtitles.append(subtitle)
return subtitles return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ): def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds):
logger.info('Downloading subtitle %r', link) logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={ request = self.session.get(link, headers={
'Referer': 'http://subs.sab.bz/index.php?' 'Referer': 'http://subs.sab.bz/index.php?'
@ -185,9 +230,9 @@ class SubsSabBzProvider(Provider):
archive_stream = io.BytesIO(request.content) archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream): if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link ) return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, num_cds)
elif is_zipfile(archive_stream): elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link ) return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds)
else: else:
logger.error('Ignore unsupported archive %r', request.headers) logger.error('Ignore unsupported archive %r', request.headers)
return [] return []

View File

@ -13,7 +13,6 @@ from guessit import guessit
from subliminal_patch.providers import Provider from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize, fix_inconsistent_naming from subliminal_patch.utils import sanitize, fix_inconsistent_naming
from subliminal.exceptions import ProviderError
from subliminal.utils import sanitize_release_group from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie from subliminal.video import Episode, Movie
@ -42,18 +41,23 @@ class SubsUnacsSubtitle(Subtitle):
"""SubsUnacs Subtitle.""" """SubsUnacs Subtitle."""
provider_name = 'subsunacs' provider_name = 'subsunacs'
def __init__(self, langauge, filename, type, video, link): def __init__(self, langauge, filename, type, video, link, fps, num_cds):
super(SubsUnacsSubtitle, self).__init__(langauge) super(SubsUnacsSubtitle, self).__init__(langauge)
self.langauge = langauge self.langauge = langauge
self.filename = filename self.filename = filename
self.page_link = link self.page_link = link
self.type = type self.type = type
self.video = video self.video = video
self.fps = fps
self.num_cds = num_cds
self.release_info = os.path.splitext(filename)[0] self.release_info = os.path.splitext(filename)[0]
@property @property
def id(self): def id(self):
return self.filename return self.page_link + self.filename
def get_fps(self):
return self.fps
def make_picklable(self): def make_picklable(self):
self.content = None self.content = None
@ -75,13 +79,17 @@ class SubsUnacsSubtitle(Subtitle):
if video_filename == subtitle_filename: if video_filename == subtitle_filename:
matches.add('hash') matches.add('hash')
matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) if video.year and self.year == video.year:
matches.add('year')
matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]}))
return matches return matches
class SubsUnacsProvider(Provider): class SubsUnacsProvider(Provider):
"""SubsUnacs Provider.""" """SubsUnacs Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [ languages = {Language(l) for l in [
'bul', 'eng' 'bul', 'eng'
]} ]}
@ -146,11 +154,43 @@ class SubsUnacsProvider(Provider):
element = a_element_wrapper.find('a', {'class': 'tooltip'}) element = a_element_wrapper.find('a', {'class': 'tooltip'})
if element: if element:
link = element.get('href') link = element.get('href')
element = row.find('a', href = re.compile(r'.*/search\.php\?t=1\&(memid|u)=.*')) notes = element.get('title')
uploader = element.get_text() if element else None title = element.get_text()
try:
year = int(element.find_next_sibling('span', {'class' : 'smGray'}).text.strip('\xa0()'))
except:
year = None
td = row.findAll('td')
try:
num_cds = int(td[1].get_text())
except:
num_cds = None
try:
fps = float(td[2].get_text())
except:
fps = None
try:
rating = float(td[3].find('img').get('title'))
except:
rating = None
try:
uploader = td[5].get_text()
except:
uploader = None
logger.info('Found subtitle link %r', link) logger.info('Found subtitle link %r', link)
sub = self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video) sub = self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video, fps, num_cds)
for s in sub: for s in sub:
s.title = title
s.notes = notes
s.year = year
s.rating = rating
s.uploader = uploader s.uploader = uploader
subtitles = subtitles + sub subtitles = subtitles + sub
return subtitles return subtitles
@ -163,28 +203,29 @@ class SubsUnacsProvider(Provider):
pass pass
else: else:
seeking_subtitle_file = subtitle.filename seeking_subtitle_file = subtitle.filename
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video) arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video,
subtitle.fps, subtitle.num_cds)
for s in arch: for s in arch:
if s.filename == seeking_subtitle_file: if s.filename == seeking_subtitle_file:
subtitle.content = s.content subtitle.content = s.content
def process_archive_subtitle_files(self, archiveStream, language, video, link): def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds):
subtitles = [] subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie' type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist(): for file_name in sorted(archiveStream.namelist()):
if file_name.lower().endswith(('.srt', '.sub', '.txt')): if file_name.lower().endswith(('.srt', '.sub', '.txt')):
file_is_txt = True if file_name.lower().endswith('.txt') else False file_is_txt = True if file_name.lower().endswith('.txt') else False
if file_is_txt and re.search(r'subsunacs\.net|танете част|прочети|^read ?me|procheti', file_name, re.I): if file_is_txt and re.search(r'subsunacs\.net|танете част|прочети|^read ?me|procheti', file_name, re.I):
logger.info('Ignore readme txt file %r', file_name) logger.info('Ignore readme txt file %r', file_name)
continue continue
logger.info('Found subtitle file %r', file_name) logger.info('Found subtitle file %r', file_name)
subtitle = SubsUnacsSubtitle(language, file_name, type, video, link) subtitle = SubsUnacsSubtitle(language, file_name, type, video, link, fps, num_cds)
subtitle.content = archiveStream.read(file_name) subtitle.content = fix_line_ending(archiveStream.read(file_name))
if file_is_txt == False or subtitle.is_valid(): if file_is_txt == False or subtitle.is_valid():
subtitles.append(subtitle) subtitles.append(subtitle)
return subtitles return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ): def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds):
logger.info('Downloading subtitle %r', link) logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={ request = self.session.get(link, headers={
'Referer': 'https://subsunacs.net/search.php' 'Referer': 'https://subsunacs.net/search.php'
@ -193,9 +234,9 @@ class SubsUnacsProvider(Provider):
archive_stream = io.BytesIO(request.content) archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream): if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link ) return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, num_cds)
elif is_zipfile(archive_stream): elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link ) return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds)
else: else:
logger.error('Ignore unsupported archive %r', request.headers) logger.error('Ignore unsupported archive %r', request.headers)
return [] return []

View File

@ -19,6 +19,8 @@ from subliminal.video import Episode
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
article_re = re.compile(r'^([A-Za-z]{1,3}) (.*)$') article_re = re.compile(r'^([A-Za-z]{1,3}) (.*)$')
episode_re = re.compile(r'^(\d+)(-(\d+))*$') episode_re = re.compile(r'^(\d+)(-(\d+))*$')
episode_name_re = re.compile(r'^(.*?)( [\[(].{2,4}[\])])*$')
series_sanitize_re = re.compile(r'^(.*?)( \[\D+\])*$')
class XSubsSubtitle(Subtitle): class XSubsSubtitle(Subtitle):
@ -143,7 +145,11 @@ class XSubsProvider(Provider):
for show_category in soup.findAll('seriesl'): for show_category in soup.findAll('seriesl'):
if show_category.attrs['category'] == u'Σειρές': if show_category.attrs['category'] == u'Σειρές':
for show in show_category.findAll('series'): for show in show_category.findAll('series'):
show_ids[sanitize(show.text)] = int(show['srsid']) series = show.text
series_match = series_sanitize_re.match(series)
if series_match:
series = series_match.group(1)
show_ids[sanitize(series)] = int(show['srsid'])
break break
logger.debug('Found %d show ids', len(show_ids)) logger.debug('Found %d show ids', len(show_ids))
@ -195,6 +201,9 @@ class XSubsProvider(Provider):
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
series = soup.find('name').text series = soup.find('name').text
series_match = episode_name_re.match(series)
if series_match:
series = series_match.group(1)
# loop over season rows # loop over season rows
seasons = soup.findAll('series_group') seasons = soup.findAll('series_group')

View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
import logging import logging
import re
import io import io
import os import os
from random import randint from random import randint
@ -13,7 +12,6 @@ from guessit import guessit
from subliminal_patch.providers import Provider from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize from subliminal_patch.utils import sanitize
from subliminal.exceptions import ProviderError
from subliminal.utils import sanitize_release_group from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie from subliminal.video import Episode, Movie
@ -27,18 +25,22 @@ class YavkaNetSubtitle(Subtitle):
"""YavkaNet Subtitle.""" """YavkaNet Subtitle."""
provider_name = 'yavkanet' provider_name = 'yavkanet'
def __init__(self, langauge, filename, type, video, link): def __init__(self, langauge, filename, type, video, link, fps):
super(YavkaNetSubtitle, self).__init__(langauge) super(YavkaNetSubtitle, self).__init__(langauge)
self.langauge = langauge self.langauge = langauge
self.filename = filename self.filename = filename
self.page_link = link self.page_link = link
self.type = type self.type = type
self.video = video self.video = video
self.fps = fps
self.release_info = os.path.splitext(filename)[0] self.release_info = os.path.splitext(filename)[0]
@property @property
def id(self): def id(self):
return self.filename return self.page_link + self.filename
def get_fps(self):
return self.fps
def make_picklable(self): def make_picklable(self):
self.content = None self.content = None
@ -60,7 +62,11 @@ class YavkaNetSubtitle(Subtitle):
if video_filename == subtitle_filename: if video_filename == subtitle_filename:
matches.add('hash') matches.add('hash')
matches |= guess_matches(video, guessit(self.filename, {'type': self.type})) if video.year and self.year == video.year:
matches.add('year')
matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]}))
return matches return matches
@ -122,18 +128,34 @@ class YavkaNetProvider(Provider):
return subtitles return subtitles
soup = BeautifulSoup(response.content, 'lxml') soup = BeautifulSoup(response.content, 'lxml')
rows = soup.findAll('tr', {'class': 'info'}) rows = soup.findAll('tr')
# Search on first 20 rows only # Search on first 25 rows only
for row in rows[:20]: for row in rows[:25]:
element = row.find('a', {'class': 'selector'}) element = row.find('a', {'class': 'selector'})
if element: if element:
link = element.get('href') link = element.get('href')
notes = element.get('content')
title = element.get_text()
try:
year = int(element.find_next_sibling('span').text.strip('()'))
except:
year = None
try:
fps = float(row.find('span', {'title': 'Кадри в секунда'}).text.strip())
except:
fps = None
element = row.find('a', {'class': 'click'}) element = row.find('a', {'class': 'click'})
uploader = element.get_text() if element else None uploader = element.get_text() if element else None
logger.info('Found subtitle link %r', link) logger.info('Found subtitle link %r', link)
sub = self.download_archive_and_add_subtitle_files('http://yavka.net/' + link, language, video) sub = self.download_archive_and_add_subtitle_files('http://yavka.net/' + link, language, video, fps)
for s in sub: for s in sub:
s.title = title
s.notes = notes
s.year = year
s.uploader = uploader s.uploader = uploader
subtitles = subtitles + sub subtitles = subtitles + sub
return subtitles return subtitles
@ -146,23 +168,24 @@ class YavkaNetProvider(Provider):
pass pass
else: else:
seeking_subtitle_file = subtitle.filename seeking_subtitle_file = subtitle.filename
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video) arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video,
subtitle.fps)
for s in arch: for s in arch:
if s.filename == seeking_subtitle_file: if s.filename == seeking_subtitle_file:
subtitle.content = s.content subtitle.content = s.content
def process_archive_subtitle_files(self, archiveStream, language, video, link): def process_archive_subtitle_files(self, archiveStream, language, video, link, fps):
subtitles = [] subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie' type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist(): for file_name in archiveStream.namelist():
if file_name.lower().endswith(('.srt', '.sub')): if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name) logger.info('Found subtitle file %r', file_name)
subtitle = YavkaNetSubtitle(language, file_name, type, video, link) subtitle = YavkaNetSubtitle(language, file_name, type, video, link, fps)
subtitle.content = archiveStream.read(file_name) subtitle.content = fix_line_ending(archiveStream.read(file_name))
subtitles.append(subtitle) subtitles.append(subtitle)
return subtitles return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ): def download_archive_and_add_subtitle_files(self, link, language, video, fps):
logger.info('Downloading subtitle %r', link) logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={ request = self.session.get(link, headers={
'Referer': 'http://yavka.net/subtitles.php' 'Referer': 'http://yavka.net/subtitles.php'
@ -171,9 +194,9 @@ class YavkaNetProvider(Provider):
archive_stream = io.BytesIO(request.content) archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream): if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link ) return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps)
elif is_zipfile(archive_stream): elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link ) return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps)
else: else:
logger.error('Ignore unsupported archive %r', request.headers) logger.error('Ignore unsupported archive %r', request.headers)
return [] return []

View File

@ -89,6 +89,13 @@ class Subtitle(Subtitle_):
def numeric_id(self): def numeric_id(self):
raise NotImplemented raise NotImplemented
def get_fps(self):
"""
:return: frames per second or None if not supported
:rtype: float
"""
return None
def make_picklable(self): def make_picklable(self):
""" """
some subtitle instances might have unpicklable objects stored; clean them up here some subtitle instances might have unpicklable objects stored; clean them up here
@ -264,10 +271,14 @@ class Subtitle(Subtitle_):
else: else:
logger.info("Got format: %s", subs.format) logger.info("Got format: %s", subs.format)
except pysubs2.UnknownFPSError: except pysubs2.UnknownFPSError:
# if parsing failed, suggest our media file's fps # if parsing failed, use frame rate from provider
logger.info("No FPS info in subtitle. Using our own media FPS for the MicroDVD subtitle: %s", sub_fps = self.get_fps()
self.plex_media_fps) if not isinstance(sub_fps, float) or sub_fps < 10.0:
subs = pysubs2.SSAFile.from_string(text, fps=self.plex_media_fps) # or use our media file's fps as a fallback
sub_fps = self.plex_media_fps
logger.info("No FPS info in subtitle. Using our own media FPS for the MicroDVD subtitle: %s",
self.plex_media_fps)
subs = pysubs2.SSAFile.from_string(text, fps=sub_fps)
unicontent = self.pysubs2_to_unicode(subs) unicontent = self.pysubs2_to_unicode(subs)
self.content = unicontent.encode(self._guessed_encoding) self.content = unicontent.encode(self._guessed_encoding)