Merge branch 'development' into hermes

# Conflicts:
#	.gitignore
#	bazarr/main.py
This commit is contained in:
Louis Vézina 2020-05-06 08:12:24 -04:00
commit 6fd6ee234c
13 changed files with 378 additions and 203 deletions

2
.gitignore vendored
View File

@ -3,6 +3,7 @@ cachefile.dbm
*.log
*.log.*
*.db
*.pyc
.idea/*
bazarr.pid
/venv
@ -10,6 +11,7 @@ bazarr.pid
static/scss/.sass-cache/*
static/scss/.sass-cache
*.scssc
/.vscode
# Allow
!*.dll

View File

@ -68,6 +68,7 @@ If you need something that is not already part of Bazarr, feel free to create a
* TVSubtitles
* Wizdom
* XSubs
* Yavka.net
* Zimuku
## Screenshot

View File

@ -80,6 +80,9 @@ if __name__ == '__main__':
while True:
check_status()
try:
time.sleep(5)
if sys.platform.startswith('win'):
time.sleep(5)
else:
os.wait()
except (KeyboardInterrupt, SystemExit):
pass

View File

@ -35,6 +35,11 @@ PROVIDER_THROTTLE_MAP = {
},
"titulky": {
DownloadLimitExceeded: (datetime.timedelta(hours=3), "3 hours")
},
"legendasdivx": {
TooManyRequests: (datetime.timedelta(hours=2), "2 hours"),
DownloadLimitExceeded: (datetime.timedelta(hours=6), "6 hours"),
ParseResponseError: (datetime.timedelta(hours=1), "1 hours"),
}
}

View File

@ -387,7 +387,7 @@ def guess_external_subtitles(dest_folder, subtitles):
logging.debug('BAZARR detected encoding %r', guess)
if guess["confidence"] < 0.6:
raise UnicodeError
if guess["confidence"] < 0.7 or guess["encoding"] == "ascii":
if guess["encoding"] == "ascii":
guess["encoding"] = "utf-8"
text = text.decode(guess["encoding"])
detected_language = guess_language(text)

View File

@ -45,6 +45,8 @@ from get_movies import *
from scheduler import Scheduler
from check_update import check_and_apply_update
from subliminal_patch.extensions import provider_registry as provider_manager
from subliminal_patch.core import SUBTITLE_EXTENSIONS
from subliminal.cache import region
from functools import wraps
from app import create_app, socketio

View File

@ -2,20 +2,22 @@
from __future__ import absolute_import
import logging
import io
import re
import os
import rarfile
import zipfile
from requests import Session
from guessit import guessit
from subliminal_patch.exceptions import ParseResponseError
from subliminal.exceptions import ConfigurationError, AuthenticationError, ServiceUnavailable, DownloadLimitExceeded
from subliminal_patch.providers import Provider
from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.subtitle import Subtitle
from subliminal.video import Episode, Movie
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending,guess_matches
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches
from subzero.language import Language
from subliminal_patch.score import get_scores
from subliminal.utils import sanitize, sanitize_release_group
logger = logging.getLogger(__name__)
@ -27,11 +29,12 @@ class LegendasdivxSubtitle(Subtitle):
super(LegendasdivxSubtitle, self).__init__(language)
self.language = language
self.page_link = data['link']
self.hits=data['hits']
self.exact_match=data['exact_match']
self.description=data['description'].lower()
self.hits = data['hits']
self.exact_match = data['exact_match']
self.description = data['description']
self.video = video
self.videoname =data['videoname']
self.video_filename = data['video_filename']
self.uploader = data['uploader']
@property
def id(self):
@ -44,40 +47,37 @@ class LegendasdivxSubtitle(Subtitle):
def get_matches(self, video):
matches = set()
if self.videoname.lower() in self.description:
description = sanitize(self.description)
if sanitize(self.video_filename) in description:
matches.update(['title'])
matches.update(['season'])
matches.update(['episode'])
# episode
if video.title and video.title.lower() in self.description:
if video.title and sanitize(video.title) in description:
matches.update(['title'])
if video.year and '{:04d}'.format(video.year) in self.description:
if video.year and '{:04d}'.format(video.year) in description:
matches.update(['year'])
if isinstance(video, Episode):
# already matched in search query
if video.season and 's{:02d}'.format(video.season) in self.description:
if video.season and 's{:02d}'.format(video.season) in description:
matches.update(['season'])
if video.episode and 'e{:02d}'.format(video.episode) in self.description:
if video.episode and 'e{:02d}'.format(video.episode) in description:
matches.update(['episode'])
if video.episode and video.season and video.series:
if '{}.s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
if '{} s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
if '{} s{:02d}e{:02d}'.format(sanitize(video.series), video.season, video.episode) in description:
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
# release_group
if video.release_group and video.release_group.lower() in self.description:
if video.release_group and sanitize_release_group(video.release_group) in sanitize_release_group(description):
matches.update(['release_group'])
# resolution
if video.resolution and video.resolution.lower() in self.description:
if video.resolution and video.resolution.lower() in description:
matches.update(['resolution'])
# format
@ -87,9 +87,9 @@ class LegendasdivxSubtitle(Subtitle):
if formats[0] == "web-dl":
formats.append("webdl")
formats.append("webrip")
formats.append("web ")
formats.append("web")
for frmt in formats:
if frmt.lower() in self.description:
if frmt in description:
matches.update(['format'])
break
@ -97,11 +97,11 @@ class LegendasdivxSubtitle(Subtitle):
if video.video_codec:
video_codecs = [video.video_codec.lower()]
if video_codecs[0] == "h264":
formats.append("x264")
video_codecs.append("x264")
elif video_codecs[0] == "h265":
formats.append("x265")
for vc in formats:
if vc.lower() in self.description:
video_codecs.append("x265")
for vc in video_codecs:
if vc in description:
matches.update(['video_codec'])
break
@ -109,9 +109,6 @@ class LegendasdivxSubtitle(Subtitle):
# matches |= guess_matches(video, guessit(self.description))
return matches
class LegendasdivxProvider(Provider):
"""Legendasdivx Provider."""
languages = {Language('por', 'BR')} | {Language('por')}
@ -126,15 +123,21 @@ class LegendasdivxProvider(Provider):
'Cache-Control': 'no-cache'
}
loginpage = site + '/forum/ucp.php?mode=login'
logoutpage = site + '/sair.php'
searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}'
language_list = list(languages)
download_link = site + '/modules.php{link}'
def __init__(self, username, password):
# make sure login credentials are configured.
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
def initialize(self):
self.session = Session()
self.session.headers.update(self.headers)
self.login()
def terminate(self):
@ -143,100 +146,103 @@ class LegendasdivxProvider(Provider):
def login(self):
logger.info('Logging in')
self.headers['Referer'] = self.site + '/index.php'
self.session.headers.update(self.headers.items())
res = self.session.get(self.loginpage)
bsoup = ParserBeautifulSoup(res.content, ['lxml'])
_allinputs = bsoup.findAll('input')
fields = {}
data = {}
# necessary to set 'sid' for POST request
for field in _allinputs:
fields[field.get('name')] = field.get('value')
data[field.get('name')] = field.get('value')
data['username'] = self.username
data['password'] = self.password
fields['username'] = self.username
fields['password'] = self.password
fields['autologin'] = 'on'
fields['viewonline'] = 'on'
self.headers['Referer'] = self.loginpage
self.session.headers.update(self.headers.items())
res = self.session.post(self.loginpage, fields)
res = self.session.post(self.loginpage, data)
res.raise_for_status()
try:
logger.debug('Got session id %s' %
logger.debug('Logged in successfully: PHPSESSID: %s' %
self.session.cookies.get_dict()['PHPSESSID'])
except KeyError as e:
logger.error(repr(e))
logger.error("Didn't get session id, check your credentials")
return False
self.logged_in = True
except KeyError:
logger.error("Couldn't retrieve session ID, check your credentials")
raise AuthenticationError("Please check your credentials.")
except Exception as e:
logger.error(repr(e))
logger.error('uncached error #legendasdivx #AA')
return False
return True
if 'bloqueado' in res.text.lower(): # blocked IP address
logger.error("LegendasDivx.pt :: Your IP is blocked on this server.")
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
logger.error("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r" % repr(e))
def logout(self):
# need to figure this out
return True
if self.logged_in:
logger.info('Legendasdivx:: Logging out')
r = self.session.get(self.logoutpage, timeout=10)
r.raise_for_status()
logger.debug('Legendasdivx :: Logged out')
self.logged_in = False
def _process_page(self, video, bsoup, video_filename):
def _process_page(self, video, bsoup, querytext, videoname):
subtitles = []
_allsubs = bsoup.findAll("div", {"class": "sub_box"})
lang = Language.fromopensubtitles("pob")
for _subbox in _allsubs:
hits=0
hits = 0
for th in _subbox.findAll("th", {"class": "color2"}):
if th.string == 'Hits:':
hits = int(th.parent.find("td").string)
if th.string == 'Idioma:':
lang = th.parent.find("td").find ("img").get ('src')
if 'brazil' in lang:
lang = th.parent.find("td").find("img").get('src')
if 'brazil' in lang.lower():
lang = Language.fromopensubtitles('pob')
else:
elif 'portugal' in lang.lower():
lang = Language.fromopensubtitles('por')
description = _subbox.find("td", {"class": "td_desc brd_up"})
else:
continue
# get description for matches
description = _subbox.find("td", {"class": "td_desc brd_up"}).get_text()
#get subtitle link
download = _subbox.find("a", {"class": "sub_download"})
try:
# sometimes BSoup just doesn't get the link
logger.debug(download.get('href'))
except Exception as e:
logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext))
continue
# sometimes BSoup can't find 'a' tag and returns None.
i = 0
while not (download): # must get it... trying again...
download = _subbox.find("a", {"class": "sub_download"})
i=+1
logger.debug("Try number {0} try!".format(str(i)))
dl = download.get('href')
logger.debug("Found subtitle on: %s" % self.download_link.format(link=dl))
# get subtitle uploader
sub_header = _subbox.find("div", {"class" :"sub_header"})
uploader = sub_header.find("a").text if sub_header else 'anonymous'
exact_match = False
if video.name.lower() in description.get_text().lower():
if video.name.lower() in description.lower():
exact_match = True
data = {'link': self.site + '/modules.php' + download.get('href'),
'exact_match': exact_match,
'hits': hits,
'videoname': videoname,
'description': description.get_text() }
'uploader': uploader,
'video_filename': video_filename,
'description': description
}
subtitles.append(
LegendasdivxSubtitle(lang, video, data)
)
return subtitles
def query(self, video, language):
try:
logger.debug('Got session id %s' %
self.session.cookies.get_dict()['PHPSESSID'])
except Exception as e:
self.login()
def query(self, video, languages):
language_ids = '0'
if isinstance(language, (tuple, list, set)):
if len(language) == 1:
language_ids = ','.join(sorted(l.opensubtitles for l in language))
if language_ids == 'por':
language_ids = '&form_cat=28'
else:
language_ids = '&form_cat=29'
video_filename = video.name
video_filename = os.path.basename(video_filename)
video_filename, _ = os.path.splitext(video_filename)
video_filename = sanitize_release_group(video_filename)
videoname = video.name
videoname = os.path.basename(videoname)
videoname, _ = os.path.splitext(videoname)
# querytext = videoname.lower()
_searchurl = self.searchurl
if video.imdb_id is None:
if isinstance(video, Episode):
@ -246,22 +252,47 @@ class LegendasdivxProvider(Provider):
else:
querytext = video.imdb_id
# language query filter
if isinstance(languages, (tuple, list, set)):
language_ids = ','.join(sorted(l.opensubtitles for l in languages))
if 'por' in language_ids: # prioritize portuguese subtitles
lang_filter = '&form_cat=28' # pt
elif 'pob' in language_ids:
lang_filter = '&form_cat=29' # br
else:
lang_filter = ''
querytext = querytext + lang_filter if lang_filter else querytext
# querytext = querytext.replace(
# ".", "+").replace("[", "").replace("]", "")
if language_ids != '0':
querytext = querytext + language_ids
self.headers['Referer'] = self.site + '/index.php'
self.session.headers.update(self.headers.items())
res = self.session.get(_searchurl.format(query=querytext))
# form_cat=28 = br
# form_cat=29 = pt
if "A legenda não foi encontrada" in res.text:
logger.warning('%s not found', querytext)
return []
bsoup = ParserBeautifulSoup(res.content, ['html.parser'])
subtitles = self._process_page(video, bsoup, querytext, videoname)
subtitles = self._process_page(video, bsoup, video_filename)
# search for more than 10 results (legendasdivx uses pagination)
# don't throttle - maximum results = 6 * 10
MAX_PAGES = 6
#get number of pages bases on results found
page_header = bsoup.find("div", {"class": "pager_bar"})
results_found = re.search(r'\((.*?) encontradas\)', page_header.text).group(1)
num_pages = (int(results_found) // 10) + 1
num_pages = min(MAX_PAGES, num_pages)
if num_pages > 1:
for num_page in range(2, num_pages+2):
_search_next = self.searchurl.format(query=querytext) + "&page={0}".format(str(num_page))
logger.debug("Moving to next page: %s" % _search_next)
res = self.session.get(_search_next)
next_page = ParserBeautifulSoup(res.content, ['html.parser'])
subs = self._process_page(video, next_page, video_filename)
subtitles.extend(subs)
return subtitles
@ -270,9 +301,14 @@ class LegendasdivxProvider(Provider):
def download_subtitle(self, subtitle):
res = self.session.get(subtitle.page_link)
res.raise_for_status()
if res:
if res.text == '500':
raise ValueError('Error 500 on server')
if res.status_code in ['500', '503']:
raise ServiceUnavailable("Legendasdivx.pt :: 503 - Service Unavailable")
elif 'limite' in res.text.lower(): # daily downloads limit reached
raise DownloadLimitReached("Legendasdivx.pt :: Download limit reached")
elif 'bloqueado' in res.text.lower(): # blocked IP address
raise ParseResponseError("Legendasdivx.pt :: %r" % res.text)
archive = self._get_archive(res.content)
# extract the subtitle
@ -281,7 +317,9 @@ class LegendasdivxProvider(Provider):
subtitle.normalize()
return subtitle
raise ValueError('Problems conecting to the server')
logger.error("Legendasdivx.pt :: there was a problem retrieving subtitle (status %s)" % res.status_code)
return
def _get_archive(self, content):
# open the archive
@ -294,7 +332,6 @@ class LegendasdivxProvider(Provider):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
else:
# raise ParseResponseError('Unsupported compressed format')
raise Exception('Unsupported compressed format')
return archive
@ -305,7 +342,7 @@ class LegendasdivxProvider(Provider):
_tmp.remove('.txt')
_subtitle_extensions = tuple(_tmp)
_max_score = 0
_scores = get_scores (subtitle.video)
_scores = get_scores(subtitle.video)
for name in archive.namelist():
# discard hidden files
@ -338,4 +375,4 @@ class LegendasdivxProvider(Provider):
logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score))
return archive.read(_max_name)
raise ParseResponseError('Can not find the subtitle in the compressed file')
raise ValueError("No subtitle found on compressed file. Max score was 0")

View File

@ -44,6 +44,12 @@ class OpenSubtitlesSubtitle(_OpenSubtitlesSubtitle):
self.wrong_fps = False
self.skip_wrong_fps = skip_wrong_fps
def get_fps(self):
try:
return float(self.fps)
except:
return None
def get_matches(self, video, hearing_impaired=False):
matches = super(OpenSubtitlesSubtitle, self).get_matches(video)
@ -138,11 +144,9 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
return ServerProxy(url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout,
user_agent=os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")))
def log_in(self, server_url=None):
if server_url:
self.terminate()
self.server = self.get_server_proxy(server_url)
def log_in_url(self, server_url):
self.token = None
self.server = self.get_server_proxy(server_url)
response = self.retry(
lambda: checked(
@ -155,6 +159,25 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
logger.debug('Logged in with token %r', self.token[:10]+"X"*(len(self.token)-10))
region.set("os_token", bytearray(self.token, encoding='utf-8'))
region.set("os_server_url", bytearray(server_url, encoding='utf-8'))
def log_in(self):
logger.info('Logging in')
try:
self.log_in_url(self.vip_url if self.is_vip else self.default_url)
except Unauthorized:
if self.is_vip:
logger.info("VIP server login failed, falling back")
try:
self.log_in_url(self.default_url)
except Unauthorized:
pass
if not self.token:
logger.error("Login failed, please check your credentials")
raise Unauthorized
def use_token_or_login(self, func):
if not self.token:
@ -167,45 +190,18 @@ class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider):
return func()
def initialize(self):
if self.is_vip:
self.server = self.get_server_proxy(self.vip_url)
logger.info("Using VIP server")
token_cache = region.get("os_token")
url_cache = region.get("os_server_url")
if token_cache is not NO_VALUE and url_cache is not NO_VALUE:
self.token = token_cache.decode("utf-8")
self.server = self.get_server_proxy(url_cache.decode("utf-8"))
logger.debug("Using previous login token: %r", self.token[:10] + "X" * (len(self.token) - 10))
else:
self.server = self.get_server_proxy(self.default_url)
self.server = None
self.token = None
logger.info('Logging in')
token = str(region.get("os_token"))
if token is not NO_VALUE:
try:
logger.debug('Trying previous token: %r', token[:10]+"X"*(len(token)-10))
checked(lambda: self.server.NoOperation(token))
self.token = token
logger.debug("Using previous login token: %r", token[:10]+"X"*(len(token)-10))
return
except (NoSession, Unauthorized):
logger.debug('Token not valid.')
pass
try:
self.log_in()
except Unauthorized:
if self.is_vip:
logger.info("VIP server login failed, falling back")
self.log_in(self.default_url)
if self.token:
return
logger.error("Login failed, please check your credentials")
def terminate(self):
if self.token:
try:
checked(lambda: self.server.LogOut(self.token))
except:
logger.error("Logout failed: %s", traceback.format_exc())
self.server = None
self.token = None

View File

@ -13,7 +13,6 @@ from guessit import guessit
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize, fix_inconsistent_naming
from subliminal.exceptions import ProviderError
from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie
@ -43,18 +42,23 @@ class SubsSabBzSubtitle(Subtitle):
"""SubsSabBz Subtitle."""
provider_name = 'subssabbz'
def __init__(self, langauge, filename, type, video, link):
def __init__(self, langauge, filename, type, video, link, fps, num_cds):
super(SubsSabBzSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
self.page_link = link
self.type = type
self.video = video
self.fps = fps
self.num_cds = num_cds
self.release_info = os.path.splitext(filename)[0]
@property
def id(self):
return self.filename
return self.page_link + self.filename
def get_fps(self):
return self.fps
def make_picklable(self):
self.content = None
@ -76,13 +80,21 @@ class SubsSabBzSubtitle(Subtitle):
if video_filename == subtitle_filename:
matches.add('hash')
matches |= guess_matches(video, guessit(self.filename, {'type': self.type}))
if video.year and self.year == video.year:
matches.add('year')
if isinstance(video, Movie):
if video.imdb_id and self.imdb_id == video.imdb_id:
matches.add('imdb_id')
matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]}))
return matches
class SubsSabBzProvider(Provider):
"""SubsSabBz Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [
languages = {Language(l) for l in [
'bul', 'eng'
]}
@ -136,19 +148,51 @@ class SubsSabBzProvider(Provider):
soup = BeautifulSoup(response.content, 'lxml')
rows = soup.findAll('tr', {'class': 'subs-row'})
# Search on first 20 rows only
for row in rows[:20]:
# Search on first 25 rows only
for row in rows[:25]:
a_element_wrapper = row.find('td', { 'class': 'c2field' })
if a_element_wrapper:
element = a_element_wrapper.find('a')
if element:
link = element.get('href')
element = row.find('a', href = re.compile(r'.*showuser=.*'))
uploader = element.get_text() if element else None
notes = element.get('onmouseover')
title = element.get_text()
try:
year = int(str(element.next_sibling).strip(' ()'))
except:
year = None
td = row.findAll('td')
try:
num_cds = int(td[6].get_text())
except:
num_cds = None
try:
fps = float(td[7].get_text())
except:
fps = None
try:
uploader = td[8].get_text()
except:
uploader = None
try:
imdb_id = re.findall(r'imdb.com/title/(tt\d+)/?$', td[9].find('a').get('href'))[0]
except:
imdb_id = None
logger.info('Found subtitle link %r', link)
sub = self.download_archive_and_add_subtitle_files(link, language, video)
for s in sub:
sub = self.download_archive_and_add_subtitle_files(link, language, video, fps, num_cds)
for s in sub:
s.title = title
s.notes = notes
s.year = year
s.uploader = uploader
s.imdb_id = imdb_id
subtitles = subtitles + sub
return subtitles
@ -160,23 +204,24 @@ class SubsSabBzProvider(Provider):
pass
else:
seeking_subtitle_file = subtitle.filename
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video)
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video,
subtitle.fps, subtitle.num_cds)
for s in arch:
if s.filename == seeking_subtitle_file:
subtitle.content = s.content
def process_archive_subtitle_files(self, archiveStream, language, video, link):
def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
for file_name in sorted(archiveStream.namelist()):
if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name)
subtitle = SubsSabBzSubtitle(language, file_name, type, video, link)
subtitle.content = archiveStream.read(file_name)
subtitle = SubsSabBzSubtitle(language, file_name, type, video, link, fps, num_cds)
subtitle.content = fix_line_ending(archiveStream.read(file_name))
subtitles.append(subtitle)
return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ):
def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds):
logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={
'Referer': 'http://subs.sab.bz/index.php?'
@ -185,9 +230,9 @@ class SubsSabBzProvider(Provider):
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link )
return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, num_cds)
elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link )
return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds)
else:
logger.error('Ignore unsupported archive %r', request.headers)
return []

View File

@ -13,7 +13,6 @@ from guessit import guessit
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize, fix_inconsistent_naming
from subliminal.exceptions import ProviderError
from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie
@ -42,18 +41,23 @@ class SubsUnacsSubtitle(Subtitle):
"""SubsUnacs Subtitle."""
provider_name = 'subsunacs'
def __init__(self, langauge, filename, type, video, link):
def __init__(self, langauge, filename, type, video, link, fps, num_cds):
super(SubsUnacsSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
self.page_link = link
self.type = type
self.video = video
self.fps = fps
self.num_cds = num_cds
self.release_info = os.path.splitext(filename)[0]
@property
def id(self):
return self.filename
return self.page_link + self.filename
def get_fps(self):
return self.fps
def make_picklable(self):
self.content = None
@ -75,13 +79,17 @@ class SubsUnacsSubtitle(Subtitle):
if video_filename == subtitle_filename:
matches.add('hash')
matches |= guess_matches(video, guessit(self.filename, {'type': self.type}))
if video.year and self.year == video.year:
matches.add('year')
matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]}))
return matches
class SubsUnacsProvider(Provider):
"""SubsUnacs Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [
languages = {Language(l) for l in [
'bul', 'eng'
]}
@ -146,11 +154,43 @@ class SubsUnacsProvider(Provider):
element = a_element_wrapper.find('a', {'class': 'tooltip'})
if element:
link = element.get('href')
element = row.find('a', href = re.compile(r'.*/search\.php\?t=1\&(memid|u)=.*'))
uploader = element.get_text() if element else None
notes = element.get('title')
title = element.get_text()
try:
year = int(element.find_next_sibling('span', {'class' : 'smGray'}).text.strip('\xa0()'))
except:
year = None
td = row.findAll('td')
try:
num_cds = int(td[1].get_text())
except:
num_cds = None
try:
fps = float(td[2].get_text())
except:
fps = None
try:
rating = float(td[3].find('img').get('title'))
except:
rating = None
try:
uploader = td[5].get_text()
except:
uploader = None
logger.info('Found subtitle link %r', link)
sub = self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video)
for s in sub:
sub = self.download_archive_and_add_subtitle_files('https://subsunacs.net' + link, language, video, fps, num_cds)
for s in sub:
s.title = title
s.notes = notes
s.year = year
s.rating = rating
s.uploader = uploader
subtitles = subtitles + sub
return subtitles
@ -163,28 +203,29 @@ class SubsUnacsProvider(Provider):
pass
else:
seeking_subtitle_file = subtitle.filename
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video)
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video,
subtitle.fps, subtitle.num_cds)
for s in arch:
if s.filename == seeking_subtitle_file:
subtitle.content = s.content
def process_archive_subtitle_files(self, archiveStream, language, video, link):
def process_archive_subtitle_files(self, archiveStream, language, video, link, fps, num_cds):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
for file_name in sorted(archiveStream.namelist()):
if file_name.lower().endswith(('.srt', '.sub', '.txt')):
file_is_txt = True if file_name.lower().endswith('.txt') else False
if file_is_txt and re.search(r'subsunacs\.net|танете част|прочети|^read ?me|procheti', file_name, re.I):
logger.info('Ignore readme txt file %r', file_name)
continue
logger.info('Found subtitle file %r', file_name)
subtitle = SubsUnacsSubtitle(language, file_name, type, video, link)
subtitle.content = archiveStream.read(file_name)
subtitle = SubsUnacsSubtitle(language, file_name, type, video, link, fps, num_cds)
subtitle.content = fix_line_ending(archiveStream.read(file_name))
if file_is_txt == False or subtitle.is_valid():
subtitles.append(subtitle)
return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ):
def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds):
logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={
'Referer': 'https://subsunacs.net/search.php'
@ -193,9 +234,9 @@ class SubsUnacsProvider(Provider):
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link )
return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, num_cds)
elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link )
return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, num_cds)
else:
logger.error('Ignore unsupported archive %r', request.headers)
return []

View File

@ -19,6 +19,8 @@ from subliminal.video import Episode
logger = logging.getLogger(__name__)
article_re = re.compile(r'^([A-Za-z]{1,3}) (.*)$')
episode_re = re.compile(r'^(\d+)(-(\d+))*$')
episode_name_re = re.compile(r'^(.*?)( [\[(].{2,4}[\])])*$')
series_sanitize_re = re.compile(r'^(.*?)( \[\D+\])*$')
class XSubsSubtitle(Subtitle):
@ -143,7 +145,11 @@ class XSubsProvider(Provider):
for show_category in soup.findAll('seriesl'):
if show_category.attrs['category'] == u'Σειρές':
for show in show_category.findAll('series'):
show_ids[sanitize(show.text)] = int(show['srsid'])
series = show.text
series_match = series_sanitize_re.match(series)
if series_match:
series = series_match.group(1)
show_ids[sanitize(series)] = int(show['srsid'])
break
logger.debug('Found %d show ids', len(show_ids))
@ -195,6 +201,9 @@ class XSubsProvider(Provider):
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
series = soup.find('name').text
series_match = episode_name_re.match(series)
if series_match:
series = series_match.group(1)
# loop over season rows
seasons = soup.findAll('series_group')

View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import re
import io
import os
from random import randint
@ -13,7 +12,6 @@ from guessit import guessit
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subliminal_patch.utils import sanitize
from subliminal.exceptions import ProviderError
from subliminal.utils import sanitize_release_group
from subliminal.subtitle import guess_matches
from subliminal.video import Episode, Movie
@ -27,18 +25,22 @@ class YavkaNetSubtitle(Subtitle):
"""YavkaNet Subtitle."""
provider_name = 'yavkanet'
def __init__(self, langauge, filename, type, video, link):
def __init__(self, langauge, filename, type, video, link, fps):
super(YavkaNetSubtitle, self).__init__(langauge)
self.langauge = langauge
self.filename = filename
self.page_link = link
self.type = type
self.video = video
self.fps = fps
self.release_info = os.path.splitext(filename)[0]
@property
def id(self):
return self.filename
return self.page_link + self.filename
def get_fps(self):
return self.fps
def make_picklable(self):
self.content = None
@ -60,7 +62,11 @@ class YavkaNetSubtitle(Subtitle):
if video_filename == subtitle_filename:
matches.add('hash')
matches |= guess_matches(video, guessit(self.filename, {'type': self.type}))
if video.year and self.year == video.year:
matches.add('year')
matches |= guess_matches(video, guessit(self.title, {'type': self.type, 'allowed_countries': [None]}))
matches |= guess_matches(video, guessit(self.filename, {'type': self.type, 'allowed_countries': [None]}))
return matches
@ -122,18 +128,34 @@ class YavkaNetProvider(Provider):
return subtitles
soup = BeautifulSoup(response.content, 'lxml')
rows = soup.findAll('tr', {'class': 'info'})
rows = soup.findAll('tr')
# Search on first 20 rows only
for row in rows[:20]:
# Search on first 25 rows only
for row in rows[:25]:
element = row.find('a', {'class': 'selector'})
if element:
link = element.get('href')
notes = element.get('content')
title = element.get_text()
try:
year = int(element.find_next_sibling('span').text.strip('()'))
except:
year = None
try:
fps = float(row.find('span', {'title': 'Кадри в секунда'}).text.strip())
except:
fps = None
element = row.find('a', {'class': 'click'})
uploader = element.get_text() if element else None
logger.info('Found subtitle link %r', link)
sub = self.download_archive_and_add_subtitle_files('http://yavka.net/' + link, language, video)
for s in sub:
sub = self.download_archive_and_add_subtitle_files('http://yavka.net/' + link, language, video, fps)
for s in sub:
s.title = title
s.notes = notes
s.year = year
s.uploader = uploader
subtitles = subtitles + sub
return subtitles
@ -146,23 +168,24 @@ class YavkaNetProvider(Provider):
pass
else:
seeking_subtitle_file = subtitle.filename
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video)
arch = self.download_archive_and_add_subtitle_files(subtitle.page_link, subtitle.language, subtitle.video,
subtitle.fps)
for s in arch:
if s.filename == seeking_subtitle_file:
subtitle.content = s.content
def process_archive_subtitle_files(self, archiveStream, language, video, link):
def process_archive_subtitle_files(self, archiveStream, language, video, link, fps):
subtitles = []
type = 'episode' if isinstance(video, Episode) else 'movie'
for file_name in archiveStream.namelist():
if file_name.lower().endswith(('.srt', '.sub')):
logger.info('Found subtitle file %r', file_name)
subtitle = YavkaNetSubtitle(language, file_name, type, video, link)
subtitle.content = archiveStream.read(file_name)
subtitle = YavkaNetSubtitle(language, file_name, type, video, link, fps)
subtitle.content = fix_line_ending(archiveStream.read(file_name))
subtitles.append(subtitle)
return subtitles
def download_archive_and_add_subtitle_files(self, link, language, video ):
def download_archive_and_add_subtitle_files(self, link, language, video, fps):
logger.info('Downloading subtitle %r', link)
request = self.session.get(link, headers={
'Referer': 'http://yavka.net/subtitles.php'
@ -171,9 +194,9 @@ class YavkaNetProvider(Provider):
archive_stream = io.BytesIO(request.content)
if is_rarfile(archive_stream):
return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link )
return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps)
elif is_zipfile(archive_stream):
return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link )
return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps)
else:
logger.error('Ignore unsupported archive %r', request.headers)
return []

View File

@ -89,6 +89,13 @@ class Subtitle(Subtitle_):
def numeric_id(self):
raise NotImplemented
def get_fps(self):
"""
:return: frames per second or None if not supported
:rtype: float
"""
return None
def make_picklable(self):
"""
some subtitle instances might have unpicklable objects stored; clean them up here
@ -264,10 +271,14 @@ class Subtitle(Subtitle_):
else:
logger.info("Got format: %s", subs.format)
except pysubs2.UnknownFPSError:
# if parsing failed, suggest our media file's fps
logger.info("No FPS info in subtitle. Using our own media FPS for the MicroDVD subtitle: %s",
self.plex_media_fps)
subs = pysubs2.SSAFile.from_string(text, fps=self.plex_media_fps)
# if parsing failed, use frame rate from provider
sub_fps = self.get_fps()
if not isinstance(sub_fps, float) or sub_fps < 10.0:
# or use our media file's fps as a fallback
sub_fps = self.plex_media_fps
logger.info("No FPS info in subtitle. Using our own media FPS for the MicroDVD subtitle: %s",
self.plex_media_fps)
subs = pysubs2.SSAFile.from_string(text, fps=sub_fps)
unicontent = self.pysubs2_to_unicode(subs)
self.content = unicontent.encode(self._guessed_encoding)