bazarr/libs/subliminal_patch/providers/legendasdivx.py

346 lines
13 KiB
Python
Raw Normal View History

2020-01-16 20:38:41 +00:00
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import logging
import io
import os
import rarfile
import zipfile
from requests import Session
from guessit import guessit
from subliminal_patch.exceptions import ParseResponseError
2020-01-16 20:38:41 +00:00
from subliminal_patch.providers import Provider
from subliminal.providers import ParserBeautifulSoup
from subliminal_patch.subtitle import Subtitle
from subliminal.video import Episode, Movie
from subliminal.subtitle import SUBTITLE_EXTENSIONS, fix_line_ending, guess_matches
2020-01-16 20:38:41 +00:00
from subzero.language import Language
from subliminal_patch.score import get_scores
2020-01-16 20:38:41 +00:00
logger = logging.getLogger(__name__)
class LegendasdivxSubtitle(Subtitle):
"""Legendasdivx Subtitle."""
provider_name = 'legendasdivx'
def __init__(self, language, video, data):
super(LegendasdivxSubtitle, self).__init__(language)
self.language = language
self.page_link = data['link']
self.hits = data['hits']
self.exact_match = data['exact_match']
self.description = data['description'].lower()
2020-01-16 20:38:41 +00:00
self.video = video
self.videoname = data['videoname']
self.uploader = data['uploader']
2020-01-16 20:38:41 +00:00
@property
def id(self):
return self.page_link
@property
def release_info(self):
return self.description
def get_matches(self, video):
matches = set()
if self.videoname.lower() in self.description:
matches.update(['title'])
matches.update(['season'])
matches.update(['episode'])
# episode
if video.title and video.title.lower() in self.description:
matches.update(['title'])
if video.year and '{:04d}'.format(video.year) in self.description:
matches.update(['year'])
if isinstance(video, Episode):
# already matched in search query
if video.season and 's{:02d}'.format(video.season) in self.description:
matches.update(['season'])
if video.episode and 'e{:02d}'.format(video.episode) in self.description:
matches.update(['episode'])
if video.episode and video.season and video.series:
if '{}.s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
if '{} s{:02d}e{:02d}'.format(video.series.lower(),video.season,video.episode) in self.description:
matches.update(['series'])
matches.update(['season'])
matches.update(['episode'])
# release_group
if video.release_group and video.release_group.lower() in self.description:
matches.update(['release_group'])
# resolution
if video.resolution and video.resolution.lower() in self.description:
matches.update(['resolution'])
# format
formats = []
if video.format:
formats = [video.format.lower()]
if formats[0] == "web-dl":
formats.append("webdl")
formats.append("webrip")
formats.append("web ")
for frmt in formats:
if frmt.lower() in self.description:
matches.update(['format'])
break
# video_codec
if video.video_codec:
video_codecs = [video.video_codec.lower()]
if video_codecs[0] == "h264":
formats.append("x264")
elif video_codecs[0] == "h265":
formats.append("x265")
for vc in formats:
if vc.lower() in self.description:
matches.update(['video_codec'])
break
# running guessit on a huge description may break guessit
# matches |= guess_matches(video, guessit(self.description))
2020-01-16 20:38:41 +00:00
return matches
class LegendasdivxProvider(Provider):
"""Legendasdivx Provider."""
languages = {Language('por', 'BR')} | {Language('por')}
SEARCH_THROTTLE = 8
site = 'https://www.legendasdivx.pt'
headers = {
2020-03-31 01:08:11 +00:00
'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"),
2020-01-16 20:38:41 +00:00
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Origin': 'https://www.legendasdivx.pt',
'Referer': 'https://www.legendasdivx.pt',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache'
}
loginpage = site + '/forum/ucp.php?mode=login'
searchurl = site + '/modules.php?name=Downloads&file=jz&d_op=search&op=_jz00&query={query}'
language_list = list(languages)
def __init__(self, username, password):
self.username = username
self.password = password
def initialize(self):
self.session = Session()
self.login()
def terminate(self):
self.logout()
self.session.close()
def login(self):
logger.info('Logging in')
2020-01-16 20:38:41 +00:00
self.headers['Referer'] = self.site + '/index.php'
self.session.headers.update(self.headers.items())
res = self.session.get(self.loginpage)
bsoup = ParserBeautifulSoup(res.content, ['lxml'])
_allinputs = bsoup.findAll('input')
fields = {}
for field in _allinputs:
fields[field.get('name')] = field.get('value')
fields['username'] = self.username
fields['password'] = self.password
fields['autologin'] = 'on'
fields['viewonline'] = 'on'
self.headers['Referer'] = self.loginpage
self.session.headers.update(self.headers.items())
res = self.session.post(self.loginpage, fields)
try:
logger.debug('Got session id %s' %
self.session.cookies.get_dict()['PHPSESSID'])
except KeyError as e:
logger.error(repr(e))
logger.error("Didn't get session id, check your credentials")
return False
except Exception as e:
logger.error(repr(e))
logger.error('uncached error #legendasdivx #AA')
return False
return True
2020-01-16 20:38:41 +00:00
def logout(self):
# need to figure this out
return True
def _process_page(self, video, bsoup, querytext, videoname):
2020-01-16 20:38:41 +00:00
subtitles = []
_allsubs = bsoup.findAll("div", {"class": "sub_box"})
2020-01-16 20:38:41 +00:00
lang = Language.fromopensubtitles("pob")
for _subbox in _allsubs:
hits=0
for th in _subbox.findAll("th", {"class": "color2"}):
if th.string == 'Hits:':
hits = int(th.parent.find("td").string)
if th.string == 'Idioma:':
lang = th.parent.find("td").find ("img").get ('src')
if 'brazil' in lang:
lang = Language.fromopensubtitles('pob')
else:
lang = Language.fromopensubtitles('por')
description = _subbox.find("td", {"class": "td_desc brd_up"})
download = _subbox.find("a", {"class": "sub_download"})
try:
# sometimes BSoup just doesn't get the link
2020-01-17 04:13:44 +00:00
logger.debug(download.get('href'))
2020-01-16 20:38:41 +00:00
except Exception as e:
logger.warning('skipping subbox on %s' % self.searchurl.format(query=querytext))
continue
# get subtitle uploader
sub_header = _subbox.find("div", {"class" :"sub_header"})
uploader = sub_header.find("a").text if sub_header else '<n/a>'
2020-01-16 20:38:41 +00:00
exact_match = False
if video.name.lower() in description.get_text().lower():
exact_match = True
data = {'link': self.site + '/modules.php' + download.get('href'),
'exact_match': exact_match,
'hits': hits,
'uploader': uploader,
2020-01-16 20:38:41 +00:00
'videoname': videoname,
'description': description.get_text()
}
2020-01-16 20:38:41 +00:00
subtitles.append(
LegendasdivxSubtitle(lang, video, data)
)
return subtitles
def query(self, video, language):
try:
logger.debug('Got session id %s' %
self.session.cookies.get_dict()['PHPSESSID'])
except Exception as e:
self.login()
language_ids = '0'
if isinstance(language, (tuple, list, set)):
if len(language) == 1:
language_ids = ','.join(sorted(l.opensubtitles for l in language))
if language_ids == 'por':
language_ids = '&form_cat=28'
else:
language_ids = '&form_cat=29'
videoname = video.name
videoname = os.path.basename(videoname)
videoname, _ = os.path.splitext(videoname)
# querytext = videoname.lower()
_searchurl = self.searchurl
if video.imdb_id is None:
if isinstance(video, Episode):
querytext = "{} S{:02d}E{:02d}".format(video.series, video.season, video.episode)
elif isinstance(video, Movie):
querytext = video.title
else:
querytext = video.imdb_id
# querytext = querytext.replace(
# ".", "+").replace("[", "").replace("]", "")
if language_ids != '0':
querytext = querytext + language_ids
self.headers['Referer'] = self.site + '/index.php'
self.session.headers.update(self.headers.items())
res = self.session.get(_searchurl.format(query=querytext))
# form_cat=28 = br
# form_cat=29 = pt
if "A legenda não foi encontrada" in res.text:
logger.warning('%s not found', querytext)
return []
bsoup = ParserBeautifulSoup(res.content, ['html.parser'])
subtitles = self._process_page(video, bsoup, querytext, videoname)
2020-01-16 20:38:41 +00:00
return subtitles
def list_subtitles(self, video, languages):
return self.query(video, languages)
def download_subtitle(self, subtitle):
res = self.session.get(subtitle.page_link)
if res:
if res.text == '500':
raise ValueError('Error 500 on server')
archive = self._get_archive(res.content)
# extract the subtitle
subtitle_content = self._get_subtitle_from_archive(archive, subtitle)
2020-01-16 20:38:41 +00:00
subtitle.content = fix_line_ending(subtitle_content)
subtitle.normalize()
return subtitle
raise ValueError('Problems conecting to the server')
def _get_archive(self, content):
# open the archive
# stole^H^H^H^H^H inspired from subvix provider
archive_stream = io.BytesIO(content)
if rarfile.is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive = rarfile.RarFile(archive_stream)
elif zipfile.is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive = zipfile.ZipFile(archive_stream)
else:
# raise ParseResponseError('Unsupported compressed format')
raise Exception('Unsupported compressed format')
return archive
def _get_subtitle_from_archive(self, archive, subtitle):
# some files have a non subtitle with .txt extension
_tmp = list(SUBTITLE_EXTENSIONS)
_tmp.remove('.txt')
_subtitle_extensions = tuple(_tmp)
_max_score = 0
_scores = get_scores (subtitle.video)
2020-01-16 20:38:41 +00:00
for name in archive.namelist():
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue
# discard non-subtitle files
if not name.lower().endswith(_subtitle_extensions):
2020-01-16 20:38:41 +00:00
continue
_guess = guessit (name)
if isinstance(subtitle.video, Episode):
logger.debug ("guessing %s" % name)
logger.debug("subtitle S{}E{} video S{}E{}".format(_guess['season'],_guess['episode'],subtitle.video.season,subtitle.video.episode))
if subtitle.video.episode != _guess['episode'] or subtitle.video.season != _guess['season']:
logger.debug('subtitle does not match video, skipping')
continue
matches = set()
matches |= guess_matches (subtitle.video, _guess)
logger.debug('srt matches: %s' % matches)
_score = sum ((_scores.get (match, 0) for match in matches))
if _score > _max_score:
_max_name = name
_max_score = _score
logger.debug("new max: {} {}".format(name, _score))
if _max_score > 0:
logger.debug("returning from archive: {} scored {}".format(_max_name, _max_score))
return archive.read(_max_name)
2020-01-16 20:38:41 +00:00
raise ParseResponseError('Can not find the subtitle in the compressed file')