This commit is contained in:
Louis Vézina 2019-09-28 00:22:17 -04:00
parent ad3f37f8ac
commit 59c6a269ab
166 changed files with 21701 additions and 13048 deletions

View File

@ -759,7 +759,7 @@ def wanted_download_subtitles(path, l, count_episodes):
attempt = []
attempt.append([language, time.time()])
else:
att = zip(*attempt)[0]
att = list(zip(*attempt))[0]
if language not in att:
attempt.append([language, time.time()])
@ -828,7 +828,7 @@ def wanted_download_subtitles_movie(path, l, count_movies):
attempt = []
attempt.append([language, time.time()])
else:
att = zip(*attempt)[0]
att = list(zip(*attempt))[0]
if language not in att:
attempt.append([language, time.time()])

View File

@ -2,10 +2,16 @@
import os
import sys
from six import PY3
def set_libs():
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../libs/'))
if PY3:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../libs3/'))
else:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../libs2/'))
set_libs()

View File

@ -6,11 +6,12 @@ import logging
import re
import types
import platform
import warnings
import six
from logging.handlers import TimedRotatingFileHandler
from get_args import args
from config import settings
import six
logger = logging.getLogger()
@ -41,6 +42,8 @@ class NoExceptionFormatter(logging.Formatter):
def configure_logging(debug=False):
warnings.simplefilter('ignore', category=ResourceWarning)
if not debug:
log_level = "INFO"
else:
@ -63,7 +66,7 @@ def configure_logging(debug=False):
# File Logging
global fh
fh = TimedRotatingFileHandler(os.path.join(args.config_dir, 'log/bazarr.log'), when="midnight", interval=1,
backupCount=7)
backupCount=7, delay=True)
f = OneLineExceptionFormatter('%(asctime)s|%(levelname)-8s|%(name)-32s|%(message)s|',
'%d/%m/%Y %H:%M:%S')
fh.setFormatter(f)

View File

@ -37,7 +37,7 @@ class Notify:
:rtype: str
"""
if self.queue:
if self.queue and (len(self.queue) > 0):
return self.queue.popleft()

View File

@ -26,14 +26,7 @@
import os
import re
import six
try:
import yaml
except ImportError:
if six.PY3:
import yaml3 as yaml
else:
import yaml2 as yaml
import yaml
from .. import plugins
from ..AppriseAsset import AppriseAsset

View File

@ -1,43 +0,0 @@
Behold, mortal, the origins of Beautiful Soup...
================================================
Leonard Richardson is the primary programmer.
Aaron DeVore is awesome.
Mark Pilgrim provided the encoding detection code that forms the base
of UnicodeDammit.
Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
Soup 4 working under Python 3.
Simon Willison wrote soupselect, which was used to make Beautiful Soup
support CSS selectors.
Sam Ruby helped with a lot of edge cases.
Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
work in solving the nestable tags conundrum.
An incomplete list of people have contributed patches to Beautiful
Soup:
Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
Webster, Paul Wright, Danny Yoo
An incomplete list of people who made suggestions or found bugs or
found ways to break Beautiful Soup:
Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart
Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
Sousa Rocha, Yichun Wei, Per Vognsen

View File

@ -1,27 +0,0 @@
Beautiful Soup is made available under the MIT license:
Copyright (c) 2004-2015 Leonard Richardson
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Beautiful Soup incorporates code from the html5lib library, which is
also made available under the MIT license. Copyright (c) 2006-2013
James Graham and other contributors

File diff suppressed because it is too large Load Diff

View File

@ -1,63 +0,0 @@
= Introduction =
>>> from bs4 import BeautifulSoup
>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
>>> print soup.prettify()
<html>
<body>
<p>
Some
<b>
bad
<i>
HTML
</i>
</b>
</p>
</body>
</html>
>>> soup.find(text="bad")
u'bad'
>>> soup.i
<i>HTML</i>
>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
>>> print soup.prettify()
<?xml version="1.0" encoding="utf-8">
<tag1>
Some
<tag2 />
bad
<tag3>
XML
</tag3>
</tag1>
= Full documentation =
The bs4/doc/ directory contains full documentation in Sphinx
format. Run "make html" in that directory to create HTML
documentation.
= Running the unit tests =
Beautiful Soup supports unit test discovery from the project root directory:
$ nosetests
$ python -m unittest discover -s bs4 # Python 2.7 and up
If you checked out the source tree, you should see a script in the
home directory called test-all-versions. This script will run the unit
tests under Python 2.7, then create a temporary Python 3 conversion of
the source and run the unit tests again under Python 3.
= Links =
Homepage: http://www.crummy.com/software/BeautifulSoup/bs4/
Documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
http://readthedocs.org/docs/beautiful-soup-4/
Discussion group: http://groups.google.com/group/beautifulsoup/
Development: https://code.launchpad.net/beautifulsoup/
Bug tracker: https://bugs.launchpad.net/beautifulsoup/

View File

@ -1,31 +0,0 @@
Additions
---------
More of the jQuery API: nextUntil?
Optimizations
-------------
The html5lib tree builder doesn't use the standard tree-building API,
which worries me and has resulted in a number of bugs.
markup_attr_map can be optimized since it's always a map now.
Upon encountering UTF-16LE data or some other uncommon serialization
of Unicode, UnicodeDammit will convert the data to Unicode, then
encode it at UTF-8. This is wasteful because it will just get decoded
back to Unicode.
CDATA
-----
The elementtree XMLParser has a strip_cdata argument that, when set to
False, should allow Beautiful Soup to preserve CDATA sections instead
of treating them as text. Except it doesn't. (This argument is also
present for HTMLParser, and also does nothing there.)
Currently, htm5lib converts CDATA sections into comments. An
as-yet-unreleased version of html5lib changes the parser's handling of
CDATA sections to allow CDATA sections in tags like <svg> and
<math>. The HTML5TreeBuilder will need to be updated to create CData
objects instead of Comment objects in this situation.

View File

@ -21,10 +21,8 @@ class User_Agent():
def loadUserAgent(self, *args, **kwargs):
browser = kwargs.pop('browser', 'chrome')
user_agents = json.load(
open(os.path.join(os.path.dirname(__file__), 'browsers.json'), 'r'),
object_pairs_hook=OrderedDict
)
with open(os.path.join(os.path.dirname(__file__), 'browsers.json'), 'r') as file:
user_agents = json.load(file, object_pairs_hook=OrderedDict)
if not user_agents.get(browser):
logging.error('Sorry "{}" browser User-Agent was not found.'.format(browser))

View File

@ -1,3 +0,0 @@
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)

View File

@ -1,23 +0,0 @@
# Copyright 2009 Brian Quinlan. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Execute computations asynchronously using threads or processes."""
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
from concurrent.futures._base import (FIRST_COMPLETED,
FIRST_EXCEPTION,
ALL_COMPLETED,
CancelledError,
TimeoutError,
Future,
Executor,
wait,
as_completed)
from concurrent.futures.thread import ThreadPoolExecutor
try:
from concurrent.futures.process import ProcessPoolExecutor
except ImportError:
# some platforms don't have multiprocessing
pass

View File

@ -1,607 +0,0 @@
# Copyright 2009 Brian Quinlan. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
import collections
import logging
import threading
import itertools
import time
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
FIRST_COMPLETED = 'FIRST_COMPLETED'
FIRST_EXCEPTION = 'FIRST_EXCEPTION'
ALL_COMPLETED = 'ALL_COMPLETED'
_AS_COMPLETED = '_AS_COMPLETED'
# Possible future states (for internal use by the futures package).
PENDING = 'PENDING'
RUNNING = 'RUNNING'
# The future was cancelled by the user...
CANCELLED = 'CANCELLED'
# ...and _Waiter.add_cancelled() was called by a worker.
CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED'
FINISHED = 'FINISHED'
_FUTURE_STATES = [
PENDING,
RUNNING,
CANCELLED,
CANCELLED_AND_NOTIFIED,
FINISHED
]
_STATE_TO_DESCRIPTION_MAP = {
PENDING: "pending",
RUNNING: "running",
CANCELLED: "cancelled",
CANCELLED_AND_NOTIFIED: "cancelled",
FINISHED: "finished"
}
# Logger for internal use by the futures package.
LOGGER = logging.getLogger("concurrent.futures")
class Error(Exception):
"""Base class for all future-related exceptions."""
pass
class CancelledError(Error):
"""The Future was cancelled."""
pass
class TimeoutError(Error):
"""The operation exceeded the given deadline."""
pass
class _Waiter(object):
"""Provides the event that wait() and as_completed() block on."""
def __init__(self):
self.event = threading.Event()
self.finished_futures = []
def add_result(self, future):
self.finished_futures.append(future)
def add_exception(self, future):
self.finished_futures.append(future)
def add_cancelled(self, future):
self.finished_futures.append(future)
class _AsCompletedWaiter(_Waiter):
"""Used by as_completed()."""
def __init__(self):
super(_AsCompletedWaiter, self).__init__()
self.lock = threading.Lock()
def add_result(self, future):
with self.lock:
super(_AsCompletedWaiter, self).add_result(future)
self.event.set()
def add_exception(self, future):
with self.lock:
super(_AsCompletedWaiter, self).add_exception(future)
self.event.set()
def add_cancelled(self, future):
with self.lock:
super(_AsCompletedWaiter, self).add_cancelled(future)
self.event.set()
class _FirstCompletedWaiter(_Waiter):
"""Used by wait(return_when=FIRST_COMPLETED)."""
def add_result(self, future):
super(_FirstCompletedWaiter, self).add_result(future)
self.event.set()
def add_exception(self, future):
super(_FirstCompletedWaiter, self).add_exception(future)
self.event.set()
def add_cancelled(self, future):
super(_FirstCompletedWaiter, self).add_cancelled(future)
self.event.set()
class _AllCompletedWaiter(_Waiter):
"""Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED)."""
def __init__(self, num_pending_calls, stop_on_exception):
self.num_pending_calls = num_pending_calls
self.stop_on_exception = stop_on_exception
self.lock = threading.Lock()
super(_AllCompletedWaiter, self).__init__()
def _decrement_pending_calls(self):
with self.lock:
self.num_pending_calls -= 1
if not self.num_pending_calls:
self.event.set()
def add_result(self, future):
super(_AllCompletedWaiter, self).add_result(future)
self._decrement_pending_calls()
def add_exception(self, future):
super(_AllCompletedWaiter, self).add_exception(future)
if self.stop_on_exception:
self.event.set()
else:
self._decrement_pending_calls()
def add_cancelled(self, future):
super(_AllCompletedWaiter, self).add_cancelled(future)
self._decrement_pending_calls()
class _AcquireFutures(object):
"""A context manager that does an ordered acquire of Future conditions."""
def __init__(self, futures):
self.futures = sorted(futures, key=id)
def __enter__(self):
for future in self.futures:
future._condition.acquire()
def __exit__(self, *args):
for future in self.futures:
future._condition.release()
def _create_and_install_waiters(fs, return_when):
if return_when == _AS_COMPLETED:
waiter = _AsCompletedWaiter()
elif return_when == FIRST_COMPLETED:
waiter = _FirstCompletedWaiter()
else:
pending_count = sum(
f._state not in [CANCELLED_AND_NOTIFIED, FINISHED] for f in fs)
if return_when == FIRST_EXCEPTION:
waiter = _AllCompletedWaiter(pending_count, stop_on_exception=True)
elif return_when == ALL_COMPLETED:
waiter = _AllCompletedWaiter(pending_count, stop_on_exception=False)
else:
raise ValueError("Invalid return condition: %r" % return_when)
for f in fs:
f._waiters.append(waiter)
return waiter
def as_completed(fs, timeout=None):
"""An iterator over the given futures that yields each as it completes.
Args:
fs: The sequence of Futures (possibly created by different Executors) to
iterate over.
timeout: The maximum number of seconds to wait. If None, then there
is no limit on the wait time.
Returns:
An iterator that yields the given Futures as they complete (finished or
cancelled). If any given Futures are duplicated, they will be returned
once.
Raises:
TimeoutError: If the entire result iterator could not be generated
before the given timeout.
"""
if timeout is not None:
end_time = timeout + time.time()
fs = set(fs)
with _AcquireFutures(fs):
finished = set(
f for f in fs
if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
pending = fs - finished
waiter = _create_and_install_waiters(fs, _AS_COMPLETED)
try:
for future in finished:
yield future
while pending:
if timeout is None:
wait_timeout = None
else:
wait_timeout = end_time - time.time()
if wait_timeout < 0:
raise TimeoutError(
'%d (of %d) futures unfinished' % (
len(pending), len(fs)))
waiter.event.wait(wait_timeout)
with waiter.lock:
finished = waiter.finished_futures
waiter.finished_futures = []
waiter.event.clear()
for future in finished:
yield future
pending.remove(future)
finally:
for f in fs:
with f._condition:
f._waiters.remove(waiter)
DoneAndNotDoneFutures = collections.namedtuple(
'DoneAndNotDoneFutures', 'done not_done')
def wait(fs, timeout=None, return_when=ALL_COMPLETED):
"""Wait for the futures in the given sequence to complete.
Args:
fs: The sequence of Futures (possibly created by different Executors) to
wait upon.
timeout: The maximum number of seconds to wait. If None, then there
is no limit on the wait time.
return_when: Indicates when this function should return. The options
are:
FIRST_COMPLETED - Return when any future finishes or is
cancelled.
FIRST_EXCEPTION - Return when any future finishes by raising an
exception. If no future raises an exception
then it is equivalent to ALL_COMPLETED.
ALL_COMPLETED - Return when all futures finish or are cancelled.
Returns:
A named 2-tuple of sets. The first set, named 'done', contains the
futures that completed (is finished or cancelled) before the wait
completed. The second set, named 'not_done', contains uncompleted
futures.
"""
with _AcquireFutures(fs):
done = set(f for f in fs
if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
not_done = set(fs) - done
if (return_when == FIRST_COMPLETED) and done:
return DoneAndNotDoneFutures(done, not_done)
elif (return_when == FIRST_EXCEPTION) and done:
if any(f for f in done
if not f.cancelled() and f.exception() is not None):
return DoneAndNotDoneFutures(done, not_done)
if len(done) == len(fs):
return DoneAndNotDoneFutures(done, not_done)
waiter = _create_and_install_waiters(fs, return_when)
waiter.event.wait(timeout)
for f in fs:
with f._condition:
f._waiters.remove(waiter)
done.update(waiter.finished_futures)
return DoneAndNotDoneFutures(done, set(fs) - done)
class Future(object):
"""Represents the result of an asynchronous computation."""
def __init__(self):
"""Initializes the future. Should not be called by clients."""
self._condition = threading.Condition()
self._state = PENDING
self._result = None
self._exception = None
self._traceback = None
self._waiters = []
self._done_callbacks = []
def _invoke_callbacks(self):
for callback in self._done_callbacks:
try:
callback(self)
except Exception:
LOGGER.exception('exception calling callback for %r', self)
def __repr__(self):
with self._condition:
if self._state == FINISHED:
if self._exception:
return '<Future at %s state=%s raised %s>' % (
hex(id(self)),
_STATE_TO_DESCRIPTION_MAP[self._state],
self._exception.__class__.__name__)
else:
return '<Future at %s state=%s returned %s>' % (
hex(id(self)),
_STATE_TO_DESCRIPTION_MAP[self._state],
self._result.__class__.__name__)
return '<Future at %s state=%s>' % (
hex(id(self)),
_STATE_TO_DESCRIPTION_MAP[self._state])
def cancel(self):
"""Cancel the future if possible.
Returns True if the future was cancelled, False otherwise. A future
cannot be cancelled if it is running or has already completed.
"""
with self._condition:
if self._state in [RUNNING, FINISHED]:
return False
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
return True
self._state = CANCELLED
self._condition.notify_all()
self._invoke_callbacks()
return True
def cancelled(self):
"""Return True if the future has cancelled."""
with self._condition:
return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]
def running(self):
"""Return True if the future is currently executing."""
with self._condition:
return self._state == RUNNING
def done(self):
"""Return True of the future was cancelled or finished executing."""
with self._condition:
return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]
def __get_result(self):
if self._exception:
raise type(self._exception), self._exception, self._traceback
else:
return self._result
def add_done_callback(self, fn):
"""Attaches a callable that will be called when the future finishes.
Args:
fn: A callable that will be called with this future as its only
argument when the future completes or is cancelled. The callable
will always be called by a thread in the same process in which
it was added. If the future has already completed or been
cancelled then the callable will be called immediately. These
callables are called in the order that they were added.
"""
with self._condition:
if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]:
self._done_callbacks.append(fn)
return
fn(self)
def result(self, timeout=None):
"""Return the result of the call that the future represents.
Args:
timeout: The number of seconds to wait for the result if the future
isn't done. If None, then there is no limit on the wait time.
Returns:
The result of the call that the future represents.
Raises:
CancelledError: If the future was cancelled.
TimeoutError: If the future didn't finish executing before the given
timeout.
Exception: If the call raised then that exception will be raised.
"""
with self._condition:
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self.__get_result()
self._condition.wait(timeout)
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self.__get_result()
else:
raise TimeoutError()
def exception_info(self, timeout=None):
"""Return a tuple of (exception, traceback) raised by the call that the
future represents.
Args:
timeout: The number of seconds to wait for the exception if the
future isn't done. If None, then there is no limit on the wait
time.
Returns:
The exception raised by the call that the future represents or None
if the call completed without raising.
Raises:
CancelledError: If the future was cancelled.
TimeoutError: If the future didn't finish executing before the given
timeout.
"""
with self._condition:
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self._exception, self._traceback
self._condition.wait(timeout)
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self._exception, self._traceback
else:
raise TimeoutError()
def exception(self, timeout=None):
"""Return the exception raised by the call that the future represents.
Args:
timeout: The number of seconds to wait for the exception if the
future isn't done. If None, then there is no limit on the wait
time.
Returns:
The exception raised by the call that the future represents or None
if the call completed without raising.
Raises:
CancelledError: If the future was cancelled.
TimeoutError: If the future didn't finish executing before the given
timeout.
"""
return self.exception_info(timeout)[0]
# The following methods should only be used by Executors and in tests.
def set_running_or_notify_cancel(self):
"""Mark the future as running or process any cancel notifications.
Should only be used by Executor implementations and unit tests.
If the future has been cancelled (cancel() was called and returned
True) then any threads waiting on the future completing (though calls
to as_completed() or wait()) are notified and False is returned.
If the future was not cancelled then it is put in the running state
(future calls to running() will return True) and True is returned.
This method should be called by Executor implementations before
executing the work associated with this future. If this method returns
False then the work should not be executed.
Returns:
False if the Future was cancelled, True otherwise.
Raises:
RuntimeError: if this method was already called or if set_result()
or set_exception() was called.
"""
with self._condition:
if self._state == CANCELLED:
self._state = CANCELLED_AND_NOTIFIED
for waiter in self._waiters:
waiter.add_cancelled(self)
# self._condition.notify_all() is not necessary because
# self.cancel() triggers a notification.
return False
elif self._state == PENDING:
self._state = RUNNING
return True
else:
LOGGER.critical('Future %s in unexpected state: %s',
id(self),
self._state)
raise RuntimeError('Future in unexpected state')
def set_result(self, result):
"""Sets the return value of work associated with the future.
Should only be used by Executor implementations and unit tests.
"""
with self._condition:
self._result = result
self._state = FINISHED
for waiter in self._waiters:
waiter.add_result(self)
self._condition.notify_all()
self._invoke_callbacks()
def set_exception_info(self, exception, traceback):
"""Sets the result of the future as being the given exception
and traceback.
Should only be used by Executor implementations and unit tests.
"""
with self._condition:
self._exception = exception
self._traceback = traceback
self._state = FINISHED
for waiter in self._waiters:
waiter.add_exception(self)
self._condition.notify_all()
self._invoke_callbacks()
def set_exception(self, exception):
"""Sets the result of the future as being the given exception.
Should only be used by Executor implementations and unit tests.
"""
self.set_exception_info(exception, None)
class Executor(object):
"""This is an abstract base class for concrete asynchronous executors."""
def submit(self, fn, *args, **kwargs):
"""Submits a callable to be executed with the given arguments.
Schedules the callable to be executed as fn(*args, **kwargs) and returns
a Future instance representing the execution of the callable.
Returns:
A Future representing the given call.
"""
raise NotImplementedError()
def map(self, fn, *iterables, **kwargs):
"""Returns a iterator equivalent to map(fn, iter).
Args:
fn: A callable that will take as many arguments as there are
passed iterables.
timeout: The maximum number of seconds to wait. If None, then there
is no limit on the wait time.
Returns:
An iterator equivalent to: map(func, *iterables) but the calls may
be evaluated out-of-order.
Raises:
TimeoutError: If the entire result iterator could not be generated
before the given timeout.
Exception: If fn(*args) raises for any values.
"""
timeout = kwargs.get('timeout')
if timeout is not None:
end_time = timeout + time.time()
fs = [self.submit(fn, *args) for args in itertools.izip(*iterables)]
# Yield must be hidden in closure so that the futures are submitted
# before the first iterator value is required.
def result_iterator():
try:
for future in fs:
if timeout is None:
yield future.result()
else:
yield future.result(end_time - time.time())
finally:
for future in fs:
future.cancel()
return result_iterator()
def shutdown(self, wait=True):
"""Clean-up the resources associated with the Executor.
It is safe to call this method several times. Otherwise, no other
methods can be called after this one.
Args:
wait: If True then shutdown will not return until all running
futures have finished executing and the resources used by the
executor have been reclaimed.
"""
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.shutdown(wait=True)
return False

View File

@ -1,359 +0,0 @@
# Copyright 2009 Brian Quinlan. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Implements ProcessPoolExecutor.
The follow diagram and text describe the data-flow through the system:
|======================= In-process =====================|== Out-of-process ==|
+----------+ +----------+ +--------+ +-----------+ +---------+
| | => | Work Ids | => | | => | Call Q | => | |
| | +----------+ | | +-----------+ | |
| | | ... | | | | ... | | |
| | | 6 | | | | 5, call() | | |
| | | 7 | | | | ... | | |
| Process | | ... | | Local | +-----------+ | Process |
| Pool | +----------+ | Worker | | #1..n |
| Executor | | Thread | | |
| | +----------- + | | +-----------+ | |
| | <=> | Work Items | <=> | | <= | Result Q | <= | |
| | +------------+ | | +-----------+ | |
| | | 6: call() | | | | ... | | |
| | | future | | | | 4, result | | |
| | | ... | | | | 3, except | | |
+----------+ +------------+ +--------+ +-----------+ +---------+
Executor.submit() called:
- creates a uniquely numbered _WorkItem and adds it to the "Work Items" dict
- adds the id of the _WorkItem to the "Work Ids" queue
Local worker thread:
- reads work ids from the "Work Ids" queue and looks up the corresponding
WorkItem from the "Work Items" dict: if the work item has been cancelled then
it is simply removed from the dict, otherwise it is repackaged as a
_CallItem and put in the "Call Q". New _CallItems are put in the "Call Q"
until "Call Q" is full. NOTE: the size of the "Call Q" is kept small because
calls placed in the "Call Q" can no longer be cancelled with Future.cancel().
- reads _ResultItems from "Result Q", updates the future stored in the
"Work Items" dict and deletes the dict entry
Process #1..n:
- reads _CallItems from "Call Q", executes the calls, and puts the resulting
_ResultItems in "Request Q"
"""
import atexit
from concurrent.futures import _base
import Queue as queue
import multiprocessing
import threading
import weakref
import sys
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
# Workers are created as daemon threads and processes. This is done to allow the
# interpreter to exit when there are still idle processes in a
# ProcessPoolExecutor's process pool (i.e. shutdown() was not called). However,
# allowing workers to die with the interpreter has two undesirable properties:
# - The workers would still be running during interpretor shutdown,
# meaning that they would fail in unpredictable ways.
# - The workers could be killed while evaluating a work item, which could
# be bad if the callable being evaluated has external side-effects e.g.
# writing to a file.
#
# To work around this problem, an exit handler is installed which tells the
# workers to exit when their work queues are empty and then waits until the
# threads/processes finish.
_threads_queues = weakref.WeakKeyDictionary()
_shutdown = False
def _python_exit():
global _shutdown
_shutdown = True
items = list(_threads_queues.items()) if _threads_queues else ()
for t, q in items:
q.put(None)
for t, q in items:
t.join(sys.maxint)
# Controls how many more calls than processes will be queued in the call queue.
# A smaller number will mean that processes spend more time idle waiting for
# work while a larger number will make Future.cancel() succeed less frequently
# (Futures in the call queue cannot be cancelled).
EXTRA_QUEUED_CALLS = 1
class _WorkItem(object):
def __init__(self, future, fn, args, kwargs):
self.future = future
self.fn = fn
self.args = args
self.kwargs = kwargs
class _ResultItem(object):
def __init__(self, work_id, exception=None, result=None):
self.work_id = work_id
self.exception = exception
self.result = result
class _CallItem(object):
def __init__(self, work_id, fn, args, kwargs):
self.work_id = work_id
self.fn = fn
self.args = args
self.kwargs = kwargs
def _process_worker(call_queue, result_queue):
"""Evaluates calls from call_queue and places the results in result_queue.
This worker is run in a separate process.
Args:
call_queue: A multiprocessing.Queue of _CallItems that will be read and
evaluated by the worker.
result_queue: A multiprocessing.Queue of _ResultItems that will written
to by the worker.
shutdown: A multiprocessing.Event that will be set as a signal to the
worker that it should exit when call_queue is empty.
"""
while True:
call_item = call_queue.get(block=True)
if call_item is None:
# Wake up queue management thread
result_queue.put(None)
return
try:
r = call_item.fn(*call_item.args, **call_item.kwargs)
except BaseException:
e = sys.exc_info()[1]
result_queue.put(_ResultItem(call_item.work_id,
exception=e))
else:
result_queue.put(_ResultItem(call_item.work_id,
result=r))
def _add_call_item_to_queue(pending_work_items,
work_ids,
call_queue):
"""Fills call_queue with _WorkItems from pending_work_items.
This function never blocks.
Args:
pending_work_items: A dict mapping work ids to _WorkItems e.g.
{5: <_WorkItem...>, 6: <_WorkItem...>, ...}
work_ids: A queue.Queue of work ids e.g. Queue([5, 6, ...]). Work ids
are consumed and the corresponding _WorkItems from
pending_work_items are transformed into _CallItems and put in
call_queue.
call_queue: A multiprocessing.Queue that will be filled with _CallItems
derived from _WorkItems.
"""
while True:
if call_queue.full():
return
try:
work_id = work_ids.get(block=False)
except queue.Empty:
return
else:
work_item = pending_work_items[work_id]
if work_item.future.set_running_or_notify_cancel():
call_queue.put(_CallItem(work_id,
work_item.fn,
work_item.args,
work_item.kwargs),
block=True)
else:
del pending_work_items[work_id]
continue
def _queue_management_worker(executor_reference,
processes,
pending_work_items,
work_ids_queue,
call_queue,
result_queue):
"""Manages the communication between this process and the worker processes.
This function is run in a local thread.
Args:
executor_reference: A weakref.ref to the ProcessPoolExecutor that owns
this thread. Used to determine if the ProcessPoolExecutor has been
garbage collected and that this function can exit.
process: A list of the multiprocessing.Process instances used as
workers.
pending_work_items: A dict mapping work ids to _WorkItems e.g.
{5: <_WorkItem...>, 6: <_WorkItem...>, ...}
work_ids_queue: A queue.Queue of work ids e.g. Queue([5, 6, ...]).
call_queue: A multiprocessing.Queue that will be filled with _CallItems
derived from _WorkItems for processing by the process workers.
result_queue: A multiprocessing.Queue of _ResultItems generated by the
process workers.
"""
nb_shutdown_processes = [0]
def shutdown_one_process():
"""Tell a worker to terminate, which will in turn wake us again"""
call_queue.put(None)
nb_shutdown_processes[0] += 1
while True:
_add_call_item_to_queue(pending_work_items,
work_ids_queue,
call_queue)
result_item = result_queue.get(block=True)
if result_item is not None:
work_item = pending_work_items[result_item.work_id]
del pending_work_items[result_item.work_id]
if result_item.exception:
work_item.future.set_exception(result_item.exception)
else:
work_item.future.set_result(result_item.result)
# Delete references to object. See issue16284
del work_item
# Check whether we should start shutting down.
executor = executor_reference()
# No more work items can be added if:
# - The interpreter is shutting down OR
# - The executor that owns this worker has been collected OR
# - The executor that owns this worker has been shutdown.
if _shutdown or executor is None or executor._shutdown_thread:
# Since no new work items can be added, it is safe to shutdown
# this thread if there are no pending work items.
if not pending_work_items:
while nb_shutdown_processes[0] < len(processes):
shutdown_one_process()
# If .join() is not called on the created processes then
# some multiprocessing.Queue methods may deadlock on Mac OS
# X.
for p in processes:
p.join()
call_queue.close()
return
del executor
_system_limits_checked = False
_system_limited = None
def _check_system_limits():
global _system_limits_checked, _system_limited
if _system_limits_checked:
if _system_limited:
raise NotImplementedError(_system_limited)
_system_limits_checked = True
try:
import os
nsems_max = os.sysconf("SC_SEM_NSEMS_MAX")
except (AttributeError, ValueError):
# sysconf not available or setting not available
return
if nsems_max == -1:
# indetermine limit, assume that limit is determined
# by available memory only
return
if nsems_max >= 256:
# minimum number of semaphores available
# according to POSIX
return
_system_limited = "system provides too few semaphores (%d available, 256 necessary)" % nsems_max
raise NotImplementedError(_system_limited)
class ProcessPoolExecutor(_base.Executor):
def __init__(self, max_workers=None):
"""Initializes a new ProcessPoolExecutor instance.
Args:
max_workers: The maximum number of processes that can be used to
execute the given calls. If None or not given then as many
worker processes will be created as the machine has processors.
"""
_check_system_limits()
if max_workers is None:
self._max_workers = multiprocessing.cpu_count()
else:
self._max_workers = max_workers
# Make the call queue slightly larger than the number of processes to
# prevent the worker processes from idling. But don't make it too big
# because futures in the call queue cannot be cancelled.
self._call_queue = multiprocessing.Queue(self._max_workers +
EXTRA_QUEUED_CALLS)
self._result_queue = multiprocessing.Queue()
self._work_ids = queue.Queue()
self._queue_management_thread = None
self._processes = set()
# Shutdown is a two-step process.
self._shutdown_thread = False
self._shutdown_lock = threading.Lock()
self._queue_count = 0
self._pending_work_items = {}
def _start_queue_management_thread(self):
# When the executor gets lost, the weakref callback will wake up
# the queue management thread.
def weakref_cb(_, q=self._result_queue):
q.put(None)
if self._queue_management_thread is None:
self._queue_management_thread = threading.Thread(
target=_queue_management_worker,
args=(weakref.ref(self, weakref_cb),
self._processes,
self._pending_work_items,
self._work_ids,
self._call_queue,
self._result_queue))
self._queue_management_thread.daemon = True
self._queue_management_thread.start()
_threads_queues[self._queue_management_thread] = self._result_queue
def _adjust_process_count(self):
for _ in range(len(self._processes), self._max_workers):
p = multiprocessing.Process(
target=_process_worker,
args=(self._call_queue,
self._result_queue))
p.start()
self._processes.add(p)
def submit(self, fn, *args, **kwargs):
with self._shutdown_lock:
if self._shutdown_thread:
raise RuntimeError('cannot schedule new futures after shutdown')
f = _base.Future()
w = _WorkItem(f, fn, args, kwargs)
self._pending_work_items[self._queue_count] = w
self._work_ids.put(self._queue_count)
self._queue_count += 1
# Wake up queue management thread
self._result_queue.put(None)
self._start_queue_management_thread()
self._adjust_process_count()
return f
submit.__doc__ = _base.Executor.submit.__doc__
def shutdown(self, wait=True):
with self._shutdown_lock:
self._shutdown_thread = True
if self._queue_management_thread:
# Wake up queue management thread
self._result_queue.put(None)
if wait:
self._queue_management_thread.join(sys.maxint)
# To reduce the risk of openning too many files, remove references to
# objects that use file descriptors.
self._queue_management_thread = None
self._call_queue = None
self._result_queue = None
self._processes = None
shutdown.__doc__ = _base.Executor.shutdown.__doc__
atexit.register(_python_exit)

View File

@ -1,134 +0,0 @@
# Copyright 2009 Brian Quinlan. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Implements ThreadPoolExecutor."""
import atexit
from concurrent.futures import _base
import Queue as queue
import threading
import weakref
import sys
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
# Workers are created as daemon threads. This is done to allow the interpreter
# to exit when there are still idle threads in a ThreadPoolExecutor's thread
# pool (i.e. shutdown() was not called). However, allowing workers to die with
# the interpreter has two undesirable properties:
# - The workers would still be running during interpretor shutdown,
# meaning that they would fail in unpredictable ways.
# - The workers could be killed while evaluating a work item, which could
# be bad if the callable being evaluated has external side-effects e.g.
# writing to a file.
#
# To work around this problem, an exit handler is installed which tells the
# workers to exit when their work queues are empty and then waits until the
# threads finish.
_threads_queues = weakref.WeakKeyDictionary()
_shutdown = False
def _python_exit():
global _shutdown
_shutdown = True
items = list(_threads_queues.items()) if _threads_queues else ()
for t, q in items:
q.put(None)
for t, q in items:
t.join(sys.maxint)
atexit.register(_python_exit)
class _WorkItem(object):
def __init__(self, future, fn, args, kwargs):
self.future = future
self.fn = fn
self.args = args
self.kwargs = kwargs
def run(self):
if not self.future.set_running_or_notify_cancel():
return
try:
result = self.fn(*self.args, **self.kwargs)
except BaseException:
e, tb = sys.exc_info()[1:]
self.future.set_exception_info(e, tb)
else:
self.future.set_result(result)
def _worker(executor_reference, work_queue):
try:
while True:
work_item = work_queue.get(block=True)
if work_item is not None:
work_item.run()
# Delete references to object. See issue16284
del work_item
continue
executor = executor_reference()
# Exit if:
# - The interpreter is shutting down OR
# - The executor that owns the worker has been collected OR
# - The executor that owns the worker has been shutdown.
if _shutdown or executor is None or executor._shutdown:
# Notice other workers
work_queue.put(None)
return
del executor
except BaseException:
_base.LOGGER.critical('Exception in worker', exc_info=True)
class ThreadPoolExecutor(_base.Executor):
def __init__(self, max_workers):
"""Initializes a new ThreadPoolExecutor instance.
Args:
max_workers: The maximum number of threads that can be used to
execute the given calls.
"""
self._max_workers = max_workers
self._work_queue = queue.Queue()
self._threads = set()
self._shutdown = False
self._shutdown_lock = threading.Lock()
def submit(self, fn, *args, **kwargs):
with self._shutdown_lock:
if self._shutdown:
raise RuntimeError('cannot schedule new futures after shutdown')
f = _base.Future()
w = _WorkItem(f, fn, args, kwargs)
self._work_queue.put(w)
self._adjust_thread_count()
return f
submit.__doc__ = _base.Executor.submit.__doc__
def _adjust_thread_count(self):
# When the executor gets lost, the weakref callback will wake up
# the worker threads.
def weakref_cb(_, q=self._work_queue):
q.put(None)
# TODO(bquinlan): Should avoid creating new threads if there are more
# idle threads than items in the work queue.
if len(self._threads) < self._max_workers:
t = threading.Thread(target=_worker,
args=(weakref.ref(self, weakref_cb),
self._work_queue))
t.daemon = True
t.start()
self._threads.add(t)
_threads_queues[t] = self._work_queue
def shutdown(self, wait=True):
with self._shutdown_lock:
self._shutdown = True
self._work_queue.put(None)
if wait:
for t in self._threads:
t.join(sys.maxint)
shutdown.__doc__ = _base.Executor.shutdown.__doc__

View File

@ -1,403 +0,0 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import base64
import binascii
import errno
import imghdr
import random
import os
import select
import socket
import sys
import threading
import time
try:
from json import read as json_decode, write as json_encode
except ImportError:
try:
from json import loads as json_decode, dumps as json_encode
except ImportError:
from simplejson import loads as json_decode, dumps as json_encode
try:
from urllib2 import build_opener, HTTPRedirectHandler, Request, HTTPError
from urllib import urlencode, urlopen
except ImportError:
from urllib.request import build_opener, HTTPRedirectHandler, Request, urlopen
from urllib.error import HTTPError
from urllib.parse import urlencode
# API version and unique software ID
API_VERSION = 'DBC/Python v4.0.11'
SOFTWARE_VENDOR_ID = 0
# Default CAPTCHA timeout and decode() polling interval
DEFAULT_TIMEOUT = 60
POLLS_INTERVAL = 5
# Base HTTP API url
HTTP_BASE_URL = 'http://api.deathbycaptcha.com/api'
# Preferred HTTP API server's response content type, do not change
HTTP_RESPONSE_TYPE = 'application/json'
# Socket API server's host & ports range
SOCKET_HOST = 'api.deathbycaptcha.com'
SOCKET_PORTS = range(8123, 8131)
class AccessDeniedException(Exception):
pass
class Client(object):
"""Death by Captcha API Client"""
def __init__(self, username, password):
self.is_verbose = False
self.userpwd = {'username': username,
'password': password}
def _load_file(self, captcha):
if hasattr(captcha, 'read'):
raw_captcha = captcha.read()
elif isinstance(captcha, bytearray):
raw_captcha = captcha
elif os.path.isfile(captcha):
raw_captcha = ''
try:
f = open(captcha, 'rb')
except Exception as e:
raise e
else:
raw_captcha = f.read()
f.close()
else:
f_stream = urlopen(captcha)
raw_captcha = f_stream.read()
if not len(raw_captcha):
raise ValueError('CAPTCHA image is empty')
elif imghdr.what(None, raw_captcha) is None:
raise TypeError('Unknown CAPTCHA image type')
else:
return raw_captcha
def _log(self, cmd, msg=''):
if self.is_verbose:
print('%d %s %s' % (time.time(), cmd, msg.rstrip()))
return self
def close(self):
pass
def connect(self):
pass
def get_user(self):
"""Fetch the user's details dict -- balance, rate and banned status."""
raise NotImplemented()
def get_balance(self):
"""Fetch the user's balance (in US cents)."""
return self.get_user().get('balance')
def get_captcha(self, cid):
"""Fetch a CAPTCHA details dict -- its ID, text and correctness."""
raise NotImplemented()
def get_text(self, cid):
"""Fetch a CAPTCHA text."""
return self.get_captcha(cid).get('text') or None
def report(self, cid):
"""Report a CAPTCHA as incorrectly solved."""
raise NotImplemented()
def remove(self, cid):
"""Remove an unsolved CAPTCHA."""
raise NotImplemented()
def upload(self, captcha):
"""Upload a CAPTCHA.
Accepts file names and file-like objects. Returns CAPTCHA details
dict on success.
"""
raise NotImplemented()
def decode(self, captcha, timeout=DEFAULT_TIMEOUT):
"""Try to solve a CAPTCHA.
See Client.upload() for arguments details.
Uploads a CAPTCHA, polls for its status periodically with arbitrary
timeout (in seconds), returns CAPTCHA details if (correctly) solved.
"""
deadline = time.time() + (max(0, timeout) or DEFAULT_TIMEOUT)
c = self.upload(captcha)
if c:
while deadline > time.time() and not c.get('text'):
time.sleep(POLLS_INTERVAL)
c = self.get_captcha(c['captcha'])
if c.get('text') and c.get('is_correct'):
return c
class HttpClient(Client):
"""Death by Captcha HTTP API client."""
def __init__(self, *args):
Client.__init__(self, *args)
self.opener = build_opener(HTTPRedirectHandler())
def _call(self, cmd, payload=None, headers=None):
if headers is None:
headers = {}
headers['Accept'] = HTTP_RESPONSE_TYPE
headers['User-Agent'] = API_VERSION
if hasattr(payload, 'items'):
payload = urlencode(payload)
self._log('SEND', '%s %d %s' % (cmd, len(payload), payload))
if payload is not None:
headers['Content-Length'] = len(payload)
try:
response = self.opener.open(Request(
HTTP_BASE_URL + '/' + cmd.strip('/'),
data=payload,
headers=headers
)).read()
except HTTPError as e:
if 403 == e.code:
raise AccessDeniedException(
'Access denied, please check your credentials and/or balance')
elif 400 == e.code or 413 == e.code:
raise ValueError("CAPTCHA was rejected by the service, check if it's a valid image")
else:
self._log('RECV', '%d %s' % (len(response), response))
try:
return json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
return {}
def get_user(self):
return self._call('user', self.userpwd.copy()) or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha/%d' % cid) or {'captcha': 0}
def report(self, cid):
return not self._call('captcha/%d/report' % cid,
self.userpwd.copy()).get('is_correct')
def remove(self, cid):
return not self._call('captcha/%d/remove' % cid,
self.userpwd.copy()).get('captcha')
def upload(self, captcha):
boundary = binascii.hexlify(os.urandom(16))
data = self.userpwd.copy()
data['swid'] = SOFTWARE_VENDOR_ID
body = '\r\n'.join(('\r\n'.join(('--%s' % boundary,
'Content-Disposition: form-data; name="%s"' % k,
'Content-Type: text/plain',
'Content-Length: %d' % len(str(v)),
'',
str(v))))
for k, v in data.items())
captcha = self._load_file(captcha)
body += '\r\n'.join(('',
'--%s' % boundary,
'Content-Disposition: form-data; name="captchafile"; filename="captcha"',
'Content-Type: application/octet-stream',
'Content-Length: %d' % len(captcha),
'',
captcha,
'--%s--' % boundary,
''))
response = self._call('captcha', body, {
'Content-Type': 'multipart/form-data; boundary="%s"' % boundary
}) or {}
if response.get('captcha'):
return response
class SocketClient(Client):
"""Death by Captcha socket API client."""
TERMINATOR = '\r\n'
def __init__(self, *args):
Client.__init__(self, *args)
self.socket_lock = threading.Lock()
self.socket = None
def close(self):
if self.socket:
self._log('CLOSE')
try:
self.socket.shutdown(socket.SHUT_RDWR)
except socket.error:
pass
finally:
self.socket.close()
self.socket = None
def connect(self):
if not self.socket:
self._log('CONN')
host = (socket.gethostbyname(SOCKET_HOST),
random.choice(SOCKET_PORTS))
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.socket.settimeout(0)
try:
self.socket.connect(host)
except socket.error as e:
if errno.EINPROGRESS == e[0]:
pass
else:
self.close()
raise e
return self.socket
def __del__(self):
self.close()
def _sendrecv(self, sock, buf):
self._log('SEND', buf)
fds = [sock]
buf += self.TERMINATOR
response = ''
while True:
rd, wr, ex = select.select((not buf and fds) or [],
(buf and fds) or [],
fds,
POLLS_INTERVAL)
if ex:
raise IOError('select() failed')
try:
if wr:
while buf:
buf = buf[wr[0].send(buf):]
elif rd:
while True:
s = rd[0].recv(256)
if not s:
raise IOError('recv(): connection lost')
else:
response += s
except socket.error as e:
if e[0] not in (errno.EAGAIN, errno.EINPROGRESS):
raise e
if response.endswith(self.TERMINATOR):
self._log('RECV', response)
return response.rstrip(self.TERMINATOR)
raise IOError('send/recv timed out')
def _call(self, cmd, data=None):
if data is None:
data = {}
data['cmd'] = cmd
data['version'] = API_VERSION
request = json_encode(data)
response = None
for i in range(2):
self.socket_lock.acquire()
try:
sock = self.connect()
response = self._sendrecv(sock, request)
except IOError as e:
sys.stderr.write(str(e) + "\n")
self.close()
except socket.error as e:
sys.stderr.write(str(e) + "\n")
self.close()
raise IOError('Connection refused')
else:
break
finally:
self.socket_lock.release()
try:
if response is None:
raise IOError('Connection lost timed out during API request')
try:
response = json_decode(response)
except Exception:
raise RuntimeError('Invalid API response')
if 'error' in response:
error = response['error']
if 'not-logged-in' == error:
raise AccessDeniedException('Access denied, check your credentials')
elif 'banned' == error:
raise AccessDeniedException('Access denied, account is suspended')
elif 'insufficient-funds' == error:
raise AccessDeniedException('CAPTCHA was rejected due to low balance')
elif 'invalid-captcha' == error:
raise ValueError('CAPTCHA is not a valid image')
elif 'service-overload' == error:
raise ValueError(
'CAPTCHA was rejected due to service overload, try again later')
else:
raise RuntimeError('API server error occured: %s' % error)
except Exception as e:
self.socket_lock.acquire()
self.close()
self.socket_lock.release()
raise e
else:
return response
def get_user(self):
return self._call('user', self.userpwd.copy()) or {'user': 0}
def get_captcha(self, cid):
return self._call('captcha', {'captcha': cid}) or {'captcha': 0}
def upload(self, captcha):
data = self.userpwd.copy()
data['captcha'] = base64.b64encode(self._load_file(captcha))
response = self._call('upload', data)
if response.get('captcha'):
return dict((k, response.get(k)) for k in ('captcha', 'text', 'is_correct'))
def report(self, cid):
data = self.userpwd.copy()
data['captcha'] = cid
return not self._call('report', data).get('is_correct')
def remove(self, cid):
data = self.userpwd.copy()
data['captcha'] = cid
return not self._call('remove', data).get('captcha')
if '__main__' == __name__:
import sys
# Put your DBC username & password here:
#client = HttpClient(sys.argv[1], sys.argv[2])
client = SocketClient(sys.argv[1], sys.argv[2])
client.is_verbose = True
print('Your balance is %s US cents' % client.get_balance())
for fn in sys.argv[3:]:
try:
# Put your CAPTCHA image file name or file-like object, and optional
# solving timeout (in seconds) here:
captcha = client.decode(fn, DEFAULT_TIMEOUT)
except Exception as e:
sys.stderr.write('Failed uploading CAPTCHA: %s\n' % (e, ))
captcha = None
if captcha:
print('CAPTCHA %d solved: %s' % (captcha['captcha'], captcha['text']))
# Report as incorrectly solved if needed. Make sure the CAPTCHA was
# in fact incorrectly solved!
try:
client.report(captcha['captcha'])
except Exception as e:
sys.stderr.write('Failed reporting CAPTCHA: %s\n' % (e, ))

View File

@ -1,4 +0,0 @@
__version__ = '0.6.5'
from .lock import Lock # noqa
from .lock import NeedRegenerationException # noqa

View File

@ -1,4 +0,0 @@
from .region import CacheRegion, register_backend, make_region # noqa
# backwards compat
from .. import __version__ # noqa

View File

@ -1,215 +0,0 @@
import operator
from ..util.compat import py3k
class NoValue(object):
"""Describe a missing cache value.
The :attr:`.NO_VALUE` module global
should be used.
"""
@property
def payload(self):
return self
def __repr__(self):
"""Ensure __repr__ is a consistent value in case NoValue is used to
fill another cache key.
"""
return '<dogpile.cache.api.NoValue object>'
if py3k:
def __bool__(self): # pragma NO COVERAGE
return False
else:
def __nonzero__(self): # pragma NO COVERAGE
return False
NO_VALUE = NoValue()
"""Value returned from ``get()`` that describes
a key not present."""
class CachedValue(tuple):
"""Represent a value stored in the cache.
:class:`.CachedValue` is a two-tuple of
``(payload, metadata)``, where ``metadata``
is dogpile.cache's tracking information (
currently the creation time). The metadata
and tuple structure is pickleable, if
the backend requires serialization.
"""
payload = property(operator.itemgetter(0))
"""Named accessor for the payload."""
metadata = property(operator.itemgetter(1))
"""Named accessor for the dogpile.cache metadata dictionary."""
def __new__(cls, payload, metadata):
return tuple.__new__(cls, (payload, metadata))
def __reduce__(self):
return CachedValue, (self.payload, self.metadata)
class CacheBackend(object):
"""Base class for backend implementations."""
key_mangler = None
"""Key mangling function.
May be None, or otherwise declared
as an ordinary instance method.
"""
def __init__(self, arguments): # pragma NO COVERAGE
"""Construct a new :class:`.CacheBackend`.
Subclasses should override this to
handle the given arguments.
:param arguments: The ``arguments`` parameter
passed to :func:`.make_registry`.
"""
raise NotImplementedError()
@classmethod
def from_config_dict(cls, config_dict, prefix):
prefix_len = len(prefix)
return cls(
dict(
(key[prefix_len:], config_dict[key])
for key in config_dict
if key.startswith(prefix)
)
)
def has_lock_timeout(self):
return False
def get_mutex(self, key):
"""Return an optional mutexing object for the given key.
This object need only provide an ``acquire()``
and ``release()`` method.
May return ``None``, in which case the dogpile
lock will use a regular ``threading.Lock``
object to mutex concurrent threads for
value creation. The default implementation
returns ``None``.
Different backends may want to provide various
kinds of "mutex" objects, such as those which
link to lock files, distributed mutexes,
memcached semaphores, etc. Whatever
kind of system is best suited for the scope
and behavior of the caching backend.
A mutex that takes the key into account will
allow multiple regenerate operations across
keys to proceed simultaneously, while a mutex
that does not will serialize regenerate operations
to just one at a time across all keys in the region.
The latter approach, or a variant that involves
a modulus of the given key's hash value,
can be used as a means of throttling the total
number of value recreation operations that may
proceed at one time.
"""
return None
def get(self, key): # pragma NO COVERAGE
"""Retrieve a value from the cache.
The returned value should be an instance of
:class:`.CachedValue`, or ``NO_VALUE`` if
not present.
"""
raise NotImplementedError()
def get_multi(self, keys): # pragma NO COVERAGE
"""Retrieve multiple values from the cache.
The returned value should be a list, corresponding
to the list of keys given.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()
def set(self, key, value): # pragma NO COVERAGE
"""Set a value in the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The value will always be an instance
of :class:`.CachedValue`.
"""
raise NotImplementedError()
def set_multi(self, mapping): # pragma NO COVERAGE
"""Set multiple values in the cache.
``mapping`` is a dict in which
the key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The value will always be an instance
of :class:`.CachedValue`.
When implementing a new :class:`.CacheBackend` or cutomizing via
:class:`.ProxyBackend`, be aware that when this method is invoked by
:meth:`.Region.get_or_create_multi`, the ``mapping`` values are the
same ones returned to the upstream caller. If the subclass alters the
values in any way, it must not do so 'in-place' on the ``mapping`` dict
-- that will have the undesirable effect of modifying the returned
values as well.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()
def delete(self, key): # pragma NO COVERAGE
"""Delete a value from the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The behavior here should be idempotent,
that is, can be called any number of times
regardless of whether or not the
key exists.
"""
raise NotImplementedError()
def delete_multi(self, keys): # pragma NO COVERAGE
"""Delete multiple values from the cache.
The key will be whatever was passed
to the registry, processed by the
"key mangling" function, if any.
The behavior here should be idempotent,
that is, can be called any number of times
regardless of whether or not the
key exists.
.. versionadded:: 0.5.0
"""
raise NotImplementedError()

View File

@ -1,22 +0,0 @@
from dogpile.cache.region import register_backend
register_backend(
"dogpile.cache.null", "dogpile.cache.backends.null", "NullBackend")
register_backend(
"dogpile.cache.dbm", "dogpile.cache.backends.file", "DBMBackend")
register_backend(
"dogpile.cache.pylibmc", "dogpile.cache.backends.memcached",
"PylibmcBackend")
register_backend(
"dogpile.cache.bmemcached", "dogpile.cache.backends.memcached",
"BMemcachedBackend")
register_backend(
"dogpile.cache.memcached", "dogpile.cache.backends.memcached",
"MemcachedBackend")
register_backend(
"dogpile.cache.memory", "dogpile.cache.backends.memory", "MemoryBackend")
register_backend(
"dogpile.cache.memory_pickle", "dogpile.cache.backends.memory",
"MemoryPickleBackend")
register_backend(
"dogpile.cache.redis", "dogpile.cache.backends.redis", "RedisBackend")

View File

@ -1,447 +0,0 @@
"""
File Backends
------------------
Provides backends that deal with local filesystem access.
"""
from __future__ import with_statement
from ..api import CacheBackend, NO_VALUE
from contextlib import contextmanager
from ...util import compat
from ... import util
import os
__all__ = 'DBMBackend', 'FileLock', 'AbstractFileLock'
class DBMBackend(CacheBackend):
"""A file-backend using a dbm file to store keys.
Basic usage::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.dbm',
expiration_time = 3600,
arguments = {
"filename":"/path/to/cachefile.dbm"
}
)
DBM access is provided using the Python ``anydbm`` module,
which selects a platform-specific dbm module to use.
This may be made to be more configurable in a future
release.
Note that different dbm modules have different behaviors.
Some dbm implementations handle their own locking, while
others don't. The :class:`.DBMBackend` uses a read/write
lockfile by default, which is compatible even with those
DBM implementations for which this is unnecessary,
though the behavior can be disabled.
The DBM backend by default makes use of two lockfiles.
One is in order to protect the DBM file itself from
concurrent writes, the other is to coordinate
value creation (i.e. the dogpile lock). By default,
these lockfiles use the ``flock()`` system call
for locking; this is **only available on Unix
platforms**. An alternative lock implementation, such as one
which is based on threads or uses a third-party system
such as `portalocker <https://pypi.python.org/pypi/portalocker>`_,
can be dropped in using the ``lock_factory`` argument
in conjunction with the :class:`.AbstractFileLock` base class.
Currently, the dogpile lock is against the entire
DBM file, not per key. This means there can
only be one "creator" job running at a time
per dbm file.
A future improvement might be to have the dogpile lock
using a filename that's based on a modulus of the key.
Locking on a filename that uniquely corresponds to the
key is problematic, since it's not generally safe to
delete lockfiles as the application runs, implying an
unlimited number of key-based files would need to be
created and never deleted.
Parameters to the ``arguments`` dictionary are
below.
:param filename: path of the filename in which to
create the DBM file. Note that some dbm backends
will change this name to have additional suffixes.
:param rw_lockfile: the name of the file to use for
read/write locking. If omitted, a default name
is used by appending the suffix ".rw.lock" to the
DBM filename. If False, then no lock is used.
:param dogpile_lockfile: the name of the file to use
for value creation, i.e. the dogpile lock. If
omitted, a default name is used by appending the
suffix ".dogpile.lock" to the DBM filename. If
False, then dogpile.cache uses the default dogpile
lock, a plain thread-based mutex.
:param lock_factory: a function or class which provides
for a read/write lock. Defaults to :class:`.FileLock`.
Custom implementations need to implement context-manager
based ``read()`` and ``write()`` functions - the
:class:`.AbstractFileLock` class is provided as a base class
which provides these methods based on individual read/write lock
functions. E.g. to replace the lock with the dogpile.core
:class:`.ReadWriteMutex`::
from dogpile.core.readwrite_lock import ReadWriteMutex
from dogpile.cache.backends.file import AbstractFileLock
class MutexLock(AbstractFileLock):
def __init__(self, filename):
self.mutex = ReadWriteMutex()
def acquire_read_lock(self, wait):
ret = self.mutex.acquire_read_lock(wait)
return wait or ret
def acquire_write_lock(self, wait):
ret = self.mutex.acquire_write_lock(wait)
return wait or ret
def release_read_lock(self):
return self.mutex.release_read_lock()
def release_write_lock(self):
return self.mutex.release_write_lock()
from dogpile.cache import make_region
region = make_region().configure(
"dogpile.cache.dbm",
expiration_time=300,
arguments={
"filename": "file.dbm",
"lock_factory": MutexLock
}
)
While the included :class:`.FileLock` uses ``os.flock()``, a
windows-compatible implementation can be built using a library
such as `portalocker <https://pypi.python.org/pypi/portalocker>`_.
.. versionadded:: 0.5.2
"""
def __init__(self, arguments):
self.filename = os.path.abspath(
os.path.normpath(arguments['filename'])
)
dir_, filename = os.path.split(self.filename)
self.lock_factory = arguments.get("lock_factory", FileLock)
self._rw_lock = self._init_lock(
arguments.get('rw_lockfile'),
".rw.lock", dir_, filename)
self._dogpile_lock = self._init_lock(
arguments.get('dogpile_lockfile'),
".dogpile.lock",
dir_, filename,
util.KeyReentrantMutex.factory)
# TODO: make this configurable
if compat.py3k:
import dbm
else:
import anydbm as dbm
self.dbmmodule = dbm
self._init_dbm_file()
def _init_lock(self, argument, suffix, basedir, basefile, wrapper=None):
if argument is None:
lock = self.lock_factory(os.path.join(basedir, basefile + suffix))
elif argument is not False:
lock = self.lock_factory(
os.path.abspath(
os.path.normpath(argument)
))
else:
return None
if wrapper:
lock = wrapper(lock)
return lock
def _init_dbm_file(self):
exists = os.access(self.filename, os.F_OK)
if not exists:
for ext in ('db', 'dat', 'pag', 'dir'):
if os.access(self.filename + os.extsep + ext, os.F_OK):
exists = True
break
if not exists:
fh = self.dbmmodule.open(self.filename, 'c')
fh.close()
def get_mutex(self, key):
# using one dogpile for the whole file. Other ways
# to do this might be using a set of files keyed to a
# hash/modulus of the key. the issue is it's never
# really safe to delete a lockfile as this can
# break other processes trying to get at the file
# at the same time - so handling unlimited keys
# can't imply unlimited filenames
if self._dogpile_lock:
return self._dogpile_lock(key)
else:
return None
@contextmanager
def _use_rw_lock(self, write):
if self._rw_lock is None:
yield
elif write:
with self._rw_lock.write():
yield
else:
with self._rw_lock.read():
yield
@contextmanager
def _dbm_file(self, write):
with self._use_rw_lock(write):
dbm = self.dbmmodule.open(
self.filename,
"w" if write else "r")
yield dbm
dbm.close()
def get(self, key):
with self._dbm_file(False) as dbm:
if hasattr(dbm, 'get'):
value = dbm.get(key, NO_VALUE)
else:
# gdbm objects lack a .get method
try:
value = dbm[key]
except KeyError:
value = NO_VALUE
if value is not NO_VALUE:
value = compat.pickle.loads(value)
return value
def get_multi(self, keys):
return [self.get(key) for key in keys]
def set(self, key, value):
with self._dbm_file(True) as dbm:
dbm[key] = compat.pickle.dumps(value,
compat.pickle.HIGHEST_PROTOCOL)
def set_multi(self, mapping):
with self._dbm_file(True) as dbm:
for key, value in mapping.items():
dbm[key] = compat.pickle.dumps(value,
compat.pickle.HIGHEST_PROTOCOL)
def delete(self, key):
with self._dbm_file(True) as dbm:
try:
del dbm[key]
except KeyError:
pass
def delete_multi(self, keys):
with self._dbm_file(True) as dbm:
for key in keys:
try:
del dbm[key]
except KeyError:
pass
class AbstractFileLock(object):
"""Coordinate read/write access to a file.
typically is a file-based lock but doesn't necessarily have to be.
The default implementation here is :class:`.FileLock`.
Implementations should provide the following methods::
* __init__()
* acquire_read_lock()
* acquire_write_lock()
* release_read_lock()
* release_write_lock()
The ``__init__()`` method accepts a single argument "filename", which
may be used as the "lock file", for those implementations that use a lock
file.
Note that multithreaded environments must provide a thread-safe
version of this lock. The recommended approach for file-
descriptor-based locks is to use a Python ``threading.local()`` so
that a unique file descriptor is held per thread. See the source
code of :class:`.FileLock` for an implementation example.
"""
def __init__(self, filename):
"""Constructor, is given the filename of a potential lockfile.
The usage of this filename is optional and no file is
created by default.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def acquire(self, wait=True):
"""Acquire the "write" lock.
This is a direct call to :meth:`.AbstractFileLock.acquire_write_lock`.
"""
return self.acquire_write_lock(wait)
def release(self):
"""Release the "write" lock.
This is a direct call to :meth:`.AbstractFileLock.release_write_lock`.
"""
self.release_write_lock()
@contextmanager
def read(self):
"""Provide a context manager for the "read" lock.
This method makes use of :meth:`.AbstractFileLock.acquire_read_lock`
and :meth:`.AbstractFileLock.release_read_lock`
"""
self.acquire_read_lock(True)
try:
yield
finally:
self.release_read_lock()
@contextmanager
def write(self):
"""Provide a context manager for the "write" lock.
This method makes use of :meth:`.AbstractFileLock.acquire_write_lock`
and :meth:`.AbstractFileLock.release_write_lock`
"""
self.acquire_write_lock(True)
try:
yield
finally:
self.release_write_lock()
@property
def is_open(self):
"""optional method."""
raise NotImplementedError()
def acquire_read_lock(self, wait):
"""Acquire a 'reader' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def acquire_write_lock(self, wait):
"""Acquire a 'write' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def release_read_lock(self):
"""Release a 'reader' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
def release_write_lock(self):
"""Release a 'writer' lock.
Raises ``NotImplementedError`` by default, must be
implemented by subclasses.
"""
raise NotImplementedError()
class FileLock(AbstractFileLock):
"""Use lockfiles to coordinate read/write access to a file.
Only works on Unix systems, using
`fcntl.flock() <http://docs.python.org/library/fcntl.html>`_.
"""
def __init__(self, filename):
self._filedescriptor = compat.threading.local()
self.filename = filename
@util.memoized_property
def _module(self):
import fcntl
return fcntl
@property
def is_open(self):
return hasattr(self._filedescriptor, 'fileno')
def acquire_read_lock(self, wait):
return self._acquire(wait, os.O_RDONLY, self._module.LOCK_SH)
def acquire_write_lock(self, wait):
return self._acquire(wait, os.O_WRONLY, self._module.LOCK_EX)
def release_read_lock(self):
self._release()
def release_write_lock(self):
self._release()
def _acquire(self, wait, wrflag, lockflag):
wrflag |= os.O_CREAT
fileno = os.open(self.filename, wrflag)
try:
if not wait:
lockflag |= self._module.LOCK_NB
self._module.flock(fileno, lockflag)
except IOError:
os.close(fileno)
if not wait:
# this is typically
# "[Errno 35] Resource temporarily unavailable",
# because of LOCK_NB
return False
else:
raise
else:
self._filedescriptor.fileno = fileno
return True
def _release(self):
try:
fileno = self._filedescriptor.fileno
except AttributeError:
return
else:
self._module.flock(fileno, self._module.LOCK_UN)
os.close(fileno)
del self._filedescriptor.fileno

View File

@ -1,364 +0,0 @@
"""
Memcached Backends
------------------
Provides backends for talking to `memcached <http://memcached.org>`_.
"""
from ..api import CacheBackend, NO_VALUE
from ...util import compat
from ... import util
import random
import time
__all__ = 'GenericMemcachedBackend', 'MemcachedBackend',\
'PylibmcBackend', 'BMemcachedBackend', 'MemcachedLock'
class MemcachedLock(object):
"""Simple distributed lock using memcached.
This is an adaptation of the lock featured at
http://amix.dk/blog/post/19386
"""
def __init__(self, client_fn, key, timeout=0):
self.client_fn = client_fn
self.key = "_lock" + key
self.timeout = timeout
def acquire(self, wait=True):
client = self.client_fn()
i = 0
while True:
if client.add(self.key, 1, self.timeout):
return True
elif not wait:
return False
else:
sleep_time = (((i + 1) * random.random()) + 2 ** i) / 2.5
time.sleep(sleep_time)
if i < 15:
i += 1
def release(self):
client = self.client_fn()
client.delete(self.key)
class GenericMemcachedBackend(CacheBackend):
"""Base class for memcached backends.
This base class accepts a number of paramters
common to all backends.
:param url: the string URL to connect to. Can be a single
string or a list of strings. This is the only argument
that's required.
:param distributed_lock: boolean, when True, will use a
memcached-lock as the dogpile lock (see :class:`.MemcachedLock`).
Use this when multiple
processes will be talking to the same memcached instance.
When left at False, dogpile will coordinate on a regular
threading mutex.
:param lock_timeout: integer, number of seconds after acquiring a lock that
memcached should expire it. This argument is only valid when
``distributed_lock`` is ``True``.
.. versionadded:: 0.5.7
:param memcached_expire_time: integer, when present will
be passed as the ``time`` parameter to ``pylibmc.Client.set``.
This is used to set the memcached expiry time for a value.
.. note::
This parameter is **different** from Dogpile's own
``expiration_time``, which is the number of seconds after
which Dogpile will consider the value to be expired.
When Dogpile considers a value to be expired,
it **continues to use the value** until generation
of a new value is complete, when using
:meth:`.CacheRegion.get_or_create`.
Therefore, if you are setting ``memcached_expire_time``, you'll
want to make sure it is greater than ``expiration_time``
by at least enough seconds for new values to be generated,
else the value won't be available during a regeneration,
forcing all threads to wait for a regeneration each time
a value expires.
The :class:`.GenericMemachedBackend` uses a ``threading.local()``
object to store individual client objects per thread,
as most modern memcached clients do not appear to be inherently
threadsafe.
In particular, ``threading.local()`` has the advantage over pylibmc's
built-in thread pool in that it automatically discards objects
associated with a particular thread when that thread ends.
"""
set_arguments = {}
"""Additional arguments which will be passed
to the :meth:`set` method."""
def __init__(self, arguments):
self._imports()
# using a plain threading.local here. threading.local
# automatically deletes the __dict__ when a thread ends,
# so the idea is that this is superior to pylibmc's
# own ThreadMappedPool which doesn't handle this
# automatically.
self.url = util.to_list(arguments['url'])
self.distributed_lock = arguments.get('distributed_lock', False)
self.lock_timeout = arguments.get('lock_timeout', 0)
self.memcached_expire_time = arguments.get(
'memcached_expire_time', 0)
def has_lock_timeout(self):
return self.lock_timeout != 0
def _imports(self):
"""client library imports go here."""
raise NotImplementedError()
def _create_client(self):
"""Creation of a Client instance goes here."""
raise NotImplementedError()
@util.memoized_property
def _clients(self):
backend = self
class ClientPool(compat.threading.local):
def __init__(self):
self.memcached = backend._create_client()
return ClientPool()
@property
def client(self):
"""Return the memcached client.
This uses a threading.local by
default as it appears most modern
memcached libs aren't inherently
threadsafe.
"""
return self._clients.memcached
def get_mutex(self, key):
if self.distributed_lock:
return MemcachedLock(lambda: self.client, key,
timeout=self.lock_timeout)
else:
return None
def get(self, key):
value = self.client.get(key)
if value is None:
return NO_VALUE
else:
return value
def get_multi(self, keys):
values = self.client.get_multi(keys)
return [
NO_VALUE if key not in values
else values[key] for key in keys
]
def set(self, key, value):
self.client.set(
key,
value,
**self.set_arguments
)
def set_multi(self, mapping):
self.client.set_multi(
mapping,
**self.set_arguments
)
def delete(self, key):
self.client.delete(key)
def delete_multi(self, keys):
self.client.delete_multi(keys)
class MemcacheArgs(object):
"""Mixin which provides support for the 'time' argument to set(),
'min_compress_len' to other methods.
"""
def __init__(self, arguments):
self.min_compress_len = arguments.get('min_compress_len', 0)
self.set_arguments = {}
if "memcached_expire_time" in arguments:
self.set_arguments["time"] = arguments["memcached_expire_time"]
if "min_compress_len" in arguments:
self.set_arguments["min_compress_len"] = \
arguments["min_compress_len"]
super(MemcacheArgs, self).__init__(arguments)
pylibmc = None
class PylibmcBackend(MemcacheArgs, GenericMemcachedBackend):
"""A backend for the
`pylibmc <http://sendapatch.se/projects/pylibmc/index.html>`_
memcached client.
A configuration illustrating several of the optional
arguments described in the pylibmc documentation::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.pylibmc',
expiration_time = 3600,
arguments = {
'url':["127.0.0.1"],
'binary':True,
'behaviors':{"tcp_nodelay": True,"ketama":True}
}
)
Arguments accepted here include those of
:class:`.GenericMemcachedBackend`, as well as
those below.
:param binary: sets the ``binary`` flag understood by
``pylibmc.Client``.
:param behaviors: a dictionary which will be passed to
``pylibmc.Client`` as the ``behaviors`` parameter.
:param min_compress_len: Integer, will be passed as the
``min_compress_len`` parameter to the ``pylibmc.Client.set``
method.
"""
def __init__(self, arguments):
self.binary = arguments.get('binary', False)
self.behaviors = arguments.get('behaviors', {})
super(PylibmcBackend, self).__init__(arguments)
def _imports(self):
global pylibmc
import pylibmc # noqa
def _create_client(self):
return pylibmc.Client(
self.url,
binary=self.binary,
behaviors=self.behaviors
)
memcache = None
class MemcachedBackend(MemcacheArgs, GenericMemcachedBackend):
"""A backend using the standard
`Python-memcached <http://www.tummy.com/Community/software/\
python-memcached/>`_
library.
Example::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memcached',
expiration_time = 3600,
arguments = {
'url':"127.0.0.1:11211"
}
)
"""
def _imports(self):
global memcache
import memcache # noqa
def _create_client(self):
return memcache.Client(self.url)
bmemcached = None
class BMemcachedBackend(GenericMemcachedBackend):
"""A backend for the
`python-binary-memcached <https://github.com/jaysonsantos/\
python-binary-memcached>`_
memcached client.
This is a pure Python memcached client which
includes the ability to authenticate with a memcached
server using SASL.
A typical configuration using username/password::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.bmemcached',
expiration_time = 3600,
arguments = {
'url':["127.0.0.1"],
'username':'scott',
'password':'tiger'
}
)
Arguments which can be passed to the ``arguments``
dictionary include:
:param username: optional username, will be used for
SASL authentication.
:param password: optional password, will be used for
SASL authentication.
"""
def __init__(self, arguments):
self.username = arguments.get('username', None)
self.password = arguments.get('password', None)
super(BMemcachedBackend, self).__init__(arguments)
def _imports(self):
global bmemcached
import bmemcached
class RepairBMemcachedAPI(bmemcached.Client):
"""Repairs BMemcached's non-standard method
signatures, which was fixed in BMemcached
ef206ed4473fec3b639e.
"""
def add(self, key, value, timeout=0):
try:
return super(RepairBMemcachedAPI, self).add(
key, value, timeout)
except ValueError:
return False
self.Client = RepairBMemcachedAPI
def _create_client(self):
return self.Client(
self.url,
username=self.username,
password=self.password
)
def delete_multi(self, keys):
"""python-binary-memcached api does not implements delete_multi"""
for key in keys:
self.delete(key)

View File

@ -1,124 +0,0 @@
"""
Memory Backends
---------------
Provides simple dictionary-based backends.
The two backends are :class:`.MemoryBackend` and :class:`.MemoryPickleBackend`;
the latter applies a serialization step to cached values while the former
places the value as given into the dictionary.
"""
from ..api import CacheBackend, NO_VALUE
from ...util.compat import pickle
class MemoryBackend(CacheBackend):
"""A backend that uses a plain dictionary.
There is no size management, and values which
are placed into the dictionary will remain
until explicitly removed. Note that
Dogpile's expiration of items is based on
timestamps and does not remove them from
the cache.
E.g.::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memory'
)
To use a Python dictionary of your choosing,
it can be passed in with the ``cache_dict``
argument::
my_dictionary = {}
region = make_region().configure(
'dogpile.cache.memory',
arguments={
"cache_dict":my_dictionary
}
)
"""
pickle_values = False
def __init__(self, arguments):
self._cache = arguments.pop("cache_dict", {})
def get(self, key):
value = self._cache.get(key, NO_VALUE)
if value is not NO_VALUE and self.pickle_values:
value = pickle.loads(value)
return value
def get_multi(self, keys):
ret = [
self._cache.get(key, NO_VALUE)
for key in keys]
if self.pickle_values:
ret = [
pickle.loads(value)
if value is not NO_VALUE else value
for value in ret
]
return ret
def set(self, key, value):
if self.pickle_values:
value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
self._cache[key] = value
def set_multi(self, mapping):
pickle_values = self.pickle_values
for key, value in mapping.items():
if pickle_values:
value = pickle.dumps(value, pickle.HIGHEST_PROTOCOL)
self._cache[key] = value
def delete(self, key):
self._cache.pop(key, None)
def delete_multi(self, keys):
for key in keys:
self._cache.pop(key, None)
class MemoryPickleBackend(MemoryBackend):
"""A backend that uses a plain dictionary, but serializes objects on
:meth:`.MemoryBackend.set` and deserializes :meth:`.MemoryBackend.get`.
E.g.::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.memory_pickle'
)
The usage of pickle to serialize cached values allows an object
as placed in the cache to be a copy of the original given object, so
that any subsequent changes to the given object aren't reflected
in the cached value, thus making the backend behave the same way
as other backends which make use of serialization.
The serialization is performed via pickle, and incurs the same
performance hit in doing so as that of other backends; in this way
the :class:`.MemoryPickleBackend` performance is somewhere in between
that of the pure :class:`.MemoryBackend` and the remote server oriented
backends such as that of Memcached or Redis.
Pickle behavior here is the same as that of the Redis backend, using
either ``cPickle`` or ``pickle`` and specifying ``HIGHEST_PROTOCOL``
upon serialize.
.. versionadded:: 0.5.3
"""
pickle_values = True

View File

@ -1,62 +0,0 @@
"""
Null Backend
-------------
The Null backend does not do any caching at all. It can be
used to test behavior without caching, or as a means of disabling
caching for a region that is otherwise used normally.
.. versionadded:: 0.5.4
"""
from ..api import CacheBackend, NO_VALUE
__all__ = ['NullBackend']
class NullLock(object):
def acquire(self, wait=True):
return True
def release(self):
pass
class NullBackend(CacheBackend):
"""A "null" backend that effectively disables all cache operations.
Basic usage::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.null'
)
"""
def __init__(self, arguments):
pass
def get_mutex(self, key):
return NullLock()
def get(self, key):
return NO_VALUE
def get_multi(self, keys):
return [NO_VALUE for k in keys]
def set(self, key, value):
pass
def set_multi(self, mapping):
pass
def delete(self, key):
pass
def delete_multi(self, keys):
pass

View File

@ -1,183 +0,0 @@
"""
Redis Backends
------------------
Provides backends for talking to `Redis <http://redis.io>`_.
"""
from __future__ import absolute_import
from ..api import CacheBackend, NO_VALUE
from ...util.compat import pickle, u
redis = None
__all__ = 'RedisBackend',
class RedisBackend(CacheBackend):
"""A `Redis <http://redis.io/>`_ backend, using the
`redis-py <http://pypi.python.org/pypi/redis/>`_ backend.
Example configuration::
from dogpile.cache import make_region
region = make_region().configure(
'dogpile.cache.redis',
arguments = {
'host': 'localhost',
'port': 6379,
'db': 0,
'redis_expiration_time': 60*60*2, # 2 hours
'distributed_lock': True
}
)
Arguments accepted in the arguments dictionary:
:param url: string. If provided, will override separate host/port/db
params. The format is that accepted by ``StrictRedis.from_url()``.
.. versionadded:: 0.4.1
:param host: string, default is ``localhost``.
:param password: string, default is no password.
.. versionadded:: 0.4.1
:param port: integer, default is ``6379``.
:param db: integer, default is ``0``.
:param redis_expiration_time: integer, number of seconds after setting
a value that Redis should expire it. This should be larger than dogpile's
cache expiration. By default no expiration is set.
:param distributed_lock: boolean, when True, will use a
redis-lock as the dogpile lock.
Use this when multiple
processes will be talking to the same redis instance.
When left at False, dogpile will coordinate on a regular
threading mutex.
:param lock_timeout: integer, number of seconds after acquiring a lock that
Redis should expire it. This argument is only valid when
``distributed_lock`` is ``True``.
.. versionadded:: 0.5.0
:param socket_timeout: float, seconds for socket timeout.
Default is None (no timeout).
.. versionadded:: 0.5.4
:param lock_sleep: integer, number of seconds to sleep when failed to
acquire a lock. This argument is only valid when
``distributed_lock`` is ``True``.
.. versionadded:: 0.5.0
:param connection_pool: ``redis.ConnectionPool`` object. If provided,
this object supersedes other connection arguments passed to the
``redis.StrictRedis`` instance, including url and/or host as well as
socket_timeout, and will be passed to ``redis.StrictRedis`` as the
source of connectivity.
.. versionadded:: 0.5.4
"""
def __init__(self, arguments):
arguments = arguments.copy()
self._imports()
self.url = arguments.pop('url', None)
self.host = arguments.pop('host', 'localhost')
self.password = arguments.pop('password', None)
self.port = arguments.pop('port', 6379)
self.db = arguments.pop('db', 0)
self.distributed_lock = arguments.get('distributed_lock', False)
self.socket_timeout = arguments.pop('socket_timeout', None)
self.lock_timeout = arguments.get('lock_timeout', None)
self.lock_sleep = arguments.get('lock_sleep', 0.1)
self.redis_expiration_time = arguments.pop('redis_expiration_time', 0)
self.connection_pool = arguments.get('connection_pool', None)
self.client = self._create_client()
def _imports(self):
# defer imports until backend is used
global redis
import redis # noqa
def _create_client(self):
if self.connection_pool is not None:
# the connection pool already has all other connection
# options present within, so here we disregard socket_timeout
# and others.
return redis.StrictRedis(connection_pool=self.connection_pool)
args = {}
if self.socket_timeout:
args['socket_timeout'] = self.socket_timeout
if self.url is not None:
args.update(url=self.url)
return redis.StrictRedis.from_url(**args)
else:
args.update(
host=self.host, password=self.password,
port=self.port, db=self.db
)
return redis.StrictRedis(**args)
def get_mutex(self, key):
if self.distributed_lock:
return self.client.lock(u('_lock{0}').format(key),
self.lock_timeout, self.lock_sleep)
else:
return None
def get(self, key):
value = self.client.get(key)
if value is None:
return NO_VALUE
return pickle.loads(value)
def get_multi(self, keys):
if not keys:
return []
values = self.client.mget(keys)
return [
pickle.loads(v) if v is not None else NO_VALUE
for v in values]
def set(self, key, value):
if self.redis_expiration_time:
self.client.setex(key, self.redis_expiration_time,
pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
else:
self.client.set(key, pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
def set_multi(self, mapping):
mapping = dict(
(k, pickle.dumps(v, pickle.HIGHEST_PROTOCOL))
for k, v in mapping.items()
)
if not self.redis_expiration_time:
self.client.mset(mapping)
else:
pipe = self.client.pipeline()
for key, value in mapping.items():
pipe.setex(key, self.redis_expiration_time, value)
pipe.execute()
def delete(self, key):
self.client.delete(key)
def delete_multi(self, keys):
self.client.delete(*keys)

View File

@ -1,25 +0,0 @@
"""Exception classes for dogpile.cache."""
class DogpileCacheException(Exception):
"""Base Exception for dogpile.cache exceptions to inherit from."""
class RegionAlreadyConfigured(DogpileCacheException):
"""CacheRegion instance is already configured."""
class RegionNotConfigured(DogpileCacheException):
"""CacheRegion instance has not been configured."""
class ValidationError(DogpileCacheException):
"""Error validating a value or option."""
class PluginNotFound(DogpileCacheException):
"""The specified plugin could not be found.
.. versionadded:: 0.6.4
"""

View File

@ -1,90 +0,0 @@
"""
Mako Integration
----------------
dogpile.cache includes a `Mako <http://www.makotemplates.org>`_ plugin
that replaces `Beaker <http://beaker.groovie.org>`_
as the cache backend.
Setup a Mako template lookup using the "dogpile.cache" cache implementation
and a region dictionary::
from dogpile.cache import make_region
from mako.lookup import TemplateLookup
my_regions = {
"local":make_region().configure(
"dogpile.cache.dbm",
expiration_time=360,
arguments={"filename":"file.dbm"}
),
"memcached":make_region().configure(
"dogpile.cache.pylibmc",
expiration_time=3600,
arguments={"url":["127.0.0.1"]}
)
}
mako_lookup = TemplateLookup(
directories=["/myapp/templates"],
cache_impl="dogpile.cache",
cache_args={
'regions':my_regions
}
)
To use the above configuration in a template, use the ``cached=True``
argument on any Mako tag which accepts it, in conjunction with the
name of the desired region as the ``cache_region`` argument::
<%def name="mysection()" cached="True" cache_region="memcached">
some content that's cached
</%def>
"""
from mako.cache import CacheImpl
class MakoPlugin(CacheImpl):
"""A Mako ``CacheImpl`` which talks to dogpile.cache."""
def __init__(self, cache):
super(MakoPlugin, self).__init__(cache)
try:
self.regions = self.cache.template.cache_args['regions']
except KeyError:
raise KeyError(
"'cache_regions' argument is required on the "
"Mako Lookup or Template object for usage "
"with the dogpile.cache plugin.")
def _get_region(self, **kw):
try:
region = kw['region']
except KeyError:
raise KeyError(
"'cache_region' argument must be specified with 'cache=True'"
"within templates for usage with the dogpile.cache plugin.")
try:
return self.regions[region]
except KeyError:
raise KeyError("No such region '%s'" % region)
def get_and_replace(self, key, creation_function, **kw):
expiration_time = kw.pop("timeout", None)
return self._get_region(**kw).get_or_create(
key, creation_function,
expiration_time=expiration_time)
def get_or_create(self, key, creation_function, **kw):
return self.get_and_replace(key, creation_function, **kw)
def put(self, key, value, **kw):
self._get_region(**kw).put(key, value)
def get(self, key, **kw):
expiration_time = kw.pop("timeout", None)
return self._get_region(**kw).get(key, expiration_time=expiration_time)
def invalidate(self, key, **kw):
self._get_region(**kw).delete(key)

View File

@ -1,95 +0,0 @@
"""
Proxy Backends
------------------
Provides a utility and a decorator class that allow for modifying the behavior
of different backends without altering the class itself or having to extend the
base backend.
.. versionadded:: 0.5.0 Added support for the :class:`.ProxyBackend` class.
"""
from .api import CacheBackend
class ProxyBackend(CacheBackend):
"""A decorator class for altering the functionality of backends.
Basic usage::
from dogpile.cache import make_region
from dogpile.cache.proxy import ProxyBackend
class MyFirstProxy(ProxyBackend):
def get(self, key):
# ... custom code goes here ...
return self.proxied.get(key)
def set(self, key, value):
# ... custom code goes here ...
self.proxied.set(key)
class MySecondProxy(ProxyBackend):
def get(self, key):
# ... custom code goes here ...
return self.proxied.get(key)
region = make_region().configure(
'dogpile.cache.dbm',
expiration_time = 3600,
arguments = {
"filename":"/path/to/cachefile.dbm"
},
wrap = [ MyFirstProxy, MySecondProxy ]
)
Classes that extend :class:`.ProxyBackend` can be stacked
together. The ``.proxied`` property will always
point to either the concrete backend instance or
the next proxy in the chain that a method can be
delegated towards.
.. versionadded:: 0.5.0
"""
def __init__(self, *args, **kwargs):
self.proxied = None
def wrap(self, backend):
''' Take a backend as an argument and setup the self.proxied property.
Return an object that be used as a backend by a :class:`.CacheRegion`
object.
'''
assert(
isinstance(backend, CacheBackend) or
isinstance(backend, ProxyBackend))
self.proxied = backend
return self
#
# Delegate any functions that are not already overridden to
# the proxies backend
#
def get(self, key):
return self.proxied.get(key)
def set(self, key, value):
self.proxied.set(key, value)
def delete(self, key):
self.proxied.delete(key)
def get_multi(self, keys):
return self.proxied.get_multi(keys)
def set_multi(self, mapping):
self.proxied.set_multi(mapping)
def delete_multi(self, keys):
self.proxied.delete_multi(keys)
def get_mutex(self, key):
return self.proxied.get_mutex(key)

File diff suppressed because it is too large Load Diff

View File

@ -1,146 +0,0 @@
from hashlib import sha1
import inspect
from ..util import compat
from ..util import langhelpers
def function_key_generator(namespace, fn, to_str=compat.string_type):
"""Return a function that generates a string
key, based on a given function as well as
arguments to the returned function itself.
This is used by :meth:`.CacheRegion.cache_on_arguments`
to generate a cache key from a decorated function.
An alternate function may be used by specifying
the :paramref:`.CacheRegion.function_key_generator` argument
for :class:`.CacheRegion`.
.. seealso::
:func:`.kwarg_function_key_generator` - similar function that also
takes keyword arguments into account
"""
if namespace is None:
namespace = '%s:%s' % (fn.__module__, fn.__name__)
else:
namespace = '%s:%s|%s' % (fn.__module__, fn.__name__, namespace)
args = inspect.getargspec(fn)
has_self = args[0] and args[0][0] in ('self', 'cls')
def generate_key(*args, **kw):
if kw:
raise ValueError(
"dogpile.cache's default key creation "
"function does not accept keyword arguments.")
if has_self:
args = args[1:]
return namespace + "|" + " ".join(map(to_str, args))
return generate_key
def function_multi_key_generator(namespace, fn, to_str=compat.string_type):
if namespace is None:
namespace = '%s:%s' % (fn.__module__, fn.__name__)
else:
namespace = '%s:%s|%s' % (fn.__module__, fn.__name__, namespace)
args = inspect.getargspec(fn)
has_self = args[0] and args[0][0] in ('self', 'cls')
def generate_keys(*args, **kw):
if kw:
raise ValueError(
"dogpile.cache's default key creation "
"function does not accept keyword arguments.")
if has_self:
args = args[1:]
return [namespace + "|" + key for key in map(to_str, args)]
return generate_keys
def kwarg_function_key_generator(namespace, fn, to_str=compat.string_type):
"""Return a function that generates a string
key, based on a given function as well as
arguments to the returned function itself.
For kwargs passed in, we will build a dict of
all argname (key) argvalue (values) including
default args from the argspec and then
alphabetize the list before generating the
key.
.. versionadded:: 0.6.2
.. seealso::
:func:`.function_key_generator` - default key generation function
"""
if namespace is None:
namespace = '%s:%s' % (fn.__module__, fn.__name__)
else:
namespace = '%s:%s|%s' % (fn.__module__, fn.__name__, namespace)
argspec = inspect.getargspec(fn)
default_list = list(argspec.defaults or [])
# Reverse the list, as we want to compare the argspec by negative index,
# meaning default_list[0] should be args[-1], which works well with
# enumerate()
default_list.reverse()
# use idx*-1 to create the correct right-lookup index.
args_with_defaults = dict((argspec.args[(idx*-1)], default)
for idx, default in enumerate(default_list, 1))
if argspec.args and argspec.args[0] in ('self', 'cls'):
arg_index_start = 1
else:
arg_index_start = 0
def generate_key(*args, **kwargs):
as_kwargs = dict(
[(argspec.args[idx], arg)
for idx, arg in enumerate(args[arg_index_start:],
arg_index_start)])
as_kwargs.update(kwargs)
for arg, val in args_with_defaults.items():
if arg not in as_kwargs:
as_kwargs[arg] = val
argument_values = [as_kwargs[key]
for key in sorted(as_kwargs.keys())]
return namespace + '|' + " ".join(map(to_str, argument_values))
return generate_key
def sha1_mangle_key(key):
"""a SHA1 key mangler."""
return sha1(key).hexdigest()
def length_conditional_mangler(length, mangler):
"""a key mangler that mangles if the length of the key is
past a certain threshold.
"""
def mangle(key):
if len(key) >= length:
return mangler(key)
else:
return key
return mangle
# in the 0.6 release these functions were moved to the dogpile.util namespace.
# They are linked here to maintain compatibility with older versions.
coerce_string_conf = langhelpers.coerce_string_conf
KeyReentrantMutex = langhelpers.KeyReentrantMutex
memoized_property = langhelpers.memoized_property
PluginLoader = langhelpers.PluginLoader
to_list = langhelpers.to_list

View File

@ -1,17 +0,0 @@
"""Compatibility namespace for those using dogpile.core.
As of dogpile.cache 0.6.0, dogpile.core as a separate package
is no longer used by dogpile.cache.
Note that this namespace will not take effect if an actual
dogpile.core installation is present.
"""
from .util import nameregistry # noqa
from .util import readwrite_lock # noqa
from .util.readwrite_lock import ReadWriteMutex # noqa
from .util.nameregistry import NameRegistry # noqa
from .lock import Lock # noqa
from .lock import NeedRegenerationException # noqa
from . import __version__ # noqa

View File

@ -1,158 +0,0 @@
import time
import logging
log = logging.getLogger(__name__)
class NeedRegenerationException(Exception):
"""An exception that when raised in the 'with' block,
forces the 'has_value' flag to False and incurs a
regeneration of the value.
"""
NOT_REGENERATED = object()
class Lock(object):
"""Dogpile lock class.
Provides an interface around an arbitrary mutex
that allows one thread/process to be elected as
the creator of a new value, while other threads/processes
continue to return the previous version
of that value.
:param mutex: A mutex object that provides ``acquire()``
and ``release()`` methods.
:param creator: Callable which returns a tuple of the form
(new_value, creation_time). "new_value" should be a newly
generated value representing completed state. "creation_time"
should be a floating point time value which is relative
to Python's ``time.time()`` call, representing the time
at which the value was created. This time value should
be associated with the created value.
:param value_and_created_fn: Callable which returns
a tuple of the form (existing_value, creation_time). This
basically should return what the last local call to the ``creator()``
callable has returned, i.e. the value and the creation time,
which would be assumed here to be from a cache. If the
value is not available, the :class:`.NeedRegenerationException`
exception should be thrown.
:param expiretime: Expiration time in seconds. Set to
``None`` for never expires. This timestamp is compared
to the creation_time result and ``time.time()`` to determine if
the value returned by value_and_created_fn is "expired".
:param async_creator: A callable. If specified, this callable will be
passed the mutex as an argument and is responsible for releasing the mutex
after it finishes some asynchronous value creation. The intent is for
this to be used to defer invocation of the creator callable until some
later time.
"""
def __init__(
self,
mutex,
creator,
value_and_created_fn,
expiretime,
async_creator=None,
):
self.mutex = mutex
self.creator = creator
self.value_and_created_fn = value_and_created_fn
self.expiretime = expiretime
self.async_creator = async_creator
def _is_expired(self, createdtime):
"""Return true if the expiration time is reached, or no
value is available."""
return not self._has_value(createdtime) or \
(
self.expiretime is not None and
time.time() - createdtime > self.expiretime
)
def _has_value(self, createdtime):
"""Return true if the creation function has proceeded
at least once."""
return createdtime > 0
def _enter(self):
value_fn = self.value_and_created_fn
try:
value = value_fn()
value, createdtime = value
except NeedRegenerationException:
log.debug("NeedRegenerationException")
value = NOT_REGENERATED
createdtime = -1
generated = self._enter_create(createdtime)
if generated is not NOT_REGENERATED:
generated, createdtime = generated
return generated
elif value is NOT_REGENERATED:
try:
value, createdtime = value_fn()
return value
except NeedRegenerationException:
raise Exception("Generation function should "
"have just been called by a concurrent "
"thread.")
else:
return value
def _enter_create(self, createdtime):
if not self._is_expired(createdtime):
return NOT_REGENERATED
_async = False
if self._has_value(createdtime):
if not self.mutex.acquire(False):
log.debug("creation function in progress "
"elsewhere, returning")
return NOT_REGENERATED
else:
log.debug("no value, waiting for create lock")
self.mutex.acquire()
try:
log.debug("value creation lock %r acquired" % self.mutex)
# see if someone created the value already
try:
value, createdtime = self.value_and_created_fn()
except NeedRegenerationException:
pass
else:
if not self._is_expired(createdtime):
log.debug("value already present")
return value, createdtime
elif self.async_creator:
log.debug("Passing creation lock to async runner")
self.async_creator(self.mutex)
_async = True
return value, createdtime
log.debug("Calling creation function")
created = self.creator()
return created
finally:
if not _async:
self.mutex.release()
log.debug("Released creation lock")
def __enter__(self):
return self._enter()
def __exit__(self, type, value, traceback):
pass

View File

@ -1,4 +0,0 @@
from .nameregistry import NameRegistry # noqa
from .readwrite_lock import ReadWriteMutex # noqa
from .langhelpers import PluginLoader, memoized_property, \
coerce_string_conf, to_list, KeyReentrantMutex # noqa

View File

@ -1,65 +0,0 @@
import sys
py2k = sys.version_info < (3, 0)
py3k = sys.version_info >= (3, 0)
py32 = sys.version_info >= (3, 2)
py27 = sys.version_info >= (2, 7)
jython = sys.platform.startswith('java')
win32 = sys.platform.startswith('win')
try:
import threading
except ImportError:
import dummy_threading as threading # noqa
if py3k: # pragma: no cover
string_types = str,
text_type = str
string_type = str
if py32:
callable = callable
else:
def callable(fn):
return hasattr(fn, '__call__')
def u(s):
return s
def ue(s):
return s
import configparser
import io
import _thread as thread
else:
string_types = basestring,
text_type = unicode
string_type = str
def u(s):
return unicode(s, "utf-8")
def ue(s):
return unicode(s, "unicode_escape")
import ConfigParser as configparser # noqa
import StringIO as io # noqa
callable = callable # noqa
import thread # noqa
if py3k or jython:
import pickle
else:
import cPickle as pickle # noqa
def timedelta_total_seconds(td):
if py27:
return td.total_seconds()
else:
return (td.microseconds + (
td.seconds + td.days * 24 * 3600) * 1e6) / 1e6

View File

@ -1,123 +0,0 @@
import re
import collections
from . import compat
def coerce_string_conf(d):
result = {}
for k, v in d.items():
if not isinstance(v, compat.string_types):
result[k] = v
continue
v = v.strip()
if re.match(r'^[-+]?\d+$', v):
result[k] = int(v)
elif re.match(r'^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?$', v):
result[k] = float(v)
elif v.lower() in ('false', 'true'):
result[k] = v.lower() == 'true'
elif v == 'None':
result[k] = None
else:
result[k] = v
return result
class PluginLoader(object):
def __init__(self, group):
self.group = group
self.impls = {}
def load(self, name):
if name in self.impls:
return self.impls[name]()
else: # pragma NO COVERAGE
import pkg_resources
for impl in pkg_resources.iter_entry_points(
self.group, name):
self.impls[name] = impl.load
return impl.load()
else:
raise self.NotFound(
"Can't load plugin %s %s" % (self.group, name)
)
def register(self, name, modulepath, objname):
def load():
mod = __import__(modulepath, fromlist=[objname])
return getattr(mod, objname)
self.impls[name] = load
class NotFound(Exception):
"""The specified plugin could not be found."""
class memoized_property(object):
"""A read-only @property that is only evaluated once."""
def __init__(self, fget, doc=None):
self.fget = fget
self.__doc__ = doc or fget.__doc__
self.__name__ = fget.__name__
def __get__(self, obj, cls):
if obj is None:
return self
obj.__dict__[self.__name__] = result = self.fget(obj)
return result
def to_list(x, default=None):
"""Coerce to a list."""
if x is None:
return default
if not isinstance(x, (list, tuple)):
return [x]
else:
return x
class KeyReentrantMutex(object):
def __init__(self, key, mutex, keys):
self.key = key
self.mutex = mutex
self.keys = keys
@classmethod
def factory(cls, mutex):
# this collection holds zero or one
# thread idents as the key; a set of
# keynames held as the value.
keystore = collections.defaultdict(set)
def fac(key):
return KeyReentrantMutex(key, mutex, keystore)
return fac
def acquire(self, wait=True):
current_thread = compat.threading.current_thread().ident
keys = self.keys.get(current_thread)
if keys is not None and \
self.key not in keys:
# current lockholder, new key. add it in
keys.add(self.key)
return True
elif self.mutex.acquire(wait=wait):
# after acquire, create new set and add our key
self.keys[current_thread].add(self.key)
return True
else:
return False
def release(self):
current_thread = compat.threading.current_thread().ident
keys = self.keys.get(current_thread)
assert keys is not None, "this thread didn't do the acquire"
assert self.key in keys, "No acquire held for key '%s'" % self.key
keys.remove(self.key)
if not keys:
# when list of keys empty, remove
# the thread ident and unlock.
del self.keys[current_thread]
self.mutex.release()

View File

@ -1,84 +0,0 @@
from .compat import threading
import weakref
class NameRegistry(object):
"""Generates and return an object, keeping it as a
singleton for a certain identifier for as long as its
strongly referenced.
e.g.::
class MyFoo(object):
"some important object."
def __init__(self, identifier):
self.identifier = identifier
registry = NameRegistry(MyFoo)
# thread 1:
my_foo = registry.get("foo1")
# thread 2
my_foo = registry.get("foo1")
Above, ``my_foo`` in both thread #1 and #2 will
be *the same object*. The constructor for
``MyFoo`` will be called once, passing the
identifier ``foo1`` as the argument.
When thread 1 and thread 2 both complete or
otherwise delete references to ``my_foo``, the
object is *removed* from the :class:`.NameRegistry` as
a result of Python garbage collection.
:param creator: A function that will create a new
value, given the identifier passed to the :meth:`.NameRegistry.get`
method.
"""
_locks = weakref.WeakValueDictionary()
_mutex = threading.RLock()
def __init__(self, creator):
"""Create a new :class:`.NameRegistry`.
"""
self._values = weakref.WeakValueDictionary()
self._mutex = threading.RLock()
self.creator = creator
def get(self, identifier, *args, **kw):
"""Get and possibly create the value.
:param identifier: Hash key for the value.
If the creation function is called, this identifier
will also be passed to the creation function.
:param \*args, \**kw: Additional arguments which will
also be passed to the creation function if it is
called.
"""
try:
if identifier in self._values:
return self._values[identifier]
else:
return self._sync_get(identifier, *args, **kw)
except KeyError:
return self._sync_get(identifier, *args, **kw)
def _sync_get(self, identifier, *args, **kw):
self._mutex.acquire()
try:
try:
if identifier in self._values:
return self._values[identifier]
else:
self._values[identifier] = value = self.creator(identifier, *args, **kw)
return value
except KeyError:
self._values[identifier] = value = self.creator(identifier, *args, **kw)
return value
finally:
self._mutex.release()

View File

@ -1,132 +0,0 @@
from .compat import threading
import logging
log = logging.getLogger(__name__)
class LockError(Exception):
pass
class ReadWriteMutex(object):
"""A mutex which allows multiple readers, single writer.
:class:`.ReadWriteMutex` uses a Python ``threading.Condition``
to provide this functionality across threads within a process.
The Beaker package also contained a file-lock based version
of this concept, so that readers/writers could be synchronized
across processes with a common filesystem. A future Dogpile
release may include this additional class at some point.
"""
def __init__(self):
# counts how many asynchronous methods are executing
self._async = 0
# pointer to thread that is the current sync operation
self.current_sync_operation = None
# condition object to lock on
self.condition = threading.Condition(threading.Lock())
def acquire_read_lock(self, wait = True):
"""Acquire the 'read' lock."""
self.condition.acquire()
try:
# see if a synchronous operation is waiting to start
# or is already running, in which case we wait (or just
# give up and return)
if wait:
while self.current_sync_operation is not None:
self.condition.wait()
else:
if self.current_sync_operation is not None:
return False
self._async += 1
log.debug("%s acquired read lock", self)
finally:
self.condition.release()
if not wait:
return True
def release_read_lock(self):
"""Release the 'read' lock."""
self.condition.acquire()
try:
self._async -= 1
# check if we are the last asynchronous reader thread
# out the door.
if self._async == 0:
# yes. so if a sync operation is waiting, notifyAll to wake
# it up
if self.current_sync_operation is not None:
self.condition.notifyAll()
elif self._async < 0:
raise LockError("Synchronizer error - too many "
"release_read_locks called")
log.debug("%s released read lock", self)
finally:
self.condition.release()
def acquire_write_lock(self, wait = True):
"""Acquire the 'write' lock."""
self.condition.acquire()
try:
# here, we are not a synchronous reader, and after returning,
# assuming waiting or immediate availability, we will be.
if wait:
# if another sync is working, wait
while self.current_sync_operation is not None:
self.condition.wait()
else:
# if another sync is working,
# we dont want to wait, so forget it
if self.current_sync_operation is not None:
return False
# establish ourselves as the current sync
# this indicates to other read/write operations
# that they should wait until this is None again
self.current_sync_operation = threading.currentThread()
# now wait again for asyncs to finish
if self._async > 0:
if wait:
# wait
self.condition.wait()
else:
# we dont want to wait, so forget it
self.current_sync_operation = None
return False
log.debug("%s acquired write lock", self)
finally:
self.condition.release()
if not wait:
return True
def release_write_lock(self):
"""Release the 'write' lock."""
self.condition.acquire()
try:
if self.current_sync_operation is not threading.currentThread():
raise LockError("Synchronizer error - current thread doesn't "
"have the write lock")
# reset the current sync operation so
# another can get it
self.current_sync_operation = None
# tell everyone to get ready
self.condition.notifyAll()
log.debug("%s released write lock", self)
finally:
# everyone go !!
self.condition.release()

32
libs/enum2/LICENSE Normal file
View File

@ -0,0 +1,32 @@
Copyright (c) 2013, Ethan Furman.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
Neither the name Ethan Furman nor the names of any
contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

3
libs/enum2/README Normal file
View File

@ -0,0 +1,3 @@
enum34 is the new Python stdlib enum module available in Python 3.4
backported for previous versions of Python from 2.4 to 3.3.
tested on 2.6, 2.7, and 3.3+

837
libs/enum2/__init__.py Normal file
View File

@ -0,0 +1,837 @@
"""Python Enumerations"""
import sys as _sys
__all__ = ['Enum', 'IntEnum', 'unique']
version = 1, 1, 6
pyver = float('%s.%s' % _sys.version_info[:2])
try:
any
except NameError:
def any(iterable):
for element in iterable:
if element:
return True
return False
try:
from collections import OrderedDict
except ImportError:
OrderedDict = None
try:
basestring
except NameError:
# In Python 2 basestring is the ancestor of both str and unicode
# in Python 3 it's just str, but was missing in 3.1
basestring = str
try:
unicode
except NameError:
# In Python 3 unicode no longer exists (it's just str)
unicode = str
class _RouteClassAttributeToGetattr(object):
"""Route attribute access on a class to __getattr__.
This is a descriptor, used to define attributes that act differently when
accessed through an instance and through a class. Instance access remains
normal, but access to an attribute through a class will be routed to the
class's __getattr__ method; this is done by raising AttributeError.
"""
def __init__(self, fget=None):
self.fget = fget
def __get__(self, instance, ownerclass=None):
if instance is None:
raise AttributeError()
return self.fget(instance)
def __set__(self, instance, value):
raise AttributeError("can't set attribute")
def __delete__(self, instance):
raise AttributeError("can't delete attribute")
def _is_descriptor(obj):
"""Returns True if obj is a descriptor, False otherwise."""
return (
hasattr(obj, '__get__') or
hasattr(obj, '__set__') or
hasattr(obj, '__delete__'))
def _is_dunder(name):
"""Returns True if a __dunder__ name, False otherwise."""
return (name[:2] == name[-2:] == '__' and
name[2:3] != '_' and
name[-3:-2] != '_' and
len(name) > 4)
def _is_sunder(name):
"""Returns True if a _sunder_ name, False otherwise."""
return (name[0] == name[-1] == '_' and
name[1:2] != '_' and
name[-2:-1] != '_' and
len(name) > 2)
def _make_class_unpicklable(cls):
"""Make the given class un-picklable."""
def _break_on_call_reduce(self, protocol=None):
raise TypeError('%r cannot be pickled' % self)
cls.__reduce_ex__ = _break_on_call_reduce
cls.__module__ = '<unknown>'
class _EnumDict(dict):
"""Track enum member order and ensure member names are not reused.
EnumMeta will use the names found in self._member_names as the
enumeration member names.
"""
def __init__(self):
super(_EnumDict, self).__init__()
self._member_names = []
def __setitem__(self, key, value):
"""Changes anything not dundered or not a descriptor.
If a descriptor is added with the same name as an enum member, the name
is removed from _member_names (this may leave a hole in the numerical
sequence of values).
If an enum member name is used twice, an error is raised; duplicate
values are not checked for.
Single underscore (sunder) names are reserved.
Note: in 3.x __order__ is simply discarded as a not necessary piece
leftover from 2.x
"""
if pyver >= 3.0 and key in ('_order_', '__order__'):
return
elif key == '__order__':
key = '_order_'
if _is_sunder(key):
if key != '_order_':
raise ValueError('_names_ are reserved for future Enum use')
elif _is_dunder(key):
pass
elif key in self._member_names:
# descriptor overwriting an enum?
raise TypeError('Attempted to reuse key: %r' % key)
elif not _is_descriptor(value):
if key in self:
# enum overwriting a descriptor?
raise TypeError('Key already defined as: %r' % self[key])
self._member_names.append(key)
super(_EnumDict, self).__setitem__(key, value)
# Dummy value for Enum as EnumMeta explicity checks for it, but of course until
# EnumMeta finishes running the first time the Enum class doesn't exist. This
# is also why there are checks in EnumMeta like `if Enum is not None`
Enum = None
class EnumMeta(type):
"""Metaclass for Enum"""
@classmethod
def __prepare__(metacls, cls, bases):
return _EnumDict()
def __new__(metacls, cls, bases, classdict):
# an Enum class is final once enumeration items have been defined; it
# cannot be mixed with other types (int, float, etc.) if it has an
# inherited __new__ unless a new __new__ is defined (or the resulting
# class will fail).
if type(classdict) is dict:
original_dict = classdict
classdict = _EnumDict()
for k, v in original_dict.items():
classdict[k] = v
member_type, first_enum = metacls._get_mixins_(bases)
__new__, save_new, use_args = metacls._find_new_(classdict, member_type,
first_enum)
# save enum items into separate mapping so they don't get baked into
# the new class
members = dict((k, classdict[k]) for k in classdict._member_names)
for name in classdict._member_names:
del classdict[name]
# py2 support for definition order
_order_ = classdict.get('_order_')
if _order_ is None:
if pyver < 3.0:
try:
_order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])]
except TypeError:
_order_ = [name for name in sorted(members.keys())]
else:
_order_ = classdict._member_names
else:
del classdict['_order_']
if pyver < 3.0:
_order_ = _order_.replace(',', ' ').split()
aliases = [name for name in members if name not in _order_]
_order_ += aliases
# check for illegal enum names (any others?)
invalid_names = set(members) & set(['mro'])
if invalid_names:
raise ValueError('Invalid enum member name(s): %s' % (
', '.join(invalid_names), ))
# save attributes from super classes so we know if we can take
# the shortcut of storing members in the class dict
base_attributes = set([a for b in bases for a in b.__dict__])
# create our new Enum type
enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict)
enum_class._member_names_ = [] # names in random order
if OrderedDict is not None:
enum_class._member_map_ = OrderedDict()
else:
enum_class._member_map_ = {} # name->value map
enum_class._member_type_ = member_type
# Reverse value->name map for hashable values.
enum_class._value2member_map_ = {}
# instantiate them, checking for duplicates as we go
# we instantiate first instead of checking for duplicates first in case
# a custom __new__ is doing something funky with the values -- such as
# auto-numbering ;)
if __new__ is None:
__new__ = enum_class.__new__
for member_name in _order_:
value = members[member_name]
if not isinstance(value, tuple):
args = (value, )
else:
args = value
if member_type is tuple: # special case for tuple enums
args = (args, ) # wrap it one more time
if not use_args or not args:
enum_member = __new__(enum_class)
if not hasattr(enum_member, '_value_'):
enum_member._value_ = value
else:
enum_member = __new__(enum_class, *args)
if not hasattr(enum_member, '_value_'):
enum_member._value_ = member_type(*args)
value = enum_member._value_
enum_member._name_ = member_name
enum_member.__objclass__ = enum_class
enum_member.__init__(*args)
# If another member with the same value was already defined, the
# new member becomes an alias to the existing one.
for name, canonical_member in enum_class._member_map_.items():
if canonical_member.value == enum_member._value_:
enum_member = canonical_member
break
else:
# Aliases don't appear in member names (only in __members__).
enum_class._member_names_.append(member_name)
# performance boost for any member that would not shadow
# a DynamicClassAttribute (aka _RouteClassAttributeToGetattr)
if member_name not in base_attributes:
setattr(enum_class, member_name, enum_member)
# now add to _member_map_
enum_class._member_map_[member_name] = enum_member
try:
# This may fail if value is not hashable. We can't add the value
# to the map, and by-value lookups for this value will be
# linear.
enum_class._value2member_map_[value] = enum_member
except TypeError:
pass
# If a custom type is mixed into the Enum, and it does not know how
# to pickle itself, pickle.dumps will succeed but pickle.loads will
# fail. Rather than have the error show up later and possibly far
# from the source, sabotage the pickle protocol for this class so
# that pickle.dumps also fails.
#
# However, if the new class implements its own __reduce_ex__, do not
# sabotage -- it's on them to make sure it works correctly. We use
# __reduce_ex__ instead of any of the others as it is preferred by
# pickle over __reduce__, and it handles all pickle protocols.
unpicklable = False
if '__reduce_ex__' not in classdict:
if member_type is not object:
methods = ('__getnewargs_ex__', '__getnewargs__',
'__reduce_ex__', '__reduce__')
if not any(m in member_type.__dict__ for m in methods):
_make_class_unpicklable(enum_class)
unpicklable = True
# double check that repr and friends are not the mixin's or various
# things break (such as pickle)
for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'):
class_method = getattr(enum_class, name)
obj_method = getattr(member_type, name, None)
enum_method = getattr(first_enum, name, None)
if name not in classdict and class_method is not enum_method:
if name == '__reduce_ex__' and unpicklable:
continue
setattr(enum_class, name, enum_method)
# method resolution and int's are not playing nice
# Python's less than 2.6 use __cmp__
if pyver < 2.6:
if issubclass(enum_class, int):
setattr(enum_class, '__cmp__', getattr(int, '__cmp__'))
elif pyver < 3.0:
if issubclass(enum_class, int):
for method in (
'__le__',
'__lt__',
'__gt__',
'__ge__',
'__eq__',
'__ne__',
'__hash__',
):
setattr(enum_class, method, getattr(int, method))
# replace any other __new__ with our own (as long as Enum is not None,
# anyway) -- again, this is to support pickle
if Enum is not None:
# if the user defined their own __new__, save it before it gets
# clobbered in case they subclass later
if save_new:
setattr(enum_class, '__member_new__', enum_class.__dict__['__new__'])
setattr(enum_class, '__new__', Enum.__dict__['__new__'])
return enum_class
def __bool__(cls):
"""
classes/types should always be True.
"""
return True
def __call__(cls, value, names=None, module=None, type=None, start=1):
"""Either returns an existing member, or creates a new enum class.
This method is used both when an enum class is given a value to match
to an enumeration member (i.e. Color(3)) and for the functional API
(i.e. Color = Enum('Color', names='red green blue')).
When used for the functional API: `module`, if set, will be stored in
the new class' __module__ attribute; `type`, if set, will be mixed in
as the first base class.
Note: if `module` is not set this routine will attempt to discover the
calling module by walking the frame stack; if this is unsuccessful
the resulting class will not be pickleable.
"""
if names is None: # simple value lookup
return cls.__new__(cls, value)
# otherwise, functional API: we're creating a new Enum type
return cls._create_(value, names, module=module, type=type, start=start)
def __contains__(cls, member):
return isinstance(member, cls) and member.name in cls._member_map_
def __delattr__(cls, attr):
# nicer error message when someone tries to delete an attribute
# (see issue19025).
if attr in cls._member_map_:
raise AttributeError(
"%s: cannot delete Enum member." % cls.__name__)
super(EnumMeta, cls).__delattr__(attr)
def __dir__(self):
return (['__class__', '__doc__', '__members__', '__module__'] +
self._member_names_)
@property
def __members__(cls):
"""Returns a mapping of member name->value.
This mapping lists all enum members, including aliases. Note that this
is a copy of the internal mapping.
"""
return cls._member_map_.copy()
def __getattr__(cls, name):
"""Return the enum member matching `name`
We use __getattr__ instead of descriptors or inserting into the enum
class' __dict__ in order to support `name` and `value` being both
properties for enum members (which live in the class' __dict__) and
enum members themselves.
"""
if _is_dunder(name):
raise AttributeError(name)
try:
return cls._member_map_[name]
except KeyError:
raise AttributeError(name)
def __getitem__(cls, name):
return cls._member_map_[name]
def __iter__(cls):
return (cls._member_map_[name] for name in cls._member_names_)
def __reversed__(cls):
return (cls._member_map_[name] for name in reversed(cls._member_names_))
def __len__(cls):
return len(cls._member_names_)
__nonzero__ = __bool__
def __repr__(cls):
return "<enum %r>" % cls.__name__
def __setattr__(cls, name, value):
"""Block attempts to reassign Enum members.
A simple assignment to the class namespace only changes one of the
several possible ways to get an Enum member from the Enum class,
resulting in an inconsistent Enumeration.
"""
member_map = cls.__dict__.get('_member_map_', {})
if name in member_map:
raise AttributeError('Cannot reassign members.')
super(EnumMeta, cls).__setattr__(name, value)
def _create_(cls, class_name, names=None, module=None, type=None, start=1):
"""Convenience method to create a new Enum class.
`names` can be:
* A string containing member names, separated either with spaces or
commas. Values are auto-numbered from 1.
* An iterable of member names. Values are auto-numbered from 1.
* An iterable of (member name, value) pairs.
* A mapping of member name -> value.
"""
if pyver < 3.0:
# if class_name is unicode, attempt a conversion to ASCII
if isinstance(class_name, unicode):
try:
class_name = class_name.encode('ascii')
except UnicodeEncodeError:
raise TypeError('%r is not representable in ASCII' % class_name)
metacls = cls.__class__
if type is None:
bases = (cls, )
else:
bases = (type, cls)
classdict = metacls.__prepare__(class_name, bases)
_order_ = []
# special processing needed for names?
if isinstance(names, basestring):
names = names.replace(',', ' ').split()
if isinstance(names, (tuple, list)) and isinstance(names[0], basestring):
names = [(e, i+start) for (i, e) in enumerate(names)]
# Here, names is either an iterable of (name, value) or a mapping.
item = None # in case names is empty
for item in names:
if isinstance(item, basestring):
member_name, member_value = item, names[item]
else:
member_name, member_value = item
classdict[member_name] = member_value
_order_.append(member_name)
# only set _order_ in classdict if name/value was not from a mapping
if not isinstance(item, basestring):
classdict['_order_'] = ' '.join(_order_)
enum_class = metacls.__new__(metacls, class_name, bases, classdict)
# TODO: replace the frame hack if a blessed way to know the calling
# module is ever developed
if module is None:
try:
module = _sys._getframe(2).f_globals['__name__']
except (AttributeError, ValueError):
pass
if module is None:
_make_class_unpicklable(enum_class)
else:
enum_class.__module__ = module
return enum_class
@staticmethod
def _get_mixins_(bases):
"""Returns the type for creating enum members, and the first inherited
enum class.
bases: the tuple of bases that was given to __new__
"""
if not bases or Enum is None:
return object, Enum
# double check that we are not subclassing a class with existing
# enumeration members; while we're at it, see if any other data
# type has been mixed in so we can use the correct __new__
member_type = first_enum = None
for base in bases:
if (base is not Enum and
issubclass(base, Enum) and
base._member_names_):
raise TypeError("Cannot extend enumerations")
# base is now the last base in bases
if not issubclass(base, Enum):
raise TypeError("new enumerations must be created as "
"`ClassName([mixin_type,] enum_type)`")
# get correct mix-in type (either mix-in type of Enum subclass, or
# first base if last base is Enum)
if not issubclass(bases[0], Enum):
member_type = bases[0] # first data type
first_enum = bases[-1] # enum type
else:
for base in bases[0].__mro__:
# most common: (IntEnum, int, Enum, object)
# possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>,
# <class 'int'>, <Enum 'Enum'>,
# <class 'object'>)
if issubclass(base, Enum):
if first_enum is None:
first_enum = base
else:
if member_type is None:
member_type = base
return member_type, first_enum
if pyver < 3.0:
@staticmethod
def _find_new_(classdict, member_type, first_enum):
"""Returns the __new__ to be used for creating the enum members.
classdict: the class dictionary given to __new__
member_type: the data type whose __new__ will be used by default
first_enum: enumeration to check for an overriding __new__
"""
# now find the correct __new__, checking to see of one was defined
# by the user; also check earlier enum classes in case a __new__ was
# saved as __member_new__
__new__ = classdict.get('__new__', None)
if __new__:
return None, True, True # __new__, save_new, use_args
N__new__ = getattr(None, '__new__')
O__new__ = getattr(object, '__new__')
if Enum is None:
E__new__ = N__new__
else:
E__new__ = Enum.__dict__['__new__']
# check all possibles for __member_new__ before falling back to
# __new__
for method in ('__member_new__', '__new__'):
for possible in (member_type, first_enum):
try:
target = possible.__dict__[method]
except (AttributeError, KeyError):
target = getattr(possible, method, None)
if target not in [
None,
N__new__,
O__new__,
E__new__,
]:
if method == '__member_new__':
classdict['__new__'] = target
return None, False, True
if isinstance(target, staticmethod):
target = target.__get__(member_type)
__new__ = target
break
if __new__ is not None:
break
else:
__new__ = object.__new__
# if a non-object.__new__ is used then whatever value/tuple was
# assigned to the enum member name will be passed to __new__ and to the
# new enum member's __init__
if __new__ is object.__new__:
use_args = False
else:
use_args = True
return __new__, False, use_args
else:
@staticmethod
def _find_new_(classdict, member_type, first_enum):
"""Returns the __new__ to be used for creating the enum members.
classdict: the class dictionary given to __new__
member_type: the data type whose __new__ will be used by default
first_enum: enumeration to check for an overriding __new__
"""
# now find the correct __new__, checking to see of one was defined
# by the user; also check earlier enum classes in case a __new__ was
# saved as __member_new__
__new__ = classdict.get('__new__', None)
# should __new__ be saved as __member_new__ later?
save_new = __new__ is not None
if __new__ is None:
# check all possibles for __member_new__ before falling back to
# __new__
for method in ('__member_new__', '__new__'):
for possible in (member_type, first_enum):
target = getattr(possible, method, None)
if target not in (
None,
None.__new__,
object.__new__,
Enum.__new__,
):
__new__ = target
break
if __new__ is not None:
break
else:
__new__ = object.__new__
# if a non-object.__new__ is used then whatever value/tuple was
# assigned to the enum member name will be passed to __new__ and to the
# new enum member's __init__
if __new__ is object.__new__:
use_args = False
else:
use_args = True
return __new__, save_new, use_args
########################################################
# In order to support Python 2 and 3 with a single
# codebase we have to create the Enum methods separately
# and then use the `type(name, bases, dict)` method to
# create the class.
########################################################
temp_enum_dict = {}
temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n"
def __new__(cls, value):
# all enum instances are actually created during class construction
# without calling this method; this method is called by the metaclass'
# __call__ (i.e. Color(3) ), and by pickle
if type(value) is cls:
# For lookups like Color(Color.red)
value = value.value
#return value
# by-value search for a matching enum member
# see if it's in the reverse mapping (for hashable values)
try:
if value in cls._value2member_map_:
return cls._value2member_map_[value]
except TypeError:
# not there, now do long search -- O(n) behavior
for member in cls._member_map_.values():
if member.value == value:
return member
raise ValueError("%s is not a valid %s" % (value, cls.__name__))
temp_enum_dict['__new__'] = __new__
del __new__
def __repr__(self):
return "<%s.%s: %r>" % (
self.__class__.__name__, self._name_, self._value_)
temp_enum_dict['__repr__'] = __repr__
del __repr__
def __str__(self):
return "%s.%s" % (self.__class__.__name__, self._name_)
temp_enum_dict['__str__'] = __str__
del __str__
if pyver >= 3.0:
def __dir__(self):
added_behavior = [
m
for cls in self.__class__.mro()
for m in cls.__dict__
if m[0] != '_' and m not in self._member_map_
]
return (['__class__', '__doc__', '__module__', ] + added_behavior)
temp_enum_dict['__dir__'] = __dir__
del __dir__
def __format__(self, format_spec):
# mixed-in Enums should use the mixed-in type's __format__, otherwise
# we can get strange results with the Enum name showing up instead of
# the value
# pure Enum branch
if self._member_type_ is object:
cls = str
val = str(self)
# mix-in branch
else:
cls = self._member_type_
val = self.value
return cls.__format__(val, format_spec)
temp_enum_dict['__format__'] = __format__
del __format__
####################################
# Python's less than 2.6 use __cmp__
if pyver < 2.6:
def __cmp__(self, other):
if type(other) is self.__class__:
if self is other:
return 0
return -1
return NotImplemented
raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__cmp__'] = __cmp__
del __cmp__
else:
def __le__(self, other):
raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__le__'] = __le__
del __le__
def __lt__(self, other):
raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__lt__'] = __lt__
del __lt__
def __ge__(self, other):
raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__ge__'] = __ge__
del __ge__
def __gt__(self, other):
raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__))
temp_enum_dict['__gt__'] = __gt__
del __gt__
def __eq__(self, other):
if type(other) is self.__class__:
return self is other
return NotImplemented
temp_enum_dict['__eq__'] = __eq__
del __eq__
def __ne__(self, other):
if type(other) is self.__class__:
return self is not other
return NotImplemented
temp_enum_dict['__ne__'] = __ne__
del __ne__
def __hash__(self):
return hash(self._name_)
temp_enum_dict['__hash__'] = __hash__
del __hash__
def __reduce_ex__(self, proto):
return self.__class__, (self._value_, )
temp_enum_dict['__reduce_ex__'] = __reduce_ex__
del __reduce_ex__
# _RouteClassAttributeToGetattr is used to provide access to the `name`
# and `value` properties of enum members while keeping some measure of
# protection from modification, while still allowing for an enumeration
# to have members named `name` and `value`. This works because enumeration
# members are not set directly on the enum class -- __getattr__ is
# used to look them up.
@_RouteClassAttributeToGetattr
def name(self):
return self._name_
temp_enum_dict['name'] = name
del name
@_RouteClassAttributeToGetattr
def value(self):
return self._value_
temp_enum_dict['value'] = value
del value
@classmethod
def _convert(cls, name, module, filter, source=None):
"""
Create a new Enum subclass that replaces a collection of global constants
"""
# convert all constants from source (or module) that pass filter() to
# a new Enum called name, and export the enum and its members back to
# module;
# also, replace the __reduce_ex__ method so unpickling works in
# previous Python versions
module_globals = vars(_sys.modules[module])
if source:
source = vars(source)
else:
source = module_globals
members = dict((name, value) for name, value in source.items() if filter(name))
cls = cls(name, members, module=module)
cls.__reduce_ex__ = _reduce_ex_by_name
module_globals.update(cls.__members__)
module_globals[name] = cls
return cls
temp_enum_dict['_convert'] = _convert
del _convert
Enum = EnumMeta('Enum', (object, ), temp_enum_dict)
del temp_enum_dict
# Enum has now been created
###########################
class IntEnum(int, Enum):
"""Enum where members are also (and must be) ints"""
def _reduce_ex_by_name(self, proto):
return self.name
def unique(enumeration):
"""Class decorator that ensures only unique members exist in an enumeration."""
duplicates = []
for name, member in enumeration.__members__.items():
if name != member.name:
duplicates.append((name, member.name))
if duplicates:
duplicate_names = ', '.join(
["%s -> %s" % (alias, name) for (alias, name) in duplicates]
)
raise ValueError('duplicate names found in %r: %s' %
(enumeration, duplicate_names)
)
return enumeration

2237
libs/enum2/doc/enum.pdf Normal file

File diff suppressed because it is too large Load Diff

735
libs/enum2/doc/enum.rst Normal file
View File

@ -0,0 +1,735 @@
``enum`` --- support for enumerations
========================================
.. :synopsis: enumerations are sets of symbolic names bound to unique, constant
values.
.. :moduleauthor:: Ethan Furman <ethan@stoneleaf.us>
.. :sectionauthor:: Barry Warsaw <barry@python.org>,
.. :sectionauthor:: Eli Bendersky <eliben@gmail.com>,
.. :sectionauthor:: Ethan Furman <ethan@stoneleaf.us>
----------------
An enumeration is a set of symbolic names (members) bound to unique, constant
values. Within an enumeration, the members can be compared by identity, and
the enumeration itself can be iterated over.
Module Contents
---------------
This module defines two enumeration classes that can be used to define unique
sets of names and values: ``Enum`` and ``IntEnum``. It also defines
one decorator, ``unique``.
``Enum``
Base class for creating enumerated constants. See section `Functional API`_
for an alternate construction syntax.
``IntEnum``
Base class for creating enumerated constants that are also subclasses of ``int``.
``unique``
Enum class decorator that ensures only one name is bound to any one value.
Creating an Enum
----------------
Enumerations are created using the ``class`` syntax, which makes them
easy to read and write. An alternative creation method is described in
`Functional API`_. To define an enumeration, subclass ``Enum`` as
follows::
>>> from enum import Enum
>>> class Color(Enum):
... red = 1
... green = 2
... blue = 3
Note: Nomenclature
- The class ``Color`` is an *enumeration* (or *enum*)
- The attributes ``Color.red``, ``Color.green``, etc., are
*enumeration members* (or *enum members*).
- The enum members have *names* and *values* (the name of
``Color.red`` is ``red``, the value of ``Color.blue`` is
``3``, etc.)
Note:
Even though we use the ``class`` syntax to create Enums, Enums
are not normal Python classes. See `How are Enums different?`_ for
more details.
Enumeration members have human readable string representations::
>>> print(Color.red)
Color.red
...while their ``repr`` has more information::
>>> print(repr(Color.red))
<Color.red: 1>
The *type* of an enumeration member is the enumeration it belongs to::
>>> type(Color.red)
<enum 'Color'>
>>> isinstance(Color.green, Color)
True
>>>
Enum members also have a property that contains just their item name::
>>> print(Color.red.name)
red
Enumerations support iteration. In Python 3.x definition order is used; in
Python 2.x the definition order is not available, but class attribute
``__order__`` is supported; otherwise, value order is used::
>>> class Shake(Enum):
... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x
... vanilla = 7
... chocolate = 4
... cookies = 9
... mint = 3
...
>>> for shake in Shake:
... print(shake)
...
Shake.vanilla
Shake.chocolate
Shake.cookies
Shake.mint
The ``__order__`` attribute is always removed, and in 3.x it is also ignored
(order is definition order); however, in the stdlib version it will be ignored
but not removed.
Enumeration members are hashable, so they can be used in dictionaries and sets::
>>> apples = {}
>>> apples[Color.red] = 'red delicious'
>>> apples[Color.green] = 'granny smith'
>>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'}
True
Programmatic access to enumeration members and their attributes
---------------------------------------------------------------
Sometimes it's useful to access members in enumerations programmatically (i.e.
situations where ``Color.red`` won't do because the exact color is not known
at program-writing time). ``Enum`` allows such access::
>>> Color(1)
<Color.red: 1>
>>> Color(3)
<Color.blue: 3>
If you want to access enum members by *name*, use item access::
>>> Color['red']
<Color.red: 1>
>>> Color['green']
<Color.green: 2>
If have an enum member and need its ``name`` or ``value``::
>>> member = Color.red
>>> member.name
'red'
>>> member.value
1
Duplicating enum members and values
-----------------------------------
Having two enum members (or any other attribute) with the same name is invalid;
in Python 3.x this would raise an error, but in Python 2.x the second member
simply overwrites the first::
>>> # python 2.x
>>> class Shape(Enum):
... square = 2
... square = 3
...
>>> Shape.square
<Shape.square: 3>
>>> # python 3.x
>>> class Shape(Enum):
... square = 2
... square = 3
Traceback (most recent call last):
...
TypeError: Attempted to reuse key: 'square'
However, two enum members are allowed to have the same value. Given two members
A and B with the same value (and A defined first), B is an alias to A. By-value
lookup of the value of A and B will return A. By-name lookup of B will also
return A::
>>> class Shape(Enum):
... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x
... square = 2
... diamond = 1
... circle = 3
... alias_for_square = 2
...
>>> Shape.square
<Shape.square: 2>
>>> Shape.alias_for_square
<Shape.square: 2>
>>> Shape(2)
<Shape.square: 2>
Allowing aliases is not always desirable. ``unique`` can be used to ensure
that none exist in a particular enumeration::
>>> from enum import unique
>>> @unique
... class Mistake(Enum):
... __order__ = 'one two three four' # only needed in 2.x
... one = 1
... two = 2
... three = 3
... four = 3
Traceback (most recent call last):
...
ValueError: duplicate names found in <enum 'Mistake'>: four -> three
Iterating over the members of an enum does not provide the aliases::
>>> list(Shape)
[<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>]
The special attribute ``__members__`` is a dictionary mapping names to members.
It includes all names defined in the enumeration, including the aliases::
>>> for name, member in sorted(Shape.__members__.items()):
... name, member
...
('alias_for_square', <Shape.square: 2>)
('circle', <Shape.circle: 3>)
('diamond', <Shape.diamond: 1>)
('square', <Shape.square: 2>)
The ``__members__`` attribute can be used for detailed programmatic access to
the enumeration members. For example, finding all the aliases::
>>> [name for name, member in Shape.__members__.items() if member.name != name]
['alias_for_square']
Comparisons
-----------
Enumeration members are compared by identity::
>>> Color.red is Color.red
True
>>> Color.red is Color.blue
False
>>> Color.red is not Color.blue
True
Ordered comparisons between enumeration values are *not* supported. Enum
members are not integers (but see `IntEnum`_ below)::
>>> Color.red < Color.blue
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unorderable types: Color() < Color()
.. warning::
In Python 2 *everything* is ordered, even though the ordering may not
make sense. If you want your enumerations to have a sensible ordering
check out the `OrderedEnum`_ recipe below.
Equality comparisons are defined though::
>>> Color.blue == Color.red
False
>>> Color.blue != Color.red
True
>>> Color.blue == Color.blue
True
Comparisons against non-enumeration values will always compare not equal
(again, ``IntEnum`` was explicitly designed to behave differently, see
below)::
>>> Color.blue == 2
False
Allowed members and attributes of enumerations
----------------------------------------------
The examples above use integers for enumeration values. Using integers is
short and handy (and provided by default by the `Functional API`_), but not
strictly enforced. In the vast majority of use-cases, one doesn't care what
the actual value of an enumeration is. But if the value *is* important,
enumerations can have arbitrary values.
Enumerations are Python classes, and can have methods and special methods as
usual. If we have this enumeration::
>>> class Mood(Enum):
... funky = 1
... happy = 3
...
... def describe(self):
... # self is the member here
... return self.name, self.value
...
... def __str__(self):
... return 'my custom str! {0}'.format(self.value)
...
... @classmethod
... def favorite_mood(cls):
... # cls here is the enumeration
... return cls.happy
Then::
>>> Mood.favorite_mood()
<Mood.happy: 3>
>>> Mood.happy.describe()
('happy', 3)
>>> str(Mood.funky)
'my custom str! 1'
The rules for what is allowed are as follows: _sunder_ names (starting and
ending with a single underscore) are reserved by enum and cannot be used;
all other attributes defined within an enumeration will become members of this
enumeration, with the exception of *__dunder__* names and descriptors (methods
are also descriptors).
Note:
If your enumeration defines ``__new__`` and/or ``__init__`` then
whatever value(s) were given to the enum member will be passed into
those methods. See `Planet`_ for an example.
Restricted subclassing of enumerations
--------------------------------------
Subclassing an enumeration is allowed only if the enumeration does not define
any members. So this is forbidden::
>>> class MoreColor(Color):
... pink = 17
Traceback (most recent call last):
...
TypeError: Cannot extend enumerations
But this is allowed::
>>> class Foo(Enum):
... def some_behavior(self):
... pass
...
>>> class Bar(Foo):
... happy = 1
... sad = 2
...
Allowing subclassing of enums that define members would lead to a violation of
some important invariants of types and instances. On the other hand, it makes
sense to allow sharing some common behavior between a group of enumerations.
(See `OrderedEnum`_ for an example.)
Pickling
--------
Enumerations can be pickled and unpickled::
>>> from enum.test_enum import Fruit
>>> from pickle import dumps, loads
>>> Fruit.tomato is loads(dumps(Fruit.tomato, 2))
True
The usual restrictions for pickling apply: picklable enums must be defined in
the top level of a module, since unpickling requires them to be importable
from that module.
Note:
With pickle protocol version 4 (introduced in Python 3.4) it is possible
to easily pickle enums nested in other classes.
Functional API
--------------
The ``Enum`` class is callable, providing the following functional API::
>>> Animal = Enum('Animal', 'ant bee cat dog')
>>> Animal
<enum 'Animal'>
>>> Animal.ant
<Animal.ant: 1>
>>> Animal.ant.value
1
>>> list(Animal)
[<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>]
The semantics of this API resemble ``namedtuple``. The first argument
of the call to ``Enum`` is the name of the enumeration.
The second argument is the *source* of enumeration member names. It can be a
whitespace-separated string of names, a sequence of names, a sequence of
2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to
values. The last two options enable assigning arbitrary values to
enumerations; the others auto-assign increasing integers starting with 1. A
new class derived from ``Enum`` is returned. In other words, the above
assignment to ``Animal`` is equivalent to::
>>> class Animals(Enum):
... ant = 1
... bee = 2
... cat = 3
... dog = 4
Pickling enums created with the functional API can be tricky as frame stack
implementation details are used to try and figure out which module the
enumeration is being created in (e.g. it will fail if you use a utility
function in separate module, and also may not work on IronPython or Jython).
The solution is to specify the module name explicitly as follows::
>>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__)
Derived Enumerations
--------------------
IntEnum
^^^^^^^
A variation of ``Enum`` is provided which is also a subclass of
``int``. Members of an ``IntEnum`` can be compared to integers;
by extension, integer enumerations of different types can also be compared
to each other::
>>> from enum import IntEnum
>>> class Shape(IntEnum):
... circle = 1
... square = 2
...
>>> class Request(IntEnum):
... post = 1
... get = 2
...
>>> Shape == 1
False
>>> Shape.circle == 1
True
>>> Shape.circle == Request.post
True
However, they still can't be compared to standard ``Enum`` enumerations::
>>> class Shape(IntEnum):
... circle = 1
... square = 2
...
>>> class Color(Enum):
... red = 1
... green = 2
...
>>> Shape.circle == Color.red
False
``IntEnum`` values behave like integers in other ways you'd expect::
>>> int(Shape.circle)
1
>>> ['a', 'b', 'c'][Shape.circle]
'b'
>>> [i for i in range(Shape.square)]
[0, 1]
For the vast majority of code, ``Enum`` is strongly recommended,
since ``IntEnum`` breaks some semantic promises of an enumeration (by
being comparable to integers, and thus by transitivity to other
unrelated enumerations). It should be used only in special cases where
there's no other choice; for example, when integer constants are
replaced with enumerations and backwards compatibility is required with code
that still expects integers.
Others
^^^^^^
While ``IntEnum`` is part of the ``enum`` module, it would be very
simple to implement independently::
class IntEnum(int, Enum):
pass
This demonstrates how similar derived enumerations can be defined; for example
a ``StrEnum`` that mixes in ``str`` instead of ``int``.
Some rules:
1. When subclassing ``Enum``, mix-in types must appear before
``Enum`` itself in the sequence of bases, as in the ``IntEnum``
example above.
2. While ``Enum`` can have members of any type, once you mix in an
additional type, all the members must have values of that type, e.g.
``int`` above. This restriction does not apply to mix-ins which only
add methods and don't specify another data type such as ``int`` or
``str``.
3. When another data type is mixed in, the ``value`` attribute is *not the
same* as the enum member itself, although it is equivalant and will compare
equal.
4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and
``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for
IntEnum) treat the enum member as its mixed-in type.
Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int``
subclasses are printed as strings and not numbers when the ``%d``, ``%i``,
or ``%u`` codes are used.
5. ``str.__format__`` (or ``format``) will use the mixed-in
type's ``__format__``. If the ``Enum``'s ``str`` or
``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes.
Decorators
----------
unique
^^^^^^
A ``class`` decorator specifically for enumerations. It searches an
enumeration's ``__members__`` gathering any aliases it finds; if any are
found ``ValueError`` is raised with the details::
>>> @unique
... class NoDupes(Enum):
... first = 'one'
... second = 'two'
... third = 'two'
Traceback (most recent call last):
...
ValueError: duplicate names found in <enum 'NoDupes'>: third -> second
Interesting examples
--------------------
While ``Enum`` and ``IntEnum`` are expected to cover the majority of
use-cases, they cannot cover them all. Here are recipes for some different
types of enumerations that can be used directly, or as examples for creating
one's own.
AutoNumber
^^^^^^^^^^
Avoids having to specify the value for each enumeration member::
>>> class AutoNumber(Enum):
... def __new__(cls):
... value = len(cls.__members__) + 1
... obj = object.__new__(cls)
... obj._value_ = value
... return obj
...
>>> class Color(AutoNumber):
... __order__ = "red green blue" # only needed in 2.x
... red = ()
... green = ()
... blue = ()
...
>>> Color.green.value == 2
True
Note:
The `__new__` method, if defined, is used during creation of the Enum
members; it is then replaced by Enum's `__new__` which is used after
class creation for lookup of existing members. Due to the way Enums are
supposed to behave, there is no way to customize Enum's `__new__`.
UniqueEnum
^^^^^^^^^^
Raises an error if a duplicate member name is found instead of creating an
alias::
>>> class UniqueEnum(Enum):
... def __init__(self, *args):
... cls = self.__class__
... if any(self.value == e.value for e in cls):
... a = self.name
... e = cls(self.value).name
... raise ValueError(
... "aliases not allowed in UniqueEnum: %r --> %r"
... % (a, e))
...
>>> class Color(UniqueEnum):
... red = 1
... green = 2
... blue = 3
... grene = 2
Traceback (most recent call last):
...
ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green'
OrderedEnum
^^^^^^^^^^^
An ordered enumeration that is not based on ``IntEnum`` and so maintains
the normal ``Enum`` invariants (such as not being comparable to other
enumerations)::
>>> class OrderedEnum(Enum):
... def __ge__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ >= other._value_
... return NotImplemented
... def __gt__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ > other._value_
... return NotImplemented
... def __le__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ <= other._value_
... return NotImplemented
... def __lt__(self, other):
... if self.__class__ is other.__class__:
... return self._value_ < other._value_
... return NotImplemented
...
>>> class Grade(OrderedEnum):
... __ordered__ = 'A B C D F'
... A = 5
... B = 4
... C = 3
... D = 2
... F = 1
...
>>> Grade.C < Grade.A
True
Planet
^^^^^^
If ``__new__`` or ``__init__`` is defined the value of the enum member
will be passed to those methods::
>>> class Planet(Enum):
... MERCURY = (3.303e+23, 2.4397e6)
... VENUS = (4.869e+24, 6.0518e6)
... EARTH = (5.976e+24, 6.37814e6)
... MARS = (6.421e+23, 3.3972e6)
... JUPITER = (1.9e+27, 7.1492e7)
... SATURN = (5.688e+26, 6.0268e7)
... URANUS = (8.686e+25, 2.5559e7)
... NEPTUNE = (1.024e+26, 2.4746e7)
... def __init__(self, mass, radius):
... self.mass = mass # in kilograms
... self.radius = radius # in meters
... @property
... def surface_gravity(self):
... # universal gravitational constant (m3 kg-1 s-2)
... G = 6.67300E-11
... return G * self.mass / (self.radius * self.radius)
...
>>> Planet.EARTH.value
(5.976e+24, 6378140.0)
>>> Planet.EARTH.surface_gravity
9.802652743337129
How are Enums different?
------------------------
Enums have a custom metaclass that affects many aspects of both derived Enum
classes and their instances (members).
Enum Classes
^^^^^^^^^^^^
The ``EnumMeta`` metaclass is responsible for providing the
``__contains__``, ``__dir__``, ``__iter__`` and other methods that
allow one to do things with an ``Enum`` class that fail on a typical
class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is
responsible for ensuring that various other methods on the final ``Enum``
class are correct (such as ``__new__``, ``__getnewargs__``,
``__str__`` and ``__repr__``).
.. note::
``__dir__`` is not changed in the Python 2 line as it messes up some
of the decorators included in the stdlib.
Enum Members (aka instances)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The most interesting thing about Enum members is that they are singletons.
``EnumMeta`` creates them all while it is creating the ``Enum``
class itself, and then puts a custom ``__new__`` in place to ensure
that no new ones are ever instantiated by returning only the existing
member instances.
Finer Points
^^^^^^^^^^^^
``Enum`` members are instances of an ``Enum`` class, and even though they
are accessible as `EnumClass.member1.member2`, they should not be
accessed directly from the member as that lookup may fail or, worse,
return something besides the ``Enum`` member you were looking for
(changed in version 1.1.1)::
>>> class FieldTypes(Enum):
... name = 1
... value = 2
... size = 3
...
>>> FieldTypes.value.size
<FieldTypes.size: 3>
>>> FieldTypes.size.value
3
The ``__members__`` attribute is only available on the class.
In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being
the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if
``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python
2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified
as the ``OrderedDict`` type didn't exist yet.
If you give your ``Enum`` subclass extra methods, like the `Planet`_
class above, those methods will show up in a `dir` of the member,
but not of the class::
>>> dir(Planet)
['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',
'VENUS', '__class__', '__doc__', '__members__', '__module__']
>>> dir(Planet.EARTH)
['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']
A ``__new__`` method will only be used for the creation of the
``Enum`` members -- after that it is replaced. This means if you wish to
change how ``Enum`` members are looked up you either have to write a
helper function or a ``classmethod``.

1820
libs/enum2/test.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -4,13 +4,9 @@ import os
import pickle
import shutil
import tempfile
import traceback
import hashlib
import appdirs
from scandir import scandir, scandir_generic as _scandir_generic
try:
from collections.abc import MutableMapping
unicode = str
@ -90,7 +86,7 @@ class FileCache(MutableMapping):
"""
def __init__(self, appname, flag='c', mode=0o666, keyencoding='utf-8',
serialize=True, app_cache_dir=None, key_file_ext=".txt"):
serialize=True, app_cache_dir=None):
"""Initialize a :class:`FileCache` object."""
if not isinstance(flag, str):
raise TypeError("flag must be str not '{}'".format(type(flag)))
@ -131,7 +127,6 @@ class FileCache(MutableMapping):
self._mode = mode
self._keyencoding = keyencoding
self._serialize = serialize
self.key_file_ext = key_file_ext
def _parse_appname(self, appname):
"""Splits an appname into the appname and subcache components."""
@ -185,16 +180,7 @@ class FileCache(MutableMapping):
self._sync = True
for ekey in self._buffer:
filename = self._key_to_filename(ekey)
try:
self._write_to_file(filename, self._buffer[ekey])
except:
logger.error("Couldn't write content from %r to cache file: %r: %s", ekey, filename,
traceback.format_exc())
try:
self.__write_to_file(filename + self.key_file_ext, ekey)
except:
logger.error("Couldn't write content from %r to cache file: %r: %s", ekey, filename,
traceback.format_exc())
self._write_to_file(filename, self._buffer[ekey])
self._buffer.clear()
self._sync = False
@ -203,7 +189,8 @@ class FileCache(MutableMapping):
raise ValueError("invalid operation on closed cache")
def _encode_key(self, key):
"""
"""Encode key using *hex_codec* for constructing a cache filename.
Keys are implicitly converted to :class:`bytes` if passed as
:class:`str`.
@ -212,15 +199,16 @@ class FileCache(MutableMapping):
key = key.encode(self._keyencoding)
elif not isinstance(key, bytes):
raise TypeError("key must be bytes or str")
return key.decode(self._keyencoding)
return codecs.encode(key, 'hex_codec').decode(self._keyencoding)
def _decode_key(self, key):
"""
"""Decode key using hex_codec to retrieve the original key.
Keys are returned as :class:`str` if serialization is enabled.
Keys are returned as :class:`bytes` if serialization is disabled.
"""
bkey = key.encode(self._keyencoding)
bkey = codecs.decode(key.encode(self._keyencoding), 'hex_codec')
return bkey.decode(self._keyencoding) if self._serialize else bkey
def _dumps(self, value):
@ -231,27 +219,19 @@ class FileCache(MutableMapping):
def _key_to_filename(self, key):
"""Convert an encoded key to an absolute cache filename."""
if isinstance(key, unicode):
key = key.encode(self._keyencoding)
return os.path.join(self.cache_dir, hashlib.md5(key).hexdigest())
return os.path.join(self.cache_dir, key)
def _filename_to_key(self, absfilename):
"""Convert an absolute cache filename to a key name."""
hkey_hdr_fn = absfilename + self.key_file_ext
if os.path.isfile(hkey_hdr_fn):
with open(hkey_hdr_fn, 'rb') as f:
key = f.read()
return key.decode(self._keyencoding) if self._serialize else key
return os.path.split(absfilename)[1]
def _all_filenames(self, scandir_generic=True):
def _all_filenames(self):
"""Return a list of absolute cache filenames"""
_scandir = _scandir_generic if scandir_generic else scandir
try:
for entry in _scandir(self.cache_dir):
if entry.is_file(follow_symlinks=False) and not entry.name.endswith(self.key_file_ext):
yield os.path.join(self.cache_dir, entry.name)
return [os.path.join(self.cache_dir, filename) for filename in
os.listdir(self.cache_dir)]
except (FileNotFoundError, OSError):
raise StopIteration
return []
def _all_keys(self):
"""Return a list of all encoded key names."""
@ -261,17 +241,14 @@ class FileCache(MutableMapping):
else:
return set(file_keys + list(self._buffer))
def __write_to_file(self, filename, value):
def _write_to_file(self, filename, bytesvalue):
"""Write bytesvalue to filename."""
fh, tmp = tempfile.mkstemp()
with os.fdopen(fh, self._flag) as f:
f.write(value)
f.write(self._dumps(bytesvalue))
rename(tmp, filename)
os.chmod(filename, self._mode)
def _write_to_file(self, filename, bytesvalue):
self.__write_to_file(filename, self._dumps(bytesvalue))
def _read_from_file(self, filename):
"""Read data from filename."""
try:
@ -288,7 +265,6 @@ class FileCache(MutableMapping):
else:
filename = self._key_to_filename(ekey)
self._write_to_file(filename, value)
self.__write_to_file(filename + self.key_file_ext, ekey)
def __getitem__(self, key):
ekey = self._encode_key(key)
@ -298,9 +274,8 @@ class FileCache(MutableMapping):
except KeyError:
pass
filename = self._key_to_filename(ekey)
if not os.path.isfile(filename):
if filename not in self._all_filenames():
raise KeyError(key)
return self._read_from_file(filename)
def __delitem__(self, key):
@ -317,11 +292,6 @@ class FileCache(MutableMapping):
except (IOError, OSError):
pass
try:
os.remove(filename + self.key_file_ext)
except (IOError, OSError):
pass
def __iter__(self):
for key in self._all_keys():
yield self._decode_key(key)
@ -331,10 +301,4 @@ class FileCache(MutableMapping):
def __contains__(self, key):
ekey = self._encode_key(key)
if not self._sync:
try:
return ekey in self._buffer
except KeyError:
pass
filename = self._key_to_filename(ekey)
return os.path.isfile(filename)
return ekey in self._all_keys()

88
libs/pysubs2/tmp.py Normal file
View File

@ -0,0 +1,88 @@
from __future__ import print_function, unicode_literals
import re
from .formatbase import FormatBase
from .ssaevent import SSAEvent
from .ssastyle import SSAStyle
from .substation import parse_tags
from .time import ms_to_times, make_time, tmptimestamp_to_ms
#: Pattern that matches TMP timestamp
TMPTIMESTAMP = re.compile(r"(\d{1,2}):(\d{2}):(\d{2})")
#: Pattern that matches TMP line
TMP_LINE = re.compile(r"(\d{1,2}:\d{2}:\d{2}):(.+)")
#: Largest timestamp allowed in Tmp, ie. 99:59:59.
MAX_REPRESENTABLE_TIME = make_time(h=100) - 1
def ms_to_timestamp(ms):
"""Convert ms to 'HH:MM:SS'"""
# XXX throw on overflow/underflow?
if ms < 0: ms = 0
if ms > MAX_REPRESENTABLE_TIME: ms = MAX_REPRESENTABLE_TIME
h, m, s, ms = ms_to_times(ms)
return "%02d:%02d:%02d" % (h, m, s)
class TmpFormat(FormatBase):
@classmethod
def guess_format(cls, text):
if "[Script Info]" in text or "[V4+ Styles]" in text:
# disambiguation vs. SSA/ASS
return None
for line in text.splitlines():
if TMP_LINE.match(line) and len(TMP_LINE.findall(line)) == 1:
return "tmp"
@classmethod
def from_file(cls, subs, fp, format_, **kwargs):
timestamps = [] # (start)
lines = [] # contains lists of lines following each timestamp
for line in fp:
match = TMP_LINE.match(line)
if not match:
continue
start, text = match.groups()
start = tmptimestamp_to_ms(TMPTIMESTAMP.match(start).groups())
#calculate endtime from starttime + 500 miliseconds + 67 miliseconds per each character (15 chars per second)
end = start + 500 + (len(line) * 67)
timestamps.append((start, end))
lines.append(text)
def prepare_text(lines):
lines = lines.replace("|", r"\N") # convert newlines
lines = re.sub(r"< *u *>", "{\\\\u1}", lines) # not r" for Python 2.7 compat, triggers unicodeescape
lines = re.sub(r"< */? *[a-zA-Z][^>]*>", "", lines) # strip other HTML tags
return lines
subs.events = [SSAEvent(start=start, end=end, text=prepare_text(lines))
for (start, end), lines in zip(timestamps, lines)]
@classmethod
def to_file(cls, subs, fp, format_, **kwargs):
def prepare_text(text, style):
body = []
for fragment, sty in parse_tags(text, style, subs.styles):
fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n")
if sty.italic: fragment = "<i>%s</i>" % fragment
if sty.underline: fragment = "<u>%s</u>" % fragment
if sty.strikeout: fragment = "<s>%s</s>" % fragment
body.append(fragment)
return re.sub("\n+", "\n", "".join(body).strip())
visible_lines = (line for line in subs if not line.is_comment)
for i, line in enumerate(visible_lines, 1):
start = ms_to_timestamp(line.start)
#end = ms_to_timestamp(line.end)
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
#print("%d" % i, file=fp) # Python 2.7 compat
print(start + ":" + text, end="\n", file=fp)
#print(text, end="\n\n", file=fp)

View File

@ -1,21 +0,0 @@
# -*- coding: utf-8 -*-
__title__ = 'subliminal'
__version__ = '2.1.0.dev'
__short_version__ = '.'.join(__version__.split('.')[:2])
__author__ = 'Antoine Bertin'
__license__ = 'MIT'
__copyright__ = 'Copyright 2016, Antoine Bertin'
import logging
from .core import (AsyncProviderPool, ProviderPool, check_video, download_best_subtitles, download_subtitles,
list_subtitles, refine, save_subtitles, scan_video, scan_videos)
from .cache import region
from .exceptions import Error, ProviderError
from .extensions import provider_manager, refiner_manager
from .providers import Provider
from .score import compute_score, get_scores
from .subtitle import SUBTITLE_EXTENSIONS, Subtitle
from .video import VIDEO_EXTENSIONS, Episode, Movie, Video
logging.getLogger(__name__).addHandler(logging.NullHandler())

View File

@ -1,16 +0,0 @@
# -*- coding: utf-8 -*-
import datetime
from dogpile.cache import make_region
#: Expiration time for show caching
SHOW_EXPIRATION_TIME = datetime.timedelta(weeks=3).total_seconds()
#: Expiration time for episode caching
EPISODE_EXPIRATION_TIME = datetime.timedelta(days=3).total_seconds()
#: Expiration time for scraper searches
REFINER_EXPIRATION_TIME = datetime.timedelta(weeks=1).total_seconds()
region = make_region()

View File

@ -1,458 +0,0 @@
# -*- coding: utf-8 -*-
"""
Subliminal uses `click <http://click.pocoo.org>`_ to provide a powerful :abbr:`CLI (command-line interface)`.
"""
from __future__ import division
from collections import defaultdict
from datetime import timedelta
import glob
import json
import logging
import os
import re
from appdirs import AppDirs
from babelfish import Error as BabelfishError, Language
import click
from dogpile.cache.backends.file import AbstractFileLock
from dogpile.util.readwrite_lock import ReadWriteMutex
from six.moves import configparser
from subliminal import (AsyncProviderPool, Episode, Movie, Video, __version__, check_video, compute_score, get_scores,
provider_manager, refine, refiner_manager, region, save_subtitles, scan_video, scan_videos)
from subliminal.core import ARCHIVE_EXTENSIONS, search_external_subtitles
logger = logging.getLogger(__name__)
class MutexLock(AbstractFileLock):
""":class:`MutexLock` is a thread-based rw lock based on :class:`dogpile.core.ReadWriteMutex`."""
def __init__(self, filename):
self.mutex = ReadWriteMutex()
def acquire_read_lock(self, wait):
ret = self.mutex.acquire_read_lock(wait)
return wait or ret
def acquire_write_lock(self, wait):
ret = self.mutex.acquire_write_lock(wait)
return wait or ret
def release_read_lock(self):
return self.mutex.release_read_lock()
def release_write_lock(self):
return self.mutex.release_write_lock()
class Config(object):
"""A :class:`~configparser.ConfigParser` wrapper to store configuration.
Interaction with the configuration is done with the properties.
:param str path: path to the configuration file.
"""
def __init__(self, path):
#: Path to the configuration file
self.path = path
#: The underlying configuration object
self.config = configparser.SafeConfigParser()
self.config.add_section('general')
self.config.set('general', 'languages', json.dumps(['en']))
self.config.set('general', 'providers', json.dumps(sorted([p.name for p in provider_manager])))
self.config.set('general', 'refiners', json.dumps(sorted([r.name for r in refiner_manager])))
self.config.set('general', 'single', str(0))
self.config.set('general', 'embedded_subtitles', str(1))
self.config.set('general', 'age', str(int(timedelta(weeks=2).total_seconds())))
self.config.set('general', 'hearing_impaired', str(1))
self.config.set('general', 'min_score', str(0))
def read(self):
"""Read the configuration from :attr:`path`"""
self.config.read(self.path)
def write(self):
"""Write the configuration to :attr:`path`"""
with open(self.path, 'w') as f:
self.config.write(f)
@property
def languages(self):
return {Language.fromietf(l) for l in json.loads(self.config.get('general', 'languages'))}
@languages.setter
def languages(self, value):
self.config.set('general', 'languages', json.dumps(sorted([str(l) for l in value])))
@property
def providers(self):
return json.loads(self.config.get('general', 'providers'))
@providers.setter
def providers(self, value):
self.config.set('general', 'providers', json.dumps(sorted([p.lower() for p in value])))
@property
def refiners(self):
return json.loads(self.config.get('general', 'refiners'))
@refiners.setter
def refiners(self, value):
self.config.set('general', 'refiners', json.dumps([r.lower() for r in value]))
@property
def single(self):
return self.config.getboolean('general', 'single')
@single.setter
def single(self, value):
self.config.set('general', 'single', str(int(value)))
@property
def embedded_subtitles(self):
return self.config.getboolean('general', 'embedded_subtitles')
@embedded_subtitles.setter
def embedded_subtitles(self, value):
self.config.set('general', 'embedded_subtitles', str(int(value)))
@property
def age(self):
return timedelta(seconds=self.config.getint('general', 'age'))
@age.setter
def age(self, value):
self.config.set('general', 'age', str(int(value.total_seconds())))
@property
def hearing_impaired(self):
return self.config.getboolean('general', 'hearing_impaired')
@hearing_impaired.setter
def hearing_impaired(self, value):
self.config.set('general', 'hearing_impaired', str(int(value)))
@property
def min_score(self):
return self.config.getfloat('general', 'min_score')
@min_score.setter
def min_score(self, value):
self.config.set('general', 'min_score', str(value))
@property
def provider_configs(self):
rv = {}
for provider in provider_manager:
if self.config.has_section(provider.name):
rv[provider.name] = {k: v for k, v in self.config.items(provider.name)}
return rv
@provider_configs.setter
def provider_configs(self, value):
# loop over provider configurations
for provider, config in value.items():
# create the corresponding section if necessary
if not self.config.has_section(provider):
self.config.add_section(provider)
# add config options
for k, v in config.items():
self.config.set(provider, k, v)
class LanguageParamType(click.ParamType):
""":class:`~click.ParamType` for languages that returns a :class:`~babelfish.language.Language`"""
name = 'language'
def convert(self, value, param, ctx):
try:
return Language.fromietf(value)
except BabelfishError:
self.fail('%s is not a valid language' % value)
LANGUAGE = LanguageParamType()
class AgeParamType(click.ParamType):
""":class:`~click.ParamType` for age strings that returns a :class:`~datetime.timedelta`
An age string is in the form `number + identifier` with possible identifiers:
* ``w`` for weeks
* ``d`` for days
* ``h`` for hours
The form can be specified multiple times but only with that idenfier ordering. For example:
* ``1w2d4h`` for 1 week, 2 days and 4 hours
* ``2w`` for 2 weeks
* ``3w6h`` for 3 weeks and 6 hours
"""
name = 'age'
def convert(self, value, param, ctx):
match = re.match(r'^(?:(?P<weeks>\d+?)w)?(?:(?P<days>\d+?)d)?(?:(?P<hours>\d+?)h)?$', value)
if not match:
self.fail('%s is not a valid age' % value)
return timedelta(**{k: int(v) for k, v in match.groupdict(0).items()})
AGE = AgeParamType()
PROVIDER = click.Choice(sorted(provider_manager.names()))
REFINER = click.Choice(sorted(refiner_manager.names()))
dirs = AppDirs('subliminal')
cache_file = 'subliminal.dbm'
config_file = 'config.ini'
@click.group(context_settings={'max_content_width': 100}, epilog='Suggestions and bug reports are greatly appreciated: '
'https://github.com/Diaoul/subliminal/')
@click.option('--addic7ed', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='Addic7ed configuration.')
@click.option('--legendastv', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD', help='LegendasTV configuration.')
@click.option('--opensubtitles', type=click.STRING, nargs=2, metavar='USERNAME PASSWORD',
help='OpenSubtitles configuration.')
@click.option('--cache-dir', type=click.Path(writable=True, file_okay=False), default=dirs.user_cache_dir,
show_default=True, expose_value=True, help='Path to the cache directory.')
@click.option('--debug', is_flag=True, help='Print useful information for debugging subliminal and for reporting bugs.')
@click.version_option(__version__)
@click.pass_context
def subliminal(ctx, addic7ed, legendastv, opensubtitles, cache_dir, debug):
"""Subtitles, faster than your thoughts."""
# create cache directory
try:
os.makedirs(cache_dir)
except OSError:
if not os.path.isdir(cache_dir):
raise
# configure cache
region.configure('dogpile.cache.dbm', expiration_time=timedelta(days=30),
arguments={'filename': os.path.join(cache_dir, cache_file), 'lock_factory': MutexLock})
# configure logging
if debug:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT))
logging.getLogger('subliminal').addHandler(handler)
logging.getLogger('subliminal').setLevel(logging.DEBUG)
# provider configs
ctx.obj = {'provider_configs': {}}
if addic7ed:
ctx.obj['provider_configs']['addic7ed'] = {'username': addic7ed[0], 'password': addic7ed[1]}
if legendastv:
ctx.obj['provider_configs']['legendastv'] = {'username': legendastv[0], 'password': legendastv[1]}
if opensubtitles:
ctx.obj['provider_configs']['opensubtitles'] = {'username': opensubtitles[0], 'password': opensubtitles[1]}
@subliminal.command()
@click.option('--clear-subliminal', is_flag=True, help='Clear subliminal\'s cache. Use this ONLY if your cache is '
'corrupted or if you experience issues.')
@click.pass_context
def cache(ctx, clear_subliminal):
"""Cache management."""
if clear_subliminal:
for file in glob.glob(os.path.join(ctx.parent.params['cache_dir'], cache_file) + '*'):
os.remove(file)
click.echo('Subliminal\'s cache cleared.')
else:
click.echo('Nothing done.')
@subliminal.command()
@click.option('-l', '--language', type=LANGUAGE, required=True, multiple=True, help='Language as IETF code, '
'e.g. en, pt-BR (can be used multiple times).')
@click.option('-p', '--provider', type=PROVIDER, multiple=True, help='Provider to use (can be used multiple times).')
@click.option('-r', '--refiner', type=REFINER, multiple=True, help='Refiner to use (can be used multiple times).')
@click.option('-a', '--age', type=AGE, help='Filter videos newer than AGE, e.g. 12h, 1w2d.')
@click.option('-d', '--directory', type=click.STRING, metavar='DIR', help='Directory where to save subtitles, '
'default is next to the video file.')
@click.option('-e', '--encoding', type=click.STRING, metavar='ENC', help='Subtitle file encoding, default is to '
'preserve original encoding.')
@click.option('-s', '--single', is_flag=True, default=False, help='Save subtitle without language code in the file '
'name, i.e. use .srt extension. Do not use this unless your media player requires it.')
@click.option('-f', '--force', is_flag=True, default=False, help='Force download even if a subtitle already exist.')
@click.option('-hi', '--hearing-impaired', is_flag=True, default=False, help='Prefer hearing impaired subtitles.')
@click.option('-m', '--min-score', type=click.IntRange(0, 100), default=0, help='Minimum score for a subtitle '
'to be downloaded (0 to 100).')
@click.option('-w', '--max-workers', type=click.IntRange(1, 50), default=None, help='Maximum number of threads to use.')
@click.option('-z/-Z', '--archives/--no-archives', default=True, show_default=True, help='Scan archives for videos '
'(supported extensions: %s).' % ', '.join(ARCHIVE_EXTENSIONS))
@click.option('-v', '--verbose', count=True, help='Increase verbosity.')
@click.argument('path', type=click.Path(), required=True, nargs=-1)
@click.pass_obj
def download(obj, provider, refiner, language, age, directory, encoding, single, force, hearing_impaired, min_score,
max_workers, archives, verbose, path):
"""Download best subtitles.
PATH can be an directory containing videos, a video file path or a video file name. It can be used multiple times.
If an existing subtitle is detected (external or embedded) in the correct language, the download is skipped for
the associated video.
"""
# process parameters
language = set(language)
# scan videos
videos = []
ignored_videos = []
errored_paths = []
with click.progressbar(path, label='Collecting videos', item_show_func=lambda p: p or '') as bar:
for p in bar:
logger.debug('Collecting path %s', p)
# non-existing
if not os.path.exists(p):
try:
video = Video.fromname(p)
except:
logger.exception('Unexpected error while collecting non-existing path %s', p)
errored_paths.append(p)
continue
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name, directory=directory).values())
refine(video, episode_refiners=refiner, movie_refiners=refiner, embedded_subtitles=not force)
videos.append(video)
continue
# directories
if os.path.isdir(p):
try:
scanned_videos = scan_videos(p, age=age, archives=archives)
except:
logger.exception('Unexpected error while collecting directory path %s', p)
errored_paths.append(p)
continue
for video in scanned_videos:
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name,
directory=directory).values())
if check_video(video, languages=language, age=age, undefined=single):
refine(video, episode_refiners=refiner, movie_refiners=refiner, embedded_subtitles=not force)
videos.append(video)
else:
ignored_videos.append(video)
continue
# other inputs
try:
video = scan_video(p)
except:
logger.exception('Unexpected error while collecting path %s', p)
errored_paths.append(p)
continue
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name, directory=directory).values())
if check_video(video, languages=language, age=age, undefined=single):
refine(video, episode_refiners=refiner, movie_refiners=refiner, embedded_subtitles=not force)
videos.append(video)
else:
ignored_videos.append(video)
# output errored paths
if verbose > 0:
for p in errored_paths:
click.secho('%s errored' % p, fg='red')
# output ignored videos
if verbose > 1:
for video in ignored_videos:
click.secho('%s ignored - subtitles: %s / age: %d day%s' % (
os.path.split(video.name)[1],
', '.join(str(s) for s in video.subtitle_languages) or 'none',
video.age.days,
's' if video.age.days > 1 else ''
), fg='yellow')
# report collected videos
click.echo('%s video%s collected / %s video%s ignored / %s error%s' % (
click.style(str(len(videos)), bold=True, fg='green' if videos else None),
's' if len(videos) > 1 else '',
click.style(str(len(ignored_videos)), bold=True, fg='yellow' if ignored_videos else None),
's' if len(ignored_videos) > 1 else '',
click.style(str(len(errored_paths)), bold=True, fg='red' if errored_paths else None),
's' if len(errored_paths) > 1 else '',
))
# exit if no video collected
if not videos:
return
# download best subtitles
downloaded_subtitles = defaultdict(list)
with AsyncProviderPool(max_workers=max_workers, providers=provider, provider_configs=obj['provider_configs']) as p:
with click.progressbar(videos, label='Downloading subtitles',
item_show_func=lambda v: os.path.split(v.name)[1] if v is not None else '') as bar:
for v in bar:
scores = get_scores(v)
subtitles = p.download_best_subtitles(p.list_subtitles(v, language - v.subtitle_languages),
v, language, min_score=scores['hash'] * min_score / 100,
hearing_impaired=hearing_impaired, only_one=single)
downloaded_subtitles[v] = subtitles
if p.discarded_providers:
click.secho('Some providers have been discarded due to unexpected errors: %s' %
', '.join(p.discarded_providers), fg='yellow')
# save subtitles
total_subtitles = 0
for v, subtitles in downloaded_subtitles.items():
saved_subtitles = save_subtitles(v, subtitles, single=single, directory=directory, encoding=encoding)
total_subtitles += len(saved_subtitles)
if verbose > 0:
click.echo('%s subtitle%s downloaded for %s' % (click.style(str(len(saved_subtitles)), bold=True),
's' if len(saved_subtitles) > 1 else '',
os.path.split(v.name)[1]))
if verbose > 1:
for s in saved_subtitles:
matches = s.get_matches(v)
score = compute_score(s, v)
# score color
score_color = None
scores = get_scores(v)
if isinstance(v, Movie):
if score < scores['title']:
score_color = 'red'
elif score < scores['title'] + scores['year'] + scores['release_group']:
score_color = 'yellow'
else:
score_color = 'green'
elif isinstance(v, Episode):
if score < scores['series'] + scores['season'] + scores['episode']:
score_color = 'red'
elif score < scores['series'] + scores['season'] + scores['episode'] + scores['release_group']:
score_color = 'yellow'
else:
score_color = 'green'
# scale score from 0 to 100 taking out preferences
scaled_score = score
if s.hearing_impaired == hearing_impaired:
scaled_score -= scores['hearing_impaired']
scaled_score *= 100 / scores['hash']
# echo some nice colored output
click.echo(' - [{score}] {language} subtitle from {provider_name} (match on {matches})'.format(
score=click.style('{:5.1f}'.format(scaled_score), fg=score_color, bold=score >= scores['hash']),
language=s.language.name if s.language.country is None else '%s (%s)' % (s.language.name,
s.language.country.name),
provider_name=s.provider_name,
matches=', '.join(sorted(matches, key=scores.get, reverse=True))
))
if verbose == 0:
click.echo('Downloaded %s subtitle%s' % (click.style(str(total_subtitles), bold=True),
's' if total_subtitles > 1 else ''))

View File

@ -1,32 +0,0 @@
# -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter, language_converters
class Addic7edConverter(LanguageReverseConverter):
def __init__(self):
self.name_converter = language_converters['name']
self.from_addic7ed = {u'Català': ('cat',), 'Chinese (Simplified)': ('zho',), 'Chinese (Traditional)': ('zho',),
'Euskera': ('eus',), 'Galego': ('glg',), 'Greek': ('ell',), 'Malay': ('msa',),
'Portuguese (Brazilian)': ('por', 'BR'), 'Serbian (Cyrillic)': ('srp', None, 'Cyrl'),
'Serbian (Latin)': ('srp',), 'Spanish (Latin America)': ('spa',),
'Spanish (Spain)': ('spa',)}
self.to_addic7ed = {('cat',): 'Català', ('zho',): 'Chinese (Simplified)', ('eus',): 'Euskera',
('glg',): 'Galego', ('ell',): 'Greek', ('msa',): 'Malay',
('por', 'BR'): 'Portuguese (Brazilian)', ('srp', None, 'Cyrl'): 'Serbian (Cyrillic)'}
self.codes = self.name_converter.codes | set(self.from_addic7ed.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3, country, script) in self.to_addic7ed:
return self.to_addic7ed[(alpha3, country, script)]
if (alpha3, country) in self.to_addic7ed:
return self.to_addic7ed[(alpha3, country)]
if (alpha3,) in self.to_addic7ed:
return self.to_addic7ed[(alpha3,)]
return self.name_converter.convert(alpha3, country, script)
def reverse(self, addic7ed):
if addic7ed in self.from_addic7ed:
return self.from_addic7ed[addic7ed]
return self.name_converter.reverse(addic7ed)

View File

@ -1,27 +0,0 @@
# -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError
class LegendasTVConverter(LanguageReverseConverter):
def __init__(self):
self.from_legendastv = {1: ('por', 'BR'), 2: ('eng',), 3: ('spa',), 4: ('fra',), 5: ('deu',), 6: ('jpn',),
7: ('dan',), 8: ('nor',), 9: ('swe',), 10: ('por',), 11: ('ara',), 12: ('ces',),
13: ('zho',), 14: ('kor',), 15: ('bul',), 16: ('ita',), 17: ('pol',)}
self.to_legendastv = {v: k for k, v in self.from_legendastv.items()}
self.codes = set(self.from_legendastv.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3, country) in self.to_legendastv:
return self.to_legendastv[(alpha3, country)]
if (alpha3,) in self.to_legendastv:
return self.to_legendastv[(alpha3,)]
raise ConfigurationError('Unsupported language code for legendastv: %s, %s, %s' % (alpha3, country, script))
def reverse(self, legendastv):
if legendastv in self.from_legendastv:
return self.from_legendastv[legendastv]
raise ConfigurationError('Unsupported language number for legendastv: %s' % legendastv)

View File

@ -1,23 +0,0 @@
# -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError
class ShooterConverter(LanguageReverseConverter):
def __init__(self):
self.from_shooter = {'chn': ('zho',), 'eng': ('eng',)}
self.to_shooter = {v: k for k, v in self.from_shooter.items()}
self.codes = set(self.from_shooter.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3,) in self.to_shooter:
return self.to_shooter[(alpha3,)]
raise ConfigurationError('Unsupported language for shooter: %s, %s, %s' % (alpha3, country, script))
def reverse(self, shooter):
if shooter in self.from_shooter:
return self.from_shooter[shooter]
raise ConfigurationError('Unsupported language code for shooter: %s' % shooter)

View File

@ -1,26 +0,0 @@
# -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter
from ..exceptions import ConfigurationError
class TheSubDBConverter(LanguageReverseConverter):
def __init__(self):
self.from_thesubdb = {'en': ('eng',), 'es': ('spa',), 'fr': ('fra',), 'it': ('ita',), 'nl': ('nld',),
'pl': ('pol',), 'pt': ('por', 'BR'), 'ro': ('ron',), 'sv': ('swe',), 'tr': ('tur',)}
self.to_thesubdb = {v: k for k, v in self.from_thesubdb.items()}
self.codes = set(self.from_thesubdb.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3, country) in self.to_thesubdb:
return self.to_thesubdb[(alpha3, country)]
if (alpha3,) in self.to_thesubdb:
return self.to_thesubdb[(alpha3,)]
raise ConfigurationError('Unsupported language for thesubdb: %s, %s, %s' % (alpha3, country, script))
def reverse(self, thesubdb):
if thesubdb in self.from_thesubdb:
return self.from_thesubdb[thesubdb]
raise ConfigurationError('Unsupported language code for thesubdb: %s' % thesubdb)

View File

@ -1,25 +0,0 @@
# -*- coding: utf-8 -*-
from babelfish import LanguageReverseConverter, language_converters
class TVsubtitlesConverter(LanguageReverseConverter):
def __init__(self):
self.alpha2_converter = language_converters['alpha2']
self.from_tvsubtitles = {'br': ('por', 'BR'), 'ua': ('ukr',), 'gr': ('ell',), 'cn': ('zho',), 'jp': ('jpn',),
'cz': ('ces',)}
self.to_tvsubtitles = {v: k for k, v in self.from_tvsubtitles.items()}
self.codes = self.alpha2_converter.codes | set(self.from_tvsubtitles.keys())
def convert(self, alpha3, country=None, script=None):
if (alpha3, country) in self.to_tvsubtitles:
return self.to_tvsubtitles[(alpha3, country)]
if (alpha3,) in self.to_tvsubtitles:
return self.to_tvsubtitles[(alpha3,)]
return self.alpha2_converter.convert(alpha3, country, script)
def reverse(self, tvsubtitles):
if tvsubtitles in self.from_tvsubtitles:
return self.from_tvsubtitles[tvsubtitles]
return self.alpha2_converter.reverse(tvsubtitles)

View File

@ -1,777 +0,0 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import platform
is_windows_special_path = False
if platform.system() == "Windows":
try:
__file__.decode("ascii")
except UnicodeDecodeError:
is_windows_special_path = True
if not is_windows_special_path:
from concurrent.futures import ThreadPoolExecutor
else:
ThreadPoolExecutor = object
from datetime import datetime
import io
import itertools
import logging
import operator
import os
import socket
from babelfish import Language, LanguageReverseError
from guessit import guessit
from six.moves.xmlrpc_client import ProtocolError
from rarfile import BadRarFile, NotRarFile, RarCannotExec, RarFile
from zipfile import BadZipfile
from ssl import SSLError
import requests
from .exceptions import ServiceUnavailable
from .extensions import provider_manager, refiner_manager
from .score import compute_score as default_compute_score
from .subtitle import SUBTITLE_EXTENSIONS, get_subtitle_path
from .utils import hash_napiprojekt, hash_opensubtitles, hash_shooter, hash_thesubdb
from .video import VIDEO_EXTENSIONS, Episode, Movie, Video
#: Supported archive extensions
ARCHIVE_EXTENSIONS = ('.rar',)
logger = logging.getLogger(__name__)
class ProviderPool(object):
"""A pool of providers with the same API as a single :class:`~subliminal.providers.Provider`.
It has a few extra features:
* Lazy loads providers when needed and supports the `with` statement to :meth:`terminate`
the providers on exit.
* Automatically discard providers on failure.
:param list providers: name of providers to use, if not all.
:param dict provider_configs: provider configuration as keyword arguments per provider name to pass when
instanciating the :class:`~subliminal.providers.Provider`.
"""
def __init__(self, providers=None, provider_configs=None):
#: Name of providers to use
self.providers = providers or provider_manager.names()
#: Provider configuration
self.provider_configs = provider_configs or {}
#: Initialized providers
self.initialized_providers = {}
#: Discarded providers
self.discarded_providers = set()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.terminate()
def __getitem__(self, name):
if name not in self.providers:
raise KeyError
if name not in self.initialized_providers:
logger.info('Initializing provider %s', name)
provider = provider_manager[name].plugin(**self.provider_configs.get(name, {}))
provider.initialize()
self.initialized_providers[name] = provider
return self.initialized_providers[name]
def __delitem__(self, name):
if name not in self.initialized_providers:
raise KeyError(name)
try:
logger.info('Terminating provider %s', name)
self.initialized_providers[name].terminate()
except (requests.Timeout, socket.timeout):
logger.error('Provider %r timed out, improperly terminated', name)
except (ServiceUnavailable, ProtocolError): # OpenSubtitles raises xmlrpclib.ProtocolError when unavailable
logger.error('Provider %r unavailable, improperly terminated', name)
except requests.exceptions.HTTPError as e:
if e.response.status_code in range(500, 600):
logger.error('Provider %r unavailable, improperly terminated', name)
else:
logger.exception('Provider %r http error %r, improperly terminated', name, e.response.status_code)
except SSLError as e:
if e.args[0] == 'The read operation timed out':
logger.error('Provider %r unavailable, improperly terminated', name)
else:
logger.exception('Provider %r SSL error %r, improperly terminated', name, e.args[0])
except:
logger.exception('Provider %r terminated unexpectedly', name)
del self.initialized_providers[name]
def __iter__(self):
return iter(self.initialized_providers)
def list_subtitles_provider(self, provider, video, languages):
"""List subtitles with a single provider.
The video and languages are checked against the provider.
:param str provider: name of the provider.
:param video: video to list subtitles for.
:type video: :class:`~subliminal.video.Video`
:param languages: languages to search for.
:type languages: set of :class:`~babelfish.language.Language`
:return: found subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle` or None
"""
# check video validity
if not provider_manager[provider].plugin.check(video):
logger.info('Skipping provider %r: not a valid video', provider)
return []
# check supported languages
provider_languages = provider_manager[provider].plugin.languages & languages
if not provider_languages:
logger.info('Skipping provider %r: no language to search for', provider)
return []
# list subtitles
logger.info('Listing subtitles with provider %r and languages %r', provider, provider_languages)
try:
return self[provider].list_subtitles(video, provider_languages)
except (requests.Timeout, socket.timeout):
logger.error('Provider %r timed out', provider)
except (ServiceUnavailable, ProtocolError): # OpenSubtitles raises xmlrpclib.ProtocolError when unavailable
logger.error('Provider %r unavailable', provider)
except requests.exceptions.HTTPError as e:
if e.response.status_code in range(500, 600):
logger.error('Provider %r unavailable', provider)
else:
logger.exception('Provider %r http error %r', provider, e.response.status_code)
except SSLError as e:
if e.args[0] == 'The read operation timed out':
logger.error('Provider %r unavailable', provider)
else:
logger.exception('Provider %r SSL error %r', provider, e.args[0])
except:
logger.exception('Unexpected error in provider %r', provider)
def list_subtitles(self, video, languages):
"""List subtitles.
:param video: video to list subtitles for.
:type video: :class:`~subliminal.video.Video`
:param languages: languages to search for.
:type languages: set of :class:`~babelfish.language.Language`
:return: found subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
"""
subtitles = []
for name in self.providers:
# check discarded providers
if name in self.discarded_providers:
logger.debug('Skipping discarded provider %r', name)
continue
# list subtitles
provider_subtitles = self.list_subtitles_provider(name, video, languages)
if provider_subtitles is None:
logger.info('Discarding provider %s', name)
self.discarded_providers.add(name)
continue
# add the subtitles
subtitles.extend(provider_subtitles)
return subtitles
def download_subtitle(self, subtitle):
"""Download `subtitle`'s :attr:`~subliminal.subtitle.Subtitle.content`.
:param subtitle: subtitle to download.
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:return: `True` if the subtitle has been successfully downloaded, `False` otherwise.
:rtype: bool
"""
# check discarded providers
if subtitle.provider_name in self.discarded_providers:
logger.warning('Provider %r is discarded', subtitle.provider_name)
return False
logger.info('Downloading subtitle %r', subtitle)
try:
self[subtitle.provider_name].download_subtitle(subtitle)
except (requests.Timeout, socket.timeout):
logger.error('Provider %r timed out, discarding it', subtitle.provider_name)
self.discarded_providers.add(subtitle.provider_name)
return False
except (ServiceUnavailable, ProtocolError): # OpenSubtitles raises xmlrpclib.ProtocolError when unavailable
logger.error('Provider %r unavailable, discarding it', subtitle.provider_name)
self.discarded_providers.add(subtitle.provider_name)
return False
except requests.exceptions.HTTPError as e:
if e.response.status_code in range(500, 600):
logger.error('Provider %r unavailable, discarding it', subtitle.provider_name)
else:
logger.exception('Provider %r http error %r, discarding it', subtitle.provider_name,
e.response.status_code)
self.discarded_providers.add(subtitle.provider_name)
return False
except SSLError as e:
if e.args[0] == 'The read operation timed out':
logger.error('Provider %r unavailable, discarding it', subtitle.provider_name)
else:
logger.exception('Provider %r SSL error %r, discarding it', subtitle.provider_name, e.args[0])
self.discarded_providers.add(subtitle.provider_name)
return False
except (BadRarFile, BadZipfile):
logger.error('Bad archive for %r', subtitle)
return False
except:
logger.exception('Unexpected error in provider %r, discarding it', subtitle.provider_name)
self.discarded_providers.add(subtitle.provider_name)
return False
# check subtitle validity
if not subtitle.is_valid():
logger.error('Invalid subtitle')
return False
return True
def download_best_subtitles(self, subtitles, video, languages, min_score=0, hearing_impaired=False, only_one=False,
compute_score=None):
"""Download the best matching subtitles.
:param subtitles: the subtitles to use.
:type subtitles: list of :class:`~subliminal.subtitle.Subtitle`
:param video: video to download subtitles for.
:type video: :class:`~subliminal.video.Video`
:param languages: languages to download.
:type languages: set of :class:`~babelfish.language.Language`
:param int min_score: minimum score for a subtitle to be downloaded.
:param bool hearing_impaired: hearing impaired preference.
:param bool only_one: download only one subtitle, not one per language.
:param compute_score: function that takes `subtitle` and `video` as positional arguments,
`hearing_impaired` as keyword argument and returns the score.
:return: downloaded subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
"""
compute_score = compute_score or default_compute_score
# sort subtitles by score
scored_subtitles = sorted([(s, compute_score(s, video, hearing_impaired=hearing_impaired))
for s in subtitles], key=operator.itemgetter(1), reverse=True)
# download best subtitles, falling back on the next on error
downloaded_subtitles = []
for subtitle, score in scored_subtitles:
# check score
if score < min_score:
logger.info('Score %d is below min_score (%d)', score, min_score)
break
# check downloaded languages
if subtitle.language in set(s.language for s in downloaded_subtitles):
logger.debug('Skipping subtitle: %r already downloaded', subtitle.language)
continue
# download
if self.download_subtitle(subtitle):
downloaded_subtitles.append(subtitle)
# stop when all languages are downloaded
if set(s.language for s in downloaded_subtitles) == languages:
logger.debug('All languages downloaded')
break
# stop if only one subtitle is requested
if only_one:
logger.debug('Only one subtitle downloaded')
break
return downloaded_subtitles
def terminate(self):
"""Terminate all the :attr:`initialized_providers`."""
logger.debug('Terminating initialized providers')
for name in list(self.initialized_providers):
del self[name]
class AsyncProviderPool(ProviderPool):
"""Subclass of :class:`ProviderPool` with asynchronous support for :meth:`~ProviderPool.list_subtitles`.
:param int max_workers: maximum number of threads to use. If `None`, :attr:`max_workers` will be set
to the number of :attr:`~ProviderPool.providers`.
"""
def __init__(self, max_workers=None, *args, **kwargs):
super(AsyncProviderPool, self).__init__(*args, **kwargs)
#: Maximum number of threads to use
self.max_workers = max_workers or len(self.providers)
def list_subtitles_provider(self, provider, video, languages):
return provider, super(AsyncProviderPool, self).list_subtitles_provider(provider, video, languages)
def list_subtitles(self, video, languages):
subtitles = []
with ThreadPoolExecutor(self.max_workers) as executor:
for provider, provider_subtitles in executor.map(self.list_subtitles_provider, self.providers,
itertools.repeat(video, len(self.providers)),
itertools.repeat(languages, len(self.providers))):
# discard provider that failed
if provider_subtitles is None:
logger.info('Discarding provider %s', provider)
self.discarded_providers.add(provider)
continue
# add subtitles
subtitles.extend(provider_subtitles)
return subtitles
def check_video(video, languages=None, age=None, undefined=False):
"""Perform some checks on the `video`.
All the checks are optional. Return `False` if any of this check fails:
* `languages` already exist in `video`'s :attr:`~subliminal.video.Video.subtitle_languages`.
* `video` is older than `age`.
* `video` has an `undefined` language in :attr:`~subliminal.video.Video.subtitle_languages`.
:param video: video to check.
:type video: :class:`~subliminal.video.Video`
:param languages: desired languages.
:type languages: set of :class:`~babelfish.language.Language`
:param datetime.timedelta age: maximum age of the video.
:param bool undefined: fail on existing undefined language.
:return: `True` if the video passes the checks, `False` otherwise.
:rtype: bool
"""
# language test
if languages and not (languages - video.subtitle_languages):
logger.debug('All languages %r exist', languages)
return False
# age test
if age and video.age > age:
logger.debug('Video is older than %r', age)
return False
# undefined test
if undefined and Language('und') in video.subtitle_languages:
logger.debug('Undefined language found')
return False
return True
def search_external_subtitles(path, directory=None):
"""Search for external subtitles from a video `path` and their associated language.
Unless `directory` is provided, search will be made in the same directory as the video file.
:param str path: path to the video.
:param str directory: directory to search for subtitles.
:return: found subtitles with their languages.
:rtype: dict
"""
# split path
dirpath, filename = os.path.split(path)
dirpath = dirpath or '.'
fileroot, fileext = os.path.splitext(filename)
# search for subtitles
subtitles = {}
for p in os.listdir(directory or dirpath):
# keep only valid subtitle filenames
if not p.startswith(fileroot) or not p.endswith(SUBTITLE_EXTENSIONS):
continue
# extract the potential language code
language = Language('und')
language_code = p[len(fileroot):-len(os.path.splitext(p)[1])].replace(fileext, '').replace('_', '-')[1:]
if language_code:
try:
language = Language.fromietf(language_code)
except (ValueError, LanguageReverseError):
logger.error('Cannot parse language code %r', language_code)
subtitles[p] = language
logger.debug('Found subtitles %r', subtitles)
return subtitles
def scan_video(path):
"""Scan a video from a `path`.
:param str path: existing path to the video.
:return: the scanned video.
:rtype: :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check video extension
if not path.endswith(VIDEO_EXTENSIONS):
raise ValueError('%r is not a valid video extension' % os.path.splitext(path)[1])
dirpath, filename = os.path.split(path)
logger.info('Scanning video %r in %r', filename, dirpath)
# guess
video = Video.fromguess(path, guessit(path))
# size and hashes
video.size = os.path.getsize(path)
if video.size > 10485760:
logger.debug('Size is %d', video.size)
video.hashes['opensubtitles'] = hash_opensubtitles(path)
video.hashes['shooter'] = hash_shooter(path)
video.hashes['thesubdb'] = hash_thesubdb(path)
video.hashes['napiprojekt'] = hash_napiprojekt(path)
logger.debug('Computed hashes %r', video.hashes)
else:
logger.warning('Size is lower than 10MB: hashes not computed')
return video
def scan_archive(path):
"""Scan an archive from a `path`.
:param str path: existing path to the archive.
:return: the scanned video.
:rtype: :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check video extension
if not path.endswith(ARCHIVE_EXTENSIONS):
raise ValueError('%r is not a valid archive extension' % os.path.splitext(path)[1])
dirpath, filename = os.path.split(path)
logger.info('Scanning archive %r in %r', filename, dirpath)
# rar extension
if filename.endswith('.rar'):
rar = RarFile(path)
# filter on video extensions
rar_filenames = [f for f in rar.namelist() if f.endswith(VIDEO_EXTENSIONS)]
# no video found
if not rar_filenames:
raise ValueError('No video in archive')
# more than one video found
if len(rar_filenames) > 1:
raise ValueError('More than one video in archive')
# guess
rar_filename = rar_filenames[0]
rar_filepath = os.path.join(dirpath, rar_filename)
video = Video.fromguess(rar_filepath, guessit(rar_filepath))
# size
video.size = rar.getinfo(rar_filename).file_size
else:
raise ValueError('Unsupported extension %r' % os.path.splitext(path)[1])
return video
def scan_videos(path, age=None, archives=True):
"""Scan `path` for videos and their subtitles.
See :func:`refine` to find additional information for the video.
:param str path: existing directory path to scan.
:param datetime.timedelta age: maximum age of the video or archive.
:param bool archives: scan videos in archives.
:return: the scanned videos.
:rtype: list of :class:`~subliminal.video.Video`
"""
# check for non-existing path
if not os.path.exists(path):
raise ValueError('Path does not exist')
# check for non-directory path
if not os.path.isdir(path):
raise ValueError('Path is not a directory')
# walk the path
videos = []
for dirpath, dirnames, filenames in os.walk(path):
logger.debug('Walking directory %r', dirpath)
# remove badly encoded and hidden dirnames
for dirname in list(dirnames):
if dirname.startswith('.'):
logger.debug('Skipping hidden dirname %r in %r', dirname, dirpath)
dirnames.remove(dirname)
# scan for videos
for filename in filenames:
# filter on videos and archives
if not (filename.endswith(VIDEO_EXTENSIONS) or archives and filename.endswith(ARCHIVE_EXTENSIONS)):
continue
# skip hidden files
if filename.startswith('.'):
logger.debug('Skipping hidden filename %r in %r', filename, dirpath)
continue
# reconstruct the file path
filepath = os.path.join(dirpath, filename)
# skip links
if os.path.islink(filepath):
logger.debug('Skipping link %r in %r', filename, dirpath)
continue
# skip old files
try:
file_age = datetime.utcfromtimestamp(os.path.getmtime(filepath))
except ValueError:
logger.warning('Could not get age of file %r in %r', filename, dirpath)
continue
else:
if age and datetime.utcnow() - file_age > age:
logger.debug('Skipping old file %r in %r', filename, dirpath)
continue
# scan
if filename.endswith(VIDEO_EXTENSIONS): # video
try:
video = scan_video(filepath)
except ValueError: # pragma: no cover
logger.exception('Error scanning video')
continue
elif archives and filename.endswith(ARCHIVE_EXTENSIONS): # archive
try:
video = scan_archive(filepath)
except (NotRarFile, RarCannotExec, ValueError): # pragma: no cover
logger.exception('Error scanning archive')
continue
else: # pragma: no cover
raise ValueError('Unsupported file %r' % filename)
videos.append(video)
return videos
def refine(video, episode_refiners=None, movie_refiners=None, **kwargs):
"""Refine a video using :ref:`refiners`.
.. note::
Exceptions raised in refiners are silently passed and logged.
:param video: the video to refine.
:type video: :class:`~subliminal.video.Video`
:param tuple episode_refiners: refiners to use for episodes.
:param tuple movie_refiners: refiners to use for movies.
:param \*\*kwargs: additional parameters for the :func:`~subliminal.refiners.refine` functions.
"""
refiners = ()
if isinstance(video, Episode):
refiners = episode_refiners or ('metadata', 'tvdb', 'omdb')
elif isinstance(video, Movie):
refiners = movie_refiners or ('metadata', 'omdb')
for refiner in refiners:
logger.info('Refining video with %s', refiner)
try:
refiner_manager[refiner].plugin(video, **kwargs)
except:
logger.error('Failed to refine video %r', video.name)
logger.debug('Refiner exception:', exc_info=True)
def list_subtitles(videos, languages, pool_class=ProviderPool, **kwargs):
"""List subtitles.
The `videos` must pass the `languages` check of :func:`check_video`.
:param videos: videos to list subtitles for.
:type videos: set of :class:`~subliminal.video.Video`
:param languages: languages to search for.
:type languages: set of :class:`~babelfish.language.Language`
:param pool_class: class to use as provider pool.
:type pool_class: :class:`ProviderPool`, :class:`AsyncProviderPool` or similar
:param \*\*kwargs: additional parameters for the provided `pool_class` constructor.
:return: found subtitles per video.
:rtype: dict of :class:`~subliminal.video.Video` to list of :class:`~subliminal.subtitle.Subtitle`
"""
listed_subtitles = defaultdict(list)
# check videos
checked_videos = []
for video in videos:
if not check_video(video, languages=languages):
logger.info('Skipping video %r', video)
continue
checked_videos.append(video)
# return immediately if no video passed the checks
if not checked_videos:
return listed_subtitles
# list subtitles
with pool_class(**kwargs) as pool:
for video in checked_videos:
logger.info('Listing subtitles for %r', video)
subtitles = pool.list_subtitles(video, languages - video.subtitle_languages)
listed_subtitles[video].extend(subtitles)
logger.info('Found %d subtitle(s)', len(subtitles))
return listed_subtitles
def download_subtitles(subtitles, pool_class=ProviderPool, **kwargs):
"""Download :attr:`~subliminal.subtitle.Subtitle.content` of `subtitles`.
:param subtitles: subtitles to download.
:type subtitles: list of :class:`~subliminal.subtitle.Subtitle`
:param pool_class: class to use as provider pool.
:type pool_class: :class:`ProviderPool`, :class:`AsyncProviderPool` or similar
:param \*\*kwargs: additional parameters for the provided `pool_class` constructor.
"""
with pool_class(**kwargs) as pool:
for subtitle in subtitles:
logger.info('Downloading subtitle %r', subtitle)
pool.download_subtitle(subtitle)
def download_best_subtitles(videos, languages, min_score=0, hearing_impaired=False, only_one=False, compute_score=None,
pool_class=ProviderPool, **kwargs):
"""List and download the best matching subtitles.
The `videos` must pass the `languages` and `undefined` (`only_one`) checks of :func:`check_video`.
:param videos: videos to download subtitles for.
:type videos: set of :class:`~subliminal.video.Video`
:param languages: languages to download.
:type languages: set of :class:`~babelfish.language.Language`
:param int min_score: minimum score for a subtitle to be downloaded.
:param bool hearing_impaired: hearing impaired preference.
:param bool only_one: download only one subtitle, not one per language.
:param compute_score: function that takes `subtitle` and `video` as positional arguments,
`hearing_impaired` as keyword argument and returns the score.
:param pool_class: class to use as provider pool.
:type pool_class: :class:`ProviderPool`, :class:`AsyncProviderPool` or similar
:param \*\*kwargs: additional parameters for the provided `pool_class` constructor.
:return: downloaded subtitles per video.
:rtype: dict of :class:`~subliminal.video.Video` to list of :class:`~subliminal.subtitle.Subtitle`
"""
downloaded_subtitles = defaultdict(list)
# check videos
checked_videos = []
for video in videos:
if not check_video(video, languages=languages, undefined=only_one):
logger.info('Skipping video %r', video)
continue
checked_videos.append(video)
# return immediately if no video passed the checks
if not checked_videos:
return downloaded_subtitles
# download best subtitles
with pool_class(**kwargs) as pool:
for video in checked_videos:
logger.info('Downloading best subtitles for %r', video)
subtitles = pool.download_best_subtitles(pool.list_subtitles(video, languages - video.subtitle_languages),
video, languages, min_score=min_score,
hearing_impaired=hearing_impaired, only_one=only_one,
compute_score=compute_score)
logger.info('Downloaded %d subtitle(s)', len(subtitles))
downloaded_subtitles[video].extend(subtitles)
return downloaded_subtitles
def save_subtitles(video, subtitles, single=False, directory=None, encoding=None):
"""Save subtitles on filesystem.
Subtitles are saved in the order of the list. If a subtitle with a language has already been saved, other subtitles
with the same language are silently ignored.
The extension used is `.lang.srt` by default or `.srt` is `single` is `True`, with `lang` being the IETF code for
the :attr:`~subliminal.subtitle.Subtitle.language` of the subtitle.
:param video: video of the subtitles.
:type video: :class:`~subliminal.video.Video`
:param subtitles: subtitles to save.
:type subtitles: list of :class:`~subliminal.subtitle.Subtitle`
:param bool single: save a single subtitle, default is to save one subtitle per language.
:param str directory: path to directory where to save the subtitles, default is next to the video.
:param str encoding: encoding in which to save the subtitles, default is to keep original encoding.
:return: the saved subtitles
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
"""
saved_subtitles = []
for subtitle in subtitles:
# check content
if subtitle.content is None:
logger.error('Skipping subtitle %r: no content', subtitle)
continue
# check language
if subtitle.language in set(s.language for s in saved_subtitles):
logger.debug('Skipping subtitle %r: language already saved', subtitle)
continue
# create subtitle path
subtitle_path = get_subtitle_path(video.name, None if single else subtitle.language)
if directory is not None:
subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1])
# save content as is or in the specified encoding
logger.info('Saving %r to %r', subtitle, subtitle_path)
if encoding is None:
with io.open(subtitle_path, 'wb') as f:
f.write(subtitle.content)
else:
with io.open(subtitle_path, 'w', encoding=encoding) as f:
f.write(subtitle.text)
saved_subtitles.append(subtitle)
# check single
if single:
break
return saved_subtitles

View File

@ -1,29 +0,0 @@
# -*- coding: utf-8 -*-
class Error(Exception):
"""Base class for exceptions in subliminal."""
pass
class ProviderError(Error):
"""Exception raised by providers."""
pass
class ConfigurationError(ProviderError):
"""Exception raised by providers when badly configured."""
pass
class AuthenticationError(ProviderError):
"""Exception raised by providers when authentication failed."""
pass
class ServiceUnavailable(ProviderError):
"""Exception raised when status is '503 Service Unavailable'."""
pass
class DownloadLimitExceeded(ProviderError):
"""Exception raised by providers when download limit is exceeded."""
pass

View File

@ -1,105 +0,0 @@
# -*- coding: utf-8 -*-
from pkg_resources import EntryPoint
from stevedore import ExtensionManager
class RegistrableExtensionManager(ExtensionManager):
""":class:~stevedore.extensions.ExtensionManager` with support for registration.
It allows loading of internal extensions without setup and registering/unregistering additional extensions.
Loading is done in this order:
* Entry point extensions
* Internal extensions
* Registered extensions
:param str namespace: namespace argument for :class:~stevedore.extensions.ExtensionManager`.
:param list internal_extensions: internal extensions to use with entry point syntax.
:param \*\*kwargs: additional parameters for the :class:~stevedore.extensions.ExtensionManager` constructor.
"""
def __init__(self, namespace, internal_extensions, **kwargs):
#: Registered extensions with entry point syntax
self.registered_extensions = []
#: Internal extensions with entry point syntax
self.internal_extensions = internal_extensions
super(RegistrableExtensionManager, self).__init__(namespace, **kwargs)
def list_entry_points(self):
# copy of default extensions
eps = list(super(RegistrableExtensionManager, self).list_entry_points())
# internal extensions
for iep in self.internal_extensions:
ep = EntryPoint.parse(iep)
if ep.name not in [e.name for e in eps]:
eps.append(ep)
# registered extensions
for rep in self.registered_extensions:
ep = EntryPoint.parse(rep)
if ep.name not in [e.name for e in eps]:
eps.append(ep)
return eps
def register(self, entry_point):
"""Register an extension
:param str entry_point: extension to register (entry point syntax).
:raise: ValueError if already registered.
"""
if entry_point in self.registered_extensions:
raise ValueError('Extension already registered')
ep = EntryPoint.parse(entry_point)
if ep.name in self.names():
raise ValueError('An extension with the same name already exist')
ext = self._load_one_plugin(ep, False, (), {}, False)
self.extensions.append(ext)
if self._extensions_by_name is not None:
self._extensions_by_name[ext.name] = ext
self.registered_extensions.insert(0, entry_point)
def unregister(self, entry_point):
"""Unregister a provider
:param str entry_point: provider to unregister (entry point syntax).
"""
if entry_point not in self.registered_extensions:
raise ValueError('Extension not registered')
ep = EntryPoint.parse(entry_point)
self.registered_extensions.remove(entry_point)
if self._extensions_by_name is not None:
del self._extensions_by_name[ep.name]
for i, ext in enumerate(self.extensions):
if ext.name == ep.name:
del self.extensions[i]
break
#: Provider manager
provider_manager = RegistrableExtensionManager('subliminal.providers', [
'addic7ed = subliminal.providers.addic7ed:Addic7edProvider',
'legendastv = subliminal.providers.legendastv:LegendasTVProvider',
'opensubtitles = subliminal.providers.opensubtitles:OpenSubtitlesProvider',
'podnapisi = subliminal.providers.podnapisi:PodnapisiProvider',
'shooter = subliminal.providers.shooter:ShooterProvider',
'thesubdb = subliminal.providers.thesubdb:TheSubDBProvider',
'tvsubtitles = subliminal.providers.tvsubtitles:TVsubtitlesProvider'
])
#: Refiner manager
refiner_manager = RegistrableExtensionManager('subliminal.refiners', [
'metadata = subliminal.refiners.metadata:refine',
'omdb = subliminal.refiners.omdb:refine',
'tvdb = subliminal.refiners.tvdb:refine'
])

View File

@ -1,164 +0,0 @@
# -*- coding: utf-8 -*-
import logging
from bs4 import BeautifulSoup, FeatureNotFound
from six.moves.xmlrpc_client import SafeTransport
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class TimeoutSafeTransport(SafeTransport):
"""Timeout support for ``xmlrpc.client.SafeTransport``."""
def __init__(self, timeout, *args, **kwargs):
SafeTransport.__init__(self, *args, **kwargs)
self.timeout = timeout
def make_connection(self, host):
c = SafeTransport.make_connection(self, host)
c.timeout = self.timeout
return c
class ParserBeautifulSoup(BeautifulSoup):
"""A ``bs4.BeautifulSoup`` that picks the first parser available in `parsers`.
:param markup: markup for the ``bs4.BeautifulSoup``.
:param list parsers: parser names, in order of preference.
"""
def __init__(self, markup, parsers, **kwargs):
# reject features
if set(parsers).intersection({'fast', 'permissive', 'strict', 'xml', 'html', 'html5'}):
raise ValueError('Features not allowed, only parser names')
# reject some kwargs
if 'features' in kwargs:
raise ValueError('Cannot use features kwarg')
if 'builder' in kwargs:
raise ValueError('Cannot use builder kwarg')
# pick the first parser available
for parser in parsers:
try:
super(ParserBeautifulSoup, self).__init__(markup, parser, **kwargs)
return
except FeatureNotFound:
pass
raise FeatureNotFound
class Provider(object):
"""Base class for providers.
If any configuration is possible for the provider, like credentials, it must take place during instantiation.
:raise: :class:`~subliminal.exceptions.ConfigurationError` if there is a configuration error
"""
#: Supported set of :class:`~babelfish.language.Language`
languages = set()
#: Supported video types
video_types = (Episode, Movie)
#: Required hash, if any
required_hash = None
#: Subtitle class to use
subtitle_class = None
def __enter__(self):
self.initialize()
return self
def __exit__(self, exc_type, exc_value, traceback):
self.terminate()
def initialize(self):
"""Initialize the provider.
Must be called when starting to work with the provider. This is the place for network initialization
or login operations.
.. note::
This is called automatically when entering the `with` statement
"""
raise NotImplementedError
def terminate(self):
"""Terminate the provider.
Must be called when done with the provider. This is the place for network shutdown or logout operations.
.. note::
This is called automatically when exiting the `with` statement
"""
raise NotImplementedError
@classmethod
def check(cls, video):
"""Check if the `video` can be processed.
The `video` is considered invalid if not an instance of :attr:`video_types` or if the :attr:`required_hash` is
not present in :attr:`~subliminal.video.Video.hashes` attribute of the `video`.
:param video: the video to check.
:type video: :class:`~subliminal.video.Video`
:return: `True` if the `video` is valid, `False` otherwise.
:rtype: bool
"""
if not isinstance(video, cls.video_types):
return False
if cls.required_hash is not None and cls.required_hash not in video.hashes:
return False
return True
def query(self, *args, **kwargs):
"""Query the provider for subtitles.
Arguments should match as much as possible the actual parameters for querying the provider
:return: found subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
:raise: :class:`~subliminal.exceptions.ProviderError`
"""
raise NotImplementedError
def list_subtitles(self, video, languages):
"""List subtitles for the `video` with the given `languages`.
This will call the :meth:`query` method internally. The parameters passed to the :meth:`query` method may
vary depending on the amount of information available in the `video`.
:param video: video to list subtitles for.
:type video: :class:`~subliminal.video.Video`
:param languages: languages to search for.
:type languages: set of :class:`~babelfish.language.Language`
:return: found subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
:raise: :class:`~subliminal.exceptions.ProviderError`
"""
raise NotImplementedError
def download_subtitle(self, subtitle):
"""Download `subtitle`'s :attr:`~subliminal.subtitle.Subtitle.content`.
:param subtitle: subtitle to download.
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:raise: :class:`~subliminal.exceptions.ProviderError`
"""
raise NotImplementedError
def __repr__(self):
return '<%s [%r]>' % (self.__class__.__name__, self.video_types)

View File

@ -1,321 +0,0 @@
# -*- coding: utf-8 -*-
import logging
import re
from babelfish import Language, language_converters
from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, DownloadLimitExceeded
from ..score import get_equivalent_release_groups
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize, sanitize_release_group
from ..video import Episode
logger = logging.getLogger(__name__)
language_converters.register('addic7ed = subliminal.converters.addic7ed:Addic7edConverter')
# Series cell matching regex
show_cells_re = re.compile(b'<td class="version">.*?</td>', re.DOTALL)
#: Series header parsing regex
series_year_re = re.compile(r'^(?P<series>[ \w\'.:(),*&!?-]+?)(?: \((?P<year>\d{4})\))?$')
class Addic7edSubtitle(Subtitle):
"""Addic7ed Subtitle."""
provider_name = 'addic7ed'
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, year, version,
download_link):
super(Addic7edSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.year = year
self.version = version
self.download_link = download_link
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
# series name
if video.series and sanitize(self.series) in (
sanitize(name) for name in [video.series] + video.alternative_series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# title of the episode
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.version and
any(r in sanitize_release_group(self.version)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# resolution
if video.resolution and self.version and video.resolution in self.version.lower():
matches.add('resolution')
# format
if video.format and self.version and video.format.lower() in self.version.lower():
matches.add('format')
# other properties
matches |= guess_matches(video, guessit(self.version), partial=True)
return matches
class Addic7edProvider(Provider):
"""Addic7ed Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [
'ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg',
'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus',
'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho'
]}
video_types = (Episode,)
server_url = 'http://www.addic7ed.com/'
subtitle_class = Addic7edSubtitle
def __init__(self, username=None, password=None):
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
# login
if self.username and self.password:
logger.info('Logging in')
data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'}
r = self.session.post(self.server_url + 'dologin.php', data, allow_redirects=False, timeout=10)
if r.status_code != 302:
raise AuthenticationError(self.username)
logger.debug('Logged in')
self.logged_in = True
def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'logout.php', timeout=10)
r.raise_for_status()
logger.debug('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _get_show_ids(self):
"""Get the ``dict`` of show ids per series by querying the `shows.php` page.
:return: show id per series, lower case and without quotes.
:rtype: dict
"""
# get the show page
logger.info('Getting show ids')
r = self.session.get(self.server_url + 'shows.php', timeout=10)
r.raise_for_status()
# LXML parser seems to fail when parsing Addic7ed.com HTML markup.
# Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
# Assuming the site's markup is bad, and stripping it down to only contain what's needed.
show_cells = re.findall(show_cells_re, r.content)
if show_cells:
soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
else:
# If RegEx fails, fall back to original r.content and use 'html.parser'
soup = ParserBeautifulSoup(r.content, ['html.parser'])
# populate the show ids
show_ids = {}
for show in soup.select('td.version > h3 > a[href^="/show/"]'):
show_ids[sanitize(show.text)] = int(show['href'][6:])
logger.debug('Found %d show ids', len(show_ids))
return show_ids
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _search_show_id(self, series, year=None):
"""Search the show id from the `series` and `year`.
:param str series: series of the episode.
:param year: year of the series, if any.
:type year: int
:return: the show id, if found.
:rtype: int
"""
# addic7ed doesn't support search with quotes
series = series.replace('\'', ' ')
# build the params
series_year = '%s %d' % (series, year) if year is not None else series
params = {'search': series_year, 'Submit': 'Search'}
# make the search
logger.info('Searching show ids with %r', params)
r = self.session.get(self.server_url + 'search.php', params=params, timeout=10)
r.raise_for_status()
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# get the suggestion
suggestion = soup.select('span.titulo > a[href^="/show/"]')
if not suggestion:
logger.warning('Show id not found: no suggestion')
return None
if not sanitize(suggestion[0].i.text.replace('\'', ' ')) == sanitize(series_year):
logger.warning('Show id not found: suggestion does not match')
return None
show_id = int(suggestion[0]['href'][6:])
logger.debug('Found show id %d', show_id)
return show_id
def get_show_id(self, series, year=None, country_code=None):
"""Get the best matching show id for `series`, `year` and `country_code`.
First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`.
:param str series: series of the episode.
:param year: year of the series, if any.
:type year: int
:param country_code: country code of the series, if any.
:type country_code: str
:return: the show id, if found.
:rtype: int
"""
series_sanitized = sanitize(series).lower()
show_ids = self._get_show_ids()
show_id = None
# attempt with country
if not show_id and country_code:
logger.debug('Getting show id with country')
show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower()))
# attempt with year
if not show_id and year:
logger.debug('Getting show id with year')
show_id = show_ids.get('%s %d' % (series_sanitized, year))
# attempt clean
if not show_id:
logger.debug('Getting show id')
show_id = show_ids.get(series_sanitized)
# search as last resort
if not show_id:
logger.warning('Series %s not found in show ids', series)
show_id = self._search_show_id(series)
return show_id
def query(self, show_id, series, season, year=None, country=None):
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
r.raise_for_status()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('No data returned from provider')
return []
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over subtitle rows
match = series_year_re.match(soup.select('#header font')[0].text.strip()[:-10])
series = match.group('series')
year = int(match.group('year')) if match.group('year') else None
subtitles = []
for row in soup.select('tr.epeven'):
cells = row('td')
# ignore incomplete subtitles
status = cells[5].text
if status != 'Completed':
logger.debug('Ignoring subtitle with status %s', status)
continue
# read the item
language = Language.fromaddic7ed(cells[3].text)
hearing_impaired = bool(cells[6].text)
page_link = self.server_url + cells[2].a['href'][1:]
season = int(cells[0].text)
episode = int(cells[1].text)
title = cells[2].text
version = cells[4].text
download_link = cells[9].a['href'][1:]
subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
version, download_link)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.get_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download the subtitle
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(self.server_url + subtitle.download_link, headers={'Referer': subtitle.page_link},
timeout=10)
r.raise_for_status()
if not r.content:
# Provider returns a status of 304 Not Modified with an empty content
# raise_for_status won't raise exception for that status code
logger.debug('Unable to download subtitle. No data returned from provider')
return
# detect download limit exceeded
if r.headers['Content-Type'] == 'text/html':
raise DownloadLimitExceeded
subtitle.content = fix_line_ending(r.content)

View File

@ -1,522 +0,0 @@
# -*- coding: utf-8 -*-
import io
import json
import logging
import os
import re
from babelfish import Language, language_converters
from datetime import datetime, timedelta
from dogpile.cache.api import NO_VALUE
from guessit import guessit
import pytz
import rarfile
from rarfile import RarFile, is_rarfile
from requests import Session
from zipfile import ZipFile, is_zipfile
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError, ServiceUnavailable
from ..subtitle import SUBTITLE_EXTENSIONS, Subtitle, fix_line_ending, guess_matches, sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
language_converters.register('legendastv = subliminal.converters.legendastv:LegendasTVConverter')
# Configure :mod:`rarfile` to use the same path separator as :mod:`zipfile`
rarfile.PATH_SEP = '/'
#: Conversion map for types
type_map = {'M': 'movie', 'S': 'episode', 'C': 'episode'}
#: BR title season parsing regex
season_re = re.compile(r' - (?P<season>\d+)(\xaa|a|st|nd|rd|th) (temporada|season)', re.IGNORECASE)
#: Downloads parsing regex
downloads_re = re.compile(r'(?P<downloads>\d+) downloads')
#: Rating parsing regex
rating_re = re.compile(r'nota (?P<rating>\d+)')
#: Timestamp parsing regex
timestamp_re = re.compile(r'(?P<day>\d+)/(?P<month>\d+)/(?P<year>\d+) - (?P<hour>\d+):(?P<minute>\d+)')
#: Title with year/country regex
title_re = re.compile(r'^(?P<series>.*?)(?: \((?:(?P<year>\d{4})|(?P<country>[A-Z]{2}))\))?$')
#: Cache key for releases
releases_key = __name__ + ':releases|{archive_id}|{archive_name}'
class LegendasTVArchive(object):
"""LegendasTV Archive.
:param str id: identifier.
:param str name: name.
:param bool pack: contains subtitles for multiple episodes.
:param bool pack: featured.
:param str link: link.
:param int downloads: download count.
:param int rating: rating (0-10).
:param timestamp: timestamp.
:type timestamp: datetime.datetime
"""
def __init__(self, id, name, pack, featured, link, downloads=0, rating=0, timestamp=None):
#: Identifier
self.id = id
#: Name
self.name = name
#: Pack
self.pack = pack
#: Featured
self.featured = featured
#: Link
self.link = link
#: Download count
self.downloads = downloads
#: Rating (0-10)
self.rating = rating
#: Timestamp
self.timestamp = timestamp
#: Compressed content as :class:`rarfile.RarFile` or :class:`zipfile.ZipFile`
self.content = None
def __repr__(self):
return '<%s [%s] %r>' % (self.__class__.__name__, self.id, self.name)
class LegendasTVSubtitle(Subtitle):
"""LegendasTV Subtitle."""
provider_name = 'legendastv'
def __init__(self, language, type, title, year, imdb_id, season, archive, name):
super(LegendasTVSubtitle, self).__init__(language, page_link=archive.link)
self.type = type
self.title = title
self.year = year
self.imdb_id = imdb_id
self.season = season
self.archive = archive
self.name = name
@property
def id(self):
return '%s-%s' % (self.archive.id, self.name.lower())
def get_matches(self, video, hearing_impaired=False):
matches = set()
# episode
if isinstance(video, Episode) and self.type == 'episode':
# series
if video.series and (sanitize(self.title) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# imdb_id
if video.series_imdb_id and self.imdb_id == video.series_imdb_id:
matches.add('series_imdb_id')
# movie
elif isinstance(video, Movie) and self.type == 'movie':
# title
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# imdb_id
if video.imdb_id and self.imdb_id == video.imdb_id:
matches.add('imdb_id')
# name
matches |= guess_matches(video, guessit(self.name, {'type': self.type}))
return matches
class LegendasTVProvider(Provider):
"""LegendasTV Provider.
:param str username: username.
:param str password: password.
"""
languages = {Language.fromlegendastv(l) for l in language_converters['legendastv'].codes}
server_url = 'http://legendas.tv/'
subtitle_class = LegendasTVSubtitle
def __init__(self, username=None, password=None):
# Provider needs UNRAR installed. If not available raise ConfigurationError
try:
rarfile.custom_check(rarfile.UNRAR_TOOL)
except rarfile.RarExecError:
raise ConfigurationError('UNRAR tool not available')
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
self.username = username
self.password = password
self.logged_in = False
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
# login
if self.username and self.password:
logger.info('Logging in')
data = {'_method': 'POST', 'data[User][username]': self.username, 'data[User][password]': self.password}
r = self.session.post(self.server_url + 'login', data, allow_redirects=False, timeout=10)
raise_for_status(r)
soup = ParserBeautifulSoup(r.content, ['html.parser'])
if soup.find('div', {'class': 'alert-error'}, string=re.compile(u'Usuário ou senha inválidos')):
raise AuthenticationError(self.username)
logger.debug('Logged in')
self.logged_in = True
def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'users/logout', allow_redirects=False, timeout=10)
raise_for_status(r)
logger.debug('Logged out')
self.logged_in = False
self.session.close()
@staticmethod
def is_valid_title(title, title_id, sanitized_title, season, year):
"""Check if is a valid title."""
sanitized_result = sanitize(title['title'])
if sanitized_result != sanitized_title:
logger.debug("Mismatched title, discarding title %d (%s)",
title_id, sanitized_result)
return
# episode type
if season:
# discard mismatches on type
if title['type'] != 'episode':
logger.debug("Mismatched 'episode' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on season
if 'season' not in title or title['season'] != season:
logger.debug('Mismatched season %s, discarding title %d (%s)',
title.get('season'), title_id, sanitized_result)
return
# movie type
else:
# discard mismatches on type
if title['type'] != 'movie':
logger.debug("Mismatched 'movie' type, discarding title %d (%s)", title_id, sanitized_result)
return
# discard mismatches on year
if year is not None and 'year' in title and title['year'] != year:
logger.debug("Mismatched movie year, discarding title %d (%s)", title_id, sanitized_result)
return
return True
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME, should_cache_fn=lambda value: value)
def search_titles(self, title, season, title_year):
"""Search for titles matching the `title`.
For episodes, each season has it own title
:param str title: the title to search for.
:param int season: season of the title
:param int title_year: year of the title
:return: found titles.
:rtype: dict
"""
titles = {}
sanitized_titles = [sanitize(title)]
ignore_characters = {'\'', '.'}
if any(c in title for c in ignore_characters):
sanitized_titles.append(sanitize(title, ignore_characters=ignore_characters))
for sanitized_title in sanitized_titles:
# make the query
if season:
logger.info('Searching episode title %r for season %r', sanitized_title, season)
else:
logger.info('Searching movie title %r', sanitized_title)
r = self.session.get(self.server_url + 'legenda/sugestao/{}'.format(sanitized_title), timeout=10)
raise_for_status(r)
results = json.loads(r.text)
# loop over results
for result in results:
source = result['_source']
# extract id
title_id = int(source['id_filme'])
# extract type
title = {'type': type_map[source['tipo']]}
# extract title, year and country
name, year, country = title_re.match(source['dsc_nome']).groups()
title['title'] = name
# extract imdb_id
if source['id_imdb'] != '0':
if not source['id_imdb'].startswith('tt'):
title['imdb_id'] = 'tt' + source['id_imdb'].zfill(7)
else:
title['imdb_id'] = source['id_imdb']
# extract season
if title['type'] == 'episode':
if source['temporada'] and source['temporada'].isdigit():
title['season'] = int(source['temporada'])
else:
match = season_re.search(source['dsc_nome_br'])
if match:
title['season'] = int(match.group('season'))
else:
logger.debug('No season detected for title %d (%s)', title_id, name)
# extract year
if year:
title['year'] = int(year)
elif source['dsc_data_lancamento'] and source['dsc_data_lancamento'].isdigit():
# year is based on season air date hence the adjustment
title['year'] = int(source['dsc_data_lancamento']) - title.get('season', 1) + 1
# add title only if is valid
# Check against title without ignored chars
if self.is_valid_title(title, title_id, sanitized_titles[0], season, title_year):
titles[title_id] = title
logger.debug('Found %d titles', len(titles))
return titles
@region.cache_on_arguments(expiration_time=timedelta(minutes=15).total_seconds())
def get_archives(self, title_id, language_code, title_type, season, episode):
"""Get the archive list from a given `title_id`, `language_code`, `title_type`, `season` and `episode`.
:param int title_id: title id.
:param int language_code: language code.
:param str title_type: episode or movie
:param int season: season
:param int episode: episode
:return: the archives.
:rtype: list of :class:`LegendasTVArchive`
"""
archives = []
page = 0
while True:
# get the archive page
url = self.server_url + 'legenda/busca/-/{language}/-/{page}/{title}'.format(
language=language_code, page=page, title=title_id)
r = self.session.get(url)
raise_for_status(r)
# parse the results
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
for archive_soup in soup.select('div.list_element > article > div > div.f_left'):
# create archive
archive = LegendasTVArchive(archive_soup.a['href'].split('/')[2],
archive_soup.a.text,
'pack' in archive_soup.parent['class'],
'destaque' in archive_soup.parent['class'],
self.server_url + archive_soup.a['href'][1:])
# clean name of path separators and pack flags
clean_name = archive.name.replace('/', '-')
if archive.pack and clean_name.startswith('(p)'):
clean_name = clean_name[3:]
# guess from name
guess = guessit(clean_name, {'type': title_type})
# episode
if season and episode:
# discard mismatches on episode in non-pack archives
# Guessit may return int for single episode or list for multi-episode
# Check if archive name has multiple episodes releases on it
if not archive.pack and 'episode' in guess:
wanted_episode = set(episode) if isinstance(episode, list) else {episode}
archive_episode = guess['episode'] if isinstance(guess['episode'], list) else {guess['episode']}
if not wanted_episode.intersection(archive_episode):
logger.debug('Mismatched episode %s, discarding archive: %s', guess['episode'], clean_name)
continue
# extract text containing downloads, rating and timestamp
data_text = archive_soup.find('p', class_='data').text
# match downloads
archive.downloads = int(downloads_re.search(data_text).group('downloads'))
# match rating
match = rating_re.search(data_text)
if match:
archive.rating = int(match.group('rating'))
# match timestamp and validate it
time_data = {k: int(v) for k, v in timestamp_re.search(data_text).groupdict().items()}
archive.timestamp = pytz.timezone('America/Sao_Paulo').localize(datetime(**time_data))
if archive.timestamp > datetime.utcnow().replace(tzinfo=pytz.utc):
raise ProviderError('Archive timestamp is in the future')
# add archive
logger.info('Found archive for title %d and language %d at page %s: %s',
title_id, language_code, page, archive)
archives.append(archive)
# stop on last page
if soup.find('a', attrs={'class': 'load_more'}, string='carregar mais') is None:
break
# increment page count
page += 1
logger.debug('Found %d archives', len(archives))
return archives
def download_archive(self, archive):
"""Download an archive's :attr:`~LegendasTVArchive.content`.
:param archive: the archive to download :attr:`~LegendasTVArchive.content` of.
:type archive: :class:`LegendasTVArchive`
"""
logger.info('Downloading archive %s', archive.id)
r = self.session.get(self.server_url + 'downloadarquivo/{}'.format(archive.id))
raise_for_status(r)
# open the archive
archive_stream = io.BytesIO(r.content)
if is_rarfile(archive_stream):
logger.debug('Identified rar archive')
archive.content = RarFile(archive_stream)
elif is_zipfile(archive_stream):
logger.debug('Identified zip archive')
archive.content = ZipFile(archive_stream)
else:
raise ValueError('Not a valid archive')
def query(self, language, title, season=None, episode=None, year=None):
# search for titles
titles = self.search_titles(title, season, year)
subtitles = []
# iterate over titles
for title_id, t in titles.items():
logger.info('Getting archives for title %d and language %d', title_id, language.legendastv)
archives = self.get_archives(title_id, language.legendastv, t['type'], season, episode)
if not archives:
logger.info('No archives found for title %d and language %d', title_id, language.legendastv)
# iterate over title's archives
for a in archives:
# compute an expiration time based on the archive timestamp
expiration_time = (datetime.utcnow().replace(tzinfo=pytz.utc) - a.timestamp).total_seconds()
# attempt to get the releases from the cache
cache_key = releases_key.format(archive_id=a.id, archive_name=a.name)
releases = region.get(cache_key, expiration_time=expiration_time)
# the releases are not in cache or cache is expired
if releases == NO_VALUE:
logger.info('Releases not found in cache')
# download archive
self.download_archive(a)
# extract the releases
releases = []
for name in a.content.namelist():
# discard the legendastv file
if name.startswith('Legendas.tv'):
continue
# discard hidden files
if os.path.split(name)[-1].startswith('.'):
continue
# discard non-subtitle files
if not name.lower().endswith(SUBTITLE_EXTENSIONS):
continue
releases.append(name)
# cache the releases
region.set(cache_key, releases)
# iterate over releases
for r in releases:
subtitle = self.subtitle_class(language, t['type'], t['title'], t.get('year'), t.get('imdb_id'),
t.get('season'), a, r)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
else:
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episode=episode, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download archive in case we previously hit the releases cache and didn't download it
if subtitle.archive.content is None:
self.download_archive(subtitle.archive)
# extract subtitle's content
subtitle.content = fix_line_ending(subtitle.archive.content.read(subtitle.name))
def raise_for_status(r):
# When site is under maintaince and http status code 200.
if 'Em breve estaremos de volta' in r.text:
raise ServiceUnavailable
else:
r.raise_for_status()

View File

@ -1,108 +0,0 @@
# -*- coding: utf-8 -*-
import logging
from babelfish import Language
from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle
logger = logging.getLogger(__name__)
def get_subhash(hash):
"""Get a second hash based on napiprojekt's hash.
:param str hash: napiprojekt's hash.
:return: the subhash.
:rtype: str
"""
idx = [0xe, 0x3, 0x6, 0x8, 0x2]
mul = [2, 2, 5, 4, 3]
add = [0, 0xd, 0x10, 0xb, 0x5]
b = []
for i in range(len(idx)):
a = add[i]
m = mul[i]
i = idx[i]
t = a + int(hash[i], 16)
v = int(hash[t:t + 2], 16)
b.append(('%x' % (v * m))[-1])
return ''.join(b)
class NapiProjektSubtitle(Subtitle):
"""NapiProjekt Subtitle."""
provider_name = 'napiprojekt'
def __init__(self, language, hash):
super(NapiProjektSubtitle, self).__init__(language)
self.hash = hash
self.content = None
@property
def id(self):
return self.hash
def get_matches(self, video):
matches = set()
# hash
if 'napiprojekt' in video.hashes and video.hashes['napiprojekt'] == self.hash:
matches.add('hash')
return matches
class NapiProjektProvider(Provider):
"""NapiProjekt Provider."""
languages = {Language.fromalpha2(l) for l in ['pl']}
required_hash = 'napiprojekt'
server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'
subtitle_class = NapiProjektSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
def terminate(self):
self.session.close()
def query(self, language, hash):
params = {
'v': 'dreambox',
'kolejka': 'false',
'nick': '',
'pass': '',
'napios': 'Linux',
'l': language.alpha2.upper(),
'f': hash,
't': get_subhash(hash)}
logger.info('Searching subtitle %r', params)
r = self.session.get(self.server_url, params=params, timeout=10)
r.raise_for_status()
# handle subtitles not found and errors
if r.content[:4] == b'NPc0':
logger.debug('No subtitles found')
return None
subtitle = self.subtitle_class(language, hash)
subtitle.content = r.content
logger.debug('Found subtitle %r', subtitle)
return subtitle
def list_subtitles(self, video, languages):
return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None]
def download_subtitle(self, subtitle):
# there is no download step, content is already filled from listing subtitles
pass

View File

@ -1,297 +0,0 @@
# -*- coding: utf-8 -*-
import base64
import logging
import os
import re
import zlib
from babelfish import Language, language_converters
from guessit import guessit
from six.moves.xmlrpc_client import ServerProxy
from . import Provider, TimeoutSafeTransport
from .. import __short_version__
from ..exceptions import (AuthenticationError, ConfigurationError, DownloadLimitExceeded, ProviderError,
ServiceUnavailable)
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class OpenSubtitlesSubtitle(Subtitle):
"""OpenSubtitles Subtitle."""
provider_name = 'opensubtitles'
series_re = re.compile(r'^"(?P<series_name>.*)" (?P<series_title>.*)$')
def __init__(self, language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name,
movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, filename, encoding):
super(OpenSubtitlesSubtitle, self).__init__(language, hearing_impaired=hearing_impaired,
page_link=page_link, encoding=encoding)
self.subtitle_id = subtitle_id
self.matched_by = matched_by
self.movie_kind = movie_kind
self.hash = hash
self.movie_name = movie_name
self.movie_release_name = movie_release_name
self.movie_year = movie_year
self.movie_imdb_id = movie_imdb_id
self.series_season = series_season
self.series_episode = series_episode
self.filename = filename
@property
def id(self):
return str(self.subtitle_id)
@property
def series_name(self):
return self.series_re.match(self.movie_name).group('series_name')
@property
def series_title(self):
return self.series_re.match(self.movie_name).group('series_title')
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode) and self.movie_kind == 'episode':
# tag match, assume series, year, season and episode matches
if self.matched_by == 'tag':
if not video.imdb_id or self.movie_imdb_id == video.imdb_id:
matches |= {'series', 'year', 'season', 'episode'}
# series
if video.series and sanitize(self.series_name) == sanitize(video.series):
matches.add('series')
# year
if video.original_series and self.movie_year is None or video.year and video.year == self.movie_year:
matches.add('year')
# season
if video.season and self.series_season == video.season:
matches.add('season')
# episode
if video.episode and self.series_episode == video.episode:
matches.add('episode')
# title
if video.title and sanitize(self.series_title) == sanitize(video.title):
matches.add('title')
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': 'episode'}))
matches |= guess_matches(video, guessit(self.filename, {'type': 'episode'}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if 'series' in matches and 'season' in matches and 'episode' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
# movie
elif isinstance(video, Movie) and self.movie_kind == 'movie':
# tag match, assume title and year matches
if self.matched_by == 'tag':
if not video.imdb_id or self.movie_imdb_id == video.imdb_id:
matches |= {'title', 'year'}
# title
if video.title and sanitize(self.movie_name) == sanitize(video.title):
matches.add('title')
# year
if video.year and self.movie_year == video.year:
matches.add('year')
# guess
matches |= guess_matches(video, guessit(self.movie_release_name, {'type': 'movie'}))
matches |= guess_matches(video, guessit(self.filename, {'type': 'movie'}))
# hash
if 'opensubtitles' in video.hashes and self.hash == video.hashes['opensubtitles']:
if 'title' in matches:
matches.add('hash')
else:
logger.debug('Match on hash discarded')
else:
logger.info('%r is not a valid movie_kind', self.movie_kind)
return matches
# imdb_id
if video.imdb_id and self.movie_imdb_id == video.imdb_id:
matches.add('imdb_id')
return matches
class OpenSubtitlesProvider(Provider):
"""OpenSubtitles Provider.
:param str username: username.
:param str password: password.
"""
languages = {Language.fromopensubtitles(l) for l in language_converters['opensubtitles'].codes}
subtitle_class = OpenSubtitlesSubtitle
def __init__(self, username=None, password=None):
self.server = ServerProxy('https://api.opensubtitles.org/xml-rpc', TimeoutSafeTransport(10))
if any((username, password)) and not all((username, password)):
raise ConfigurationError('Username and password must be specified')
# None values not allowed for logging in, so replace it by ''
self.username = username or ''
self.password = password or ''
self.token = None
def initialize(self):
logger.info('Logging in')
response = checked(self.server.LogIn(self.username, self.password, 'eng',
'subliminal v%s' % __short_version__))
self.token = response['token']
logger.debug('Logged in with token %r', self.token)
def terminate(self):
logger.info('Logging out')
checked(self.server.LogOut(self.token))
self.server.close()
self.token = None
logger.debug('Logged out')
def no_operation(self):
logger.debug('No operation')
checked(self.server.NoOperation(self.token))
def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None):
# fill the search criteria
criteria = []
if hash and size:
criteria.append({'moviehash': hash, 'moviebytesize': str(size)})
if imdb_id:
if season and episode:
criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode})
else:
criteria.append({'imdbid': imdb_id[2:]})
if tag:
criteria.append({'tag': tag})
if query and season and episode:
criteria.append({'query': query.replace('\'', ''), 'season': season, 'episode': episode})
elif query:
criteria.append({'query': query.replace('\'', '')})
if not criteria:
raise ValueError('Not enough information')
# add the language
for criterion in criteria:
criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages))
# query the server
logger.info('Searching subtitles %r', criteria)
response = checked(self.server.SearchSubtitles(self.token, criteria))
subtitles = []
# exit if no data
if not response['data']:
logger.debug('No subtitles found')
return subtitles
# loop over subtitle items
for subtitle_item in response['data']:
# read the item
language = Language.fromopensubtitles(subtitle_item['SubLanguageID'])
hearing_impaired = bool(int(subtitle_item['SubHearingImpaired']))
page_link = subtitle_item['SubtitlesLink']
subtitle_id = int(subtitle_item['IDSubtitleFile'])
matched_by = subtitle_item['MatchedBy']
movie_kind = subtitle_item['MovieKind']
hash = subtitle_item['MovieHash']
movie_name = subtitle_item['MovieName']
movie_release_name = subtitle_item['MovieReleaseName']
movie_year = int(subtitle_item['MovieYear']) if subtitle_item['MovieYear'] else None
movie_imdb_id = 'tt' + subtitle_item['IDMovieImdb']
series_season = int(subtitle_item['SeriesSeason']) if subtitle_item['SeriesSeason'] else None
series_episode = int(subtitle_item['SeriesEpisode']) if subtitle_item['SeriesEpisode'] else None
filename = subtitle_item['SubFileName']
encoding = subtitle_item.get('SubEncoding') or None
subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind,
hash, movie_name, movie_release_name, movie_year, movie_imdb_id,
series_season, series_episode, filename, encoding)
logger.debug('Found subtitle %r by %s', subtitle, matched_by)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
query = video.series
season = video.season
episode = video.episode
else:
query = video.title
return self.query(languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=video.imdb_id,
query=query, season=season, episode=episode, tag=os.path.basename(video.name))
def download_subtitle(self, subtitle):
logger.info('Downloading subtitle %r', subtitle)
response = checked(self.server.DownloadSubtitles(self.token, [str(subtitle.subtitle_id)]))
subtitle.content = fix_line_ending(zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
class OpenSubtitlesError(ProviderError):
"""Base class for non-generic :class:`OpenSubtitlesProvider` exceptions."""
pass
class Unauthorized(OpenSubtitlesError, AuthenticationError):
"""Exception raised when status is '401 Unauthorized'."""
pass
class NoSession(OpenSubtitlesError, AuthenticationError):
"""Exception raised when status is '406 No session'."""
pass
class DownloadLimitReached(OpenSubtitlesError, DownloadLimitExceeded):
"""Exception raised when status is '407 Download limit reached'."""
pass
class InvalidImdbid(OpenSubtitlesError):
"""Exception raised when status is '413 Invalid ImdbID'."""
pass
class UnknownUserAgent(OpenSubtitlesError, AuthenticationError):
"""Exception raised when status is '414 Unknown User Agent'."""
pass
class DisabledUserAgent(OpenSubtitlesError, AuthenticationError):
"""Exception raised when status is '415 Disabled user agent'."""
pass
def checked(response):
"""Check a response status before returning it.
:param response: a response from a XMLRPC call to OpenSubtitles.
:return: the response.
:raise: :class:`OpenSubtitlesError`
"""
status_code = int(response['status'][:3])
if status_code == 401:
raise Unauthorized
if status_code == 406:
raise NoSession
if status_code == 407:
raise DownloadLimitReached
if status_code == 413:
raise InvalidImdbid
if status_code == 414:
raise UnknownUserAgent
if status_code == 415:
raise DisabledUserAgent
if status_code == 503:
raise ServiceUnavailable
if status_code != 200:
raise OpenSubtitlesError(response['status'])
return response

View File

@ -1,197 +0,0 @@
# -*- coding: utf-8 -*-
import io
import logging
import re
from babelfish import Language, language_converters
from guessit import guessit
try:
from lxml import etree
except ImportError:
try:
import xml.etree.cElementTree as etree
except ImportError:
import xml.etree.ElementTree as etree
from requests import Session
from zipfile import ZipFile
from . import Provider
from .. import __short_version__
from ..exceptions import ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class PodnapisiSubtitle(Subtitle):
"""Podnapisi Subtitle."""
provider_name = 'podnapisi'
def __init__(self, language, hearing_impaired, page_link, pid, releases, title, season=None, episode=None,
year=None):
super(PodnapisiSubtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link)
self.pid = pid
self.releases = releases
self.title = title
self.season = season
self.episode = episode
self.year = year
@property
def id(self):
return self.pid
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode):
# series
if video.series and (sanitize(self.title) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'episode'}))
# movie
elif isinstance(video, Movie):
# title
if video.title and (sanitize(self.title) in (
sanitize(name) for name in [video.title] + video.alternative_titles)):
matches.add('title')
# year
if video.year and self.year == video.year:
matches.add('year')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'movie'}))
return matches
class PodnapisiProvider(Provider):
"""Podnapisi Provider."""
languages = ({Language('por', 'BR'), Language('srp', script='Latn')} |
{Language.fromalpha2(l) for l in language_converters['alpha2'].codes})
server_url = 'https://www.podnapisi.net/subtitles/'
subtitle_class = PodnapisiSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
def terminate(self):
self.session.close()
def query(self, language, keyword, season=None, episode=None, year=None):
# set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652
params = {'sXML': 1, 'sL': str(language), 'sK': keyword}
is_episode = False
if season and episode:
is_episode = True
params['sTS'] = season
params['sTE'] = episode
if year:
params['sY'] = year
# loop over paginated results
logger.info('Searching subtitles %r', params)
subtitles = []
pids = set()
while True:
# query the server
r = self.session.get(self.server_url + 'search/old', params=params, timeout=10)
r.raise_for_status()
xml = etree.fromstring(r.content)
# exit if no results
if not int(xml.find('pagination/results').text):
logger.debug('No subtitles found')
break
# loop over subtitles
for subtitle_xml in xml.findall('subtitle'):
# read xml elements
pid = subtitle_xml.find('pid').text
# ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321
if pid in pids:
continue
language = Language.fromietf(subtitle_xml.find('language').text)
hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '')
page_link = subtitle_xml.find('url').text
releases = []
if subtitle_xml.find('release').text:
for release in subtitle_xml.find('release').text.split():
release = re.sub(r'\.+$', '', release) # remove trailing dots
release = ''.join(filter(lambda x: ord(x) < 128, release)) # remove non-ascii characters
releases.append(release)
title = subtitle_xml.find('title').text
season = int(subtitle_xml.find('tvSeason').text)
episode = int(subtitle_xml.find('tvEpisode').text)
year = int(subtitle_xml.find('year').text)
if is_episode:
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
season=season, episode=episode, year=year)
else:
subtitle = self.subtitle_class(language, hearing_impaired, page_link, pid, releases, title,
year=year)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
pids.add(pid)
# stop on last page
if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text):
break
# increment current page
params['page'] = int(xml.find('pagination/current').text) + 1
logger.debug('Getting page %d', params['page'])
return subtitles
def list_subtitles(self, video, languages):
season = episode = None
if isinstance(video, Episode):
titles = [video.series] + video.alternative_series
season = video.season
episode = video.episode
else:
titles = [video.title] + video.alternative_titles
for title in titles:
subtitles = [s for l in languages for s in
self.query(l, title, season=season, episode=episode, year=video.year)]
if subtitles:
return subtitles
return []
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
if len(zf.namelist()) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))

View File

@ -1,83 +0,0 @@
# -*- coding: utf-8 -*-
import json
import logging
import os
from babelfish import Language, language_converters
from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle, fix_line_ending
logger = logging.getLogger(__name__)
language_converters.register('shooter = subliminal.converters.shooter:ShooterConverter')
class ShooterSubtitle(Subtitle):
"""Shooter Subtitle."""
provider_name = 'shooter'
def __init__(self, language, hash, download_link):
super(ShooterSubtitle, self).__init__(language)
self.hash = hash
self.download_link = download_link
@property
def id(self):
return self.download_link
def get_matches(self, video):
matches = set()
# hash
if 'shooter' in video.hashes and video.hashes['shooter'] == self.hash:
matches.add('hash')
return matches
class ShooterProvider(Provider):
"""Shooter Provider."""
languages = {Language(l) for l in ['eng', 'zho']}
server_url = 'https://www.shooter.cn/api/subapi.php'
subtitle_class = ShooterSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
def terminate(self):
self.session.close()
def query(self, language, filename, hash=None):
# query the server
params = {'filehash': hash, 'pathinfo': os.path.realpath(filename), 'format': 'json', 'lang': language.shooter}
logger.debug('Searching subtitles %r', params)
r = self.session.post(self.server_url, params=params, timeout=10)
r.raise_for_status()
# handle subtitles not found
if r.content == b'\xff':
logger.debug('No subtitles found')
return []
# parse the subtitles
results = json.loads(r.text)
subtitles = [self.subtitle_class(language, hash, t['Link']) for s in results for t in s['Files']]
return subtitles
def list_subtitles(self, video, languages):
return [s for l in languages for s in self.query(l, video.name, video.hashes.get('shooter'))]
def download_subtitle(self, subtitle):
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(subtitle.download_link, timeout=10)
r.raise_for_status()
subtitle.content = fix_line_ending(r.content)

View File

@ -1,243 +0,0 @@
# -*- coding: utf-8 -*-
import bisect
from collections import defaultdict
import io
import json
import logging
import zipfile
from babelfish import Language
from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import SHOW_EXPIRATION_TIME, region
from ..exceptions import AuthenticationError, ConfigurationError, ProviderError
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize
from ..video import Episode, Movie
logger = logging.getLogger(__name__)
class SubsCenterSubtitle(Subtitle):
"""SubsCenter Subtitle."""
provider_name = 'subscenter'
def __init__(self, language, hearing_impaired, page_link, series, season, episode, title, subtitle_id, subtitle_key,
subtitle_version, downloaded, releases):
super(SubsCenterSubtitle, self).__init__(language, hearing_impaired, page_link)
self.series = series
self.season = season
self.episode = episode
self.title = title
self.subtitle_id = subtitle_id
self.subtitle_key = subtitle_key
self.subtitle_version = subtitle_version
self.downloaded = downloaded
self.releases = releases
@property
def id(self):
return str(self.subtitle_id)
def get_matches(self, video):
matches = set()
# episode
if isinstance(video, Episode):
# series
if video.series and sanitize(self.series) == sanitize(video.series):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'episode'}))
# movie
elif isinstance(video, Movie):
# guess
for release in self.releases:
matches |= guess_matches(video, guessit(release, {'type': 'movie'}))
# title
if video.title and sanitize(self.title) == sanitize(video.title):
matches.add('title')
return matches
class SubsCenterProvider(Provider):
"""SubsCenter Provider."""
languages = {Language.fromalpha2(l) for l in ['he']}
server_url = 'http://www.subscenter.org/he/'
subtitle_class = SubsCenterSubtitle
def __init__(self, username=None, password=None):
if username is not None and password is None or username is None and password is not None:
raise ConfigurationError('Username and password must be specified')
self.session = None
self.username = username
self.password = password
self.logged_in = False
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/{}'.format(__short_version__)
# login
if self.username is not None and self.password is not None:
logger.debug('Logging in')
url = self.server_url + 'subscenter/accounts/login/'
# retrieve CSRF token
self.session.get(url)
csrf_token = self.session.cookies['csrftoken']
# actual login
data = {'username': self.username, 'password': self.password, 'csrfmiddlewaretoken': csrf_token}
r = self.session.post(url, data, allow_redirects=False, timeout=10)
if r.status_code != 302:
raise AuthenticationError(self.username)
logger.info('Logged in')
self.logged_in = True
def terminate(self):
# logout
if self.logged_in:
logger.info('Logging out')
r = self.session.get(self.server_url + 'subscenter/accounts/logout/', timeout=10)
r.raise_for_status()
logger.info('Logged out')
self.logged_in = False
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def _search_url_titles(self, title):
"""Search the URL titles by kind for the given `title`.
:param str title: title to search for.
:return: the URL titles by kind.
:rtype: collections.defaultdict
"""
# make the search
logger.info('Searching title name for %r', title)
r = self.session.get(self.server_url + 'subtitle/search/', params={'q': title}, timeout=10)
r.raise_for_status()
# check for redirections
if r.history and all([h.status_code == 302 for h in r.history]):
logger.debug('Redirected to the subtitles page')
links = [r.url]
else:
# get the suggestions (if needed)
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
links = [link.attrs['href'] for link in soup.select('#processes div.generalWindowTop a')]
logger.debug('Found %d suggestions', len(links))
url_titles = defaultdict(list)
for link in links:
parts = link.split('/')
url_titles[parts[-3]].append(parts[-2])
return url_titles
def query(self, title, season=None, episode=None):
# search for the url title
url_titles = self._search_url_titles(title)
# episode
if season and episode:
if 'series' not in url_titles:
logger.error('No URL title found for series %r', title)
return []
url_title = url_titles['series'][0]
logger.debug('Using series title %r', url_title)
url = self.server_url + 'cst/data/series/sb/{}/{}/{}/'.format(url_title, season, episode)
page_link = self.server_url + 'subtitle/series/{}/{}/{}/'.format(url_title, season, episode)
else:
if 'movie' not in url_titles:
logger.error('No URL title found for movie %r', title)
return []
url_title = url_titles['movie'][0]
logger.debug('Using movie title %r', url_title)
url = self.server_url + 'cst/data/movie/sb/{}/'.format(url_title)
page_link = self.server_url + 'subtitle/movie/{}/'.format(url_title)
# get the list of subtitles
logger.debug('Getting the list of subtitles')
r = self.session.get(url)
r.raise_for_status()
results = json.loads(r.text)
# loop over results
subtitles = {}
for language_code, language_data in results.items():
for quality_data in language_data.values():
for quality, subtitles_data in quality_data.items():
for subtitle_item in subtitles_data.values():
# read the item
language = Language.fromalpha2(language_code)
hearing_impaired = bool(subtitle_item['hearing_impaired'])
subtitle_id = subtitle_item['id']
subtitle_key = subtitle_item['key']
subtitle_version = subtitle_item['h_version']
downloaded = subtitle_item['downloaded']
release = subtitle_item['subtitle_version']
# add the release and increment downloaded count if we already have the subtitle
if subtitle_id in subtitles:
logger.debug('Found additional release %r for subtitle %d', release, subtitle_id)
bisect.insort_left(subtitles[subtitle_id].releases, release) # deterministic order
subtitles[subtitle_id].downloaded += downloaded
continue
# otherwise create it
subtitle = self.subtitle_class(language, hearing_impaired, page_link, title, season, episode,
title, subtitle_id, subtitle_key, subtitle_version, downloaded,
[release])
logger.debug('Found subtitle %r', subtitle)
subtitles[subtitle_id] = subtitle
return subtitles.values()
def list_subtitles(self, video, languages):
season = episode = None
title = video.title
if isinstance(video, Episode):
title = video.series
season = video.season
episode = video.episode
return [s for s in self.query(title, season, episode) if s.language in languages]
def download_subtitle(self, subtitle):
# download
url = self.server_url + 'subtitle/download/{}/{}/'.format(subtitle.language.alpha2, subtitle.subtitle_id)
params = {'v': subtitle.subtitle_version, 'key': subtitle.subtitle_key}
r = self.session.get(url, params=params, headers={'Referer': subtitle.page_link}, timeout=10)
r.raise_for_status()
# open the zip
try:
with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
# remove some filenames from the namelist
namelist = [n for n in zf.namelist() if not n.endswith('.txt')]
if len(namelist) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(namelist[0]))
except zipfile.BadZipfile:
# if no zip file was retrieved, daily downloads limit has exceeded
raise ProviderError('Daily limit exceeded')

View File

@ -1,88 +0,0 @@
# -*- coding: utf-8 -*-
import logging
from babelfish import Language, language_converters
from requests import Session
from . import Provider
from .. import __short_version__
from ..subtitle import Subtitle, fix_line_ending
logger = logging.getLogger(__name__)
language_converters.register('thesubdb = subliminal.converters.thesubdb:TheSubDBConverter')
class TheSubDBSubtitle(Subtitle):
"""TheSubDB Subtitle."""
provider_name = 'thesubdb'
def __init__(self, language, hash):
super(TheSubDBSubtitle, self).__init__(language)
self.hash = hash
@property
def id(self):
return self.hash + '-' + str(self.language)
def get_matches(self, video):
matches = set()
# hash
if 'thesubdb' in video.hashes and video.hashes['thesubdb'] == self.hash:
matches.add('hash')
return matches
class TheSubDBProvider(Provider):
"""TheSubDB Provider."""
languages = {Language.fromthesubdb(l) for l in language_converters['thesubdb'].codes}
required_hash = 'thesubdb'
server_url = 'http://api.thesubdb.com/'
subtitle_class = TheSubDBSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = ('SubDB/1.0 (subliminal/%s; https://github.com/Diaoul/subliminal)' %
__short_version__)
def terminate(self):
self.session.close()
def query(self, hash):
# make the query
params = {'action': 'search', 'hash': hash}
logger.info('Searching subtitles %r', params)
r = self.session.get(self.server_url, params=params, timeout=10)
# handle subtitles not found and errors
if r.status_code == 404:
logger.debug('No subtitles found')
return []
r.raise_for_status()
# loop over languages
subtitles = []
for language_code in r.text.split(','):
language = Language.fromthesubdb(language_code)
subtitle = self.subtitle_class(language, hash)
logger.debug('Found subtitle %r', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
return [s for s in self.query(video.hashes['thesubdb']) if s.language in languages]
def download_subtitle(self, subtitle):
logger.info('Downloading subtitle %r', subtitle)
params = {'action': 'download', 'hash': subtitle.hash, 'language': subtitle.language.alpha2}
r = self.session.get(self.server_url, params=params, timeout=10)
r.raise_for_status()
subtitle.content = fix_line_ending(r.content)

View File

@ -1,226 +0,0 @@
# -*- coding: utf-8 -*-
import io
import logging
import re
from zipfile import ZipFile
from babelfish import Language, language_converters
from guessit import guessit
from requests import Session
from . import ParserBeautifulSoup, Provider
from .. import __short_version__
from ..cache import EPISODE_EXPIRATION_TIME, SHOW_EXPIRATION_TIME, region
from ..exceptions import ProviderError
from ..score import get_equivalent_release_groups
from ..subtitle import Subtitle, fix_line_ending, guess_matches
from ..utils import sanitize, sanitize_release_group
from ..video import Episode
logger = logging.getLogger(__name__)
language_converters.register('tvsubtitles = subliminal.converters.tvsubtitles:TVsubtitlesConverter')
link_re = re.compile(r'^(?P<series>.+?)(?: \(?\d{4}\)?| \((?:US|UK)\))? \((?P<first_year>\d{4})-\d{4}\)$')
episode_id_re = re.compile(r'^episode-\d+\.html$')
class TVsubtitlesSubtitle(Subtitle):
"""TVsubtitles Subtitle."""
provider_name = 'tvsubtitles'
def __init__(self, language, page_link, subtitle_id, series, season, episode, year, rip, release):
super(TVsubtitlesSubtitle, self).__init__(language, page_link=page_link)
self.subtitle_id = subtitle_id
self.series = series
self.season = season
self.episode = episode
self.year = year
self.rip = rip
self.release = release
@property
def id(self):
return str(self.subtitle_id)
def get_matches(self, video):
matches = set()
# series
if video.series and (sanitize(self.series) in (
sanitize(name) for name in [video.series] + video.alternative_series)):
matches.add('series')
# season
if video.season and self.season == video.season:
matches.add('season')
# episode
if video.episode and self.episode == video.episode:
matches.add('episode')
# year
if video.original_series and self.year is None or video.year and video.year == self.year:
matches.add('year')
# release_group
if (video.release_group and self.release and
any(r in sanitize_release_group(self.release)
for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))):
matches.add('release_group')
# other properties
if self.release:
matches |= guess_matches(video, guessit(self.release, {'type': 'episode'}), partial=True)
if self.rip:
matches |= guess_matches(video, guessit(self.rip), partial=True)
return matches
class TVsubtitlesProvider(Provider):
"""TVsubtitles Provider."""
languages = {Language('por', 'BR')} | {Language(l) for l in [
'ara', 'bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'fin', 'fra', 'hun', 'ita', 'jpn', 'kor', 'nld', 'pol', 'por',
'ron', 'rus', 'spa', 'swe', 'tur', 'ukr', 'zho'
]}
video_types = (Episode,)
server_url = 'http://www.tvsubtitles.net/'
subtitle_class = TVsubtitlesSubtitle
def __init__(self):
self.session = None
def initialize(self):
self.session = Session()
self.session.headers['User-Agent'] = 'Subliminal/%s' % __short_version__
def terminate(self):
self.session.close()
@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME)
def search_show_id(self, series, year=None):
"""Search the show id from the `series` and `year`.
:param str series: series of the episode.
:param year: year of the series, if any.
:type year: int
:return: the show id, if any.
:rtype: int
"""
# make the search
logger.info('Searching show id for %r', series)
r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10)
r.raise_for_status()
# get the series out of the suggestions
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
show_id = None
for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
match = link_re.match(suggestion.text)
if not match:
logger.error('Failed to match %s', suggestion.text)
continue
if match.group('series').lower() == series.lower():
if year is not None and int(match.group('first_year')) != year:
logger.debug('Year does not match')
continue
show_id = int(suggestion['href'][8:-5])
logger.debug('Found show id %d', show_id)
break
return show_id
@region.cache_on_arguments(expiration_time=EPISODE_EXPIRATION_TIME)
def get_episode_ids(self, show_id, season):
"""Get episode ids from the show id and the season.
:param int show_id: show id.
:param int season: season of the episode.
:return: episode ids per episode number.
:rtype: dict
"""
# get the page of the season of the show
logger.info('Getting the page of show id %d, season %d', show_id, season)
r = self.session.get(self.server_url + 'tvshow-%d-%d.html' % (show_id, season), timeout=10)
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over episode rows
episode_ids = {}
for row in soup.select('table#table5 tr'):
# skip rows that do not have a link to the episode page
if not row('a', href=episode_id_re):
continue
# extract data from the cells
cells = row('td')
episode = int(cells[0].text.split('x')[1])
episode_id = int(cells[1].a['href'][8:-5])
episode_ids[episode] = episode_id
if episode_ids:
logger.debug('Found episode ids %r', episode_ids)
else:
logger.warning('No episode ids found')
return episode_ids
def query(self, show_id, series, season, episode, year=None):
# get the episode ids
episode_ids = self.get_episode_ids(show_id, season)
if episode not in episode_ids:
logger.error('Episode %d not found', episode)
return []
# get the episode page
logger.info('Getting the page for episode %d', episode_ids[episode])
r = self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10)
soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
# loop over subtitles rows
subtitles = []
for row in soup.select('.subtitlen'):
# read the item
language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
subtitle_id = int(row.parent['href'][10:-5])
page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
rip = row.find('p', title='rip').text.strip() or None
release = row.find('h5').text.strip() or None
subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip,
release)
logger.debug('Found subtitle %s', subtitle)
subtitles.append(subtitle)
return subtitles
def list_subtitles(self, video, languages):
# lookup show_id
titles = [video.series] + video.alternative_series
show_id = None
for title in titles:
show_id = self.search_show_id(title, video.year)
if show_id is not None:
break
# query for subtitles with the show_id
if show_id is not None:
subtitles = [s for s in self.query(show_id, title, video.season, video.episode, video.year)
if s.language in languages and s.episode == video.episode]
if subtitles:
return subtitles
else:
logger.error('No show id found for %r (%r)', video.series, {'year': video.year})
return []
def download_subtitle(self, subtitle):
# download as a zip
logger.info('Downloading subtitle %r', subtitle)
r = self.session.get(self.server_url + 'download-%d.html' % subtitle.subtitle_id, timeout=10)
r.raise_for_status()
# open the zip
with ZipFile(io.BytesIO(r.content)) as zf:
if len(zf.namelist()) > 1:
raise ProviderError('More than one file to unzip')
subtitle.content = fix_line_ending(zf.read(zf.namelist()[0]))

View File

@ -1,12 +0,0 @@
"""
Refiners enrich a :class:`~subliminal.video.Video` object by adding information to it.
A refiner is a simple function:
.. py:function:: refine(video, **kwargs)
:param video: the video to refine.
:type video: :class:`~subliminal.video.Video`
:param \*\*kwargs: additional parameters for refiners.
"""

View File

@ -1,99 +0,0 @@
# -*- coding: utf-8 -*-
import logging
import os
from babelfish import Error as BabelfishError, Language
from enzyme import MKV
logger = logging.getLogger(__name__)
def refine(video, embedded_subtitles=True, **kwargs):
"""Refine a video by searching its metadata.
Several :class:`~subliminal.video.Video` attributes can be found:
* :attr:`~subliminal.video.Video.resolution`
* :attr:`~subliminal.video.Video.video_codec`
* :attr:`~subliminal.video.Video.audio_codec`
* :attr:`~subliminal.video.Video.subtitle_languages`
:param bool embedded_subtitles: search for embedded subtitles.
"""
# skip non existing videos
if not video.exists:
return
# check extensions
extension = os.path.splitext(video.name)[1]
if extension == '.mkv':
with open(video.name, 'rb') as f:
mkv = MKV(f)
# main video track
if mkv.video_tracks:
video_track = mkv.video_tracks[0]
# resolution
if video_track.height in (480, 720, 1080):
if video_track.interlaced:
video.resolution = '%di' % video_track.height
else:
video.resolution = '%dp' % video_track.height
logger.debug('Found resolution %s', video.resolution)
# video codec
if video_track.codec_id == 'V_MPEG4/ISO/AVC':
video.video_codec = 'h264'
logger.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/SP':
video.video_codec = 'DivX'
logger.debug('Found video_codec %s', video.video_codec)
elif video_track.codec_id == 'V_MPEG4/ISO/ASP':
video.video_codec = 'XviD'
logger.debug('Found video_codec %s', video.video_codec)
else:
logger.warning('MKV has no video track')
# main audio track
if mkv.audio_tracks:
audio_track = mkv.audio_tracks[0]
# audio codec
if audio_track.codec_id == 'A_AC3':
video.audio_codec = 'AC3'
logger.debug('Found audio_codec %s', video.audio_codec)
elif audio_track.codec_id == 'A_DTS':
video.audio_codec = 'DTS'
logger.debug('Found audio_codec %s', video.audio_codec)
elif audio_track.codec_id == 'A_AAC':
video.audio_codec = 'AAC'
logger.debug('Found audio_codec %s', video.audio_codec)
else:
logger.warning('MKV has no audio track')
# subtitle tracks
if mkv.subtitle_tracks:
if embedded_subtitles:
embedded_subtitle_languages = set()
for st in mkv.subtitle_tracks:
if st.language:
try:
embedded_subtitle_languages.add(Language.fromalpha3b(st.language))
except BabelfishError:
logger.error('Embedded subtitle track language %r is not a valid language', st.language)
embedded_subtitle_languages.add(Language('und'))
elif st.name:
try:
embedded_subtitle_languages.add(Language.fromname(st.name))
except BabelfishError:
logger.debug('Embedded subtitle track name %r is not a valid language', st.name)
embedded_subtitle_languages.add(Language('und'))
else:
embedded_subtitle_languages.add(Language('und'))
logger.debug('Found embedded subtitle %r', embedded_subtitle_languages)
video.subtitle_languages |= embedded_subtitle_languages
else:
logger.debug('MKV has no subtitle track')
else:
logger.debug('Unsupported video extension %s', extension)

View File

@ -1,187 +0,0 @@
# -*- coding: utf-8 -*-
import logging
import operator
import requests
from .. import __short_version__
from ..cache import REFINER_EXPIRATION_TIME, region
from ..video import Episode, Movie
from ..utils import sanitize
logger = logging.getLogger(__name__)
class OMDBClient(object):
base_url = 'http://www.omdbapi.com'
def __init__(self, version=1, session=None, headers=None, timeout=10):
#: Session for the requests
self.session = session or requests.Session()
self.session.timeout = timeout
self.session.headers.update(headers or {})
self.session.params['r'] = 'json'
self.session.params['v'] = version
def get(self, id=None, title=None, type=None, year=None, plot='short', tomatoes=False):
# build the params
params = {}
if id:
params['i'] = id
if title:
params['t'] = title
if not params:
raise ValueError('At least id or title is required')
params['type'] = type
params['y'] = year
params['plot'] = plot
params['tomatoes'] = tomatoes
# perform the request
r = self.session.get(self.base_url, params=params)
r.raise_for_status()
# get the response as json
j = r.json()
# check response status
if j['Response'] == 'False':
return None
return j
def search(self, title, type=None, year=None, page=1):
# build the params
params = {'s': title, 'type': type, 'y': year, 'page': page}
# perform the request
r = self.session.get(self.base_url, params=params)
r.raise_for_status()
# get the response as json
j = r.json()
# check response status
if j['Response'] == 'False':
return None
return j
omdb_client = OMDBClient(headers={'User-Agent': 'Subliminal/%s' % __short_version__})
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
def search(title, type, year):
results = omdb_client.search(title, type, year)
if not results:
return None
# fetch all paginated results
all_results = results['Search']
total_results = int(results['totalResults'])
page = 1
while total_results > page * 10:
page += 1
results = omdb_client.search(title, type, year, page=page)
all_results.extend(results['Search'])
return all_results
def refine(video, **kwargs):
"""Refine a video by searching `OMDb API <http://omdbapi.com/>`_.
Several :class:`~subliminal.video.Episode` attributes can be found:
* :attr:`~subliminal.video.Episode.series`
* :attr:`~subliminal.video.Episode.year`
* :attr:`~subliminal.video.Episode.series_imdb_id`
Similarly, for a :class:`~subliminal.video.Movie`:
* :attr:`~subliminal.video.Movie.title`
* :attr:`~subliminal.video.Movie.year`
* :attr:`~subliminal.video.Video.imdb_id`
"""
if isinstance(video, Episode):
# exit if the information is complete
if video.series_imdb_id:
logger.debug('No need to search')
return
# search the series
results = search(video.series, 'series', video.year)
if not results:
logger.warning('No results for series')
return
logger.debug('Found %d results', len(results))
# filter the results
results = [r for r in results if sanitize(r['Title']) == sanitize(video.series)]
if not results:
logger.warning('No matching series found')
return
# process the results
found = False
for result in sorted(results, key=operator.itemgetter('Year')):
if video.original_series and video.year is None:
logger.debug('Found result for original series without year')
found = True
break
if video.year == int(result['Year'].split(u'\u2013')[0]):
logger.debug('Found result with matching year')
found = True
break
if not found:
logger.warning('No matching series found')
return
# add series information
logger.debug('Found series %r', result)
video.series = result['Title']
video.year = int(result['Year'].split(u'\u2013')[0])
video.series_imdb_id = result['imdbID']
elif isinstance(video, Movie):
# exit if the information is complete
if video.imdb_id:
return
# search the movie
results = search(video.title, 'movie', video.year)
if not results:
logger.warning('No results')
return
logger.debug('Found %d results', len(results))
# filter the results
results = [r for r in results if sanitize(r['Title']) == sanitize(video.title)]
if not results:
logger.warning('No matching movie found')
return
# process the results
found = False
for result in results:
if video.year is None:
logger.debug('Found result for movie without year')
found = True
break
if video.year == int(result['Year']):
logger.debug('Found result with matching year')
found = True
break
if not found:
logger.warning('No matching movie found')
return
# add movie information
logger.debug('Found movie %r', result)
video.title = result['Title']
video.year = int(result['Year'].split(u'\u2013')[0])
video.imdb_id = result['imdbID']

View File

@ -1,351 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from functools import wraps
import logging
import re
import _strptime
import requests
from .. import __short_version__
from ..cache import REFINER_EXPIRATION_TIME, region
from ..utils import sanitize
from ..video import Episode
logger = logging.getLogger(__name__)
series_re = re.compile(r'^(?P<series>.*?)(?: \((?:(?P<year>\d{4})|(?P<country>[A-Z]{2}))\))?$')
def requires_auth(func):
"""Decorator for :class:`TVDBClient` methods that require authentication"""
@wraps(func)
def wrapper(self, *args, **kwargs):
if self.token is None or self.token_expired:
self.login()
elif self.token_needs_refresh:
self.refresh_token()
return func(self, *args, **kwargs)
return wrapper
class TVDBClient(object):
"""TVDB REST API Client
:param str apikey: API key to use.
:param str username: username to use.
:param str password: password to use.
:param str language: language of the responses.
:param session: session object to use.
:type session: :class:`requests.sessions.Session` or compatible.
:param dict headers: additional headers.
:param int timeout: timeout for the requests.
"""
#: Base URL of the API
base_url = 'https://api.thetvdb.com'
#: Token lifespan
token_lifespan = timedelta(hours=1)
#: Minimum token age before a :meth:`refresh_token` is triggered
refresh_token_every = timedelta(minutes=30)
def __init__(self, apikey=None, username=None, password=None, language='en', session=None, headers=None,
timeout=10):
#: API key
self.apikey = apikey
#: Username
self.username = username
#: Password
self.password = password
#: Last token acquisition date
self.token_date = datetime.utcnow() - self.token_lifespan
#: Session for the requests
self.session = session or requests.Session()
self.session.timeout = timeout
self.session.headers.update(headers or {})
self.session.headers['Content-Type'] = 'application/json'
self.session.headers['Accept-Language'] = language
@property
def language(self):
return self.session.headers['Accept-Language']
@language.setter
def language(self, value):
self.session.headers['Accept-Language'] = value
@property
def token(self):
if 'Authorization' not in self.session.headers:
return None
return self.session.headers['Authorization'][7:]
@property
def token_expired(self):
return datetime.utcnow() - self.token_date > self.token_lifespan
@property
def token_needs_refresh(self):
return datetime.utcnow() - self.token_date > self.refresh_token_every
def login(self):
"""Login"""
# perform the request
data = {'apikey': self.apikey, 'username': self.username, 'password': self.password}
r = self.session.post(self.base_url + '/login', json=data)
r.raise_for_status()
# set the Authorization header
self.session.headers['Authorization'] = 'Bearer ' + r.json()['token']
# update token_date
self.token_date = datetime.utcnow()
def refresh_token(self):
"""Refresh token"""
# perform the request
r = self.session.get(self.base_url + '/refresh_token')
r.raise_for_status()
# set the Authorization header
self.session.headers['Authorization'] = 'Bearer ' + r.json()['token']
# update token_date
self.token_date = datetime.utcnow()
@requires_auth
def search_series(self, name=None, imdb_id=None, zap2it_id=None):
"""Search series"""
# perform the request
params = {'name': name, 'imdbId': imdb_id, 'zap2itId': zap2it_id}
r = self.session.get(self.base_url + '/search/series', params=params)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()['data']
@requires_auth
def get_series(self, id):
"""Get series"""
# perform the request
r = self.session.get(self.base_url + '/series/{}'.format(id))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()['data']
@requires_auth
def get_series_actors(self, id):
"""Get series actors"""
# perform the request
r = self.session.get(self.base_url + '/series/{}/actors'.format(id))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()['data']
@requires_auth
def get_series_episodes(self, id, page=1):
"""Get series episodes"""
# perform the request
params = {'page': page}
r = self.session.get(self.base_url + '/series/{}/episodes'.format(id), params=params)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()
@requires_auth
def query_series_episodes(self, id, absolute_number=None, aired_season=None, aired_episode=None, dvd_season=None,
dvd_episode=None, imdb_id=None, page=1):
"""Query series episodes"""
# perform the request
params = {'absoluteNumber': absolute_number, 'airedSeason': aired_season, 'airedEpisode': aired_episode,
'dvdSeason': dvd_season, 'dvdEpisode': dvd_episode, 'imdbId': imdb_id, 'page': page}
r = self.session.get(self.base_url + '/series/{}/episodes/query'.format(id), params=params)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()
@requires_auth
def get_episode(self, id):
"""Get episode"""
# perform the request
r = self.session.get(self.base_url + '/episodes/{}'.format(id))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()['data']
#: Configured instance of :class:`TVDBClient`
tvdb_client = TVDBClient('5EC930FB90DA1ADA', headers={'User-Agent': 'Subliminal/%s' % __short_version__})
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
def search_series(name):
"""Search series.
:param str name: name of the series.
:return: the search results.
:rtype: list
"""
return tvdb_client.search_series(name)
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
def get_series(id):
"""Get series.
:param int id: id of the series.
:return: the series data.
:rtype: dict
"""
return tvdb_client.get_series(id)
@region.cache_on_arguments(expiration_time=REFINER_EXPIRATION_TIME)
def get_series_episode(series_id, season, episode):
"""Get an episode of a series.
:param int series_id: id of the series.
:param int season: season number of the episode.
:param int episode: episode number of the episode.
:return: the episode data.
:rtype: dict
"""
result = tvdb_client.query_series_episodes(series_id, aired_season=season, aired_episode=episode)
if result:
return tvdb_client.get_episode(result['data'][0]['id'])
def refine(video, **kwargs):
"""Refine a video by searching `TheTVDB <http://thetvdb.com/>`_.
.. note::
This refiner only work for instances of :class:`~subliminal.video.Episode`.
Several attributes can be found:
* :attr:`~subliminal.video.Episode.series`
* :attr:`~subliminal.video.Episode.year`
* :attr:`~subliminal.video.Episode.series_imdb_id`
* :attr:`~subliminal.video.Episode.series_tvdb_id`
* :attr:`~subliminal.video.Episode.title`
* :attr:`~subliminal.video.Video.imdb_id`
* :attr:`~subliminal.video.Episode.tvdb_id`
"""
# only deal with Episode videos
if not isinstance(video, Episode):
logger.error('Cannot refine episodes')
return
# exit if the information is complete
if video.series_tvdb_id and video.tvdb_id:
logger.debug('No need to search')
return
# search the series
logger.info('Searching series %r', video.series)
results = search_series(video.series.lower())
if not results:
logger.warning('No results for series')
return
logger.debug('Found %d results', len(results))
# search for exact matches
matching_results = []
for result in results:
matching_result = {}
# use seriesName and aliases
series_names = [result['seriesName']]
series_names.extend(result['aliases'])
# parse the original series as series + year or country
original_match = series_re.match(result['seriesName']).groupdict()
# parse series year
series_year = None
if result['firstAired']:
series_year = datetime.strptime(result['firstAired'], '%Y-%m-%d').year
# discard mismatches on year
if video.year and series_year and video.year != series_year:
logger.debug('Discarding series %r mismatch on year %d', result['seriesName'], series_year)
continue
# iterate over series names
for series_name in series_names:
# parse as series and year
series, year, country = series_re.match(series_name).groups()
if year:
year = int(year)
# discard mismatches on year
if year and (video.original_series or video.year != year):
logger.debug('Discarding series name %r mismatch on year %d', series, year)
continue
# match on sanitized series name
if sanitize(series) == sanitize(video.series):
logger.debug('Found exact match on series %r', series_name)
matching_result['match'] = {'series': original_match['series'], 'year': series_year,
'original_series': original_match['year'] is None}
break
# add the result on match
if matching_result:
matching_result['data'] = result
matching_results.append(matching_result)
# exit if we don't have exactly 1 matching result
if not matching_results:
logger.error('No matching series found')
return
if len(matching_results) > 1:
logger.error('Multiple matches found')
return
# get the series
matching_result = matching_results[0]
series = get_series(matching_result['data']['id'])
# add series information
logger.debug('Found series %r', series)
video.series = matching_result['match']['series']
video.alternative_series.extend(series['aliases'])
video.year = matching_result['match']['year']
video.original_series = matching_result['match']['original_series']
video.series_tvdb_id = series['id']
video.series_imdb_id = series['imdbId'] or None
# get the episode
logger.info('Getting series episode %dx%d', video.season, video.episode)
episode = get_series_episode(video.series_tvdb_id, video.season, video.episode)
if not episode:
logger.warning('No results for episode')
return
# add episode information
logger.debug('Found episode %r', episode)
video.tvdb_id = episode['id']
video.title = episode['episodeName'] or None
video.imdb_id = episode['imdbId'] or None

View File

@ -1,234 +0,0 @@
# -*- coding: utf-8 -*-
"""
This module provides the default implementation of the `compute_score` parameter in
:meth:`~subliminal.core.ProviderPool.download_best_subtitles` and :func:`~subliminal.core.download_best_subtitles`.
.. note::
To avoid unnecessary dependency on `sympy <http://www.sympy.org/>`_ and boost subliminal's import time, the
resulting scores are hardcoded here and manually updated when the set of equations change.
Available matches:
* hash
* title
* year
* series
* season
* episode
* release_group
* format
* audio_codec
* resolution
* hearing_impaired
* video_codec
* series_imdb_id
* imdb_id
* tvdb_id
"""
from __future__ import division, print_function
import logging
from .video import Episode, Movie
logger = logging.getLogger(__name__)
#: Scores for episodes
episode_scores = {'hash': 359, 'series': 180, 'year': 90, 'season': 30, 'episode': 30, 'release_group': 15,
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
#: Scores for movies
movie_scores = {'hash': 119, 'title': 60, 'year': 30, 'release_group': 15,
'format': 7, 'audio_codec': 3, 'resolution': 2, 'video_codec': 2, 'hearing_impaired': 1}
#: Equivalent release groups
equivalent_release_groups = ({'LOL', 'DIMENSION'}, {'ASAP', 'IMMERSE', 'FLEET'}, {'AVS', 'SVA'})
def get_equivalent_release_groups(release_group):
"""Get all the equivalents of the given release group.
:param str release_group: the release group to get the equivalents of.
:return: the equivalent release groups.
:rtype: set
"""
for equivalent_release_group in equivalent_release_groups:
if release_group in equivalent_release_group:
return equivalent_release_group
return {release_group}
def get_scores(video):
"""Get the scores dict for the given `video`.
This will return either :data:`episode_scores` or :data:`movie_scores` based on the type of the `video`.
:param video: the video to compute the score against.
:type video: :class:`~subliminal.video.Video`
:return: the scores dict.
:rtype: dict
"""
if isinstance(video, Episode):
return episode_scores
elif isinstance(video, Movie):
return movie_scores
raise ValueError('video must be an instance of Episode or Movie')
def compute_score(subtitle, video, hearing_impaired=None):
"""Compute the score of the `subtitle` against the `video` with `hearing_impaired` preference.
:func:`compute_score` uses the :meth:`Subtitle.get_matches <subliminal.subtitle.Subtitle.get_matches>` method and
applies the scores (either from :data:`episode_scores` or :data:`movie_scores`) after some processing.
:param subtitle: the subtitle to compute the score of.
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:param video: the video to compute the score against.
:type video: :class:`~subliminal.video.Video`
:param bool hearing_impaired: hearing impaired preference.
:return: score of the subtitle.
:rtype: int
"""
logger.info('Computing score of %r for video %r with %r', subtitle, video, dict(hearing_impaired=hearing_impaired))
# get the scores dict
scores = get_scores(video)
logger.debug('Using scores %r', scores)
# get the matches
matches = subtitle.get_matches(video)
logger.debug('Found matches %r', matches)
# on hash match, discard everything else
if 'hash' in matches:
logger.debug('Keeping only hash match')
matches &= {'hash'}
# handle equivalent matches
if isinstance(video, Episode):
if 'title' in matches:
logger.debug('Adding title match equivalent')
matches.add('episode')
if 'series_imdb_id' in matches:
logger.debug('Adding series_imdb_id match equivalent')
matches |= {'series', 'year'}
if 'imdb_id' in matches:
logger.debug('Adding imdb_id match equivalents')
matches |= {'series', 'year', 'season', 'episode'}
if 'tvdb_id' in matches:
logger.debug('Adding tvdb_id match equivalents')
matches |= {'series', 'year', 'season', 'episode'}
if 'series_tvdb_id' in matches:
logger.debug('Adding series_tvdb_id match equivalents')
matches |= {'series', 'year'}
elif isinstance(video, Movie):
if 'imdb_id' in matches:
logger.debug('Adding imdb_id match equivalents')
matches |= {'title', 'year'}
# handle hearing impaired
if hearing_impaired is not None and subtitle.hearing_impaired == hearing_impaired:
logger.debug('Matched hearing_impaired')
matches.add('hearing_impaired')
# compute the score
score = sum((scores.get(match, 0) for match in matches))
logger.info('Computed score %r with final matches %r', score, matches)
# ensure score is within valid bounds
assert 0 <= score <= scores['hash'] + scores['hearing_impaired']
return score
def solve_episode_equations():
from sympy import Eq, solve, symbols
hash, series, year, season, episode, release_group = symbols('hash series year season episode release_group')
format, audio_codec, resolution, video_codec = symbols('format audio_codec resolution video_codec')
hearing_impaired = symbols('hearing_impaired')
equations = [
# hash is best
Eq(hash, series + year + season + episode + release_group + format + audio_codec + resolution + video_codec),
# series counts for the most part in the total score
Eq(series, year + season + episode + release_group + format + audio_codec + resolution + video_codec + 1),
# year is the second most important part
Eq(year, season + episode + release_group + format + audio_codec + resolution + video_codec + 1),
# season is important too
Eq(season, release_group + format + audio_codec + resolution + video_codec + 1),
# episode is equally important to season
Eq(episode, season),
# release group is the next most wanted match
Eq(release_group, format + audio_codec + resolution + video_codec + 1),
# format counts as much as audio_codec, resolution and video_codec
Eq(format, audio_codec + resolution + video_codec),
# audio_codec is more valuable than video_codec
Eq(audio_codec, video_codec + 1),
# resolution counts as much as video_codec
Eq(resolution, video_codec),
# video_codec is the least valuable match but counts more than the sum of all scoring increasing matches
Eq(video_codec, hearing_impaired + 1),
# hearing impaired is only used for score increasing, so put it to 1
Eq(hearing_impaired, 1),
]
return solve(equations, [hash, series, year, season, episode, release_group, format, audio_codec, resolution,
hearing_impaired, video_codec])
def solve_movie_equations():
from sympy import Eq, solve, symbols
hash, title, year, release_group = symbols('hash title year release_group')
format, audio_codec, resolution, video_codec = symbols('format audio_codec resolution video_codec')
hearing_impaired = symbols('hearing_impaired')
equations = [
# hash is best
Eq(hash, title + year + release_group + format + audio_codec + resolution + video_codec),
# title counts for the most part in the total score
Eq(title, year + release_group + format + audio_codec + resolution + video_codec + 1),
# year is the second most important part
Eq(year, release_group + format + audio_codec + resolution + video_codec + 1),
# release group is the next most wanted match
Eq(release_group, format + audio_codec + resolution + video_codec + 1),
# format counts as much as audio_codec, resolution and video_codec
Eq(format, audio_codec + resolution + video_codec),
# audio_codec is more valuable than video_codec
Eq(audio_codec, video_codec + 1),
# resolution counts as much as video_codec
Eq(resolution, video_codec),
# video_codec is the least valuable match but counts more than the sum of all scoring increasing matches
Eq(video_codec, hearing_impaired + 1),
# hearing impaired is only used for score increasing, so put it to 1
Eq(hearing_impaired, 1),
]
return solve(equations, [hash, title, year, release_group, format, audio_codec, resolution, hearing_impaired,
video_codec])

View File

@ -1,261 +0,0 @@
# -*- coding: utf-8 -*-
import codecs
import logging
import os
import chardet
import pysrt
from .score import get_equivalent_release_groups
from .video import Episode, Movie
from .utils import sanitize, sanitize_release_group
logger = logging.getLogger(__name__)
#: Subtitle extensions
SUBTITLE_EXTENSIONS = ('.srt', '.sub', '.smi', '.txt', '.ssa', '.ass', '.mpl')
class Subtitle(object):
"""Base class for subtitle.
:param language: language of the subtitle.
:type language: :class:`~babelfish.language.Language`
:param bool hearing_impaired: whether or not the subtitle is hearing impaired.
:param page_link: URL of the web page from which the subtitle can be downloaded.
:type page_link: str
:param encoding: Text encoding of the subtitle.
:type encoding: str
"""
#: Name of the provider that returns that class of subtitle
provider_name = ''
def __init__(self, language, hearing_impaired=False, page_link=None, encoding=None):
#: Language of the subtitle
self.language = language
#: Whether or not the subtitle is hearing impaired
self.hearing_impaired = hearing_impaired
#: URL of the web page from which the subtitle can be downloaded
self.page_link = page_link
#: Content as bytes
self.content = None
#: Encoding to decode with when accessing :attr:`text`
self.encoding = None
# validate the encoding
if encoding:
try:
self.encoding = codecs.lookup(encoding).name
except (TypeError, LookupError):
logger.debug('Unsupported encoding %s', encoding)
@property
def id(self):
"""Unique identifier of the subtitle"""
raise NotImplementedError
@property
def text(self):
"""Content as string
If :attr:`encoding` is None, the encoding is guessed with :meth:`guess_encoding`
"""
if not self.content:
return
if self.encoding:
return self.content.decode(self.encoding, errors='replace')
return self.content.decode(self.guess_encoding(), errors='replace')
def is_valid(self):
"""Check if a :attr:`text` is a valid SubRip format.
:return: whether or not the subtitle is valid.
:rtype: bool
"""
if not self.text:
return False
try:
pysrt.from_string(self.text, error_handling=pysrt.ERROR_RAISE)
except pysrt.Error as e:
if e.args[0] < 80:
return False
return True
def guess_encoding(self):
"""Guess encoding using the language, falling back on chardet.
:return: the guessed encoding.
:rtype: str
"""
logger.info('Guessing encoding for language %s', self.language)
# always try utf-8 first
encodings = ['utf-8']
# add language-specific encodings
if self.language.alpha3 == 'zho':
encodings.extend(['gb18030', 'big5'])
elif self.language.alpha3 == 'jpn':
encodings.append('shift-jis')
elif self.language.alpha3 == 'ara':
encodings.append('windows-1256')
elif self.language.alpha3 == 'heb':
encodings.append('windows-1255')
elif self.language.alpha3 == 'tur':
encodings.extend(['iso-8859-9', 'windows-1254'])
elif self.language.alpha3 == 'pol':
# Eastern European Group 1
encodings.extend(['windows-1250'])
elif self.language.alpha3 == 'bul':
# Eastern European Group 2
encodings.extend(['windows-1251'])
else:
# Western European (windows-1252)
encodings.append('latin-1')
# try to decode
logger.debug('Trying encodings %r', encodings)
for encoding in encodings:
try:
self.content.decode(encoding)
except UnicodeDecodeError:
pass
else:
logger.info('Guessed encoding %s', encoding)
return encoding
logger.warning('Could not guess encoding from language')
# fallback on chardet
encoding = chardet.detect(self.content)['encoding']
logger.info('Chardet found encoding %s', encoding)
return encoding
def get_matches(self, video):
"""Get the matches against the `video`.
:param video: the video to get the matches with.
:type video: :class:`~subliminal.video.Video`
:return: matches of the subtitle.
:rtype: set
"""
raise NotImplementedError
def __hash__(self):
return hash(self.provider_name + '-' + self.id)
def __repr__(self):
return '<%s %r [%s]>' % (self.__class__.__name__, self.id, self.language)
def get_subtitle_path(video_path, language=None, extension='.srt'):
"""Get the subtitle path using the `video_path` and `language`.
:param str video_path: path to the video.
:param language: language of the subtitle to put in the path.
:type language: :class:`~babelfish.language.Language`
:param str extension: extension of the subtitle.
:return: path of the subtitle.
:rtype: str
"""
subtitle_root = os.path.splitext(video_path)[0]
if language:
subtitle_root += '.' + str(language)
return subtitle_root + extension
def guess_matches(video, guess, partial=False):
"""Get matches between a `video` and a `guess`.
If a guess is `partial`, the absence information won't be counted as a match.
:param video: the video.
:type video: :class:`~subliminal.video.Video`
:param guess: the guess.
:type guess: dict
:param bool partial: whether or not the guess is partial.
:return: matches between the `video` and the `guess`.
:rtype: set
"""
matches = set()
if isinstance(video, Episode):
# series
if video.series and 'title' in guess and sanitize(guess['title']) == sanitize(video.series):
matches.add('series')
# title
if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title):
matches.add('title')
# season
if video.season and 'season' in guess and guess['season'] == video.season:
matches.add('season')
# episode
# Currently we only have single-ep support (guessit returns a multi-ep as a list with int values)
# Most providers only support single-ep, so make sure it contains only 1 episode
# In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number)
if video.episode and 'episode' in guess:
episode_guess = guess['episode']
episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess
if episode == video.episode:
matches.add('episode')
# year
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
# count "no year" as an information
if not partial and video.original_series and 'year' not in guess:
matches.add('year')
elif isinstance(video, Movie):
# year
if video.year and 'year' in guess and guess['year'] == video.year:
matches.add('year')
# title
if video.title and 'title' in guess and sanitize(guess['title']) == sanitize(video.title):
matches.add('title')
# release_group
if (video.release_group and 'release_group' in guess and
sanitize_release_group(guess['release_group']) in
get_equivalent_release_groups(sanitize_release_group(video.release_group))):
matches.add('release_group')
# resolution
if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution:
matches.add('resolution')
# format
if video.format and 'format' in guess and guess['format'].lower() == video.format.lower():
matches.add('format')
# video_codec
if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec:
matches.add('video_codec')
# audio_codec
if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec:
matches.add('audio_codec')
return matches
def fix_line_ending(content):
"""Fix line ending of `content` by changing it to \n.
:param bytes content: content of the subtitle.
:return: the content with fixed line endings.
:rtype: bytes
"""
return content.replace(b'\r\n', b'\n')

View File

@ -1,152 +0,0 @@
# -*- coding: utf-8 -*-
from datetime import datetime
import hashlib
import os
import re
import struct
def hash_opensubtitles(video_path):
"""Compute a hash using OpenSubtitles' algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
bytesize = struct.calcsize(b'<q')
with open(video_path, 'rb') as f:
filesize = os.path.getsize(video_path)
filehash = filesize
if filesize < 65536 * 2:
return
for _ in range(65536 // bytesize):
filebuffer = f.read(bytesize)
(l_value,) = struct.unpack(b'<q', filebuffer)
filehash += l_value
filehash &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
f.seek(max(0, filesize - 65536), 0)
for _ in range(65536 // bytesize):
filebuffer = f.read(bytesize)
(l_value,) = struct.unpack(b'<q', filebuffer)
filehash += l_value
filehash &= 0xFFFFFFFFFFFFFFFF
returnedhash = '%016x' % filehash
return returnedhash
def hash_thesubdb(video_path):
"""Compute a hash using TheSubDB's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
readsize = 64 * 1024
if os.path.getsize(video_path) < readsize:
return
with open(video_path, 'rb') as f:
data = f.read(readsize)
f.seek(-readsize, os.SEEK_END)
data += f.read(readsize)
return hashlib.md5(data).hexdigest()
def hash_napiprojekt(video_path):
"""Compute a hash using NapiProjekt's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
readsize = 1024 * 1024 * 10
with open(video_path, 'rb') as f:
data = f.read(readsize)
return hashlib.md5(data).hexdigest()
def hash_shooter(video_path):
"""Compute a hash using Shooter's algorithm
:param string video_path: path of the video
:return: the hash
:rtype: string
"""
filesize = os.path.getsize(video_path)
readsize = 4096
if os.path.getsize(video_path) < readsize * 2:
return None
offsets = (readsize, filesize // 3 * 2, filesize // 3, filesize - readsize * 2)
filehash = []
with open(video_path, 'rb') as f:
for offset in offsets:
f.seek(offset)
filehash.append(hashlib.md5(f.read(readsize)).hexdigest())
return ';'.join(filehash)
def sanitize(string, ignore_characters=None):
"""Sanitize a string to strip special characters.
:param str string: the string to sanitize.
:param set ignore_characters: characters to ignore.
:return: the sanitized string.
:rtype: str
"""
# only deal with strings
if string is None:
return
ignore_characters = ignore_characters or set()
# replace some characters with one space
characters = {'-', ':', '(', ')', '.'} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), ' ', string)
# remove some characters
characters = {'\''} - ignore_characters
if characters:
string = re.sub(r'[%s]' % re.escape(''.join(characters)), '', string)
# replace multiple spaces with one
string = re.sub(r'\s+', ' ', string)
# strip and lower case
return string.strip().lower()
def sanitize_release_group(string):
"""Sanitize a `release_group` string to remove content in square brackets.
:param str string: the release group to sanitize.
:return: the sanitized release group.
:rtype: str
"""
# only deal with strings
if string is None:
return
# remove content in square brackets
string = re.sub(r'\[\w+\]', '', string)
# strip and upper case
return string.strip().upper()
def timestamp(date):
"""Get the timestamp of the `date`, python2/3 compatible
:param datetime.datetime date: the utc date.
:return: the timestamp of the date.
:rtype: float
"""
return (date - datetime(1970, 1, 1)).total_seconds()

View File

@ -1,239 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import division
from datetime import datetime, timedelta
import logging
import os
from guessit import guessit
logger = logging.getLogger(__name__)
#: Video extensions
VIDEO_EXTENSIONS = ('.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik',
'.bix', '.box', '.cam', '.dat', '.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli',
'.flic', '.flv', '.flx', '.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e',
'.m4v', '.mjp', '.mjpeg', '.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4',
'.mpe', '.mpeg', '.mpg', '.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.ogv', '.omf',
'.ps', '.qt', '.ram', '.rm', '.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo',
'.vob', '.vro', '.webm', '.wm', '.wmv', '.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid')
class Video(object):
"""Base class for videos.
Represent a video, existing or not.
:param str name: name or path of the video.
:param str format: format of the video (HDTV, WEB-DL, BluRay, ...).
:param str release_group: release group of the video.
:param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i).
:param str video_codec: codec of the video stream.
:param str audio_codec: codec of the main audio stream.
:param str imdb_id: IMDb id of the video.
:param dict hashes: hashes of the video file by provider names.
:param int size: size of the video file in bytes.
:param set subtitle_languages: existing subtitle languages.
"""
def __init__(self, name, format=None, release_group=None, resolution=None, video_codec=None, audio_codec=None,
imdb_id=None, hashes=None, size=None, subtitle_languages=None):
#: Name or path of the video
self.name = name
#: Format of the video (HDTV, WEB-DL, BluRay, ...)
self.format = format
#: Release group of the video
self.release_group = release_group
#: Resolution of the video stream (480p, 720p, 1080p or 1080i)
self.resolution = resolution
#: Codec of the video stream
self.video_codec = video_codec
#: Codec of the main audio stream
self.audio_codec = audio_codec
#: IMDb id of the video
self.imdb_id = imdb_id
#: Hashes of the video file by provider names
self.hashes = hashes or {}
#: Size of the video file in bytes
self.size = size
#: Existing subtitle languages
self.subtitle_languages = subtitle_languages or set()
@property
def exists(self):
"""Test whether the video exists"""
return os.path.exists(self.name)
@property
def age(self):
"""Age of the video"""
if self.exists:
return datetime.utcnow() - datetime.utcfromtimestamp(os.path.getmtime(self.name))
return timedelta()
@classmethod
def fromguess(cls, name, guess):
"""Create an :class:`Episode` or a :class:`Movie` with the given `name` based on the `guess`.
:param str name: name of the video.
:param dict guess: guessed data.
:raise: :class:`ValueError` if the `type` of the `guess` is invalid
"""
if guess['type'] == 'episode':
return Episode.fromguess(name, guess)
if guess['type'] == 'movie':
return Movie.fromguess(name, guess)
raise ValueError('The guess must be an episode or a movie guess')
@classmethod
def fromname(cls, name):
"""Shortcut for :meth:`fromguess` with a `guess` guessed from the `name`.
:param str name: name of the video.
"""
return cls.fromguess(name, guessit(name))
def __repr__(self):
return '<%s [%r]>' % (self.__class__.__name__, self.name)
def __hash__(self):
return hash(self.name)
class Episode(Video):
"""Episode :class:`Video`.
:param str series: series of the episode.
:param int season: season number of the episode.
:param int episode: episode number of the episode.
:param str title: title of the episode.
:param int year: year of the series.
:param bool original_series: whether the series is the first with this name.
:param int tvdb_id: TVDB id of the episode.
:param list alternative_series: alternative names of the series
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None,
series_tvdb_id=None, series_imdb_id=None, alternative_series=None, **kwargs):
super(Episode, self).__init__(name, **kwargs)
#: Series of the episode
self.series = series
#: Season number of the episode
self.season = season
#: Episode number of the episode
self.episode = episode
#: Title of the episode
self.title = title
#: Year of series
self.year = year
#: The series is the first with this name
self.original_series = original_series
#: TVDB id of the episode
self.tvdb_id = tvdb_id
#: TVDB id of the series
self.series_tvdb_id = series_tvdb_id
#: IMDb id of the series
self.series_imdb_id = series_imdb_id
#: Alternative names of the series
self.alternative_series = alternative_series or []
@classmethod
def fromguess(cls, name, guess):
if guess['type'] != 'episode':
raise ValueError('The guess must be an episode guess')
if 'title' not in guess or 'episode' not in guess:
raise ValueError('Insufficient data to process the guess')
# Currently we only have single-ep support (guessit returns a multi-ep as a list with int values)
# Most providers only support single-ep, so make sure it contains only 1 episode
# In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number)
episode_guess = guess.get('episode')
episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess
return cls(name, guess['title'], guess.get('season', 1), episode, title=guess.get('episode_title'),
year=guess.get('year'), format=guess.get('format'), original_series='year' not in guess,
release_group=guess.get('release_group'), resolution=guess.get('screen_size'),
video_codec=guess.get('video_codec'), audio_codec=guess.get('audio_codec'))
@classmethod
def fromname(cls, name):
return cls.fromguess(name, guessit(name, {'type': 'episode'}))
def __repr__(self):
if self.year is None:
return '<%s [%r, %dx%d]>' % (self.__class__.__name__, self.series, self.season, self.episode)
return '<%s [%r, %d, %dx%d]>' % (self.__class__.__name__, self.series, self.year, self.season, self.episode)
class Movie(Video):
"""Movie :class:`Video`.
:param str title: title of the movie.
:param int year: year of the movie.
:param list alternative_titles: alternative titles of the movie
:param \*\*kwargs: additional parameters for the :class:`Video` constructor.
"""
def __init__(self, name, title, year=None, alternative_titles=None, **kwargs):
super(Movie, self).__init__(name, **kwargs)
#: Title of the movie
self.title = title
#: Year of the movie
self.year = year
#: Alternative titles of the movie
self.alternative_titles = alternative_titles or []
@classmethod
def fromguess(cls, name, guess):
if guess['type'] != 'movie':
raise ValueError('The guess must be a movie guess')
if 'title' not in guess:
raise ValueError('Insufficient data to process the guess')
alternative_titles = []
if 'alternative_title' in guess:
alternative_titles.append(u"%s %s" % (guess['title'], guess['alternative_title']))
return cls(name, guess['title'], format=guess.get('format'), release_group=guess.get('release_group'),
resolution=guess.get('screen_size'), video_codec=guess.get('video_codec'),
audio_codec=guess.get('audio_codec'), year=guess.get('year'), alternative_titles=alternative_titles)
@classmethod
def fromname(cls, name):
return cls.fromguess(name, guessit(name, {'type': 'movie'}))
def __repr__(self):
if self.year is None:
return '<%s [%r]>' % (self.__class__.__name__, self.title)
return '<%s [%r, %d]>' % (self.__class__.__name__, self.title, self.year)

View File

@ -13,6 +13,7 @@ import six.moves.xmlrpc_client
import dns.resolver
import ipaddress
import re
from six import PY3
from requests import exceptions
from urllib3.util import connection
@ -102,13 +103,17 @@ class CFSession(CloudScraper):
resp = self.sendChallengeResponse(resp, **kwargs)
except ValueError as e:
if e.message == "Captcha":
if PY3:
error = str(e)
else:
error = e.message
if error == "Captcha":
parsed_url = urlparse(url)
domain = parsed_url.netloc
# solve the captcha
site_key = re.search(r'data-sitekey="(.+?)"', resp.content).group(1)
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.content).group(1)
challenge_ray = re.search(r'data-ray="(.+?)"', resp.content).group(1)
site_key = re.search(r'data-sitekey="(.+?)"', resp.text).group(1)
challenge_s = re.search(r'type="hidden" name="s" value="(.+?)"', resp.text).group(1)
challenge_ray = re.search(r'data-ray="(.+?)"', resp.text).group(1)
if not all([site_key, challenge_s, challenge_ray]):
raise Exception("cf: Captcha site-key not found!")

View File

@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
An implementation of semantics and validations described in RFC 3986.
See http://rfc3986.readthedocs.io/ for detailed documentation.
:copyright: (c) 2014 Rackspace
:license: Apache v2.0, see LICENSE for details
"""
from .api import iri_reference
from .api import IRIReference
from .api import is_valid_uri
from .api import normalize_uri
from .api import uri_reference
from .api import URIReference
from .api import urlparse
from .parseresult import ParseResult
__title__ = 'rfc3986'
__author__ = 'Ian Stapleton Cordasco'
__author_email__ = 'graffatcolmingov@gmail.com'
__license__ = 'Apache v2.0'
__copyright__ = 'Copyright 2014 Rackspace'
__version__ = '1.3.2'
__all__ = (
'ParseResult',
'URIReference',
'IRIReference',
'is_valid_uri',
'normalize_uri',
'uri_reference',
'iri_reference',
'urlparse',
'__title__',
'__author__',
'__author_email__',
'__license__',
'__copyright__',
'__version__',
)

View File

@ -0,0 +1,353 @@
"""Module containing the implementation of the URIMixin class."""
import warnings
from . import exceptions as exc
from . import misc
from . import normalizers
from . import validators
class URIMixin(object):
"""Mixin with all shared methods for URIs and IRIs."""
__hash__ = tuple.__hash__
def authority_info(self):
"""Return a dictionary with the ``userinfo``, ``host``, and ``port``.
If the authority is not valid, it will raise a
:class:`~rfc3986.exceptions.InvalidAuthority` Exception.
:returns:
``{'userinfo': 'username:password', 'host': 'www.example.com',
'port': '80'}``
:rtype: dict
:raises rfc3986.exceptions.InvalidAuthority:
If the authority is not ``None`` and can not be parsed.
"""
if not self.authority:
return {'userinfo': None, 'host': None, 'port': None}
match = self._match_subauthority()
if match is None:
# In this case, we have an authority that was parsed from the URI
# Reference, but it cannot be further parsed by our
# misc.SUBAUTHORITY_MATCHER. In this case it must not be a valid
# authority.
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
# We had a match, now let's ensure that it is actually a valid host
# address if it is IPv4
matches = match.groupdict()
host = matches.get('host')
if (host and misc.IPv4_MATCHER.match(host) and not
validators.valid_ipv4_host_address(host)):
# If we have a host, it appears to be IPv4 and it does not have
# valid bytes, it is an InvalidAuthority.
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
return matches
def _match_subauthority(self):
return misc.SUBAUTHORITY_MATCHER.match(self.authority)
@property
def host(self):
"""If present, a string representing the host."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority['host']
@property
def port(self):
"""If present, the port extracted from the authority."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority['port']
@property
def userinfo(self):
"""If present, the userinfo extracted from the authority."""
try:
authority = self.authority_info()
except exc.InvalidAuthority:
return None
return authority['userinfo']
def is_absolute(self):
"""Determine if this URI Reference is an absolute URI.
See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation.
:returns: ``True`` if it is an absolute URI, ``False`` otherwise.
:rtype: bool
"""
return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit()))
def is_valid(self, **kwargs):
"""Determine if the URI is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param bool require_scheme: Set to ``True`` if you wish to require the
presence of the scheme component.
:param bool require_authority: Set to ``True`` if you wish to require
the presence of the authority component.
:param bool require_path: Set to ``True`` if you wish to require the
presence of the path component.
:param bool require_query: Set to ``True`` if you wish to require the
presence of the query component.
:param bool require_fragment: Set to ``True`` if you wish to require
the presence of the fragment component.
:returns: ``True`` if the URI is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
validators = [
(self.scheme_is_valid, kwargs.get('require_scheme', False)),
(self.authority_is_valid, kwargs.get('require_authority', False)),
(self.path_is_valid, kwargs.get('require_path', False)),
(self.query_is_valid, kwargs.get('require_query', False)),
(self.fragment_is_valid, kwargs.get('require_fragment', False)),
]
return all(v(r) for v, r in validators)
def authority_is_valid(self, require=False):
"""Determine if the authority component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param bool require:
Set to ``True`` to require the presence of this component.
:returns:
``True`` if the authority is valid. ``False`` otherwise.
:rtype:
bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
try:
self.authority_info()
except exc.InvalidAuthority:
return False
return validators.authority_is_valid(
self.authority,
host=self.host,
require=require,
)
def scheme_is_valid(self, require=False):
"""Determine if the scheme component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the scheme is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
return validators.scheme_is_valid(self.scheme, require)
def path_is_valid(self, require=False):
"""Determine if the path component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the path is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
return validators.path_is_valid(self.path, require)
def query_is_valid(self, require=False):
"""Determine if the query component is valid.
.. deprecated:: 1.1.0
Use the :class:`~rfc3986.validators.Validator` object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the query is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
return validators.query_is_valid(self.query, require)
def fragment_is_valid(self, require=False):
"""Determine if the fragment component is valid.
.. deprecated:: 1.1.0
Use the Validator object instead.
:param str require: Set to ``True`` to require the presence of this
component.
:returns: ``True`` if the fragment is valid. ``False`` otherwise.
:rtype: bool
"""
warnings.warn("Please use rfc3986.validators.Validator instead. "
"This method will be eventually removed.",
DeprecationWarning)
return validators.fragment_is_valid(self.fragment, require)
def normalized_equality(self, other_ref):
"""Compare this URIReference to another URIReference.
:param URIReference other_ref: (required), The reference with which
we're comparing.
:returns: ``True`` if the references are equal, ``False`` otherwise.
:rtype: bool
"""
return tuple(self.normalize()) == tuple(other_ref.normalize())
def resolve_with(self, base_uri, strict=False):
"""Use an absolute URI Reference to resolve this relative reference.
Assuming this is a relative reference that you would like to resolve,
use the provided base URI to resolve it.
See http://tools.ietf.org/html/rfc3986#section-5 for more information.
:param base_uri: Either a string or URIReference. It must be an
absolute URI or it will raise an exception.
:returns: A new URIReference which is the result of resolving this
reference using ``base_uri``.
:rtype: :class:`URIReference`
:raises rfc3986.exceptions.ResolutionError:
If the ``base_uri`` is not an absolute URI.
"""
if not isinstance(base_uri, URIMixin):
base_uri = type(self).from_string(base_uri)
if not base_uri.is_absolute():
raise exc.ResolutionError(base_uri)
# This is optional per
# http://tools.ietf.org/html/rfc3986#section-5.2.1
base_uri = base_uri.normalize()
# The reference we're resolving
resolving = self
if not strict and resolving.scheme == base_uri.scheme:
resolving = resolving.copy_with(scheme=None)
# http://tools.ietf.org/html/rfc3986#page-32
if resolving.scheme is not None:
target = resolving.copy_with(
path=normalizers.normalize_path(resolving.path)
)
else:
if resolving.authority is not None:
target = resolving.copy_with(
scheme=base_uri.scheme,
path=normalizers.normalize_path(resolving.path)
)
else:
if resolving.path is None:
if resolving.query is not None:
query = resolving.query
else:
query = base_uri.query
target = resolving.copy_with(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=base_uri.path,
query=query
)
else:
if resolving.path.startswith('/'):
path = normalizers.normalize_path(resolving.path)
else:
path = normalizers.normalize_path(
misc.merge_paths(base_uri, resolving.path)
)
target = resolving.copy_with(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=path,
query=resolving.query
)
return target
def unsplit(self):
"""Create a URI string from the components.
:returns: The URI Reference reconstituted as a string.
:rtype: str
"""
# See http://tools.ietf.org/html/rfc3986#section-5.3
result_list = []
if self.scheme:
result_list.extend([self.scheme, ':'])
if self.authority:
result_list.extend(['//', self.authority])
if self.path:
result_list.append(self.path)
if self.query is not None:
result_list.extend(['?', self.query])
if self.fragment is not None:
result_list.extend(['#', self.fragment])
return ''.join(result_list)
def copy_with(self, scheme=misc.UseExisting, authority=misc.UseExisting,
path=misc.UseExisting, query=misc.UseExisting,
fragment=misc.UseExisting):
"""Create a copy of this reference with the new components.
:param str scheme:
(optional) The scheme to use for the new reference.
:param str authority:
(optional) The authority to use for the new reference.
:param str path:
(optional) The path to use for the new reference.
:param str query:
(optional) The query to use for the new reference.
:param str fragment:
(optional) The fragment to use for the new reference.
:returns:
New URIReference with provided components.
:rtype:
URIReference
"""
attributes = {
'scheme': scheme,
'authority': authority,
'path': path,
'query': query,
'fragment': fragment,
}
for key, value in list(attributes.items()):
if value is misc.UseExisting:
del attributes[key]
uri = self._replace(**attributes)
uri.encoding = self.encoding
return uri

View File

@ -0,0 +1,267 @@
# -*- coding: utf-8 -*-
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for the regular expressions crafted from ABNF."""
import sys
# https://tools.ietf.org/html/rfc3986#page-13
GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@"
GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS)
# https://tools.ietf.org/html/rfc3986#page-13
SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;="
SUB_DELIMITERS_SET = set(SUB_DELIMITERS)
# Escape the '*' for use in regular expressions
SUB_DELIMITERS_RE = r"!$&'()\*+,;="
RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET)
ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
DIGIT = '0123456789'
# https://tools.ietf.org/html/rfc3986#section-2.3
UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r'._!-'
UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS)
NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET)
# We need to escape the '-' in this case:
UNRESERVED_RE = r'A-Za-z0-9._~\-'
# Percent encoded character values
PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}'
PCHAR = '([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':@]|%s)' % PCT_ENCODED
# NOTE(sigmavirus24): We're going to use more strict regular expressions
# than appear in Appendix B for scheme. This will prevent over-eager
# consuming of items that aren't schemes.
SCHEME_RE = '[a-zA-Z][a-zA-Z0-9+.-]*'
_AUTHORITY_RE = '[^/?#]*'
_PATH_RE = '[^?#]*'
_QUERY_RE = '[^#]*'
_FRAGMENT_RE = '.*'
# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B
COMPONENT_PATTERN_DICT = {
'scheme': SCHEME_RE,
'authority': _AUTHORITY_RE,
'path': _PATH_RE,
'query': _QUERY_RE,
'fragment': _FRAGMENT_RE,
}
# See http://tools.ietf.org/html/rfc3986#appendix-B
# In this case, we name each of the important matches so we can use
# SRE_Match#groupdict to parse the values out if we so choose. This is also
# modified to ignore other matches that are not important to the parsing of
# the reference so we can also simply use SRE_Match#groups.
URL_PARSING_RE = (
r'(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?'
r'(?P<path>{path})(?:\?(?P<query>{query}))?'
r'(?:#(?P<fragment>{fragment}))?'
).format(**COMPONENT_PATTERN_DICT)
# #########################
# Authority Matcher Section
# #########################
# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2
# The pattern for a regular name, e.g., www.google.com, api.github.com
REGULAR_NAME_RE = REG_NAME = '((?:{0}|[{1}])*)'.format(
'%[0-9A-Fa-f]{2}', SUB_DELIMITERS_RE + UNRESERVED_RE
)
# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1,
IPv4_RE = r'([0-9]{1,3}\.){3}[0-9]{1,3}'
# Hexadecimal characters used in each piece of an IPv6 address
HEXDIG_RE = '[0-9A-Fa-f]{1,4}'
# Least-significant 32 bits of an IPv6 address
LS32_RE = '({hex}:{hex}|{ipv4})'.format(hex=HEXDIG_RE, ipv4=IPv4_RE)
# Substitutions into the following patterns for IPv6 patterns defined
# http://tools.ietf.org/html/rfc3986#page-20
_subs = {'hex': HEXDIG_RE, 'ls32': LS32_RE}
# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details
# about ABNF (Augmented Backus-Naur Form) use in the comments
variations = [
# 6( h16 ":" ) ls32
'(%(hex)s:){6}%(ls32)s' % _subs,
# "::" 5( h16 ":" ) ls32
'::(%(hex)s:){5}%(ls32)s' % _subs,
# [ h16 ] "::" 4( h16 ":" ) ls32
'(%(hex)s)?::(%(hex)s:){4}%(ls32)s' % _subs,
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
'((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s' % _subs,
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
'((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s' % _subs,
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
'((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s' % _subs,
# [ *4( h16 ":" ) h16 ] "::" ls32
'((%(hex)s:){0,4}%(hex)s)?::%(ls32)s' % _subs,
# [ *5( h16 ":" ) h16 ] "::" h16
'((%(hex)s:){0,5}%(hex)s)?::%(hex)s' % _subs,
# [ *6( h16 ":" ) h16 ] "::"
'((%(hex)s:){0,6}%(hex)s)?::' % _subs,
]
IPv6_RE = '(({0})|({1})|({2})|({3})|({4})|({5})|({6})|({7})|({8}))'.format(
*variations
)
IPv_FUTURE_RE = r'v[0-9A-Fa-f]+\.[%s]+' % (
UNRESERVED_RE + SUB_DELIMITERS_RE + ':'
)
# RFC 6874 Zone ID ABNF
ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+'
IPv6_ADDRZ_RFC4007_RE = IPv6_RE + '(?:(?:%25|%)' + ZONE_ID + ')?'
IPv6_ADDRZ_RE = IPv6_RE + '(?:%25' + ZONE_ID + ')?'
IP_LITERAL_RE = r'\[({0}|{1})\]'.format(
IPv6_ADDRZ_RFC4007_RE,
IPv_FUTURE_RE,
)
# Pattern for matching the host piece of the authority
HOST_RE = HOST_PATTERN = '({0}|{1}|{2})'.format(
REG_NAME,
IPv4_RE,
IP_LITERAL_RE,
)
USERINFO_RE = '^([' + UNRESERVED_RE + SUB_DELIMITERS_RE + ':]|%s)+' % (
PCT_ENCODED
)
PORT_RE = '[0-9]{1,5}'
# ####################
# Path Matcher Section
# ####################
# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information
# about the path patterns defined below.
segments = {
'segment': PCHAR + '*',
# Non-zero length segment
'segment-nz': PCHAR + '+',
# Non-zero length segment without ":"
'segment-nz-nc': PCHAR.replace(':', '') + '+'
}
# Path types taken from Section 3.3 (linked above)
PATH_EMPTY = '^$'
PATH_ROOTLESS = '%(segment-nz)s(/%(segment)s)*' % segments
PATH_NOSCHEME = '%(segment-nz-nc)s(/%(segment)s)*' % segments
PATH_ABSOLUTE = '/(%s)?' % PATH_ROOTLESS
PATH_ABEMPTY = '(/%(segment)s)*' % segments
PATH_RE = '^(%s|%s|%s|%s|%s)$' % (
PATH_ABEMPTY, PATH_ABSOLUTE, PATH_NOSCHEME, PATH_ROOTLESS, PATH_EMPTY
)
FRAGMENT_RE = QUERY_RE = (
'^([/?:@' + UNRESERVED_RE + SUB_DELIMITERS_RE + ']|%s)*$' % PCT_ENCODED
)
# ##########################
# Relative reference matcher
# ##########################
# See http://tools.ietf.org/html/rfc3986#section-4.2 for details
RELATIVE_PART_RE = '(//%s%s|%s|%s|%s)' % (
COMPONENT_PATTERN_DICT['authority'],
PATH_ABEMPTY,
PATH_ABSOLUTE,
PATH_NOSCHEME,
PATH_EMPTY,
)
# See http://tools.ietf.org/html/rfc3986#section-3 for definition
HIER_PART_RE = '(//%s%s|%s|%s|%s)' % (
COMPONENT_PATTERN_DICT['authority'],
PATH_ABEMPTY,
PATH_ABSOLUTE,
PATH_ROOTLESS,
PATH_EMPTY,
)
# ###############
# IRIs / RFC 3987
# ###############
# Only wide-unicode gets the high-ranges of UCSCHAR
if sys.maxunicode > 0xFFFF: # pragma: no cover
IPRIVATE = u'\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD'
UCSCHAR_RE = (
u'\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
u'\U00010000-\U0001FFFD\U00020000-\U0002FFFD'
u'\U00030000-\U0003FFFD\U00040000-\U0004FFFD'
u'\U00050000-\U0005FFFD\U00060000-\U0006FFFD'
u'\U00070000-\U0007FFFD\U00080000-\U0008FFFD'
u'\U00090000-\U0009FFFD\U000A0000-\U000AFFFD'
u'\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD'
u'\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD'
)
else: # pragma: no cover
IPRIVATE = u'\uE000-\uF8FF'
UCSCHAR_RE = (
u'\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF'
)
IUNRESERVED_RE = u'A-Za-z0-9\\._~\\-' + UCSCHAR_RE
IPCHAR = u'([' + IUNRESERVED_RE + SUB_DELIMITERS_RE + u':@]|%s)' % PCT_ENCODED
isegments = {
'isegment': IPCHAR + u'*',
# Non-zero length segment
'isegment-nz': IPCHAR + u'+',
# Non-zero length segment without ":"
'isegment-nz-nc': IPCHAR.replace(':', '') + u'+'
}
IPATH_ROOTLESS = u'%(isegment-nz)s(/%(isegment)s)*' % isegments
IPATH_NOSCHEME = u'%(isegment-nz-nc)s(/%(isegment)s)*' % isegments
IPATH_ABSOLUTE = u'/(?:%s)?' % IPATH_ROOTLESS
IPATH_ABEMPTY = u'(?:/%(isegment)s)*' % isegments
IPATH_RE = u'^(?:%s|%s|%s|%s|%s)$' % (
IPATH_ABEMPTY, IPATH_ABSOLUTE, IPATH_NOSCHEME, IPATH_ROOTLESS, PATH_EMPTY
)
IREGULAR_NAME_RE = IREG_NAME = u'(?:{0}|[{1}])*'.format(
u'%[0-9A-Fa-f]{2}', SUB_DELIMITERS_RE + IUNRESERVED_RE
)
IHOST_RE = IHOST_PATTERN = u'({0}|{1}|{2})'.format(
IREG_NAME,
IPv4_RE,
IP_LITERAL_RE,
)
IUSERINFO_RE = u'^(?:[' + IUNRESERVED_RE + SUB_DELIMITERS_RE + u':]|%s)+' % (
PCT_ENCODED
)
IFRAGMENT_RE = (u'^(?:[/?:@' + IUNRESERVED_RE + SUB_DELIMITERS_RE
+ u']|%s)*$' % PCT_ENCODED)
IQUERY_RE = (u'^(?:[/?:@' + IUNRESERVED_RE + SUB_DELIMITERS_RE
+ IPRIVATE + u']|%s)*$' % PCT_ENCODED)
IRELATIVE_PART_RE = u'(//%s%s|%s|%s|%s)' % (
COMPONENT_PATTERN_DICT['authority'],
IPATH_ABEMPTY,
IPATH_ABSOLUTE,
IPATH_NOSCHEME,
PATH_EMPTY,
)
IHIER_PART_RE = u'(//%s%s|%s|%s|%s)' % (
COMPONENT_PATTERN_DICT['authority'],
IPATH_ABEMPTY,
IPATH_ABSOLUTE,
IPATH_ROOTLESS,
PATH_EMPTY,
)

View File

@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module containing the simple and functional API for rfc3986.
This module defines functions and provides access to the public attributes
and classes of rfc3986.
"""
from .iri import IRIReference
from .parseresult import ParseResult
from .uri import URIReference
def uri_reference(uri, encoding='utf-8'):
"""Parse a URI string into a URIReference.
This is a convenience function. You could achieve the same end by using
``URIReference.from_string(uri)``.
:param str uri: The URI which needs to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: A parsed URI
:rtype: :class:`URIReference`
"""
return URIReference.from_string(uri, encoding)
def iri_reference(iri, encoding='utf-8'):
"""Parse a IRI string into an IRIReference.
This is a convenience function. You could achieve the same end by using
``IRIReference.from_string(iri)``.
:param str iri: The IRI which needs to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: A parsed IRI
:rtype: :class:`IRIReference`
"""
return IRIReference.from_string(iri, encoding)
def is_valid_uri(uri, encoding='utf-8', **kwargs):
"""Determine if the URI given is valid.
This is a convenience function. You could use either
``uri_reference(uri).is_valid()`` or
``URIReference.from_string(uri).is_valid()`` to achieve the same result.
:param str uri: The URI to be validated.
:param str encoding: The encoding of the string provided
:param bool require_scheme: Set to ``True`` if you wish to require the
presence of the scheme component.
:param bool require_authority: Set to ``True`` if you wish to require the
presence of the authority component.
:param bool require_path: Set to ``True`` if you wish to require the
presence of the path component.
:param bool require_query: Set to ``True`` if you wish to require the
presence of the query component.
:param bool require_fragment: Set to ``True`` if you wish to require the
presence of the fragment component.
:returns: ``True`` if the URI is valid, ``False`` otherwise.
:rtype: bool
"""
return URIReference.from_string(uri, encoding).is_valid(**kwargs)
def normalize_uri(uri, encoding='utf-8'):
"""Normalize the given URI.
This is a convenience function. You could use either
``uri_reference(uri).normalize().unsplit()`` or
``URIReference.from_string(uri).normalize().unsplit()`` instead.
:param str uri: The URI to be normalized.
:param str encoding: The encoding of the string provided
:returns: The normalized URI.
:rtype: str
"""
normalized_reference = URIReference.from_string(uri, encoding).normalize()
return normalized_reference.unsplit()
def urlparse(uri, encoding='utf-8'):
"""Parse a given URI and return a ParseResult.
This is a partial replacement of the standard library's urlparse function.
:param str uri: The URI to be parsed.
:param str encoding: The encoding of the string provided.
:returns: A parsed URI
:rtype: :class:`~rfc3986.parseresult.ParseResult`
"""
return ParseResult.from_string(uri, encoding, strict=False)

View File

@ -0,0 +1,298 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2017 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the logic for the URIBuilder object."""
from . import compat
from . import normalizers
from . import uri
class URIBuilder(object):
"""Object to aid in building up a URI Reference from parts.
.. note::
This object should be instantiated by the user, but it's recommended
that it is not provided with arguments. Instead, use the available
method to populate the fields.
"""
def __init__(self, scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment=None):
"""Initialize our URI builder.
:param str scheme:
(optional)
:param str userinfo:
(optional)
:param str host:
(optional)
:param int port:
(optional)
:param str path:
(optional)
:param str query:
(optional)
:param str fragment:
(optional)
"""
self.scheme = scheme
self.userinfo = userinfo
self.host = host
self.port = port
self.path = path
self.query = query
self.fragment = fragment
def __repr__(self):
"""Provide a convenient view of our builder object."""
formatstr = ('URIBuilder(scheme={b.scheme}, userinfo={b.userinfo}, '
'host={b.host}, port={b.port}, path={b.path}, '
'query={b.query}, fragment={b.fragment})')
return formatstr.format(b=self)
def add_scheme(self, scheme):
"""Add a scheme to our builder object.
After normalizing, this will generate a new URIBuilder instance with
the specified scheme and all other attributes the same.
.. code-block:: python
>>> URIBuilder().add_scheme('HTTPS')
URIBuilder(scheme='https', userinfo=None, host=None, port=None,
path=None, query=None, fragment=None)
"""
scheme = normalizers.normalize_scheme(scheme)
return URIBuilder(
scheme=scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_credentials(self, username, password):
"""Add credentials as the userinfo portion of the URI.
.. code-block:: python
>>> URIBuilder().add_credentials('root', 's3crete')
URIBuilder(scheme=None, userinfo='root:s3crete', host=None,
port=None, path=None, query=None, fragment=None)
>>> URIBuilder().add_credentials('root', None)
URIBuilder(scheme=None, userinfo='root', host=None,
port=None, path=None, query=None, fragment=None)
"""
if username is None:
raise ValueError('Username cannot be None')
userinfo = normalizers.normalize_username(username)
if password is not None:
userinfo = '{}:{}'.format(
userinfo,
normalizers.normalize_password(password),
)
return URIBuilder(
scheme=self.scheme,
userinfo=userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_host(self, host):
"""Add hostname to the URI.
.. code-block:: python
>>> URIBuilder().add_host('google.com')
URIBuilder(scheme=None, userinfo=None, host='google.com',
port=None, path=None, query=None, fragment=None)
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=normalizers.normalize_host(host),
port=self.port,
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_port(self, port):
"""Add port to the URI.
.. code-block:: python
>>> URIBuilder().add_port(80)
URIBuilder(scheme=None, userinfo=None, host=None, port='80',
path=None, query=None, fragment=None)
>>> URIBuilder().add_port(443)
URIBuilder(scheme=None, userinfo=None, host=None, port='443',
path=None, query=None, fragment=None)
"""
port_int = int(port)
if port_int < 0:
raise ValueError(
'ports are not allowed to be negative. You provided {}'.format(
port_int,
)
)
if port_int > 65535:
raise ValueError(
'ports are not allowed to be larger than 65535. '
'You provided {}'.format(
port_int,
)
)
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port='{}'.format(port_int),
path=self.path,
query=self.query,
fragment=self.fragment,
)
def add_path(self, path):
"""Add a path to the URI.
.. code-block:: python
>>> URIBuilder().add_path('sigmavirus24/rfc3985')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/sigmavirus24/rfc3986', query=None, fragment=None)
>>> URIBuilder().add_path('/checkout.php')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path='/checkout.php', query=None, fragment=None)
"""
if not path.startswith('/'):
path = '/{}'.format(path)
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=normalizers.normalize_path(path),
query=self.query,
fragment=self.fragment,
)
def add_query_from(self, query_items):
"""Generate and add a query a dictionary or list of tuples.
.. code-block:: python
>>> URIBuilder().add_query_from({'a': 'b c'})
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b+c', fragment=None)
>>> URIBuilder().add_query_from([('a', 'b c')])
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b+c', fragment=None)
"""
query = normalizers.normalize_query(compat.urlencode(query_items))
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=query,
fragment=self.fragment,
)
def add_query(self, query):
"""Add a pre-formated query string to the URI.
.. code-block:: python
>>> URIBuilder().add_query('a=b&c=d')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query='a=b&c=d', fragment=None)
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=normalizers.normalize_query(query),
fragment=self.fragment,
)
def add_fragment(self, fragment):
"""Add a fragment to the URI.
.. code-block:: python
>>> URIBuilder().add_fragment('section-2.6.1')
URIBuilder(scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment='section-2.6.1')
"""
return URIBuilder(
scheme=self.scheme,
userinfo=self.userinfo,
host=self.host,
port=self.port,
path=self.path,
query=self.query,
fragment=normalizers.normalize_fragment(fragment),
)
def finalize(self):
"""Create a URIReference from our builder.
.. code-block:: python
>>> URIBuilder().add_scheme('https').add_host('github.com'
... ).add_path('sigmavirus24/rfc3986').finalize().unsplit()
'https://github.com/sigmavirus24/rfc3986'
>>> URIBuilder().add_scheme('https').add_host('github.com'
... ).add_path('sigmavirus24/rfc3986').add_credentials(
... 'sigmavirus24', 'not-re@l').finalize().unsplit()
'https://sigmavirus24:not-re%40l@github.com/sigmavirus24/rfc3986'
"""
return uri.URIReference(
self.scheme,
normalizers.normalize_authority(
(self.userinfo, self.host, self.port)
),
self.path,
self.query,
self.fragment,
)

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compatibility module for Python 2 and 3 support."""
import sys
try:
from urllib.parse import quote as urlquote
except ImportError: # Python 2.x
from urllib import quote as urlquote
try:
from urllib.parse import urlencode
except ImportError: # Python 2.x
from urllib import urlencode
__all__ = (
'to_bytes',
'to_str',
'urlquote',
'urlencode',
)
PY3 = (3, 0) <= sys.version_info < (4, 0)
PY2 = (2, 6) <= sys.version_info < (2, 8)
if PY3:
unicode = str # Python 3.x
def to_str(b, encoding='utf-8'):
"""Ensure that b is text in the specified encoding."""
if hasattr(b, 'decode') and not isinstance(b, unicode):
b = b.decode(encoding)
return b
def to_bytes(s, encoding='utf-8'):
"""Ensure that s is converted to bytes from the encoding."""
if hasattr(s, 'encode') and not isinstance(s, bytes):
s = s.encode(encoding)
return s

View File

@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
"""Exceptions module for rfc3986."""
from . import compat
class RFC3986Exception(Exception):
"""Base class for all rfc3986 exception classes."""
pass
class InvalidAuthority(RFC3986Exception):
"""Exception when the authority string is invalid."""
def __init__(self, authority):
"""Initialize the exception with the invalid authority."""
super(InvalidAuthority, self).__init__(
u"The authority ({0}) is not valid.".format(
compat.to_str(authority)))
class InvalidPort(RFC3986Exception):
"""Exception when the port is invalid."""
def __init__(self, port):
"""Initialize the exception with the invalid port."""
super(InvalidPort, self).__init__(
'The port ("{0}") is not valid.'.format(port))
class ResolutionError(RFC3986Exception):
"""Exception to indicate a failure to resolve a URI."""
def __init__(self, uri):
"""Initialize the error with the failed URI."""
super(ResolutionError, self).__init__(
"{0} is not an absolute URI.".format(uri.unsplit()))
class ValidationError(RFC3986Exception):
"""Exception raised during Validation of a URI."""
pass
class MissingComponentError(ValidationError):
"""Exception raised when a required component is missing."""
def __init__(self, uri, *component_names):
"""Initialize the error with the missing component name."""
verb = 'was'
if len(component_names) > 1:
verb = 'were'
self.uri = uri
self.components = sorted(component_names)
components = ', '.join(self.components)
super(MissingComponentError, self).__init__(
"{} {} required but missing".format(components, verb),
uri,
self.components,
)
class UnpermittedComponentError(ValidationError):
"""Exception raised when a component has an unpermitted value."""
def __init__(self, component_name, component_value, allowed_values):
"""Initialize the error with the unpermitted component."""
super(UnpermittedComponentError, self).__init__(
"{} was required to be one of {!r} but was {!r}".format(
component_name, list(sorted(allowed_values)), component_value,
),
component_name,
component_value,
allowed_values,
)
self.component_name = component_name
self.component_value = component_value
self.allowed_values = allowed_values
class PasswordForbidden(ValidationError):
"""Exception raised when a URL has a password in the userinfo section."""
def __init__(self, uri):
"""Initialize the error with the URI that failed validation."""
unsplit = getattr(uri, 'unsplit', lambda: uri)
super(PasswordForbidden, self).__init__(
'"{}" contained a password when validation forbade it'.format(
unsplit()
)
)
self.uri = uri
class InvalidComponentsError(ValidationError):
"""Exception raised when one or more components are invalid."""
def __init__(self, uri, *component_names):
"""Initialize the error with the invalid component name(s)."""
verb = 'was'
if len(component_names) > 1:
verb = 'were'
self.uri = uri
self.components = sorted(component_names)
components = ', '.join(self.components)
super(InvalidComponentsError, self).__init__(
"{} {} found to be invalid".format(components, verb),
uri,
self.components,
)
class MissingDependencyError(RFC3986Exception):
"""Exception raised when an IRI is encoded without the 'idna' module."""

View File

@ -0,0 +1,147 @@
"""Module containing the implementation of the IRIReference class."""
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from . import compat
from . import exceptions
from . import misc
from . import normalizers
from . import uri
try:
import idna
except ImportError: # pragma: no cover
idna = None
class IRIReference(namedtuple('IRIReference', misc.URI_COMPONENTS),
uri.URIMixin):
"""Immutable object representing a parsed IRI Reference.
Can be encoded into an URIReference object via the procedure
specified in RFC 3987 Section 3.1
.. note::
The IRI submodule is a new interface and may possibly change in
the future. Check for changes to the interface when upgrading.
"""
slots = ()
def __new__(cls, scheme, authority, path, query, fragment,
encoding='utf-8'):
"""Create a new IRIReference."""
ref = super(IRIReference, cls).__new__(
cls,
scheme or None,
authority or None,
path or None,
query,
fragment)
ref.encoding = encoding
return ref
def __eq__(self, other):
"""Compare this reference to another."""
other_ref = other
if isinstance(other, tuple):
other_ref = self.__class__(*other)
elif not isinstance(other, IRIReference):
try:
other_ref = self.__class__.from_string(other)
except TypeError:
raise TypeError(
'Unable to compare {0}() to {1}()'.format(
type(self).__name__, type(other).__name__))
# See http://tools.ietf.org/html/rfc3986#section-6.2
return tuple(self) == tuple(other_ref)
def _match_subauthority(self):
return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
@classmethod
def from_string(cls, iri_string, encoding='utf-8'):
"""Parse a IRI reference from the given unicode IRI string.
:param str iri_string: Unicode IRI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: :class:`IRIReference` or subclass thereof
"""
iri_string = compat.to_str(iri_string, encoding)
split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
return cls(
split_iri['scheme'], split_iri['authority'],
normalizers.encode_component(split_iri['path'], encoding),
normalizers.encode_component(split_iri['query'], encoding),
normalizers.encode_component(split_iri['fragment'], encoding),
encoding,
)
def encode(self, idna_encoder=None): # noqa: C901
"""Encode an IRIReference into a URIReference instance.
If the ``idna`` module is installed or the ``rfc3986[idna]``
extra is used then unicode characters in the IRI host
component will be encoded with IDNA2008.
:param idna_encoder:
Function that encodes each part of the host component
If not given will raise an exception if the IRI
contains a host component.
:rtype: uri.URIReference
:returns: A URI reference
"""
authority = self.authority
if authority:
if idna_encoder is None:
if idna is None: # pragma: no cover
raise exceptions.MissingDependencyError(
"Could not import the 'idna' module "
"and the IRI hostname requires encoding"
)
def idna_encoder(name):
if any(ord(c) > 128 for c in name):
try:
return idna.encode(name.lower(),
strict=True,
std3_rules=True)
except idna.IDNAError:
raise exceptions.InvalidAuthority(self.authority)
return name
authority = ""
if self.host:
authority = ".".join([compat.to_str(idna_encoder(part))
for part in self.host.split(".")])
if self.userinfo is not None:
authority = (normalizers.encode_component(
self.userinfo, self.encoding) + '@' + authority)
if self.port is not None:
authority += ":" + str(self.port)
return uri.URIReference(self.scheme,
authority,
path=self.path,
query=self.query,
fragment=self.fragment,
encoding=self.encoding)

View File

@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module containing compiled regular expressions and constants.
This module contains important constants, patterns, and compiled regular
expressions for parsing and validating URIs and their components.
"""
import re
from . import abnf_regexp
# These are enumerated for the named tuple used as a superclass of
# URIReference
URI_COMPONENTS = ['scheme', 'authority', 'path', 'query', 'fragment']
important_characters = {
'generic_delimiters': abnf_regexp.GENERIC_DELIMITERS,
'sub_delimiters': abnf_regexp.SUB_DELIMITERS,
# We need to escape the '*' in this case
're_sub_delimiters': abnf_regexp.SUB_DELIMITERS_RE,
'unreserved_chars': abnf_regexp.UNRESERVED_CHARS,
# We need to escape the '-' in this case:
're_unreserved': abnf_regexp.UNRESERVED_RE,
}
# For details about delimiters and reserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.2
GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
# For details about unreserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.3
UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
SUBAUTHORITY_MATCHER = re.compile((
'^(?:(?P<userinfo>{0})@)?' # userinfo
'(?P<host>{1})' # host
':?(?P<port>{2})?$' # port
).format(abnf_regexp.USERINFO_RE,
abnf_regexp.HOST_PATTERN,
abnf_regexp.PORT_RE))
HOST_MATCHER = re.compile('^' + abnf_regexp.HOST_RE + '$')
IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$')
IPv6_MATCHER = re.compile(r'^\[' + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r'\]$')
# Used by host validator
IPv6_NO_RFC4007_MATCHER = re.compile(r'^\[%s\]$' % (
abnf_regexp.IPv6_ADDRZ_RE
))
# Matcher used to validate path components
PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
# ##################################
# Query and Fragment Matcher Section
# ##################################
QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
FRAGMENT_MATCHER = QUERY_MATCHER
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE))
RELATIVE_REF_MATCHER = re.compile(r'^%s(\?%s)?(#%s)?$' % (
abnf_regexp.RELATIVE_PART_RE,
abnf_regexp.QUERY_RE,
abnf_regexp.FRAGMENT_RE,
))
# See http://tools.ietf.org/html/rfc3986#section-4.3
ABSOLUTE_URI_MATCHER = re.compile(r'^%s:%s(\?%s)?$' % (
abnf_regexp.COMPONENT_PATTERN_DICT['scheme'],
abnf_regexp.HIER_PART_RE,
abnf_regexp.QUERY_RE[1:-1],
))
# ###############
# IRIs / RFC 3987
# ###############
IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
ISUBAUTHORITY_MATCHER = re.compile((
u'^(?:(?P<userinfo>{0})@)?' # iuserinfo
u'(?P<host>{1})' # ihost
u':?(?P<port>{2})?$' # port
).format(abnf_regexp.IUSERINFO_RE,
abnf_regexp.IHOST_RE,
abnf_regexp.PORT_RE), re.UNICODE)
# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
def merge_paths(base_uri, relative_path):
"""Merge a base URI's path with a relative URI's path."""
if base_uri.path is None and base_uri.authority is not None:
return '/' + relative_path
else:
path = base_uri.path or ''
index = path.rfind('/')
return path[:index] + '/' + relative_path
UseExisting = object()

View File

@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module with functions to normalize components."""
import re
from . import compat
from . import misc
def normalize_scheme(scheme):
"""Normalize the scheme component."""
return scheme.lower()
def normalize_authority(authority):
"""Normalize an authority tuple to a string."""
userinfo, host, port = authority
result = ''
if userinfo:
result += normalize_percent_characters(userinfo) + '@'
if host:
result += normalize_host(host)
if port:
result += ':' + port
return result
def normalize_username(username):
"""Normalize a username to make it safe to include in userinfo."""
return compat.urlquote(username)
def normalize_password(password):
"""Normalize a password to make safe for userinfo."""
return compat.urlquote(password)
def normalize_host(host):
"""Normalize a host string."""
if misc.IPv6_MATCHER.match(host):
percent = host.find('%')
if percent != -1:
percent_25 = host.find('%25')
# Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25'
# from RFC 6874. If the host is '[<IPv6 addr>%25]' then we
# assume RFC 4007 and normalize to '[<IPV6 addr>%2525]'
if percent_25 == -1 or percent < percent_25 or \
(percent == percent_25 and percent_25 == len(host) - 4):
host = host.replace('%', '%25', 1)
# Don't normalize the casing of the Zone ID
return host[:percent].lower() + host[percent:]
return host.lower()
def normalize_path(path):
"""Normalize the path string."""
if not path:
return path
path = normalize_percent_characters(path)
return remove_dot_segments(path)
def normalize_query(query):
"""Normalize the query string."""
if not query:
return query
return normalize_percent_characters(query)
def normalize_fragment(fragment):
"""Normalize the fragment string."""
if not fragment:
return fragment
return normalize_percent_characters(fragment)
PERCENT_MATCHER = re.compile('%[A-Fa-f0-9]{2}')
def normalize_percent_characters(s):
"""All percent characters should be upper-cased.
For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.
"""
matches = set(PERCENT_MATCHER.findall(s))
for m in matches:
if not m.isupper():
s = s.replace(m, m.upper())
return s
def remove_dot_segments(s):
"""Remove dot segments from the string.
See also Section 5.2.4 of :rfc:`3986`.
"""
# See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
segments = s.split('/') # Turn the path into a list of segments
output = [] # Initialize the variable to use to store output
for segment in segments:
# '.' is the current directory, so ignore it, it is superfluous
if segment == '.':
continue
# Anything other than '..', should be appended to the output
elif segment != '..':
output.append(segment)
# In this case segment == '..', if we can, we should pop the last
# element
elif output:
output.pop()
# If the path starts with '/' and the output is empty or the first string
# is non-empty
if s.startswith('/') and (not output or output[0]):
output.insert(0, '')
# If the path starts with '/.' or '/..' ensure we add one more empty
# string to add a trailing '/'
if s.endswith(('/.', '/..')):
output.append('')
return '/'.join(output)
def encode_component(uri_component, encoding):
"""Encode the specific component in the provided encoding."""
if uri_component is None:
return uri_component
# Try to see if the component we're encoding is already percent-encoded
# so we can skip all '%' characters but still encode all others.
percent_encodings = len(PERCENT_MATCHER.findall(
compat.to_str(uri_component, encoding)))
uri_bytes = compat.to_bytes(uri_component, encoding)
is_percent_encoded = percent_encodings == uri_bytes.count(b'%')
encoded_uri = bytearray()
for i in range(0, len(uri_bytes)):
# Will return a single character bytestring on both Python 2 & 3
byte = uri_bytes[i:i+1]
byte_ord = ord(byte)
if ((is_percent_encoded and byte == b'%')
or (byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED)):
encoded_uri.extend(byte)
continue
encoded_uri.extend('%{0:02x}'.format(byte_ord).encode().upper())
return encoded_uri.decode(encoding)

View File

@ -0,0 +1,385 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the urlparse compatibility logic."""
from collections import namedtuple
from . import compat
from . import exceptions
from . import misc
from . import normalizers
from . import uri
__all__ = ('ParseResult', 'ParseResultBytes')
PARSED_COMPONENTS = ('scheme', 'userinfo', 'host', 'port', 'path', 'query',
'fragment')
class ParseResultMixin(object):
def _generate_authority(self, attributes):
# I swear I did not align the comparisons below. That's just how they
# happened to align based on pep8 and attribute lengths.
userinfo, host, port = (attributes[p]
for p in ('userinfo', 'host', 'port'))
if (self.userinfo != userinfo or
self.host != host or
self.port != port):
if port:
port = '{0}'.format(port)
return normalizers.normalize_authority(
(compat.to_str(userinfo, self.encoding),
compat.to_str(host, self.encoding),
port)
)
return self.authority
def geturl(self):
"""Shim to match the standard library method."""
return self.unsplit()
@property
def hostname(self):
"""Shim to match the standard library."""
return self.host
@property
def netloc(self):
"""Shim to match the standard library."""
return self.authority
@property
def params(self):
"""Shim to match the standard library."""
return self.query
class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS),
ParseResultMixin):
"""Implementation of urlparse compatibility class.
This uses the URIReference logic to handle compatibility with the
urlparse.ParseResult class.
"""
slots = ()
def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
uri_ref, encoding='utf-8'):
"""Create a new ParseResult."""
parse_result = super(ParseResult, cls).__new__(
cls,
scheme or None,
userinfo or None,
host,
port or None,
path or None,
query,
fragment)
parse_result.encoding = encoding
parse_result.reference = uri_ref
return parse_result
@classmethod
def from_parts(cls, scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment=None, encoding='utf-8'):
"""Create a ParseResult instance from its parts."""
authority = ''
if userinfo is not None:
authority += userinfo + '@'
if host is not None:
authority += host
if port is not None:
authority += ':{0}'.format(port)
uri_ref = uri.URIReference(scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
encoding=encoding).normalize()
userinfo, host, port = authority_from(uri_ref, strict=True)
return cls(scheme=uri_ref.scheme,
userinfo=userinfo,
host=host,
port=port,
path=uri_ref.path,
query=uri_ref.query,
fragment=uri_ref.fragment,
uri_ref=uri_ref,
encoding=encoding)
@classmethod
def from_string(cls, uri_string, encoding='utf-8', strict=True,
lazy_normalize=True):
"""Parse a URI from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:param bool strict: Parse strictly according to :rfc:`3986` if True.
If False, parse similarly to the standard library's urlparse
function.
:returns: :class:`ParseResult` or subclass thereof
"""
reference = uri.URIReference.from_string(uri_string, encoding)
if not lazy_normalize:
reference = reference.normalize()
userinfo, host, port = authority_from(reference, strict)
return cls(scheme=reference.scheme,
userinfo=userinfo,
host=host,
port=port,
path=reference.path,
query=reference.query,
fragment=reference.fragment,
uri_ref=reference,
encoding=encoding)
@property
def authority(self):
"""Return the normalized authority."""
return self.reference.authority
def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting,
host=misc.UseExisting, port=misc.UseExisting,
path=misc.UseExisting, query=misc.UseExisting,
fragment=misc.UseExisting):
"""Create a copy of this instance replacing with specified parts."""
attributes = zip(PARSED_COMPONENTS,
(scheme, userinfo, host, port, path, query, fragment))
attrs_dict = {}
for name, value in attributes:
if value is misc.UseExisting:
value = getattr(self, name)
attrs_dict[name] = value
authority = self._generate_authority(attrs_dict)
ref = self.reference.copy_with(scheme=attrs_dict['scheme'],
authority=authority,
path=attrs_dict['path'],
query=attrs_dict['query'],
fragment=attrs_dict['fragment'])
return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict)
def encode(self, encoding=None):
"""Convert to an instance of ParseResultBytes."""
encoding = encoding or self.encoding
attrs = dict(
zip(PARSED_COMPONENTS,
(attr.encode(encoding) if hasattr(attr, 'encode') else attr
for attr in self)))
return ParseResultBytes(
uri_ref=self.reference,
encoding=encoding,
**attrs
)
def unsplit(self, use_idna=False):
"""Create a URI string from the components.
:returns: The parsed URI reconstituted as a string.
:rtype: str
"""
parse_result = self
if use_idna and self.host:
hostbytes = self.host.encode('idna')
host = hostbytes.decode(self.encoding)
parse_result = self.copy_with(host=host)
return parse_result.reference.unsplit()
class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS),
ParseResultMixin):
"""Compatibility shim for the urlparse.ParseResultBytes object."""
def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
uri_ref, encoding='utf-8', lazy_normalize=True):
"""Create a new ParseResultBytes instance."""
parse_result = super(ParseResultBytes, cls).__new__(
cls,
scheme or None,
userinfo or None,
host,
port or None,
path or None,
query or None,
fragment or None)
parse_result.encoding = encoding
parse_result.reference = uri_ref
parse_result.lazy_normalize = lazy_normalize
return parse_result
@classmethod
def from_parts(cls, scheme=None, userinfo=None, host=None, port=None,
path=None, query=None, fragment=None, encoding='utf-8',
lazy_normalize=True):
"""Create a ParseResult instance from its parts."""
authority = ''
if userinfo is not None:
authority += userinfo + '@'
if host is not None:
authority += host
if port is not None:
authority += ':{0}'.format(int(port))
uri_ref = uri.URIReference(scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
encoding=encoding)
if not lazy_normalize:
uri_ref = uri_ref.normalize()
to_bytes = compat.to_bytes
userinfo, host, port = authority_from(uri_ref, strict=True)
return cls(scheme=to_bytes(scheme, encoding),
userinfo=to_bytes(userinfo, encoding),
host=to_bytes(host, encoding),
port=port,
path=to_bytes(path, encoding),
query=to_bytes(query, encoding),
fragment=to_bytes(fragment, encoding),
uri_ref=uri_ref,
encoding=encoding,
lazy_normalize=lazy_normalize)
@classmethod
def from_string(cls, uri_string, encoding='utf-8', strict=True,
lazy_normalize=True):
"""Parse a URI from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:param bool strict: Parse strictly according to :rfc:`3986` if True.
If False, parse similarly to the standard library's urlparse
function.
:returns: :class:`ParseResultBytes` or subclass thereof
"""
reference = uri.URIReference.from_string(uri_string, encoding)
if not lazy_normalize:
reference = reference.normalize()
userinfo, host, port = authority_from(reference, strict)
to_bytes = compat.to_bytes
return cls(scheme=to_bytes(reference.scheme, encoding),
userinfo=to_bytes(userinfo, encoding),
host=to_bytes(host, encoding),
port=port,
path=to_bytes(reference.path, encoding),
query=to_bytes(reference.query, encoding),
fragment=to_bytes(reference.fragment, encoding),
uri_ref=reference,
encoding=encoding,
lazy_normalize=lazy_normalize)
@property
def authority(self):
"""Return the normalized authority."""
return self.reference.authority.encode(self.encoding)
def copy_with(self, scheme=misc.UseExisting, userinfo=misc.UseExisting,
host=misc.UseExisting, port=misc.UseExisting,
path=misc.UseExisting, query=misc.UseExisting,
fragment=misc.UseExisting, lazy_normalize=True):
"""Create a copy of this instance replacing with specified parts."""
attributes = zip(PARSED_COMPONENTS,
(scheme, userinfo, host, port, path, query, fragment))
attrs_dict = {}
for name, value in attributes:
if value is misc.UseExisting:
value = getattr(self, name)
if not isinstance(value, bytes) and hasattr(value, 'encode'):
value = value.encode(self.encoding)
attrs_dict[name] = value
authority = self._generate_authority(attrs_dict)
to_str = compat.to_str
ref = self.reference.copy_with(
scheme=to_str(attrs_dict['scheme'], self.encoding),
authority=to_str(authority, self.encoding),
path=to_str(attrs_dict['path'], self.encoding),
query=to_str(attrs_dict['query'], self.encoding),
fragment=to_str(attrs_dict['fragment'], self.encoding)
)
if not lazy_normalize:
ref = ref.normalize()
return ParseResultBytes(
uri_ref=ref,
encoding=self.encoding,
lazy_normalize=lazy_normalize,
**attrs_dict
)
def unsplit(self, use_idna=False):
"""Create a URI bytes object from the components.
:returns: The parsed URI reconstituted as a string.
:rtype: bytes
"""
parse_result = self
if use_idna and self.host:
# self.host is bytes, to encode to idna, we need to decode it
# first
host = self.host.decode(self.encoding)
hostbytes = host.encode('idna')
parse_result = self.copy_with(host=hostbytes)
if self.lazy_normalize:
parse_result = parse_result.copy_with(lazy_normalize=False)
uri = parse_result.reference.unsplit()
return uri.encode(self.encoding)
def split_authority(authority):
# Initialize our expected return values
userinfo = host = port = None
# Initialize an extra var we may need to use
extra_host = None
# Set-up rest in case there is no userinfo portion
rest = authority
if '@' in authority:
userinfo, rest = authority.rsplit('@', 1)
# Handle IPv6 host addresses
if rest.startswith('['):
host, rest = rest.split(']', 1)
host += ']'
if ':' in rest:
extra_host, port = rest.split(':', 1)
elif not host and rest:
host = rest
if extra_host and not host:
host = extra_host
return userinfo, host, port
def authority_from(reference, strict):
try:
subauthority = reference.authority_info()
except exceptions.InvalidAuthority:
if strict:
raise
userinfo, host, port = split_authority(reference.authority)
else:
# Thanks to Richard Barrell for this idea:
# https://twitter.com/0x2ba22e11/status/617338811975139328
userinfo, host, port = (subauthority.get(p)
for p in ('userinfo', 'host', 'port'))
if port:
try:
port = int(port)
except ValueError:
raise exceptions.InvalidPort(port)
return userinfo, host, port

View File

@ -0,0 +1,153 @@
"""Module containing the implementation of the URIReference class."""
# -*- coding: utf-8 -*-
# Copyright (c) 2014 Rackspace
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from . import compat
from . import misc
from . import normalizers
from ._mixin import URIMixin
class URIReference(namedtuple('URIReference', misc.URI_COMPONENTS), URIMixin):
"""Immutable object representing a parsed URI Reference.
.. note::
This class is not intended to be directly instantiated by the user.
This object exposes attributes for the following components of a
URI:
- scheme
- authority
- path
- query
- fragment
.. attribute:: scheme
The scheme that was parsed for the URI Reference. For example,
``http``, ``https``, ``smtp``, ``imap``, etc.
.. attribute:: authority
Component of the URI that contains the user information, host,
and port sub-components. For example,
``google.com``, ``127.0.0.1:5000``, ``username@[::1]``,
``username:password@example.com:443``, etc.
.. attribute:: path
The path that was parsed for the given URI Reference. For example,
``/``, ``/index.php``, etc.
.. attribute:: query
The query component for a given URI Reference. For example, ``a=b``,
``a=b%20c``, ``a=b+c``, ``a=b,c=d,e=%20f``, etc.
.. attribute:: fragment
The fragment component of a URI. For example, ``section-3.1``.
This class also provides extra attributes for easier access to information
like the subcomponents of the authority component.
.. attribute:: userinfo
The user information parsed from the authority.
.. attribute:: host
The hostname, IPv4, or IPv6 adddres parsed from the authority.
.. attribute:: port
The port parsed from the authority.
"""
slots = ()
def __new__(cls, scheme, authority, path, query, fragment,
encoding='utf-8'):
"""Create a new URIReference."""
ref = super(URIReference, cls).__new__(
cls,
scheme or None,
authority or None,
path or None,
query,
fragment)
ref.encoding = encoding
return ref
__hash__ = tuple.__hash__
def __eq__(self, other):
"""Compare this reference to another."""
other_ref = other
if isinstance(other, tuple):
other_ref = URIReference(*other)
elif not isinstance(other, URIReference):
try:
other_ref = URIReference.from_string(other)
except TypeError:
raise TypeError(
'Unable to compare URIReference() to {0}()'.format(
type(other).__name__))
# See http://tools.ietf.org/html/rfc3986#section-6.2
naive_equality = tuple(self) == tuple(other_ref)
return naive_equality or self.normalized_equality(other_ref)
def normalize(self):
"""Normalize this reference as described in Section 6.2.2.
This is not an in-place normalization. Instead this creates a new
URIReference.
:returns: A new reference object with normalized components.
:rtype: URIReference
"""
# See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in
# this method.
return URIReference(normalizers.normalize_scheme(self.scheme or ''),
normalizers.normalize_authority(
(self.userinfo, self.host, self.port)),
normalizers.normalize_path(self.path or ''),
normalizers.normalize_query(self.query),
normalizers.normalize_fragment(self.fragment),
self.encoding)
@classmethod
def from_string(cls, uri_string, encoding='utf-8'):
"""Parse a URI reference from the given unicode URI string.
:param str uri_string: Unicode URI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: :class:`URIReference` or subclass thereof
"""
uri_string = compat.to_str(uri_string, encoding)
split_uri = misc.URI_MATCHER.match(uri_string).groupdict()
return cls(
split_uri['scheme'], split_uri['authority'],
normalizers.encode_component(split_uri['path'], encoding),
normalizers.encode_component(split_uri['query'], encoding),
normalizers.encode_component(split_uri['fragment'], encoding),
encoding,
)

View File

@ -0,0 +1,450 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2017 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module containing the validation logic for rfc3986."""
from . import exceptions
from . import misc
from . import normalizers
class Validator(object):
"""Object used to configure validation of all objects in rfc3986.
.. versionadded:: 1.0
Example usage::
>>> from rfc3986 import api, validators
>>> uri = api.uri_reference('https://github.com/')
>>> validator = validators.Validator().require_presence_of(
... 'scheme', 'host', 'path',
... ).allow_schemes(
... 'http', 'https',
... ).allow_hosts(
... '127.0.0.1', 'github.com',
... )
>>> validator.validate(uri)
>>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com')
>>> validator.validate(invalid_uri)
Traceback (most recent call last):
...
rfc3986.exceptions.MissingComponentError: ('path was required but
missing', URIReference(scheme=u'imap', authority=u'mail.google.com',
path=None, query=None, fragment=None), ['path'])
"""
COMPONENT_NAMES = frozenset([
'scheme',
'userinfo',
'host',
'port',
'path',
'query',
'fragment',
])
def __init__(self):
"""Initialize our default validations."""
self.allowed_schemes = set()
self.allowed_hosts = set()
self.allowed_ports = set()
self.allow_password = True
self.required_components = {
'scheme': False,
'userinfo': False,
'host': False,
'port': False,
'path': False,
'query': False,
'fragment': False,
}
self.validated_components = self.required_components.copy()
def allow_schemes(self, *schemes):
"""Require the scheme to be one of the provided schemes.
.. versionadded:: 1.0
:param schemes:
Schemes, without ``://`` that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for scheme in schemes:
self.allowed_schemes.add(normalizers.normalize_scheme(scheme))
return self
def allow_hosts(self, *hosts):
"""Require the host to be one of the provided hosts.
.. versionadded:: 1.0
:param hosts:
Hosts that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for host in hosts:
self.allowed_hosts.add(normalizers.normalize_host(host))
return self
def allow_ports(self, *ports):
"""Require the port to be one of the provided ports.
.. versionadded:: 1.0
:param ports:
Ports that are allowed.
:returns:
The validator instance.
:rtype:
Validator
"""
for port in ports:
port_int = int(port, base=10)
if 0 <= port_int <= 65535:
self.allowed_ports.add(port)
return self
def allow_use_of_password(self):
"""Allow passwords to be present in the URI.
.. versionadded:: 1.0
:returns:
The validator instance.
:rtype:
Validator
"""
self.allow_password = True
return self
def forbid_use_of_password(self):
"""Prevent passwords from being included in the URI.
.. versionadded:: 1.0
:returns:
The validator instance.
:rtype:
Validator
"""
self.allow_password = False
return self
def check_validity_of(self, *components):
"""Check the validity of the components provided.
This can be specified repeatedly.
.. versionadded:: 1.1
:param components:
Names of components from :attr:`Validator.COMPONENT_NAMES`.
:returns:
The validator instance.
:rtype:
Validator
"""
components = [c.lower() for c in components]
for component in components:
if component not in self.COMPONENT_NAMES:
raise ValueError(
'"{}" is not a valid component'.format(component)
)
self.validated_components.update({
component: True for component in components
})
return self
def require_presence_of(self, *components):
"""Require the components provided.
This can be specified repeatedly.
.. versionadded:: 1.0
:param components:
Names of components from :attr:`Validator.COMPONENT_NAMES`.
:returns:
The validator instance.
:rtype:
Validator
"""
components = [c.lower() for c in components]
for component in components:
if component not in self.COMPONENT_NAMES:
raise ValueError(
'"{}" is not a valid component'.format(component)
)
self.required_components.update({
component: True for component in components
})
return self
def validate(self, uri):
"""Check a URI for conditions specified on this validator.
.. versionadded:: 1.0
:param uri:
Parsed URI to validate.
:type uri:
rfc3986.uri.URIReference
:raises MissingComponentError:
When a required component is missing.
:raises UnpermittedComponentError:
When a component is not one of those allowed.
:raises PasswordForbidden:
When a password is present in the userinfo component but is
not permitted by configuration.
:raises InvalidComponentsError:
When a component was found to be invalid.
"""
if not self.allow_password:
check_password(uri)
required_components = [
component
for component, required in self.required_components.items()
if required
]
validated_components = [
component
for component, required in self.validated_components.items()
if required
]
if required_components:
ensure_required_components_exist(uri, required_components)
if validated_components:
ensure_components_are_valid(uri, validated_components)
ensure_one_of(self.allowed_schemes, uri, 'scheme')
ensure_one_of(self.allowed_hosts, uri, 'host')
ensure_one_of(self.allowed_ports, uri, 'port')
def check_password(uri):
"""Assert that there is no password present in the uri."""
userinfo = uri.userinfo
if not userinfo:
return
credentials = userinfo.split(':', 1)
if len(credentials) <= 1:
return
raise exceptions.PasswordForbidden(uri)
def ensure_one_of(allowed_values, uri, attribute):
"""Assert that the uri's attribute is one of the allowed values."""
value = getattr(uri, attribute)
if value is not None and allowed_values and value not in allowed_values:
raise exceptions.UnpermittedComponentError(
attribute, value, allowed_values,
)
def ensure_required_components_exist(uri, required_components):
"""Assert that all required components are present in the URI."""
missing_components = sorted([
component
for component in required_components
if getattr(uri, component) is None
])
if missing_components:
raise exceptions.MissingComponentError(uri, *missing_components)
def is_valid(value, matcher, require):
"""Determine if a value is valid based on the provided matcher.
:param str value:
Value to validate.
:param matcher:
Compiled regular expression to use to validate the value.
:param require:
Whether or not the value is required.
"""
if require:
return (value is not None
and matcher.match(value))
# require is False and value is not None
return value is None or matcher.match(value)
def authority_is_valid(authority, host=None, require=False):
"""Determine if the authority string is valid.
:param str authority:
The authority to validate.
:param str host:
(optional) The host portion of the authority to validate.
:param bool require:
(optional) Specify if authority must not be None.
:returns:
``True`` if valid, ``False`` otherwise
:rtype:
bool
"""
validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require)
if validated and host is not None:
return host_is_valid(host, require)
return validated
def host_is_valid(host, require=False):
"""Determine if the host string is valid.
:param str host:
The host to validate.
:param bool require:
(optional) Specify if host must not be None.
:returns:
``True`` if valid, ``False`` otherwise
:rtype:
bool
"""
validated = is_valid(host, misc.HOST_MATCHER, require)
if validated and host is not None and misc.IPv4_MATCHER.match(host):
return valid_ipv4_host_address(host)
elif validated and host is not None and misc.IPv6_MATCHER.match(host):
return misc.IPv6_NO_RFC4007_MATCHER.match(host) is not None
return validated
def scheme_is_valid(scheme, require=False):
"""Determine if the scheme is valid.
:param str scheme:
The scheme string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a scheme.
:returns:
``True`` if the scheme is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(scheme, misc.SCHEME_MATCHER, require)
def path_is_valid(path, require=False):
"""Determine if the path component is valid.
:param str path:
The path string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a path.
:returns:
``True`` if the path is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(path, misc.PATH_MATCHER, require)
def query_is_valid(query, require=False):
"""Determine if the query component is valid.
:param str query:
The query string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a query.
:returns:
``True`` if the query is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(query, misc.QUERY_MATCHER, require)
def fragment_is_valid(fragment, require=False):
"""Determine if the fragment component is valid.
:param str fragment:
The fragment string to validate.
:param bool require:
(optional) Set to ``True`` to require the presence of a fragment.
:returns:
``True`` if the fragment is valid. ``False`` otherwise.
:rtype:
bool
"""
return is_valid(fragment, misc.FRAGMENT_MATCHER, require)
def valid_ipv4_host_address(host):
"""Determine if the given host is a valid IPv4 address."""
# If the host exists, and it might be IPv4, check each byte in the
# address.
return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')])
_COMPONENT_VALIDATORS = {
'scheme': scheme_is_valid,
'path': path_is_valid,
'query': query_is_valid,
'fragment': fragment_is_valid,
}
_SUBAUTHORITY_VALIDATORS = set(['userinfo', 'host', 'port'])
def subauthority_component_is_valid(uri, component):
"""Determine if the userinfo, host, and port are valid."""
try:
subauthority_dict = uri.authority_info()
except exceptions.InvalidAuthority:
return False
# If we can parse the authority into sub-components and we're not
# validating the port, we can assume it's valid.
if component == 'host':
return host_is_valid(subauthority_dict['host'])
elif component != 'port':
return True
try:
port = int(subauthority_dict['port'])
except TypeError:
# If the port wasn't provided it'll be None and int(None) raises a
# TypeError
return True
return (0 <= port <= 65535)
def ensure_components_are_valid(uri, validated_components):
"""Assert that all components are valid in the URI."""
invalid_components = set([])
for component in validated_components:
if component in _SUBAUTHORITY_VALIDATORS:
if not subauthority_component_is_valid(uri, component):
invalid_components.add(component)
# Python's peephole optimizer means that while this continue *is*
# actually executed, coverage.py cannot detect that. See also,
# https://bitbucket.org/ned/coveragepy/issues/198/continue-marked-as-not-covered
continue # nocov: Python 2.7, 3.3, 3.4
validator = _COMPONENT_VALIDATORS[component]
if not validator(getattr(uri, component)):
invalid_components.add(component)
if invalid_components:
raise exceptions.InvalidComponentsError(uri, *invalid_components)

Some files were not shown because too many files have changed in this diff Show More