mirror of https://github.com/evilhero/mylar
First commit to get the ball rolling for the conversion to PY3. What works: startup, GUI, config saves/loads, pullist recreates/populate, comictagger updated, metatagging works, rechecking files works. Everything else: not fully tested
This commit is contained in:
parent
4f8943b5b5
commit
69acf99781
32
Mylar.py
32
Mylar.py
|
@ -1,4 +1,3 @@
|
|||
#!/usr/bin/env python
|
||||
# This file is part of Mylar.
|
||||
#
|
||||
# Mylar is free software: you can redistribute it and/or modify
|
||||
|
@ -15,6 +14,7 @@
|
|||
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os, sys, locale
|
||||
import argparse
|
||||
import errno
|
||||
import shutil
|
||||
import time
|
||||
|
@ -63,9 +63,9 @@ def main():
|
|||
mylar.SYS_ENCODING = 'UTF-8'
|
||||
|
||||
if not logger.LOG_LANG.startswith('en'):
|
||||
print 'language detected as non-English (%s). Forcing specific logging module - errors WILL NOT be captured in the logs' % logger.LOG_LANG
|
||||
print('language detected as non-English (%s). Forcing specific logging module - errors WILL NOT be captured in the logs' % logger.LOG_LANG)
|
||||
else:
|
||||
print 'log language set to %s' % logger.LOG_LANG
|
||||
print('log language set to %s' % logger.LOG_LANG)
|
||||
|
||||
# Set up and gather command line arguments
|
||||
parser = argparse.ArgumentParser(description='Automated Comic Book Downloader')
|
||||
|
@ -96,25 +96,25 @@ def main():
|
|||
|
||||
if args.maintenance:
|
||||
if all([args.exportjson is None, args.importdatabase is None, args.importjson is None, args.importstatus is False, args.update is False, args.fixslashes is False]):
|
||||
print 'Expecting subcommand with the maintenance positional argumeent'
|
||||
print('Expecting subcommand with the maintenance positional argumeent')
|
||||
sys.exit()
|
||||
mylar.MAINTENANCE = True
|
||||
else:
|
||||
mylar.MAINTENANCE = False
|
||||
|
||||
if args.verbose:
|
||||
print 'Verbose/Debugging mode enabled...'
|
||||
print('Verbose/Debugging mode enabled...')
|
||||
mylar.LOG_LEVEL = 2
|
||||
elif args.quiet:
|
||||
mylar.QUIET = True
|
||||
print 'Quiet logging mode enabled...'
|
||||
print('Quiet logging mode enabled...')
|
||||
mylar.LOG_LEVEL = 0
|
||||
else:
|
||||
mylar.LOG_LEVEL = 1
|
||||
|
||||
if args.daemon:
|
||||
if sys.platform == 'win32':
|
||||
print "Daemonize not supported under Windows, starting normally"
|
||||
print("Daemonize not supported under Windows, starting normally")
|
||||
else:
|
||||
mylar.DAEMON = True
|
||||
|
||||
|
@ -130,7 +130,7 @@ def main():
|
|||
mylar.CREATEPID = True
|
||||
try:
|
||||
file(mylar.PIDFILE, 'w').write("pid\n")
|
||||
except IOError, e:
|
||||
except IOError as e:
|
||||
raise SystemExit("Unable to write PID file: %s [%d]" % (e.strerror, e.errno))
|
||||
else:
|
||||
print("Not running in daemon mode. PID file creation disabled.")
|
||||
|
@ -177,15 +177,15 @@ def main():
|
|||
|
||||
# backup the db and configs before they load.
|
||||
if args.backup:
|
||||
print '[AUTO-BACKUP] Backing up .db and config.ini files for safety.'
|
||||
print('[AUTO-BACKUP] Backing up .db and config.ini files for safety.')
|
||||
backupdir = os.path.join(mylar.DATA_DIR, 'backup')
|
||||
|
||||
try:
|
||||
os.makedirs(backupdir)
|
||||
print '[AUTO-BACKUP] Directory does not exist for backup - creating : ' + backupdir
|
||||
print('[AUTO-BACKUP] Directory does not exist for backup - creating : ' + backupdir)
|
||||
except OSError as exception:
|
||||
if exception.errno != errno.EEXIST:
|
||||
print '[AUTO-BACKUP] Directory already exists.'
|
||||
print('[AUTO-BACKUP] Directory already exists.')
|
||||
raise
|
||||
|
||||
i = 0
|
||||
|
@ -200,14 +200,14 @@ def main():
|
|||
back_1 = os.path.join(backupdir, 'config.ini.1')
|
||||
|
||||
try:
|
||||
print '[AUTO-BACKUP] Now Backing up mylar.db file'
|
||||
print('[AUTO-BACKUP] Now Backing up mylar.db file')
|
||||
if os.path.isfile(back_1):
|
||||
print '[AUTO-BACKUP] ' + back_1 + ' exists. Deleting and keeping new.'
|
||||
print('[AUTO-BACKUP] ' + back_1 + ' exists. Deleting and keeping new.')
|
||||
os.remove(back_1)
|
||||
if os.path.isfile(back):
|
||||
print '[AUTO-BACKUP] Now renaming ' + back + ' to ' + back_1
|
||||
print('[AUTO-BACKUP] Now renaming ' + back + ' to ' + back_1)
|
||||
shutil.move(back, back_1)
|
||||
print '[AUTO-BACKUP] Now copying db file to ' + back
|
||||
print('[AUTO-BACKUP] Now copying db file to ' + back)
|
||||
shutil.copy(ogfile, back)
|
||||
|
||||
except OSError as exception:
|
||||
|
@ -282,7 +282,7 @@ def main():
|
|||
http_port = int(mylar.CONFIG.HTTP_PORT)
|
||||
|
||||
# Check if pyOpenSSL is installed. It is required for certificate generation
|
||||
# and for CherryPy.
|
||||
# and for cherrypy.
|
||||
if mylar.CONFIG.ENABLE_HTTPS:
|
||||
try:
|
||||
import OpenSSL
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
## ![Mylar Logo](https://github.com/evilhero/mylar/blob/master/data/images/mylarlogo.png) Mylar
|
||||
|
||||
## The PY3 branch is a WIP.
|
||||
## Some things will be completely broken, while other parts may work fine or partially.
|
||||
## Do not use this branch unless you're willing and able to fix things.
|
||||
|
||||
Mylar is an automated Comic Book (cbr/cbz) downloader program for use with NZB and torrents written in python. It supports SABnzbd, NZBGET, and many torrent clients in addition to DDL.
|
||||
|
||||
It will allow you to monitor weekly pull-lists for items belonging to user-specific series to download, as well as being able to monitor story-arcs. Support for TPB's and GN's is also now available.
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python
|
||||
from lib.comictaggerlib.main import ctmain
|
||||
#from comictaggerlib.main import ctmain
|
||||
#from mylar import logger
|
||||
|
||||
if __name__ == '__main__':
|
||||
ctmain()
|
||||
|
|
|
@ -999,7 +999,7 @@
|
|||
<tr>
|
||||
<%
|
||||
porder = []
|
||||
for k,v in sorted(mylar.CONFIG.PROVIDER_ORDER.iteritems(), key=itemgetter(0), reverse=False):
|
||||
for k,v in sorted(mylar.CONFIG.PROVIDER_ORDER.items(), key=itemgetter(0), reverse=False):
|
||||
porder.append(v)
|
||||
porder = ', '.join(porder)
|
||||
%>
|
||||
|
|
|
@ -1,240 +0,0 @@
|
|||
Metadata-Version: 1.1
|
||||
Name: ConcurrentLogHandler
|
||||
Version: 0.9.1
|
||||
Summary: Concurrent logging handler (drop-in replacement for RotatingFileHandler)
|
||||
Home-page: http://launchpad.net/python-concurrent-log-handler
|
||||
Author: Lowell Alleman
|
||||
Author-email: lowell87@gmail.com
|
||||
License: http://www.apache.org/licenses/LICENSE-2.0
|
||||
Description:
|
||||
Overview
|
||||
========
|
||||
This module provides an additional log handler for Python's standard logging
|
||||
package (PEP 282). This handler will write log events to log file which is
|
||||
rotated when the log file reaches a certain size. Multiple processes can
|
||||
safely write to the same log file concurrently.
|
||||
|
||||
Details
|
||||
=======
|
||||
.. _portalocker: http://code.activestate.com/recipes/65203/
|
||||
|
||||
The ``ConcurrentRotatingFileHandler`` class is a drop-in replacement for
|
||||
Python's standard log handler ``RotatingFileHandler``. This module uses file
|
||||
locking so that multiple processes can concurrently log to a single file without
|
||||
dropping or clobbering log events. This module provides a file rotation scheme
|
||||
like with ``RotatingFileHanler``. Extra care is taken to ensure that logs
|
||||
can be safely rotated before the rotation process is started. (This module works
|
||||
around the file rename issue with ``RotatingFileHandler`` on Windows, where a
|
||||
rotation failure means that all subsequent log events are dropped).
|
||||
|
||||
This module attempts to preserve log records at all cost. This means that log
|
||||
files will grow larger than the specified maximum (rotation) size. So if disk
|
||||
space is tight, you may want to stick with ``RotatingFileHandler``, which will
|
||||
strictly adhere to the maximum file size.
|
||||
|
||||
If you have multiple instances of a script (or multiple scripts) all running at
|
||||
the same time and writing to the same log file, then *all* of the scripts should
|
||||
be using ``ConcurrentRotatingFileHandler``. You should not attempt to mix
|
||||
and match ``RotatingFileHandler`` and ``ConcurrentRotatingFileHandler``.
|
||||
|
||||
This package bundles `portalocker`_ to deal with file locking. Please be aware
|
||||
that portalocker only supports Unix (posix) an NT platforms at this time, and
|
||||
therefore this package only supports those platforms as well.
|
||||
|
||||
Installation
|
||||
============
|
||||
Use the following command to install this package::
|
||||
|
||||
pip install ConcurrentLogHandler
|
||||
|
||||
If you are installing from source, you can use::
|
||||
|
||||
python setup.py install
|
||||
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
Simple Example
|
||||
--------------
|
||||
Here is a example demonstrating how to use this module directly (from within
|
||||
Python code)::
|
||||
|
||||
from logging import getLogger, INFO
|
||||
from cloghandler import ConcurrentRotatingFileHandler
|
||||
import os
|
||||
|
||||
log = getLogger()
|
||||
# Use an absolute path to prevent file rotation trouble.
|
||||
logfile = os.path.abspath("mylogfile.log")
|
||||
# Rotate log after reaching 512K, keep 5 old copies.
|
||||
rotateHandler = ConcurrentRotatingFileHandler(logfile, "a", 512*1024, 5)
|
||||
log.addHandler(rotateHandler)
|
||||
log.setLevel(INFO)
|
||||
|
||||
log.info("Here is a very exciting log message, just for you")
|
||||
|
||||
|
||||
Automatic fallback example
|
||||
--------------------------
|
||||
If you are distributing your code and you are unsure if the
|
||||
`ConcurrentLogHandler` package has been installed everywhere your code will run,
|
||||
Python makes it easy to gracefully fallback to the built in
|
||||
`RotatingFileHandler`, here is an example::
|
||||
|
||||
try:
|
||||
from cloghandler import ConcurrentRotatingFileHandler as RFHandler
|
||||
except ImportError:
|
||||
# Next 2 lines are optional: issue a warning to the user
|
||||
from warnings import warn
|
||||
warn("ConcurrentLogHandler package not installed. Using builtin log handler")
|
||||
from logging.handlers import RotatingFileHandler as RFHandler
|
||||
|
||||
log = getLogger()
|
||||
rotateHandler = RFHandler("/path/to/mylogfile.log", "a", 1048576, 15)
|
||||
log.addHandler(rotateHandler)
|
||||
|
||||
|
||||
|
||||
Config file example
|
||||
-------------------
|
||||
This example shows you how to use this log handler with the logging config file
|
||||
parser. This allows you to keep your logging configuration code separate from
|
||||
your application code.
|
||||
|
||||
Example config file: ``logging.ini``::
|
||||
|
||||
[loggers]
|
||||
keys=root
|
||||
|
||||
[handlers]
|
||||
keys=hand01
|
||||
|
||||
[formatters]
|
||||
keys=form01
|
||||
|
||||
[logger_root]
|
||||
level=NOTSET
|
||||
handlers=hand01
|
||||
|
||||
[handler_hand01]
|
||||
class=handlers.ConcurrentRotatingFileHandler
|
||||
level=NOTSET
|
||||
formatter=form01
|
||||
args=("rotating.log", "a", 512*1024, 5)
|
||||
|
||||
[formatter_form01]
|
||||
format=%(asctime)s %(levelname)s %(message)s
|
||||
|
||||
Example Python code: ``app.py``::
|
||||
|
||||
import logging, logging.config
|
||||
import cloghandler
|
||||
|
||||
logging.config.fileConfig("logging.ini")
|
||||
log = logging.getLogger()
|
||||
log.info("Here is a very exciting log message, just for you")
|
||||
|
||||
|
||||
Change Log
|
||||
==========
|
||||
|
||||
.. _Red Hat Bug #858912: https://bugzilla.redhat.com/show_bug.cgi?id=858912
|
||||
.. _Python Bug #15960: http://bugs.python.org/issue15960
|
||||
.. _LP Bug 1199332: https://bugs.launchpad.net/python-concurrent-log-handler/+bug/1199332
|
||||
.. _LP Bug 1199333: https://bugs.launchpad.net/python-concurrent-log-handler/+bug/1199333
|
||||
|
||||
|
||||
- 0.9.1: Bug fixes - `LP Bug 1199332`_ and `LP Bug 1199333`_.
|
||||
* More gracefully handle out of disk space scenarios. Prevent release() from
|
||||
throwing an exception.
|
||||
* Handle logging.shutdown() in Python 2.7+. Close the lock file stream via
|
||||
close().
|
||||
* Big thanks to Dan Callaghan for forwarding these issues and patches.
|
||||
|
||||
- 0.9.0: Now requires Python 2.6+
|
||||
* Revamp file opening/closing and file-locking internals (inspired by
|
||||
feedback from Vinay Sajip.)
|
||||
* Add the 'delay' parameter (delayed log file opening) to better match the
|
||||
core logging functionality in more recent version of Python.
|
||||
* For anyone still using Python 2.3-2.5, please use the latest 0.8.x release
|
||||
|
||||
- 0.8.6: Fixed packaging bug with test script
|
||||
* Fix a small packaging bug from the 0.8.5 release. (Thanks to Björn Häuser
|
||||
for bringing this to my attention.)
|
||||
* Updated stresstest.py to always use the correct python version when
|
||||
launching sub-processes instead of the system's default "python".
|
||||
|
||||
- 0.8.5: Fixed ValueError: I/O operation on closed file
|
||||
* Thanks to Vince Carney, Arif Kasim, Matt Drew, Nick Coghlan, and
|
||||
Dan Callaghan for bug reports. Bugs can now be filled here:
|
||||
https://bugs.launchpad.net/python-concurrent-log-handler. Bugs resolved
|
||||
`Red Hat Bug #858912`_ and `Python Bug #15960`_
|
||||
* Updated ez_setup.py to 0.7.7
|
||||
* Updated portalocker to 0.3 (now maintained by Rick van Hattem)
|
||||
* Initial Python 3 support (needs more testing)
|
||||
* Fixed minor spelling mistakes
|
||||
|
||||
- 0.8.4: Fixed lock-file naming issue
|
||||
* Resolved a minor issue where lock-files would be improperly named if the
|
||||
log file contained ".log" in the middle of the log name. For example, if
|
||||
you log file was "/var/log/mycompany.logging.mysource.log", the lock file
|
||||
would be named "/var/log/mycompany.ging.mysource.lock", which is not correct.
|
||||
Thanks to Dirk Rothe for pointing this out. Since this introduce a slight
|
||||
lock-file behavior difference, make sure all concurrent writers are updated
|
||||
to 0.8.4 at the same time if this issue effects you.
|
||||
* Updated ez_setup.py to 0.6c11
|
||||
|
||||
- 0.8.3: Fixed a log file rotation bug and updated docs
|
||||
* Fixed a bug that happens after log rotation when multiple processes are
|
||||
witting to the same log file. Each process ends up writing to their own
|
||||
log file ("log.1" or "log.2" instead of "log"). The fix is simply to reopen
|
||||
the log file and check the size again. I do not believe this bug results in
|
||||
data loss; however, this certainly was not the desired behavior. (A big
|
||||
thanks goes to Oliver Tonnhofer for finding, documenting, and providing a
|
||||
patch for this bug.)
|
||||
* Cleanup the docs. (aka "the page you are reading right now") I fixed some
|
||||
silly mistakes and typos... who writes this stuff?
|
||||
|
||||
- 0.8.2: Minor bug fix release (again)
|
||||
* Found and resolved another issue with older logging packages that do not
|
||||
support encoding.
|
||||
|
||||
- 0.8.1: Minor bug fix release
|
||||
* Now importing "codecs" directly; I found some slight differences in the
|
||||
logging module in different Python 2.4.x releases that caused the module to
|
||||
fail to load.
|
||||
|
||||
- 0.8.0: Minor feature release
|
||||
* Add better support for using ``logging.config.fileConfig()``. This class
|
||||
is now available using ``class=handlers.ConcurrentRotatingFileHandler``.
|
||||
* Minor changes in how the ``filename`` parameter is handled when given a
|
||||
relative path.
|
||||
|
||||
- 0.7.4: Minor bug fix
|
||||
* Fixed a typo in the package description (incorrect class name)
|
||||
* Added a change log; which you are reading now.
|
||||
* Fixed the ``close()`` method to no longer assume that stream is still
|
||||
open.
|
||||
|
||||
To-do
|
||||
=====
|
||||
* This module has had minimal testing in a multi-threaded process. I see no
|
||||
reason why this should be an issue, but no stress-testing has been done in a
|
||||
threaded situation. If this is important to you, you could always add
|
||||
threading support to the ``stresstest.py`` script and send me the patch.
|
||||
|
||||
|
||||
Keywords: logging,windows,linux,unix,rotate,portalocker
|
||||
Platform: nt
|
||||
Platform: posix
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Topic :: System :: Logging
|
||||
Classifier: Operating System :: POSIX
|
||||
Classifier: Operating System :: Microsoft :: Windows
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 2.6
|
||||
Classifier: Programming Language :: Python :: 2.7
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: License :: OSI Approved :: Apache Software License
|
|
@ -1,16 +0,0 @@
|
|||
.bzrignore
|
||||
LICENSE
|
||||
README
|
||||
do_release.sh
|
||||
ez_setup.py
|
||||
pre_commit.sh
|
||||
setup.cfg
|
||||
setup.py
|
||||
stresstest.py
|
||||
src/cloghandler.py
|
||||
src/portalocker.py
|
||||
src/ConcurrentLogHandler.egg-info/PKG-INFO
|
||||
src/ConcurrentLogHandler.egg-info/SOURCES.txt
|
||||
src/ConcurrentLogHandler.egg-info/dependency_links.txt
|
||||
src/ConcurrentLogHandler.egg-info/top_level.txt
|
||||
src/ConcurrentLogHandler.egg-info/zip-safe
|
|
@ -1 +0,0 @@
|
|||
|
|
@ -1,13 +0,0 @@
|
|||
../cloghandler.py
|
||||
../portalocker.py
|
||||
../cloghandler.pyc
|
||||
../portalocker.pyc
|
||||
../../../../tests/stresstest.py
|
||||
../../../../docs/README
|
||||
../../../../docs/LICENSE
|
||||
./
|
||||
SOURCES.txt
|
||||
zip-safe
|
||||
PKG-INFO
|
||||
dependency_links.txt
|
||||
top_level.txt
|
|
@ -1,2 +0,0 @@
|
|||
cloghandler
|
||||
portalocker
|
|
@ -1 +0,0 @@
|
|||
|
|
@ -1,349 +0,0 @@
|
|||
# Copyright 2013 Lowell Alleman
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
# use this file except in compliance with the License. You may obtain a copy
|
||||
# of the License at http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
""" cloghandler.py: A smart replacement for the standard RotatingFileHandler
|
||||
|
||||
ConcurrentRotatingFileHandler: This class is a log handler which is a drop-in
|
||||
replacement for the python standard log handler 'RotateFileHandler', the primary
|
||||
difference being that this handler will continue to write to the same file if
|
||||
the file cannot be rotated for some reason, whereas the RotatingFileHandler will
|
||||
strictly adhere to the maximum file size. Unfortunately, if you are using the
|
||||
RotatingFileHandler on Windows, you will find that once an attempted rotation
|
||||
fails, all subsequent log messages are dropped. The other major advantage of
|
||||
this module is that multiple processes can safely write to a single log file.
|
||||
|
||||
To put it another way: This module's top priority is preserving your log
|
||||
records, whereas the standard library attempts to limit disk usage, which can
|
||||
potentially drop log messages. If you are trying to determine which module to
|
||||
use, there are number of considerations: What is most important: strict disk
|
||||
space usage or preservation of log messages? What OSes are you supporting? Can
|
||||
you afford to have processes blocked by file locks?
|
||||
|
||||
Concurrent access is handled by using file locks, which should ensure that log
|
||||
messages are not dropped or clobbered. This means that a file lock is acquired
|
||||
and released for every log message that is written to disk. (On Windows, you may
|
||||
also run into a temporary situation where the log file must be opened and closed
|
||||
for each log message.) This can have potentially performance implications. In my
|
||||
testing, performance was more than adequate, but if you need a high-volume or
|
||||
low-latency solution, I suggest you look elsewhere.
|
||||
|
||||
This module currently only support the 'nt' and 'posix' platforms due to the
|
||||
usage of the portalocker module. I do not have access to any other platforms
|
||||
for testing, patches are welcome.
|
||||
|
||||
See the README file for an example usage of this module.
|
||||
|
||||
This module supports Python 2.6 and later.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
__version__ = '0.9.1'
|
||||
__revision__ = 'lowell87@gmail.com-20130711022321-doutxl7zyzuwss5a 2013-07-10 22:23:21 -0400 [0]'
|
||||
__author__ = "Lowell Alleman"
|
||||
__all__ = [
|
||||
"ConcurrentRotatingHandler",
|
||||
]
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
from random import randint
|
||||
from logging import Handler, LogRecord
|
||||
from logging.handlers import BaseRotatingHandler
|
||||
|
||||
try:
|
||||
import codecs
|
||||
except ImportError:
|
||||
codecs = None
|
||||
|
||||
|
||||
|
||||
# Question/TODO: Should we have a fallback mode if we can't load portalocker /
|
||||
# we should still be better off than with the standard RotattingFileHandler
|
||||
# class, right? We do some rename checking... that should prevent some file
|
||||
# clobbering that the builtin class allows.
|
||||
|
||||
# sibling module than handles all the ugly platform-specific details of file locking
|
||||
from portalocker import lock, unlock, LOCK_EX, LOCK_NB, LockException
|
||||
|
||||
|
||||
# Workaround for handleError() in Python 2.7+ where record is written to stderr
|
||||
class NullLogRecord(LogRecord):
|
||||
def __init__(self):
|
||||
pass
|
||||
def __getattr__(self, attr):
|
||||
return None
|
||||
|
||||
class ConcurrentRotatingFileHandler(BaseRotatingHandler):
|
||||
"""
|
||||
Handler for logging to a set of files, which switches from one file to the
|
||||
next when the current file reaches a certain size. Multiple processes can
|
||||
write to the log file concurrently, but this may mean that the file will
|
||||
exceed the given size.
|
||||
"""
|
||||
def __init__(self, filename, mode='a', maxBytes=0, backupCount=0,
|
||||
encoding=None, debug=True, delay=0):
|
||||
"""
|
||||
Open the specified file and use it as the stream for logging.
|
||||
|
||||
By default, the file grows indefinitely. You can specify particular
|
||||
values of maxBytes and backupCount to allow the file to rollover at
|
||||
a predetermined size.
|
||||
|
||||
Rollover occurs whenever the current log file is nearly maxBytes in
|
||||
length. If backupCount is >= 1, the system will successively create
|
||||
new files with the same pathname as the base file, but with extensions
|
||||
".1", ".2" etc. appended to it. For example, with a backupCount of 5
|
||||
and a base file name of "app.log", you would get "app.log",
|
||||
"app.log.1", "app.log.2", ... through to "app.log.5". The file being
|
||||
written to is always "app.log" - when it gets filled up, it is closed
|
||||
and renamed to "app.log.1", and if files "app.log.1", "app.log.2" etc.
|
||||
exist, then they are renamed to "app.log.2", "app.log.3" etc.
|
||||
respectively.
|
||||
|
||||
If maxBytes is zero, rollover never occurs.
|
||||
|
||||
On Windows, it is not possible to rename a file that is currently opened
|
||||
by another process. This means that it is not possible to rotate the
|
||||
log files if multiple processes is using the same log file. In this
|
||||
case, the current log file will continue to grow until the rotation can
|
||||
be completed successfully. In order for rotation to be possible, all of
|
||||
the other processes need to close the file first. A mechanism, called
|
||||
"degraded" mode, has been created for this scenario. In degraded mode,
|
||||
the log file is closed after each log message is written. So once all
|
||||
processes have entered degraded mode, the net rotation attempt should
|
||||
be successful and then normal logging can be resumed. Using the 'delay'
|
||||
parameter may help reduce contention in some usage patterns.
|
||||
|
||||
This log handler assumes that all concurrent processes logging to a
|
||||
single file will are using only this class, and that the exact same
|
||||
parameters are provided to each instance of this class. If, for
|
||||
example, two different processes are using this class, but with
|
||||
different values for 'maxBytes' or 'backupCount', then odd behavior is
|
||||
expected. The same is true if this class is used by one application, but
|
||||
the RotatingFileHandler is used by another.
|
||||
"""
|
||||
# Absolute file name handling done by FileHandler since Python 2.5
|
||||
BaseRotatingHandler.__init__(self, filename, mode, encoding, delay)
|
||||
self.delay = delay
|
||||
self._rotateFailed = False
|
||||
self.maxBytes = maxBytes
|
||||
self.backupCount = backupCount
|
||||
self._open_lockfile()
|
||||
# For debug mode, swap out the "_degrade()" method with a more a verbose one.
|
||||
if debug:
|
||||
self._degrade = self._degrade_debug
|
||||
|
||||
def _open_lockfile(self):
|
||||
# Use 'file.lock' and not 'file.log.lock' (Only handles the normal "*.log" case.)
|
||||
if self.baseFilename.endswith(".log"):
|
||||
lock_file = self.baseFilename[:-4]
|
||||
else:
|
||||
lock_file = self.baseFilename
|
||||
lock_file += ".lock"
|
||||
self.stream_lock = open(lock_file,"w")
|
||||
|
||||
def _open(self, mode=None):
|
||||
"""
|
||||
Open the current base file with the (original) mode and encoding.
|
||||
Return the resulting stream.
|
||||
|
||||
Note: Copied from stdlib. Added option to override 'mode'
|
||||
"""
|
||||
if mode is None:
|
||||
mode = self.mode
|
||||
if self.encoding is None:
|
||||
stream = open(self.baseFilename, mode)
|
||||
else:
|
||||
stream = codecs.open(self.baseFilename, mode, self.encoding)
|
||||
return stream
|
||||
|
||||
def _close(self):
|
||||
""" Close file stream. Unlike close(), we don't tear anything down, we
|
||||
expect the log to be re-opened after rotation."""
|
||||
if self.stream:
|
||||
try:
|
||||
if not self.stream.closed:
|
||||
# Flushing probably isn't technically necessary, but it feels right
|
||||
self.stream.flush()
|
||||
self.stream.close()
|
||||
finally:
|
||||
self.stream = None
|
||||
|
||||
def acquire(self):
|
||||
""" Acquire thread and file locks. Re-opening log for 'degraded' mode.
|
||||
"""
|
||||
# handle thread lock
|
||||
Handler.acquire(self)
|
||||
# Issue a file lock. (This is inefficient for multiple active threads
|
||||
# within a single process. But if you're worried about high-performance,
|
||||
# you probably aren't using this log handler.)
|
||||
if self.stream_lock:
|
||||
# If stream_lock=None, then assume close() was called or something
|
||||
# else weird and ignore all file-level locks.
|
||||
if self.stream_lock.closed:
|
||||
# Daemonization can close all open file descriptors, see
|
||||
# https://bugzilla.redhat.com/show_bug.cgi?id=952929
|
||||
# Try opening the lock file again. Should we warn() here?!?
|
||||
try:
|
||||
self._open_lockfile()
|
||||
except Exception:
|
||||
self.handleError(NullLogRecord())
|
||||
# Don't try to open the stream lock again
|
||||
self.stream_lock = None
|
||||
return
|
||||
lock(self.stream_lock, LOCK_EX)
|
||||
# Stream will be opened as part by FileHandler.emit()
|
||||
|
||||
def release(self):
|
||||
""" Release file and thread locks. If in 'degraded' mode, close the
|
||||
stream to reduce contention until the log files can be rotated. """
|
||||
try:
|
||||
if self._rotateFailed:
|
||||
self._close()
|
||||
except Exception:
|
||||
self.handleError(NullLogRecord())
|
||||
finally:
|
||||
try:
|
||||
if self.stream_lock and not self.stream_lock.closed:
|
||||
unlock(self.stream_lock)
|
||||
except Exception:
|
||||
self.handleError(NullLogRecord())
|
||||
finally:
|
||||
# release thread lock
|
||||
Handler.release(self)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close log stream and stream_lock. """
|
||||
try:
|
||||
self._close()
|
||||
if not self.stream_lock.closed:
|
||||
self.stream_lock.close()
|
||||
finally:
|
||||
self.stream_lock = None
|
||||
Handler.close(self)
|
||||
|
||||
def _degrade(self, degrade, msg, *args):
|
||||
""" Set degrade mode or not. Ignore msg. """
|
||||
self._rotateFailed = degrade
|
||||
del msg, args # avoid pychecker warnings
|
||||
|
||||
def _degrade_debug(self, degrade, msg, *args):
|
||||
""" A more colorful version of _degade(). (This is enabled by passing
|
||||
"debug=True" at initialization).
|
||||
"""
|
||||
if degrade:
|
||||
if not self._rotateFailed:
|
||||
sys.stderr.write("Degrade mode - ENTERING - (pid=%d) %s\n" %
|
||||
(os.getpid(), msg % args))
|
||||
self._rotateFailed = True
|
||||
else:
|
||||
if self._rotateFailed:
|
||||
sys.stderr.write("Degrade mode - EXITING - (pid=%d) %s\n" %
|
||||
(os.getpid(), msg % args))
|
||||
self._rotateFailed = False
|
||||
|
||||
def doRollover(self):
|
||||
"""
|
||||
Do a rollover, as described in __init__().
|
||||
"""
|
||||
self._close()
|
||||
if self.backupCount <= 0:
|
||||
# Don't keep any backups, just overwrite the existing backup file
|
||||
# Locking doesn't much matter here; since we are overwriting it anyway
|
||||
self.stream = self._open("w")
|
||||
return
|
||||
try:
|
||||
# Determine if we can rename the log file or not. Windows refuses to
|
||||
# rename an open file, Unix is inode base so it doesn't care.
|
||||
|
||||
# Attempt to rename logfile to tempname: There is a slight race-condition here, but it seems unavoidable
|
||||
tmpname = None
|
||||
while not tmpname or os.path.exists(tmpname):
|
||||
tmpname = "%s.rotate.%08d" % (self.baseFilename, randint(0,99999999))
|
||||
try:
|
||||
# Do a rename test to determine if we can successfully rename the log file
|
||||
os.rename(self.baseFilename, tmpname)
|
||||
except (IOError, OSError):
|
||||
exc_value = sys.exc_info()[1]
|
||||
self._degrade(True, "rename failed. File in use? "
|
||||
"exception=%s", exc_value)
|
||||
return
|
||||
|
||||
# Q: Is there some way to protect this code from a KeboardInterupt?
|
||||
# This isn't necessarily a data loss issue, but it certainly does
|
||||
# break the rotation process during stress testing.
|
||||
|
||||
# There is currently no mechanism in place to handle the situation
|
||||
# where one of these log files cannot be renamed. (Example, user
|
||||
# opens "logfile.3" in notepad); we could test rename each file, but
|
||||
# nobody's complained about this being an issue; so the additional
|
||||
# code complexity isn't warranted.
|
||||
for i in range(self.backupCount - 1, 0, -1):
|
||||
sfn = "%s.%d" % (self.baseFilename, i)
|
||||
dfn = "%s.%d" % (self.baseFilename, i + 1)
|
||||
if os.path.exists(sfn):
|
||||
#print "%s -> %s" % (sfn, dfn)
|
||||
if os.path.exists(dfn):
|
||||
os.remove(dfn)
|
||||
os.rename(sfn, dfn)
|
||||
dfn = self.baseFilename + ".1"
|
||||
if os.path.exists(dfn):
|
||||
os.remove(dfn)
|
||||
os.rename(tmpname, dfn)
|
||||
#print "%s -> %s" % (self.baseFilename, dfn)
|
||||
self._degrade(False, "Rotation completed")
|
||||
finally:
|
||||
# Re-open the output stream, but if "delay" is enabled then wait
|
||||
# until the next emit() call. This could reduce rename contention in
|
||||
# some usage patterns.
|
||||
if not self.delay:
|
||||
self.stream = self._open()
|
||||
|
||||
def shouldRollover(self, record):
|
||||
"""
|
||||
Determine if rollover should occur.
|
||||
|
||||
For those that are keeping track. This differs from the standard
|
||||
library's RotatingLogHandler class. Because there is no promise to keep
|
||||
the file size under maxBytes we ignore the length of the current record.
|
||||
"""
|
||||
del record # avoid pychecker warnings
|
||||
# Is stream is not yet open, skip rollover check. (Check will occur on
|
||||
# next message, after emit() calls _open())
|
||||
if self.stream is None:
|
||||
return False
|
||||
if self._shouldRollover():
|
||||
# If some other process already did the rollover (which is possible
|
||||
# on Unix) the file our stream may now be named "log.1", thus
|
||||
# triggering another rollover. Avoid this by closing and opening
|
||||
# "log" again.
|
||||
self._close()
|
||||
self.stream = self._open()
|
||||
return self._shouldRollover()
|
||||
return False
|
||||
|
||||
def _shouldRollover(self):
|
||||
if self.maxBytes > 0: # are we rolling over?
|
||||
self.stream.seek(0, 2) #due to non-posix-compliant Windows feature
|
||||
if self.stream.tell() >= self.maxBytes:
|
||||
return True
|
||||
else:
|
||||
self._degrade(False, "Rotation done or not needed at this time")
|
||||
return False
|
||||
|
||||
|
||||
# Publish this class to the "logging.handlers" module so that it can be use
|
||||
# from a logging config file via logging.config.fileConfig().
|
||||
import logging.handlers
|
||||
logging.handlers.ConcurrentRotatingFileHandler = ConcurrentRotatingFileHandler
|
|
@ -1,141 +0,0 @@
|
|||
# portalocker.py - Cross-platform (posix/nt) API for flock-style file locking.
|
||||
# Requires python 1.5.2 or better.
|
||||
"""Cross-platform (posix/nt) API for flock-style file locking.
|
||||
|
||||
Synopsis:
|
||||
|
||||
import portalocker
|
||||
file = open("somefile", "r+")
|
||||
portalocker.lock(file, portalocker.LOCK_EX)
|
||||
file.seek(12)
|
||||
file.write("foo")
|
||||
file.close()
|
||||
|
||||
If you know what you're doing, you may choose to
|
||||
|
||||
portalocker.unlock(file)
|
||||
|
||||
before closing the file, but why?
|
||||
|
||||
Methods:
|
||||
|
||||
lock( file, flags )
|
||||
unlock( file )
|
||||
|
||||
Constants:
|
||||
|
||||
LOCK_EX
|
||||
LOCK_SH
|
||||
LOCK_NB
|
||||
|
||||
Exceptions:
|
||||
|
||||
LockException
|
||||
|
||||
Notes:
|
||||
|
||||
For the 'nt' platform, this module requires the Python Extensions for Windows.
|
||||
Be aware that this may not work as expected on Windows 95/98/ME.
|
||||
|
||||
History:
|
||||
|
||||
I learned the win32 technique for locking files from sample code
|
||||
provided by John Nielsen <nielsenjf@my-deja.com> in the documentation
|
||||
that accompanies the win32 modules.
|
||||
|
||||
Author: Jonathan Feinberg <jdf@pobox.com>,
|
||||
Lowell Alleman <lalleman@mfps.com>,
|
||||
Rick van Hattem <Rick.van.Hattem@Fawo.nl>
|
||||
Version: 0.3
|
||||
URL: https://github.com/WoLpH/portalocker
|
||||
"""
|
||||
|
||||
|
||||
__all__ = [
|
||||
"lock",
|
||||
"unlock",
|
||||
"LOCK_EX",
|
||||
"LOCK_SH",
|
||||
"LOCK_NB",
|
||||
"LockException",
|
||||
]
|
||||
|
||||
import os
|
||||
|
||||
class LockException(Exception):
|
||||
# Error codes:
|
||||
LOCK_FAILED = 1
|
||||
|
||||
if os.name == 'nt':
|
||||
import win32con
|
||||
import win32file
|
||||
import pywintypes
|
||||
LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
|
||||
LOCK_SH = 0 # the default
|
||||
LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
|
||||
# is there any reason not to reuse the following structure?
|
||||
__overlapped = pywintypes.OVERLAPPED()
|
||||
elif os.name == 'posix':
|
||||
import fcntl
|
||||
LOCK_EX = fcntl.LOCK_EX
|
||||
LOCK_SH = fcntl.LOCK_SH
|
||||
LOCK_NB = fcntl.LOCK_NB
|
||||
else:
|
||||
raise RuntimeError("PortaLocker only defined for nt and posix platforms")
|
||||
|
||||
if os.name == 'nt':
|
||||
def lock(file, flags):
|
||||
hfile = win32file._get_osfhandle(file.fileno())
|
||||
try:
|
||||
win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
|
||||
except pywintypes.error, exc_value:
|
||||
# error: (33, 'LockFileEx', 'The process cannot access the file because another process has locked a portion of the file.')
|
||||
if exc_value[0] == 33:
|
||||
raise LockException(LockException.LOCK_FAILED, exc_value[2])
|
||||
else:
|
||||
# Q: Are there exceptions/codes we should be dealing with here?
|
||||
raise
|
||||
|
||||
def unlock(file):
|
||||
hfile = win32file._get_osfhandle(file.fileno())
|
||||
try:
|
||||
win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
|
||||
except pywintypes.error, exc_value:
|
||||
if exc_value[0] == 158:
|
||||
# error: (158, 'UnlockFileEx', 'The segment is already unlocked.')
|
||||
# To match the 'posix' implementation, silently ignore this error
|
||||
pass
|
||||
else:
|
||||
# Q: Are there exceptions/codes we should be dealing with here?
|
||||
raise
|
||||
|
||||
elif os.name == 'posix':
|
||||
def lock(file, flags):
|
||||
try:
|
||||
fcntl.flock(file.fileno(), flags)
|
||||
except IOError, exc_value:
|
||||
# The exception code varies on different systems so we'll catch
|
||||
# every IO error
|
||||
raise LockException(*exc_value)
|
||||
|
||||
def unlock(file):
|
||||
fcntl.flock(file.fileno(), fcntl.LOCK_UN)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from time import time, strftime, localtime
|
||||
import sys
|
||||
import portalocker
|
||||
|
||||
log = open('log.txt', "a+")
|
||||
portalocker.lock(log, portalocker.LOCK_EX)
|
||||
|
||||
timestamp = strftime("%m/%d/%Y %H:%M:%S\n", localtime(time()))
|
||||
log.write( timestamp )
|
||||
|
||||
print "Wrote lines. Hit enter to release lock."
|
||||
dummy = sys.stdin.readline()
|
||||
|
||||
log.close()
|
||||
|
|
@ -1,287 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
""" stresstest.py: A stress-tester for ConcurrentRotatingFileHandler
|
||||
|
||||
This utility spawns a bunch of processes that all try to concurrently write to
|
||||
the same file. This is pretty much the worst-case scenario for my log handler.
|
||||
Once all of the processes have completed writing to the log file, the output is
|
||||
compared to see if any log messages have been lost.
|
||||
|
||||
In the future, I may also add in support for testing with each process having
|
||||
multiple threads.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
__version__ = '$Id$'
|
||||
__author__ = 'Lowell Alleman'
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
from subprocess import call, Popen, STDOUT
|
||||
from time import sleep
|
||||
|
||||
ROTATE_COUNT = 5000
|
||||
|
||||
# local lib; for testing
|
||||
from cloghandler import ConcurrentRotatingFileHandler
|
||||
|
||||
class RotateLogStressTester:
|
||||
def __init__(self, sharedfile, uniquefile, name="LogStressTester", logger_delay=0):
|
||||
self.sharedfile = sharedfile
|
||||
self.uniquefile = uniquefile
|
||||
self.name = name
|
||||
self.writeLoops = 100000
|
||||
self.rotateSize = 128 * 1024
|
||||
self.rotateCount = ROTATE_COUNT
|
||||
self.random_sleep_mode = False
|
||||
self.debug = False
|
||||
self.logger_delay = logger_delay
|
||||
|
||||
def getLogHandler(self, fn):
|
||||
""" Override this method if you want to test a different logging handler
|
||||
class. """
|
||||
return ConcurrentRotatingFileHandler(fn, 'a', self.rotateSize,
|
||||
self.rotateCount, delay=self.logger_delay,
|
||||
debug=self.debug)
|
||||
# To run the test with the standard library's RotatingFileHandler:
|
||||
# from logging.handlers import RotatingFileHandler
|
||||
# return RotatingFileHandler(fn, 'a', self.rotateSize, self.rotateCount)
|
||||
|
||||
def start(self):
|
||||
from logging import getLogger, FileHandler, Formatter, DEBUG
|
||||
self.log = getLogger(self.name)
|
||||
self.log.setLevel(DEBUG)
|
||||
|
||||
formatter = Formatter('%(asctime)s [%(process)d:%(threadName)s] %(levelname)-8s %(name)s: %(message)s')
|
||||
# Unique log handler (single file)
|
||||
handler = FileHandler(self.uniquefile, "w")
|
||||
handler.setLevel(DEBUG)
|
||||
handler.setFormatter(formatter)
|
||||
self.log.addHandler(handler)
|
||||
|
||||
# If you suspect that the diff stuff isn't working, un comment the next
|
||||
# line. You should see this show up once per-process.
|
||||
# self.log.info("Here is a line that should only be in the first output.")
|
||||
|
||||
# Setup output used for testing
|
||||
handler = self.getLogHandler(self.sharedfile)
|
||||
handler.setLevel(DEBUG)
|
||||
handler.setFormatter(formatter)
|
||||
self.log.addHandler(handler)
|
||||
|
||||
# If this ever becomes a real "Thread", then remove this line:
|
||||
self.run()
|
||||
|
||||
def run(self):
|
||||
c = 0
|
||||
from random import choice, randint
|
||||
# Use a bunch of random quotes, numbers, and severity levels to mix it up a bit!
|
||||
msgs = ["I found %d puppies", "There are %d cats in your hatz",
|
||||
"my favorite number is %d", "I am %d years old.", "1 + 1 = %d",
|
||||
"%d/0 = DivideByZero", "blah! %d thingies!", "8 15 16 23 48 %d",
|
||||
"the worlds largest prime number: %d", "%d happy meals!"]
|
||||
logfuncts = [self.log.debug, self.log.info, self.log.warn, self.log.error]
|
||||
|
||||
self.log.info("Starting to write random log message. Loop=%d", self.writeLoops)
|
||||
while c <= self.writeLoops:
|
||||
c += 1
|
||||
msg = choice(msgs)
|
||||
logfunc = choice(logfuncts)
|
||||
logfunc(msg, randint(0,99999999))
|
||||
|
||||
if self.random_sleep_mode and c % 1000 == 0:
|
||||
# Sleep from 0-15 seconds
|
||||
s = randint(1,15)
|
||||
print("PID %d sleeping for %d seconds" % (os.getpid(), s))
|
||||
sleep(s)
|
||||
# break
|
||||
self.log.info("Done witting random log messages.")
|
||||
|
||||
def iter_lognames(logfile, count):
|
||||
""" Generator for log file names based on a rotation scheme """
|
||||
for i in range(count -1, 0, -1):
|
||||
yield "%s.%d" % (logfile, i)
|
||||
yield logfile
|
||||
|
||||
def iter_logs(iterable, missing_ok=False):
|
||||
""" Generator to extract log entries from shared log file. """
|
||||
for fn in iterable:
|
||||
if os.path.exists(fn):
|
||||
for line in open(fn):
|
||||
yield line
|
||||
elif not missing_ok:
|
||||
raise ValueError("Missing log file %s" % fn)
|
||||
|
||||
def combine_logs(combinedlog, iterable, mode="w"):
|
||||
""" write all lines (iterable) into a single log file. """
|
||||
fp = open(combinedlog, mode)
|
||||
for chunk in iterable:
|
||||
fp.write(chunk)
|
||||
fp.close()
|
||||
|
||||
|
||||
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser(usage="usage: %prog",
|
||||
version=__version__,
|
||||
description="Stress test the cloghandler module.")
|
||||
parser.add_option("--log-calls", metavar="NUM",
|
||||
action="store", type="int", default=50000,
|
||||
help="Number of logging entries to write to each log file. "
|
||||
"Default is %default")
|
||||
parser.add_option("--random-sleep-mode",
|
||||
action="store_true", default=False)
|
||||
parser.add_option("--debug",
|
||||
action="store_true", default=False)
|
||||
parser.add_option("--logger-delay",
|
||||
action="store_true", default=False,
|
||||
help="Enable the 'delay' mode in the logger class. "
|
||||
"This means that the log file will be opened on demand.")
|
||||
|
||||
|
||||
def main_client(args):
|
||||
(options, args) = parser.parse_args(args)
|
||||
if len(args) != 2:
|
||||
raise ValueError("Require 2 arguments. We have %d args" % len(args))
|
||||
(shared, client) = args
|
||||
|
||||
if os.path.isfile(client):
|
||||
sys.stderr.write("Already a client using output file %s\n" % client)
|
||||
sys.exit(1)
|
||||
tester = RotateLogStressTester(shared, client, logger_delay=options.logger_delay)
|
||||
tester.random_sleep_mode = options.random_sleep_mode
|
||||
tester.debug = options.debug
|
||||
tester.writeLoops = options.log_calls
|
||||
tester.start()
|
||||
print("We are done pid=%d" % os.getpid())
|
||||
|
||||
|
||||
|
||||
class TestManager:
|
||||
class ChildProc(object):
|
||||
""" Very simple child container class."""
|
||||
__slots__ = [ "popen", "sharedfile", "clientfile" ]
|
||||
def __init__(self, **kwargs):
|
||||
self.update(**kwargs)
|
||||
def update(self, **kwargs):
|
||||
for key, val in kwargs.items():
|
||||
setattr(self, key, val)
|
||||
|
||||
def __init__(self):
|
||||
self.tests = []
|
||||
|
||||
def launchPopen(self, *args, **kwargs):
|
||||
proc = Popen(*args, **kwargs)
|
||||
cp = self.ChildProc(popen=proc)
|
||||
self.tests.append(cp)
|
||||
return cp
|
||||
|
||||
def wait(self, check_interval=3):
|
||||
""" Wait for all child test processes to complete. """
|
||||
print("Waiting while children are out running and playing!")
|
||||
while True:
|
||||
sleep(check_interval)
|
||||
waiting = []
|
||||
for cp in self.tests:
|
||||
if cp.popen.poll() is None:
|
||||
waiting.append(cp.popen.pid)
|
||||
if not waiting:
|
||||
break
|
||||
print("Waiting on %r " % waiting)
|
||||
print("All children have stopped.")
|
||||
|
||||
def checkExitCodes(self):
|
||||
for cp in self.tests:
|
||||
if cp.popen.poll() != 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
|
||||
def unified_diff(a,b, out=sys.stdout):
|
||||
import difflib
|
||||
ai = open(a).readlines()
|
||||
bi = open(b).readlines()
|
||||
for line in difflib.unified_diff(ai, bi, a, b):
|
||||
out.write(line)
|
||||
|
||||
|
||||
|
||||
def main_runner(args):
|
||||
parser.add_option("--processes", metavar="NUM",
|
||||
action="store", type="int", default=3,
|
||||
help="Number of processes to spawn. Default: %default")
|
||||
parser.add_option("--delay", metavar="secs",
|
||||
action="store", type="float", default=2.5,
|
||||
help="Wait SECS before spawning next processes. "
|
||||
"Default: %default")
|
||||
parser.add_option("-p", "--path", metavar="DIR",
|
||||
action="store", default="test",
|
||||
help="Path to a temporary directory. Default: '%default'")
|
||||
|
||||
|
||||
this_script = args[0]
|
||||
(options, args) = parser.parse_args(args)
|
||||
options.path = os.path.abspath(options.path)
|
||||
if not os.path.isdir(options.path):
|
||||
os.makedirs(options.path)
|
||||
|
||||
manager = TestManager()
|
||||
shared = os.path.join(options.path, "shared.log")
|
||||
for client_id in range(options.processes):
|
||||
client = os.path.join(options.path, "client.log_client%s.log" % client_id)
|
||||
cmdline = [ sys.executable, this_script, "client", shared, client,
|
||||
"--log-calls=%d" % options.log_calls ]
|
||||
if options.random_sleep_mode:
|
||||
cmdline.append("--random-sleep-mode")
|
||||
if options.debug:
|
||||
cmdline.append("--debug")
|
||||
if options.logger_delay:
|
||||
cmdline.append("--logger-delay")
|
||||
|
||||
child = manager.launchPopen(cmdline)
|
||||
child.update(sharedfile=shared, clientfile=client)
|
||||
sleep(options.delay)
|
||||
|
||||
# Wait for all of the subprocesses to exit
|
||||
manager.wait()
|
||||
# Check children exit codes
|
||||
if not manager.checkExitCodes():
|
||||
sys.stderr.write("One or more of the child process has failed.\n"
|
||||
"Aborting test.\n")
|
||||
sys.exit(2)
|
||||
|
||||
client_combo = os.path.join(options.path, "client.log.combo")
|
||||
shared_combo = os.path.join(options.path, "shared.log.combo")
|
||||
|
||||
# Combine all of the log files...
|
||||
client_files = [ child.clientfile for child in manager.tests ]
|
||||
|
||||
if False:
|
||||
def sort_em(iterable):
|
||||
return iterable
|
||||
else:
|
||||
sort_em = sorted
|
||||
|
||||
print("Writing out combined client logs...")
|
||||
combine_logs(client_combo, sort_em(iter_logs(client_files)))
|
||||
print("done.")
|
||||
|
||||
print("Writing out combined shared logs...")
|
||||
shared_log_files = iter_lognames(shared, ROTATE_COUNT)
|
||||
log_lines = iter_logs(shared_log_files, missing_ok=True)
|
||||
combine_logs(shared_combo, sort_em(log_lines))
|
||||
print("done.")
|
||||
|
||||
print("Running internal diff: (If the next line is 'end of diff', then the stress test passed!)")
|
||||
unified_diff(client_combo, shared_combo)
|
||||
print(" --- end of diff ----")
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1 and sys.argv[1].lower() == "client":
|
||||
main_client(sys.argv[2:])
|
||||
else:
|
||||
main_runner(sys.argv)
|
|
@ -1,88 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
####
|
||||
# 06/2010 Nic Wolfe <nic@wolfeden.ca>
|
||||
# 02/2006 Will Holcomb <wholcomb@gmail.com>
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
|
||||
import urllib
|
||||
import urllib2
|
||||
import mimetools, mimetypes
|
||||
import os, sys
|
||||
|
||||
# Controls how sequences are uncoded. If true, elements may be given multiple values by
|
||||
# assigning a sequence.
|
||||
doseq = 1
|
||||
|
||||
class MultipartPostHandler(urllib2.BaseHandler):
|
||||
handler_order = urllib2.HTTPHandler.handler_order - 10 # needs to run first
|
||||
|
||||
def http_request(self, request):
|
||||
data = request.get_data()
|
||||
if data is not None and type(data) != str:
|
||||
v_files = []
|
||||
v_vars = []
|
||||
try:
|
||||
for(key, value) in data.items():
|
||||
if type(value) in (file, list, tuple):
|
||||
v_files.append((key, value))
|
||||
else:
|
||||
v_vars.append((key, value))
|
||||
except TypeError:
|
||||
systype, value, traceback = sys.exc_info()
|
||||
raise TypeError, "not a valid non-string sequence or mapping object", traceback
|
||||
|
||||
if len(v_files) == 0:
|
||||
data = urllib.urlencode(v_vars, doseq)
|
||||
else:
|
||||
boundary, data = MultipartPostHandler.multipart_encode(v_vars, v_files)
|
||||
contenttype = 'multipart/form-data; boundary=%s' % boundary
|
||||
if(request.has_header('Content-Type')
|
||||
and request.get_header('Content-Type').find('multipart/form-data') != 0):
|
||||
print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')
|
||||
request.add_unredirected_header('Content-Type', contenttype)
|
||||
|
||||
request.add_data(data)
|
||||
return request
|
||||
|
||||
@staticmethod
|
||||
def multipart_encode(vars, files, boundary = None, buffer = None):
|
||||
if boundary is None:
|
||||
boundary = mimetools.choose_boundary()
|
||||
if buffer is None:
|
||||
buffer = ''
|
||||
for(key, value) in vars:
|
||||
buffer += '--%s\r\n' % boundary
|
||||
buffer += 'Content-Disposition: form-data; name="%s"' % key
|
||||
buffer += '\r\n\r\n' + value + '\r\n'
|
||||
for(key, fd) in files:
|
||||
|
||||
# allow them to pass in a file or a tuple with name & data
|
||||
if type(fd) == file:
|
||||
name_in = fd.name
|
||||
fd.seek(0)
|
||||
data_in = fd.read()
|
||||
elif type(fd) in (tuple, list):
|
||||
name_in, data_in = fd
|
||||
|
||||
filename = os.path.basename(name_in)
|
||||
contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
|
||||
buffer += '--%s\r\n' % boundary
|
||||
buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename)
|
||||
buffer += 'Content-Type: %s\r\n' % contenttype
|
||||
# buffer += 'Content-Length: %s\r\n' % file_size
|
||||
buffer += '\r\n' + data_in + '\r\n'
|
||||
buffer += '--%s--\r\n\r\n' % boundary
|
||||
return boundary, buffer
|
||||
|
||||
https_request = http_request
|
|
@ -1,8 +0,0 @@
|
|||
# These will be removed in APScheduler 4.0.
|
||||
#release = __import__('pkg_resources').get_distribution('APScheduler').version.split('-')[0]
|
||||
#version_info = tuple(int(x) if x.isdigit() else x for x in release.split('.'))
|
||||
#version = __version__ = '.'.join(str(x) for x in version_info[:3])
|
||||
|
||||
version_info = (3, 3, 1)
|
||||
release = '3.3.1'
|
||||
version = __version__ = '3.3.1'
|
|
@ -1,94 +0,0 @@
|
|||
__all__ = ('EVENT_SCHEDULER_STARTED', 'EVENT_SCHEDULER_SHUTDOWN', 'EVENT_SCHEDULER_PAUSED',
|
||||
'EVENT_SCHEDULER_RESUMED', 'EVENT_EXECUTOR_ADDED', 'EVENT_EXECUTOR_REMOVED',
|
||||
'EVENT_JOBSTORE_ADDED', 'EVENT_JOBSTORE_REMOVED', 'EVENT_ALL_JOBS_REMOVED',
|
||||
'EVENT_JOB_ADDED', 'EVENT_JOB_REMOVED', 'EVENT_JOB_MODIFIED', 'EVENT_JOB_EXECUTED',
|
||||
'EVENT_JOB_ERROR', 'EVENT_JOB_MISSED', 'EVENT_JOB_SUBMITTED', 'EVENT_JOB_MAX_INSTANCES',
|
||||
'SchedulerEvent', 'JobEvent', 'JobExecutionEvent')
|
||||
|
||||
|
||||
EVENT_SCHEDULER_STARTED = EVENT_SCHEDULER_START = 2 ** 0
|
||||
EVENT_SCHEDULER_SHUTDOWN = 2 ** 1
|
||||
EVENT_SCHEDULER_PAUSED = 2 ** 2
|
||||
EVENT_SCHEDULER_RESUMED = 2 ** 3
|
||||
EVENT_EXECUTOR_ADDED = 2 ** 4
|
||||
EVENT_EXECUTOR_REMOVED = 2 ** 5
|
||||
EVENT_JOBSTORE_ADDED = 2 ** 6
|
||||
EVENT_JOBSTORE_REMOVED = 2 ** 7
|
||||
EVENT_ALL_JOBS_REMOVED = 2 ** 8
|
||||
EVENT_JOB_ADDED = 2 ** 9
|
||||
EVENT_JOB_REMOVED = 2 ** 10
|
||||
EVENT_JOB_MODIFIED = 2 ** 11
|
||||
EVENT_JOB_EXECUTED = 2 ** 12
|
||||
EVENT_JOB_ERROR = 2 ** 13
|
||||
EVENT_JOB_MISSED = 2 ** 14
|
||||
EVENT_JOB_SUBMITTED = 2 ** 15
|
||||
EVENT_JOB_MAX_INSTANCES = 2 ** 16
|
||||
EVENT_ALL = (EVENT_SCHEDULER_STARTED | EVENT_SCHEDULER_SHUTDOWN | EVENT_SCHEDULER_PAUSED |
|
||||
EVENT_SCHEDULER_RESUMED | EVENT_EXECUTOR_ADDED | EVENT_EXECUTOR_REMOVED |
|
||||
EVENT_JOBSTORE_ADDED | EVENT_JOBSTORE_REMOVED | EVENT_ALL_JOBS_REMOVED |
|
||||
EVENT_JOB_ADDED | EVENT_JOB_REMOVED | EVENT_JOB_MODIFIED | EVENT_JOB_EXECUTED |
|
||||
EVENT_JOB_ERROR | EVENT_JOB_MISSED | EVENT_JOB_SUBMITTED | EVENT_JOB_MAX_INSTANCES)
|
||||
|
||||
|
||||
class SchedulerEvent(object):
|
||||
"""
|
||||
An event that concerns the scheduler itself.
|
||||
|
||||
:ivar code: the type code of this event
|
||||
:ivar alias: alias of the job store or executor that was added or removed (if applicable)
|
||||
"""
|
||||
|
||||
def __init__(self, code, alias=None):
|
||||
super(SchedulerEvent, self).__init__()
|
||||
self.code = code
|
||||
self.alias = alias
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s (code=%d)>' % (self.__class__.__name__, self.code)
|
||||
|
||||
|
||||
class JobEvent(SchedulerEvent):
|
||||
"""
|
||||
An event that concerns a job.
|
||||
|
||||
:ivar code: the type code of this event
|
||||
:ivar job_id: identifier of the job in question
|
||||
:ivar jobstore: alias of the job store containing the job in question
|
||||
"""
|
||||
|
||||
def __init__(self, code, job_id, jobstore):
|
||||
super(JobEvent, self).__init__(code)
|
||||
self.code = code
|
||||
self.job_id = job_id
|
||||
self.jobstore = jobstore
|
||||
|
||||
|
||||
class JobSubmissionEvent(JobEvent):
|
||||
"""
|
||||
An event that concerns the submission of a job to its executor.
|
||||
|
||||
:ivar scheduled_run_times: a list of datetimes when the job was intended to run
|
||||
"""
|
||||
|
||||
def __init__(self, code, job_id, jobstore, scheduled_run_times):
|
||||
super(JobSubmissionEvent, self).__init__(code, job_id, jobstore)
|
||||
self.scheduled_run_times = scheduled_run_times
|
||||
|
||||
|
||||
class JobExecutionEvent(JobEvent):
|
||||
"""
|
||||
An event that concerns the running of a job within its executor.
|
||||
|
||||
:ivar scheduled_run_time: the time when the job was scheduled to be run
|
||||
:ivar retval: the return value of the successfully executed job
|
||||
:ivar exception: the exception raised by the job
|
||||
:ivar traceback: a formatted traceback for the exception
|
||||
"""
|
||||
|
||||
def __init__(self, code, job_id, jobstore, scheduled_run_time, retval=None, exception=None,
|
||||
traceback=None):
|
||||
super(JobExecutionEvent, self).__init__(code, job_id, jobstore)
|
||||
self.scheduled_run_time = scheduled_run_time
|
||||
self.retval = retval
|
||||
self.exception = exception
|
||||
self.traceback = traceback
|
|
@ -1,49 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import sys
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
try:
|
||||
from asyncio import iscoroutinefunction
|
||||
from apscheduler.executors.base_py3 import run_coroutine_job
|
||||
except ImportError:
|
||||
from trollius import iscoroutinefunction
|
||||
run_coroutine_job = None
|
||||
|
||||
|
||||
class AsyncIOExecutor(BaseExecutor):
|
||||
"""
|
||||
Runs jobs in the default executor of the event loop.
|
||||
|
||||
If the job function is a native coroutine function, it is scheduled to be run directly in the
|
||||
event loop as soon as possible. All other functions are run in the event loop's default
|
||||
executor which is usually a thread pool.
|
||||
|
||||
Plugin alias: ``asyncio``
|
||||
"""
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(AsyncIOExecutor, self).start(scheduler, alias)
|
||||
self._eventloop = scheduler._eventloop
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(f):
|
||||
try:
|
||||
events = f.result()
|
||||
except:
|
||||
self._run_job_error(job.id, *sys.exc_info()[1:])
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
||||
|
||||
if iscoroutinefunction(job.func):
|
||||
if run_coroutine_job is not None:
|
||||
coro = run_coroutine_job(job, job._jobstore_alias, run_times, self._logger.name)
|
||||
f = self._eventloop.create_task(coro)
|
||||
else:
|
||||
raise Exception('Executing coroutine based jobs is not supported with Trollius')
|
||||
else:
|
||||
f = self._eventloop.run_in_executor(None, run_job, job, job._jobstore_alias, run_times,
|
||||
self._logger.name)
|
||||
|
||||
f.add_done_callback(callback)
|
|
@ -1,137 +0,0 @@
|
|||
from abc import ABCMeta, abstractmethod
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
from traceback import format_tb
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from pytz import utc
|
||||
import six
|
||||
|
||||
from apscheduler.events import (
|
||||
JobExecutionEvent, EVENT_JOB_MISSED, EVENT_JOB_ERROR, EVENT_JOB_EXECUTED)
|
||||
|
||||
|
||||
class MaxInstancesReachedError(Exception):
|
||||
def __init__(self, job):
|
||||
super(MaxInstancesReachedError, self).__init__(
|
||||
'Job "%s" has already reached its maximum number of instances (%d)' %
|
||||
(job.id, job.max_instances))
|
||||
|
||||
|
||||
class BaseExecutor(six.with_metaclass(ABCMeta, object)):
|
||||
"""Abstract base class that defines the interface that every executor must implement."""
|
||||
|
||||
_scheduler = None
|
||||
_lock = None
|
||||
_logger = logging.getLogger('apscheduler.executors')
|
||||
|
||||
def __init__(self):
|
||||
super(BaseExecutor, self).__init__()
|
||||
self._instances = defaultdict(lambda: 0)
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
"""
|
||||
Called by the scheduler when the scheduler is being started or when the executor is being
|
||||
added to an already running scheduler.
|
||||
|
||||
:param apscheduler.schedulers.base.BaseScheduler scheduler: the scheduler that is starting
|
||||
this executor
|
||||
:param str|unicode alias: alias of this executor as it was assigned to the scheduler
|
||||
|
||||
"""
|
||||
self._scheduler = scheduler
|
||||
self._lock = scheduler._create_lock()
|
||||
self._logger = logging.getLogger('apscheduler.executors.%s' % alias)
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
"""
|
||||
Shuts down this executor.
|
||||
|
||||
:param bool wait: ``True`` to wait until all submitted jobs
|
||||
have been executed
|
||||
"""
|
||||
|
||||
def submit_job(self, job, run_times):
|
||||
"""
|
||||
Submits job for execution.
|
||||
|
||||
:param Job job: job to execute
|
||||
:param list[datetime] run_times: list of datetimes specifying
|
||||
when the job should have been run
|
||||
:raises MaxInstancesReachedError: if the maximum number of
|
||||
allowed instances for this job has been reached
|
||||
|
||||
"""
|
||||
assert self._lock is not None, 'This executor has not been started yet'
|
||||
with self._lock:
|
||||
if self._instances[job.id] >= job.max_instances:
|
||||
raise MaxInstancesReachedError(job)
|
||||
|
||||
self._do_submit_job(job, run_times)
|
||||
self._instances[job.id] += 1
|
||||
|
||||
@abstractmethod
|
||||
def _do_submit_job(self, job, run_times):
|
||||
"""Performs the actual task of scheduling `run_job` to be called."""
|
||||
|
||||
def _run_job_success(self, job_id, events):
|
||||
"""
|
||||
Called by the executor with the list of generated events when :func:`run_job` has been
|
||||
successfully called.
|
||||
|
||||
"""
|
||||
with self._lock:
|
||||
self._instances[job_id] -= 1
|
||||
if self._instances[job_id] == 0:
|
||||
del self._instances[job_id]
|
||||
|
||||
for event in events:
|
||||
self._scheduler._dispatch_event(event)
|
||||
|
||||
def _run_job_error(self, job_id, exc, traceback=None):
|
||||
"""Called by the executor with the exception if there is an error calling `run_job`."""
|
||||
with self._lock:
|
||||
self._instances[job_id] -= 1
|
||||
if self._instances[job_id] == 0:
|
||||
del self._instances[job_id]
|
||||
|
||||
exc_info = (exc.__class__, exc, traceback)
|
||||
self._logger.error('Error running job %s', job_id, exc_info=exc_info)
|
||||
|
||||
|
||||
def run_job(job, jobstore_alias, run_times, logger_name):
|
||||
"""
|
||||
Called by executors to run the job. Returns a list of scheduler events to be dispatched by the
|
||||
scheduler.
|
||||
|
||||
"""
|
||||
events = []
|
||||
logger = logging.getLogger(logger_name)
|
||||
for run_time in run_times:
|
||||
# See if the job missed its run time window, and handle
|
||||
# possible misfires accordingly
|
||||
if job.misfire_grace_time is not None:
|
||||
difference = datetime.now(utc) - run_time
|
||||
grace_time = timedelta(seconds=job.misfire_grace_time)
|
||||
if difference > grace_time:
|
||||
events.append(JobExecutionEvent(EVENT_JOB_MISSED, job.id, jobstore_alias,
|
||||
run_time))
|
||||
logger.warning('Run time of job "%s" was missed by %s', job, difference)
|
||||
continue
|
||||
|
||||
logger.info('Running job "%s" (scheduled at %s)', job, run_time)
|
||||
try:
|
||||
retval = job.func(*job.args, **job.kwargs)
|
||||
except:
|
||||
exc, tb = sys.exc_info()[1:]
|
||||
formatted_tb = ''.join(format_tb(tb))
|
||||
events.append(JobExecutionEvent(EVENT_JOB_ERROR, job.id, jobstore_alias, run_time,
|
||||
exception=exc, traceback=formatted_tb))
|
||||
logger.exception('Job "%s" raised an exception', job)
|
||||
else:
|
||||
events.append(JobExecutionEvent(EVENT_JOB_EXECUTED, job.id, jobstore_alias, run_time,
|
||||
retval=retval))
|
||||
logger.info('Job "%s" executed successfully', job)
|
||||
|
||||
return events
|
|
@ -1,41 +0,0 @@
|
|||
import logging
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from traceback import format_tb
|
||||
|
||||
from pytz import utc
|
||||
|
||||
from apscheduler.events import (
|
||||
JobExecutionEvent, EVENT_JOB_MISSED, EVENT_JOB_ERROR, EVENT_JOB_EXECUTED)
|
||||
|
||||
|
||||
async def run_coroutine_job(job, jobstore_alias, run_times, logger_name):
|
||||
"""Coroutine version of run_job()."""
|
||||
events = []
|
||||
logger = logging.getLogger(logger_name)
|
||||
for run_time in run_times:
|
||||
# See if the job missed its run time window, and handle possible misfires accordingly
|
||||
if job.misfire_grace_time is not None:
|
||||
difference = datetime.now(utc) - run_time
|
||||
grace_time = timedelta(seconds=job.misfire_grace_time)
|
||||
if difference > grace_time:
|
||||
events.append(JobExecutionEvent(EVENT_JOB_MISSED, job.id, jobstore_alias,
|
||||
run_time))
|
||||
logger.warning('Run time of job "%s" was missed by %s', job, difference)
|
||||
continue
|
||||
|
||||
logger.info('Running job "%s" (scheduled at %s)', job, run_time)
|
||||
try:
|
||||
retval = await job.func(*job.args, **job.kwargs)
|
||||
except:
|
||||
exc, tb = sys.exc_info()[1:]
|
||||
formatted_tb = ''.join(format_tb(tb))
|
||||
events.append(JobExecutionEvent(EVENT_JOB_ERROR, job.id, jobstore_alias, run_time,
|
||||
exception=exc, traceback=formatted_tb))
|
||||
logger.exception('Job "%s" raised an exception', job)
|
||||
else:
|
||||
events.append(JobExecutionEvent(EVENT_JOB_EXECUTED, job.id, jobstore_alias, run_time,
|
||||
retval=retval))
|
||||
logger.info('Job "%s" executed successfully', job)
|
||||
|
||||
return events
|
|
@ -1,20 +0,0 @@
|
|||
import sys
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
class DebugExecutor(BaseExecutor):
|
||||
"""
|
||||
A special executor that executes the target callable directly instead of deferring it to a
|
||||
thread or process.
|
||||
|
||||
Plugin alias: ``debug``
|
||||
"""
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
try:
|
||||
events = run_job(job, job._jobstore_alias, run_times, self._logger.name)
|
||||
except:
|
||||
self._run_job_error(job.id, *sys.exc_info()[1:])
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
|
@ -1,30 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
import sys
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
try:
|
||||
import gevent
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('GeventExecutor requires gevent installed')
|
||||
|
||||
|
||||
class GeventExecutor(BaseExecutor):
|
||||
"""
|
||||
Runs jobs as greenlets.
|
||||
|
||||
Plugin alias: ``gevent``
|
||||
"""
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(greenlet):
|
||||
try:
|
||||
events = greenlet.get()
|
||||
except:
|
||||
self._run_job_error(job.id, *sys.exc_info()[1:])
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
||||
|
||||
gevent.spawn(run_job, job, job._jobstore_alias, run_times, self._logger.name).\
|
||||
link(callback)
|
|
@ -1,54 +0,0 @@
|
|||
from abc import abstractmethod
|
||||
import concurrent.futures
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
class BasePoolExecutor(BaseExecutor):
|
||||
@abstractmethod
|
||||
def __init__(self, pool):
|
||||
super(BasePoolExecutor, self).__init__()
|
||||
self._pool = pool
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(f):
|
||||
exc, tb = (f.exception_info() if hasattr(f, 'exception_info') else
|
||||
(f.exception(), getattr(f.exception(), '__traceback__', None)))
|
||||
if exc:
|
||||
self._run_job_error(job.id, exc, tb)
|
||||
else:
|
||||
self._run_job_success(job.id, f.result())
|
||||
|
||||
f = self._pool.submit(run_job, job, job._jobstore_alias, run_times, self._logger.name)
|
||||
f.add_done_callback(callback)
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
self._pool.shutdown(wait)
|
||||
|
||||
|
||||
class ThreadPoolExecutor(BasePoolExecutor):
|
||||
"""
|
||||
An executor that runs jobs in a concurrent.futures thread pool.
|
||||
|
||||
Plugin alias: ``threadpool``
|
||||
|
||||
:param max_workers: the maximum number of spawned threads.
|
||||
"""
|
||||
|
||||
def __init__(self, max_workers=10):
|
||||
pool = concurrent.futures.ThreadPoolExecutor(int(max_workers))
|
||||
super(ThreadPoolExecutor, self).__init__(pool)
|
||||
|
||||
|
||||
class ProcessPoolExecutor(BasePoolExecutor):
|
||||
"""
|
||||
An executor that runs jobs in a concurrent.futures process pool.
|
||||
|
||||
Plugin alias: ``processpool``
|
||||
|
||||
:param max_workers: the maximum number of spawned processes.
|
||||
"""
|
||||
|
||||
def __init__(self, max_workers=10):
|
||||
pool = concurrent.futures.ProcessPoolExecutor(int(max_workers))
|
||||
super(ProcessPoolExecutor, self).__init__(pool)
|
|
@ -1,54 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from tornado.gen import convert_yielded
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
try:
|
||||
from inspect import iscoroutinefunction
|
||||
from apscheduler.executors.base_py3 import run_coroutine_job
|
||||
except ImportError:
|
||||
def iscoroutinefunction(func):
|
||||
return False
|
||||
|
||||
|
||||
class TornadoExecutor(BaseExecutor):
|
||||
"""
|
||||
Runs jobs either in a thread pool or directly on the I/O loop.
|
||||
|
||||
If the job function is a native coroutine function, it is scheduled to be run directly in the
|
||||
I/O loop as soon as possible. All other functions are run in a thread pool.
|
||||
|
||||
Plugin alias: ``tornado``
|
||||
|
||||
:param int max_workers: maximum number of worker threads in the thread pool
|
||||
"""
|
||||
|
||||
def __init__(self, max_workers=10):
|
||||
super(TornadoExecutor, self).__init__()
|
||||
self.executor = ThreadPoolExecutor(max_workers)
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(TornadoExecutor, self).start(scheduler, alias)
|
||||
self._ioloop = scheduler._ioloop
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(f):
|
||||
try:
|
||||
events = f.result()
|
||||
except:
|
||||
self._run_job_error(job.id, *sys.exc_info()[1:])
|
||||
else:
|
||||
self._run_job_success(job.id, events)
|
||||
|
||||
if iscoroutinefunction(job.func):
|
||||
f = run_coroutine_job(job, job._jobstore_alias, run_times, self._logger.name)
|
||||
else:
|
||||
f = self.executor.submit(run_job, job, job._jobstore_alias, run_times,
|
||||
self._logger.name)
|
||||
|
||||
f = convert_yielded(f)
|
||||
f.add_done_callback(callback)
|
|
@ -1,25 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.executors.base import BaseExecutor, run_job
|
||||
|
||||
|
||||
class TwistedExecutor(BaseExecutor):
|
||||
"""
|
||||
Runs jobs in the reactor's thread pool.
|
||||
|
||||
Plugin alias: ``twisted``
|
||||
"""
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(TwistedExecutor, self).start(scheduler, alias)
|
||||
self._reactor = scheduler._reactor
|
||||
|
||||
def _do_submit_job(self, job, run_times):
|
||||
def callback(success, result):
|
||||
if success:
|
||||
self._run_job_success(job.id, result)
|
||||
else:
|
||||
self._run_job_error(job.id, result.value, result.tb)
|
||||
|
||||
self._reactor.getThreadPool().callInThreadWithCallback(
|
||||
callback, run_job, job, job._jobstore_alias, run_times, self._logger.name)
|
|
@ -1,289 +0,0 @@
|
|||
from collections import Iterable, Mapping
|
||||
from uuid import uuid4
|
||||
|
||||
import six
|
||||
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.util import (
|
||||
ref_to_obj, obj_to_ref, datetime_repr, repr_escape, get_callable_name, check_callable_args,
|
||||
convert_to_datetime)
|
||||
|
||||
|
||||
class Job(object):
|
||||
"""
|
||||
Contains the options given when scheduling callables and its current schedule and other state.
|
||||
This class should never be instantiated by the user.
|
||||
|
||||
:var str id: the unique identifier of this job
|
||||
:var str name: the description of this job
|
||||
:var func: the callable to execute
|
||||
:var tuple|list args: positional arguments to the callable
|
||||
:var dict kwargs: keyword arguments to the callable
|
||||
:var bool coalesce: whether to only run the job once when several run times are due
|
||||
:var trigger: the trigger object that controls the schedule of this job
|
||||
:var str executor: the name of the executor that will run this job
|
||||
:var int misfire_grace_time: the time (in seconds) how much this job's execution is allowed to
|
||||
be late
|
||||
:var int max_instances: the maximum number of concurrently executing instances allowed for this
|
||||
job
|
||||
:var datetime.datetime next_run_time: the next scheduled run time of this job
|
||||
|
||||
.. note::
|
||||
The ``misfire_grace_time`` has some non-obvious effects on job execution. See the
|
||||
:ref:`missed-job-executions` section in the documentation for an in-depth explanation.
|
||||
"""
|
||||
|
||||
__slots__ = ('_scheduler', '_jobstore_alias', 'id', 'trigger', 'executor', 'func', 'func_ref',
|
||||
'args', 'kwargs', 'name', 'misfire_grace_time', 'coalesce', 'max_instances',
|
||||
'next_run_time')
|
||||
|
||||
def __init__(self, scheduler, id=None, **kwargs):
|
||||
super(Job, self).__init__()
|
||||
self._scheduler = scheduler
|
||||
self._jobstore_alias = None
|
||||
self._modify(id=id or uuid4().hex, **kwargs)
|
||||
|
||||
def modify(self, **changes):
|
||||
"""
|
||||
Makes the given changes to this job and saves it in the associated job store.
|
||||
|
||||
Accepted keyword arguments are the same as the variables on this class.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.modify_job`
|
||||
|
||||
:return Job: this job instance
|
||||
|
||||
"""
|
||||
self._scheduler.modify_job(self.id, self._jobstore_alias, **changes)
|
||||
return self
|
||||
|
||||
def reschedule(self, trigger, **trigger_args):
|
||||
"""
|
||||
Shortcut for switching the trigger on this job.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.reschedule_job`
|
||||
|
||||
:return Job: this job instance
|
||||
|
||||
"""
|
||||
self._scheduler.reschedule_job(self.id, self._jobstore_alias, trigger, **trigger_args)
|
||||
return self
|
||||
|
||||
def pause(self):
|
||||
"""
|
||||
Temporarily suspend the execution of this job.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.pause_job`
|
||||
|
||||
:return Job: this job instance
|
||||
|
||||
"""
|
||||
self._scheduler.pause_job(self.id, self._jobstore_alias)
|
||||
return self
|
||||
|
||||
def resume(self):
|
||||
"""
|
||||
Resume the schedule of this job if previously paused.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.resume_job`
|
||||
|
||||
:return Job: this job instance
|
||||
|
||||
"""
|
||||
self._scheduler.resume_job(self.id, self._jobstore_alias)
|
||||
return self
|
||||
|
||||
def remove(self):
|
||||
"""
|
||||
Unschedules this job and removes it from its associated job store.
|
||||
|
||||
.. seealso:: :meth:`~apscheduler.schedulers.base.BaseScheduler.remove_job`
|
||||
|
||||
"""
|
||||
self._scheduler.remove_job(self.id, self._jobstore_alias)
|
||||
|
||||
@property
|
||||
def pending(self):
|
||||
"""
|
||||
Returns ``True`` if the referenced job is still waiting to be added to its designated job
|
||||
store.
|
||||
|
||||
"""
|
||||
return self._jobstore_alias is None
|
||||
|
||||
#
|
||||
# Private API
|
||||
#
|
||||
|
||||
def _get_run_times(self, now):
|
||||
"""
|
||||
Computes the scheduled run times between ``next_run_time`` and ``now`` (inclusive).
|
||||
|
||||
:type now: datetime.datetime
|
||||
:rtype: list[datetime.datetime]
|
||||
|
||||
"""
|
||||
run_times = []
|
||||
next_run_time = self.next_run_time
|
||||
while next_run_time and next_run_time <= now:
|
||||
run_times.append(next_run_time)
|
||||
next_run_time = self.trigger.get_next_fire_time(next_run_time, now)
|
||||
|
||||
return run_times
|
||||
|
||||
def _modify(self, **changes):
|
||||
"""
|
||||
Validates the changes to the Job and makes the modifications if and only if all of them
|
||||
validate.
|
||||
|
||||
"""
|
||||
approved = {}
|
||||
|
||||
if 'id' in changes:
|
||||
value = changes.pop('id')
|
||||
if not isinstance(value, six.string_types):
|
||||
raise TypeError("id must be a nonempty string")
|
||||
if hasattr(self, 'id'):
|
||||
raise ValueError('The job ID may not be changed')
|
||||
approved['id'] = value
|
||||
|
||||
if 'func' in changes or 'args' in changes or 'kwargs' in changes:
|
||||
func = changes.pop('func') if 'func' in changes else self.func
|
||||
args = changes.pop('args') if 'args' in changes else self.args
|
||||
kwargs = changes.pop('kwargs') if 'kwargs' in changes else self.kwargs
|
||||
|
||||
if isinstance(func, six.string_types):
|
||||
func_ref = func
|
||||
func = ref_to_obj(func)
|
||||
elif callable(func):
|
||||
try:
|
||||
func_ref = obj_to_ref(func)
|
||||
except ValueError:
|
||||
# If this happens, this Job won't be serializable
|
||||
func_ref = None
|
||||
else:
|
||||
raise TypeError('func must be a callable or a textual reference to one')
|
||||
|
||||
if not hasattr(self, 'name') and changes.get('name', None) is None:
|
||||
changes['name'] = get_callable_name(func)
|
||||
|
||||
if isinstance(args, six.string_types) or not isinstance(args, Iterable):
|
||||
raise TypeError('args must be a non-string iterable')
|
||||
if isinstance(kwargs, six.string_types) or not isinstance(kwargs, Mapping):
|
||||
raise TypeError('kwargs must be a dict-like object')
|
||||
|
||||
check_callable_args(func, args, kwargs)
|
||||
|
||||
approved['func'] = func
|
||||
approved['func_ref'] = func_ref
|
||||
approved['args'] = args
|
||||
approved['kwargs'] = kwargs
|
||||
|
||||
if 'name' in changes:
|
||||
value = changes.pop('name')
|
||||
if not value or not isinstance(value, six.string_types):
|
||||
raise TypeError("name must be a nonempty string")
|
||||
approved['name'] = value
|
||||
|
||||
if 'misfire_grace_time' in changes:
|
||||
value = changes.pop('misfire_grace_time')
|
||||
if value is not None and (not isinstance(value, six.integer_types) or value <= 0):
|
||||
raise TypeError('misfire_grace_time must be either None or a positive integer')
|
||||
approved['misfire_grace_time'] = value
|
||||
|
||||
if 'coalesce' in changes:
|
||||
value = bool(changes.pop('coalesce'))
|
||||
approved['coalesce'] = value
|
||||
|
||||
if 'max_instances' in changes:
|
||||
value = changes.pop('max_instances')
|
||||
if not isinstance(value, six.integer_types) or value <= 0:
|
||||
raise TypeError('max_instances must be a positive integer')
|
||||
approved['max_instances'] = value
|
||||
|
||||
if 'trigger' in changes:
|
||||
trigger = changes.pop('trigger')
|
||||
if not isinstance(trigger, BaseTrigger):
|
||||
raise TypeError('Expected a trigger instance, got %s instead' %
|
||||
trigger.__class__.__name__)
|
||||
|
||||
approved['trigger'] = trigger
|
||||
|
||||
if 'executor' in changes:
|
||||
value = changes.pop('executor')
|
||||
if not isinstance(value, six.string_types):
|
||||
raise TypeError('executor must be a string')
|
||||
approved['executor'] = value
|
||||
|
||||
if 'next_run_time' in changes:
|
||||
value = changes.pop('next_run_time')
|
||||
approved['next_run_time'] = convert_to_datetime(value, self._scheduler.timezone,
|
||||
'next_run_time')
|
||||
|
||||
if changes:
|
||||
raise AttributeError('The following are not modifiable attributes of Job: %s' %
|
||||
', '.join(changes))
|
||||
|
||||
for key, value in six.iteritems(approved):
|
||||
setattr(self, key, value)
|
||||
|
||||
def __getstate__(self):
|
||||
# Don't allow this Job to be serialized if the function reference could not be determined
|
||||
if not self.func_ref:
|
||||
raise ValueError(
|
||||
'This Job cannot be serialized since the reference to its callable (%r) could not '
|
||||
'be determined. Consider giving a textual reference (module:function name) '
|
||||
'instead.' % (self.func,))
|
||||
|
||||
return {
|
||||
'version': 1,
|
||||
'id': self.id,
|
||||
'func': self.func_ref,
|
||||
'trigger': self.trigger,
|
||||
'executor': self.executor,
|
||||
'args': self.args,
|
||||
'kwargs': self.kwargs,
|
||||
'name': self.name,
|
||||
'misfire_grace_time': self.misfire_grace_time,
|
||||
'coalesce': self.coalesce,
|
||||
'max_instances': self.max_instances,
|
||||
'next_run_time': self.next_run_time
|
||||
}
|
||||
|
||||
def __setstate__(self, state):
|
||||
if state.get('version', 1) > 1:
|
||||
raise ValueError('Job has version %s, but only version 1 can be handled' %
|
||||
state['version'])
|
||||
|
||||
self.id = state['id']
|
||||
self.func_ref = state['func']
|
||||
self.func = ref_to_obj(self.func_ref)
|
||||
self.trigger = state['trigger']
|
||||
self.executor = state['executor']
|
||||
self.args = state['args']
|
||||
self.kwargs = state['kwargs']
|
||||
self.name = state['name']
|
||||
self.misfire_grace_time = state['misfire_grace_time']
|
||||
self.coalesce = state['coalesce']
|
||||
self.max_instances = state['max_instances']
|
||||
self.next_run_time = state['next_run_time']
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Job):
|
||||
return self.id == other.id
|
||||
return NotImplemented
|
||||
|
||||
def __repr__(self):
|
||||
return '<Job (id=%s name=%s)>' % (repr_escape(self.id), repr_escape(self.name))
|
||||
|
||||
def __str__(self):
|
||||
return repr_escape(self.__unicode__())
|
||||
|
||||
def __unicode__(self):
|
||||
if hasattr(self, 'next_run_time'):
|
||||
status = ('next run at: ' + datetime_repr(self.next_run_time) if
|
||||
self.next_run_time else 'paused')
|
||||
else:
|
||||
status = 'pending'
|
||||
|
||||
return u'%s (trigger: %s, %s)' % (self.name, self.trigger, status)
|
|
@ -1,143 +0,0 @@
|
|||
from abc import ABCMeta, abstractmethod
|
||||
import logging
|
||||
|
||||
import six
|
||||
|
||||
|
||||
class JobLookupError(KeyError):
|
||||
"""Raised when the job store cannot find a job for update or removal."""
|
||||
|
||||
def __init__(self, job_id):
|
||||
super(JobLookupError, self).__init__(u'No job by the id of %s was found' % job_id)
|
||||
|
||||
|
||||
class ConflictingIdError(KeyError):
|
||||
"""Raised when the uniqueness of job IDs is being violated."""
|
||||
|
||||
def __init__(self, job_id):
|
||||
super(ConflictingIdError, self).__init__(
|
||||
u'Job identifier (%s) conflicts with an existing job' % job_id)
|
||||
|
||||
|
||||
class TransientJobError(ValueError):
|
||||
"""
|
||||
Raised when an attempt to add transient (with no func_ref) job to a persistent job store is
|
||||
detected.
|
||||
"""
|
||||
|
||||
def __init__(self, job_id):
|
||||
super(TransientJobError, self).__init__(
|
||||
u'Job (%s) cannot be added to this job store because a reference to the callable '
|
||||
u'could not be determined.' % job_id)
|
||||
|
||||
|
||||
class BaseJobStore(six.with_metaclass(ABCMeta)):
|
||||
"""Abstract base class that defines the interface that every job store must implement."""
|
||||
|
||||
_scheduler = None
|
||||
_alias = None
|
||||
_logger = logging.getLogger('apscheduler.jobstores')
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
"""
|
||||
Called by the scheduler when the scheduler is being started or when the job store is being
|
||||
added to an already running scheduler.
|
||||
|
||||
:param apscheduler.schedulers.base.BaseScheduler scheduler: the scheduler that is starting
|
||||
this job store
|
||||
:param str|unicode alias: alias of this job store as it was assigned to the scheduler
|
||||
"""
|
||||
|
||||
self._scheduler = scheduler
|
||||
self._alias = alias
|
||||
self._logger = logging.getLogger('apscheduler.jobstores.%s' % alias)
|
||||
|
||||
def shutdown(self):
|
||||
"""Frees any resources still bound to this job store."""
|
||||
|
||||
def _fix_paused_jobs_sorting(self, jobs):
|
||||
for i, job in enumerate(jobs):
|
||||
if job.next_run_time is not None:
|
||||
if i > 0:
|
||||
paused_jobs = jobs[:i]
|
||||
del jobs[:i]
|
||||
jobs.extend(paused_jobs)
|
||||
break
|
||||
|
||||
@abstractmethod
|
||||
def lookup_job(self, job_id):
|
||||
"""
|
||||
Returns a specific job, or ``None`` if it isn't found..
|
||||
|
||||
The job store is responsible for setting the ``scheduler`` and ``jobstore`` attributes of
|
||||
the returned job to point to the scheduler and itself, respectively.
|
||||
|
||||
:param str|unicode job_id: identifier of the job
|
||||
:rtype: Job
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_due_jobs(self, now):
|
||||
"""
|
||||
Returns the list of jobs that have ``next_run_time`` earlier or equal to ``now``.
|
||||
The returned jobs must be sorted by next run time (ascending).
|
||||
|
||||
:param datetime.datetime now: the current (timezone aware) datetime
|
||||
:rtype: list[Job]
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_next_run_time(self):
|
||||
"""
|
||||
Returns the earliest run time of all the jobs stored in this job store, or ``None`` if
|
||||
there are no active jobs.
|
||||
|
||||
:rtype: datetime.datetime
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_all_jobs(self):
|
||||
"""
|
||||
Returns a list of all jobs in this job store.
|
||||
The returned jobs should be sorted by next run time (ascending).
|
||||
Paused jobs (next_run_time == None) should be sorted last.
|
||||
|
||||
The job store is responsible for setting the ``scheduler`` and ``jobstore`` attributes of
|
||||
the returned jobs to point to the scheduler and itself, respectively.
|
||||
|
||||
:rtype: list[Job]
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def add_job(self, job):
|
||||
"""
|
||||
Adds the given job to this store.
|
||||
|
||||
:param Job job: the job to add
|
||||
:raises ConflictingIdError: if there is another job in this store with the same ID
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def update_job(self, job):
|
||||
"""
|
||||
Replaces the job in the store with the given newer version.
|
||||
|
||||
:param Job job: the job to update
|
||||
:raises JobLookupError: if the job does not exist
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def remove_job(self, job_id):
|
||||
"""
|
||||
Removes the given job from this store.
|
||||
|
||||
:param str|unicode job_id: identifier of the job
|
||||
:raises JobLookupError: if the job does not exist
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def remove_all_jobs(self):
|
||||
"""Removes all jobs from this store."""
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s>' % self.__class__.__name__
|
|
@ -1,108 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import datetime_to_utc_timestamp
|
||||
|
||||
|
||||
class MemoryJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in an array in RAM. Provides no persistence support.
|
||||
|
||||
Plugin alias: ``memory``
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(MemoryJobStore, self).__init__()
|
||||
# list of (job, timestamp), sorted by next_run_time and job id (ascending)
|
||||
self._jobs = []
|
||||
self._jobs_index = {} # id -> (job, timestamp) lookup table
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
return self._jobs_index.get(job_id, (None, None))[0]
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
now_timestamp = datetime_to_utc_timestamp(now)
|
||||
pending = []
|
||||
for job, timestamp in self._jobs:
|
||||
if timestamp is None or timestamp > now_timestamp:
|
||||
break
|
||||
pending.append(job)
|
||||
|
||||
return pending
|
||||
|
||||
def get_next_run_time(self):
|
||||
return self._jobs[0][0].next_run_time if self._jobs else None
|
||||
|
||||
def get_all_jobs(self):
|
||||
return [j[0] for j in self._jobs]
|
||||
|
||||
def add_job(self, job):
|
||||
if job.id in self._jobs_index:
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
timestamp = datetime_to_utc_timestamp(job.next_run_time)
|
||||
index = self._get_job_index(timestamp, job.id)
|
||||
self._jobs.insert(index, (job, timestamp))
|
||||
self._jobs_index[job.id] = (job, timestamp)
|
||||
|
||||
def update_job(self, job):
|
||||
old_job, old_timestamp = self._jobs_index.get(job.id, (None, None))
|
||||
if old_job is None:
|
||||
raise JobLookupError(job.id)
|
||||
|
||||
# If the next run time has not changed, simply replace the job in its present index.
|
||||
# Otherwise, reinsert the job to the list to preserve the ordering.
|
||||
old_index = self._get_job_index(old_timestamp, old_job.id)
|
||||
new_timestamp = datetime_to_utc_timestamp(job.next_run_time)
|
||||
if old_timestamp == new_timestamp:
|
||||
self._jobs[old_index] = (job, new_timestamp)
|
||||
else:
|
||||
del self._jobs[old_index]
|
||||
new_index = self._get_job_index(new_timestamp, job.id)
|
||||
self._jobs.insert(new_index, (job, new_timestamp))
|
||||
|
||||
self._jobs_index[old_job.id] = (job, new_timestamp)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
job, timestamp = self._jobs_index.get(job_id, (None, None))
|
||||
if job is None:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
index = self._get_job_index(timestamp, job_id)
|
||||
del self._jobs[index]
|
||||
del self._jobs_index[job.id]
|
||||
|
||||
def remove_all_jobs(self):
|
||||
self._jobs = []
|
||||
self._jobs_index = {}
|
||||
|
||||
def shutdown(self):
|
||||
self.remove_all_jobs()
|
||||
|
||||
def _get_job_index(self, timestamp, job_id):
|
||||
"""
|
||||
Returns the index of the given job, or if it's not found, the index where the job should be
|
||||
inserted based on the given timestamp.
|
||||
|
||||
:type timestamp: int
|
||||
:type job_id: str
|
||||
|
||||
"""
|
||||
lo, hi = 0, len(self._jobs)
|
||||
timestamp = float('inf') if timestamp is None else timestamp
|
||||
while lo < hi:
|
||||
mid = (lo + hi) // 2
|
||||
mid_job, mid_timestamp = self._jobs[mid]
|
||||
mid_timestamp = float('inf') if mid_timestamp is None else mid_timestamp
|
||||
if mid_timestamp > timestamp:
|
||||
hi = mid
|
||||
elif mid_timestamp < timestamp:
|
||||
lo = mid + 1
|
||||
elif mid_job.id > job_id:
|
||||
hi = mid
|
||||
elif mid_job.id < job_id:
|
||||
lo = mid + 1
|
||||
else:
|
||||
return mid
|
||||
|
||||
return lo
|
|
@ -1,141 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
import warnings
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime
|
||||
from apscheduler.job import Job
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from bson.binary import Binary
|
||||
from pymongo.errors import DuplicateKeyError
|
||||
from pymongo import MongoClient, ASCENDING
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('MongoDBJobStore requires PyMongo installed')
|
||||
|
||||
|
||||
class MongoDBJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in a MongoDB database. Any leftover keyword arguments are directly passed to
|
||||
pymongo's `MongoClient
|
||||
<http://api.mongodb.org/python/current/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient>`_.
|
||||
|
||||
Plugin alias: ``mongodb``
|
||||
|
||||
:param str database: database to store jobs in
|
||||
:param str collection: collection to store jobs in
|
||||
:param client: a :class:`~pymongo.mongo_client.MongoClient` instance to use instead of
|
||||
providing connection arguments
|
||||
:param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the
|
||||
highest available
|
||||
"""
|
||||
|
||||
def __init__(self, database='apscheduler', collection='jobs', client=None,
|
||||
pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args):
|
||||
super(MongoDBJobStore, self).__init__()
|
||||
self.pickle_protocol = pickle_protocol
|
||||
|
||||
if not database:
|
||||
raise ValueError('The "database" parameter must not be empty')
|
||||
if not collection:
|
||||
raise ValueError('The "collection" parameter must not be empty')
|
||||
|
||||
if client:
|
||||
self.client = maybe_ref(client)
|
||||
else:
|
||||
connect_args.setdefault('w', 1)
|
||||
self.client = MongoClient(**connect_args)
|
||||
|
||||
self.collection = self.client[database][collection]
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(MongoDBJobStore, self).start(scheduler, alias)
|
||||
self.collection.ensure_index('next_run_time', sparse=True)
|
||||
|
||||
@property
|
||||
def connection(self):
|
||||
warnings.warn('The "connection" member is deprecated -- use "client" instead',
|
||||
DeprecationWarning)
|
||||
return self.client
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
document = self.collection.find_one(job_id, ['job_state'])
|
||||
return self._reconstitute_job(document['job_state']) if document else None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
timestamp = datetime_to_utc_timestamp(now)
|
||||
return self._get_jobs({'next_run_time': {'$lte': timestamp}})
|
||||
|
||||
def get_next_run_time(self):
|
||||
document = self.collection.find_one({'next_run_time': {'$ne': None}},
|
||||
projection=['next_run_time'],
|
||||
sort=[('next_run_time', ASCENDING)])
|
||||
return utc_timestamp_to_datetime(document['next_run_time']) if document else None
|
||||
|
||||
def get_all_jobs(self):
|
||||
jobs = self._get_jobs({})
|
||||
self._fix_paused_jobs_sorting(jobs)
|
||||
return jobs
|
||||
|
||||
def add_job(self, job):
|
||||
try:
|
||||
self.collection.insert({
|
||||
'_id': job.id,
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': Binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
})
|
||||
except DuplicateKeyError:
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
def update_job(self, job):
|
||||
changes = {
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': Binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
}
|
||||
result = self.collection.update({'_id': job.id}, {'$set': changes})
|
||||
if result and result['n'] == 0:
|
||||
raise JobLookupError(job.id)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
result = self.collection.remove(job_id)
|
||||
if result and result['n'] == 0:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
def remove_all_jobs(self):
|
||||
self.collection.remove()
|
||||
|
||||
def shutdown(self):
|
||||
self.client.close()
|
||||
|
||||
def _reconstitute_job(self, job_state):
|
||||
job_state = pickle.loads(job_state)
|
||||
job = Job.__new__(Job)
|
||||
job.__setstate__(job_state)
|
||||
job._scheduler = self._scheduler
|
||||
job._jobstore_alias = self._alias
|
||||
return job
|
||||
|
||||
def _get_jobs(self, conditions):
|
||||
jobs = []
|
||||
failed_job_ids = []
|
||||
for document in self.collection.find(conditions, ['_id', 'job_state'],
|
||||
sort=[('next_run_time', ASCENDING)]):
|
||||
try:
|
||||
jobs.append(self._reconstitute_job(document['job_state']))
|
||||
except:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it',
|
||||
document['_id'])
|
||||
failed_job_ids.append(document['_id'])
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
self.collection.remove({'_id': {'$in': failed_job_ids}})
|
||||
|
||||
return jobs
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s (client=%s)>' % (self.__class__.__name__, self.client)
|
|
@ -1,146 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
from datetime import datetime
|
||||
|
||||
from pytz import utc
|
||||
import six
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import datetime_to_utc_timestamp, utc_timestamp_to_datetime
|
||||
from apscheduler.job import Job
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from redis import StrictRedis
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('RedisJobStore requires redis installed')
|
||||
|
||||
|
||||
class RedisJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in a Redis database. Any leftover keyword arguments are directly passed to redis's
|
||||
:class:`~redis.StrictRedis`.
|
||||
|
||||
Plugin alias: ``redis``
|
||||
|
||||
:param int db: the database number to store jobs in
|
||||
:param str jobs_key: key to store jobs in
|
||||
:param str run_times_key: key to store the jobs' run times in
|
||||
:param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the
|
||||
highest available
|
||||
"""
|
||||
|
||||
def __init__(self, db=0, jobs_key='apscheduler.jobs', run_times_key='apscheduler.run_times',
|
||||
pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args):
|
||||
super(RedisJobStore, self).__init__()
|
||||
|
||||
if db is None:
|
||||
raise ValueError('The "db" parameter must not be empty')
|
||||
if not jobs_key:
|
||||
raise ValueError('The "jobs_key" parameter must not be empty')
|
||||
if not run_times_key:
|
||||
raise ValueError('The "run_times_key" parameter must not be empty')
|
||||
|
||||
self.pickle_protocol = pickle_protocol
|
||||
self.jobs_key = jobs_key
|
||||
self.run_times_key = run_times_key
|
||||
self.redis = StrictRedis(db=int(db), **connect_args)
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
job_state = self.redis.hget(self.jobs_key, job_id)
|
||||
return self._reconstitute_job(job_state) if job_state else None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
timestamp = datetime_to_utc_timestamp(now)
|
||||
job_ids = self.redis.zrangebyscore(self.run_times_key, 0, timestamp)
|
||||
if job_ids:
|
||||
job_states = self.redis.hmget(self.jobs_key, *job_ids)
|
||||
return self._reconstitute_jobs(six.moves.zip(job_ids, job_states))
|
||||
return []
|
||||
|
||||
def get_next_run_time(self):
|
||||
next_run_time = self.redis.zrange(self.run_times_key, 0, 0, withscores=True)
|
||||
if next_run_time:
|
||||
return utc_timestamp_to_datetime(next_run_time[0][1])
|
||||
|
||||
def get_all_jobs(self):
|
||||
job_states = self.redis.hgetall(self.jobs_key)
|
||||
jobs = self._reconstitute_jobs(six.iteritems(job_states))
|
||||
paused_sort_key = datetime(9999, 12, 31, tzinfo=utc)
|
||||
return sorted(jobs, key=lambda job: job.next_run_time or paused_sort_key)
|
||||
|
||||
def add_job(self, job):
|
||||
if self.redis.hexists(self.jobs_key, job.id):
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.multi()
|
||||
pipe.hset(self.jobs_key, job.id, pickle.dumps(job.__getstate__(),
|
||||
self.pickle_protocol))
|
||||
if job.next_run_time:
|
||||
pipe.zadd(self.run_times_key, datetime_to_utc_timestamp(job.next_run_time), job.id)
|
||||
pipe.execute()
|
||||
|
||||
def update_job(self, job):
|
||||
if not self.redis.hexists(self.jobs_key, job.id):
|
||||
raise JobLookupError(job.id)
|
||||
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.hset(self.jobs_key, job.id, pickle.dumps(job.__getstate__(),
|
||||
self.pickle_protocol))
|
||||
if job.next_run_time:
|
||||
pipe.zadd(self.run_times_key, datetime_to_utc_timestamp(job.next_run_time), job.id)
|
||||
else:
|
||||
pipe.zrem(self.run_times_key, job.id)
|
||||
pipe.execute()
|
||||
|
||||
def remove_job(self, job_id):
|
||||
if not self.redis.hexists(self.jobs_key, job_id):
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.hdel(self.jobs_key, job_id)
|
||||
pipe.zrem(self.run_times_key, job_id)
|
||||
pipe.execute()
|
||||
|
||||
def remove_all_jobs(self):
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.delete(self.jobs_key)
|
||||
pipe.delete(self.run_times_key)
|
||||
pipe.execute()
|
||||
|
||||
def shutdown(self):
|
||||
self.redis.connection_pool.disconnect()
|
||||
|
||||
def _reconstitute_job(self, job_state):
|
||||
job_state = pickle.loads(job_state)
|
||||
job = Job.__new__(Job)
|
||||
job.__setstate__(job_state)
|
||||
job._scheduler = self._scheduler
|
||||
job._jobstore_alias = self._alias
|
||||
return job
|
||||
|
||||
def _reconstitute_jobs(self, job_states):
|
||||
jobs = []
|
||||
failed_job_ids = []
|
||||
for job_id, job_state in job_states:
|
||||
try:
|
||||
jobs.append(self._reconstitute_job(job_state))
|
||||
except:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it', job_id)
|
||||
failed_job_ids.append(job_id)
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
with self.redis.pipeline() as pipe:
|
||||
pipe.hdel(self.jobs_key, *failed_job_ids)
|
||||
pipe.zrem(self.run_times_key, *failed_job_ids)
|
||||
pipe.execute()
|
||||
|
||||
return jobs
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s>' % self.__class__.__name__
|
|
@ -1,153 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime
|
||||
from apscheduler.job import Job
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
import rethinkdb as r
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('RethinkDBJobStore requires rethinkdb installed')
|
||||
|
||||
|
||||
class RethinkDBJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in a RethinkDB database. Any leftover keyword arguments are directly passed to
|
||||
rethinkdb's `RethinkdbClient <http://www.rethinkdb.com/api/#connect>`_.
|
||||
|
||||
Plugin alias: ``rethinkdb``
|
||||
|
||||
:param str database: database to store jobs in
|
||||
:param str collection: collection to store jobs in
|
||||
:param client: a :class:`rethinkdb.net.Connection` instance to use instead of providing
|
||||
connection arguments
|
||||
:param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the
|
||||
highest available
|
||||
"""
|
||||
|
||||
def __init__(self, database='apscheduler', table='jobs', client=None,
|
||||
pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args):
|
||||
super(RethinkDBJobStore, self).__init__()
|
||||
|
||||
if not database:
|
||||
raise ValueError('The "database" parameter must not be empty')
|
||||
if not table:
|
||||
raise ValueError('The "table" parameter must not be empty')
|
||||
|
||||
self.database = database
|
||||
self.table = table
|
||||
self.client = client
|
||||
self.pickle_protocol = pickle_protocol
|
||||
self.connect_args = connect_args
|
||||
self.conn = None
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(RethinkDBJobStore, self).start(scheduler, alias)
|
||||
|
||||
if self.client:
|
||||
self.conn = maybe_ref(self.client)
|
||||
else:
|
||||
self.conn = r.connect(db=self.database, **self.connect_args)
|
||||
|
||||
if self.database not in r.db_list().run(self.conn):
|
||||
r.db_create(self.database).run(self.conn)
|
||||
|
||||
if self.table not in r.table_list().run(self.conn):
|
||||
r.table_create(self.table).run(self.conn)
|
||||
|
||||
if 'next_run_time' not in r.table(self.table).index_list().run(self.conn):
|
||||
r.table(self.table).index_create('next_run_time').run(self.conn)
|
||||
|
||||
self.table = r.db(self.database).table(self.table)
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
results = list(self.table.get_all(job_id).pluck('job_state').run(self.conn))
|
||||
return self._reconstitute_job(results[0]['job_state']) if results else None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
return self._get_jobs(r.row['next_run_time'] <= datetime_to_utc_timestamp(now))
|
||||
|
||||
def get_next_run_time(self):
|
||||
results = list(
|
||||
self.table
|
||||
.filter(r.row['next_run_time'] != None) # flake8: noqa
|
||||
.order_by(r.asc('next_run_time'))
|
||||
.map(lambda x: x['next_run_time'])
|
||||
.limit(1)
|
||||
.run(self.conn)
|
||||
)
|
||||
return utc_timestamp_to_datetime(results[0]) if results else None
|
||||
|
||||
def get_all_jobs(self):
|
||||
jobs = self._get_jobs()
|
||||
self._fix_paused_jobs_sorting(jobs)
|
||||
return jobs
|
||||
|
||||
def add_job(self, job):
|
||||
job_dict = {
|
||||
'id': job.id,
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': r.binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
}
|
||||
results = self.table.insert(job_dict).run(self.conn)
|
||||
if results['errors'] > 0:
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
def update_job(self, job):
|
||||
changes = {
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': r.binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
|
||||
}
|
||||
results = self.table.get_all(job.id).update(changes).run(self.conn)
|
||||
skipped = False in map(lambda x: results[x] == 0, results.keys())
|
||||
if results['skipped'] > 0 or results['errors'] > 0 or not skipped:
|
||||
raise JobLookupError(job.id)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
results = self.table.get_all(job_id).delete().run(self.conn)
|
||||
if results['deleted'] + results['skipped'] != 1:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
def remove_all_jobs(self):
|
||||
self.table.delete().run(self.conn)
|
||||
|
||||
def shutdown(self):
|
||||
self.conn.close()
|
||||
|
||||
def _reconstitute_job(self, job_state):
|
||||
job_state = pickle.loads(job_state)
|
||||
job = Job.__new__(Job)
|
||||
job.__setstate__(job_state)
|
||||
job._scheduler = self._scheduler
|
||||
job._jobstore_alias = self._alias
|
||||
return job
|
||||
|
||||
def _get_jobs(self, predicate=None):
|
||||
jobs = []
|
||||
failed_job_ids = []
|
||||
query = (self.table.filter(r.row['next_run_time'] != None).filter(predicate) if
|
||||
predicate else self.table)
|
||||
query = query.order_by('next_run_time', 'id').pluck('id', 'job_state')
|
||||
|
||||
for document in query.run(self.conn):
|
||||
try:
|
||||
jobs.append(self._reconstitute_job(document['job_state']))
|
||||
except:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it', document['id'])
|
||||
failed_job_ids.append(document['id'])
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
r.expr(failed_job_ids).for_each(
|
||||
lambda job_id: self.table.get_all(job_id).delete()).run(self.conn)
|
||||
|
||||
return jobs
|
||||
|
||||
def __repr__(self):
|
||||
connection = self.conn
|
||||
return '<%s (connection=%s)>' % (self.__class__.__name__, connection)
|
|
@ -1,148 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime
|
||||
from apscheduler.job import Job
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from sqlalchemy import (
|
||||
create_engine, Table, Column, MetaData, Unicode, Float, LargeBinary, select)
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.sql.expression import null
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('SQLAlchemyJobStore requires SQLAlchemy installed')
|
||||
|
||||
|
||||
class SQLAlchemyJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in a database table using SQLAlchemy.
|
||||
The table will be created if it doesn't exist in the database.
|
||||
|
||||
Plugin alias: ``sqlalchemy``
|
||||
|
||||
:param str url: connection string (see `SQLAlchemy documentation
|
||||
<http://docs.sqlalchemy.org/en/latest/core/engines.html?highlight=create_engine#database-urls>`_
|
||||
on this)
|
||||
:param engine: an SQLAlchemy Engine to use instead of creating a new one based on ``url``
|
||||
:param str tablename: name of the table to store jobs in
|
||||
:param metadata: a :class:`~sqlalchemy.MetaData` instance to use instead of creating a new one
|
||||
:param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the
|
||||
highest available
|
||||
"""
|
||||
|
||||
def __init__(self, url=None, engine=None, tablename='apscheduler_jobs', metadata=None,
|
||||
pickle_protocol=pickle.HIGHEST_PROTOCOL):
|
||||
super(SQLAlchemyJobStore, self).__init__()
|
||||
self.pickle_protocol = pickle_protocol
|
||||
metadata = maybe_ref(metadata) or MetaData()
|
||||
|
||||
if engine:
|
||||
self.engine = maybe_ref(engine)
|
||||
elif url:
|
||||
self.engine = create_engine(url)
|
||||
else:
|
||||
raise ValueError('Need either "engine" or "url" defined')
|
||||
|
||||
# 191 = max key length in MySQL for InnoDB/utf8mb4 tables,
|
||||
# 25 = precision that translates to an 8-byte float
|
||||
self.jobs_t = Table(
|
||||
tablename, metadata,
|
||||
Column('id', Unicode(191, _warn_on_bytestring=False), primary_key=True),
|
||||
Column('next_run_time', Float(25), index=True),
|
||||
Column('job_state', LargeBinary, nullable=False)
|
||||
)
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(SQLAlchemyJobStore, self).start(scheduler, alias)
|
||||
self.jobs_t.create(self.engine, True)
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
selectable = select([self.jobs_t.c.job_state]).where(self.jobs_t.c.id == job_id)
|
||||
job_state = self.engine.execute(selectable).scalar()
|
||||
return self._reconstitute_job(job_state) if job_state else None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
timestamp = datetime_to_utc_timestamp(now)
|
||||
return self._get_jobs(self.jobs_t.c.next_run_time <= timestamp)
|
||||
|
||||
def get_next_run_time(self):
|
||||
selectable = select([self.jobs_t.c.next_run_time]).\
|
||||
where(self.jobs_t.c.next_run_time != null()).\
|
||||
order_by(self.jobs_t.c.next_run_time).limit(1)
|
||||
next_run_time = self.engine.execute(selectable).scalar()
|
||||
return utc_timestamp_to_datetime(next_run_time)
|
||||
|
||||
def get_all_jobs(self):
|
||||
jobs = self._get_jobs()
|
||||
self._fix_paused_jobs_sorting(jobs)
|
||||
return jobs
|
||||
|
||||
def add_job(self, job):
|
||||
insert = self.jobs_t.insert().values(**{
|
||||
'id': job.id,
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': pickle.dumps(job.__getstate__(), self.pickle_protocol)
|
||||
})
|
||||
try:
|
||||
self.engine.execute(insert)
|
||||
except IntegrityError:
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
def update_job(self, job):
|
||||
update = self.jobs_t.update().values(**{
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': pickle.dumps(job.__getstate__(), self.pickle_protocol)
|
||||
}).where(self.jobs_t.c.id == job.id)
|
||||
result = self.engine.execute(update)
|
||||
if result.rowcount == 0:
|
||||
raise JobLookupError(id)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
delete = self.jobs_t.delete().where(self.jobs_t.c.id == job_id)
|
||||
result = self.engine.execute(delete)
|
||||
if result.rowcount == 0:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
def remove_all_jobs(self):
|
||||
delete = self.jobs_t.delete()
|
||||
self.engine.execute(delete)
|
||||
|
||||
def shutdown(self):
|
||||
self.engine.dispose()
|
||||
|
||||
def _reconstitute_job(self, job_state):
|
||||
job_state = pickle.loads(job_state)
|
||||
job_state['jobstore'] = self
|
||||
job = Job.__new__(Job)
|
||||
job.__setstate__(job_state)
|
||||
job._scheduler = self._scheduler
|
||||
job._jobstore_alias = self._alias
|
||||
return job
|
||||
|
||||
def _get_jobs(self, *conditions):
|
||||
jobs = []
|
||||
selectable = select([self.jobs_t.c.id, self.jobs_t.c.job_state]).\
|
||||
order_by(self.jobs_t.c.next_run_time)
|
||||
selectable = selectable.where(*conditions) if conditions else selectable
|
||||
failed_job_ids = set()
|
||||
for row in self.engine.execute(selectable):
|
||||
try:
|
||||
jobs.append(self._reconstitute_job(row.job_state))
|
||||
except:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it', row.id)
|
||||
failed_job_ids.add(row.id)
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
delete = self.jobs_t.delete().where(self.jobs_t.c.id.in_(failed_job_ids))
|
||||
self.engine.execute(delete)
|
||||
|
||||
return jobs
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s (url=%s)>' % (self.__class__.__name__, self.engine.url)
|
|
@ -1,179 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
from pytz import utc
|
||||
from kazoo.exceptions import NoNodeError, NodeExistsError
|
||||
|
||||
from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
|
||||
from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime
|
||||
from apscheduler.job import Job
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError: # pragma: nocover
|
||||
import pickle
|
||||
|
||||
try:
|
||||
from kazoo.client import KazooClient
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('ZooKeeperJobStore requires Kazoo installed')
|
||||
|
||||
|
||||
class ZooKeeperJobStore(BaseJobStore):
|
||||
"""
|
||||
Stores jobs in a ZooKeeper tree. Any leftover keyword arguments are directly passed to
|
||||
kazoo's `KazooClient
|
||||
<http://kazoo.readthedocs.io/en/latest/api/client.html>`_.
|
||||
|
||||
Plugin alias: ``zookeeper``
|
||||
|
||||
:param str path: path to store jobs in
|
||||
:param client: a :class:`~kazoo.client.KazooClient` instance to use instead of
|
||||
providing connection arguments
|
||||
:param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the
|
||||
highest available
|
||||
"""
|
||||
|
||||
def __init__(self, path='/apscheduler', client=None, close_connection_on_exit=False,
|
||||
pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args):
|
||||
super(ZooKeeperJobStore, self).__init__()
|
||||
self.pickle_protocol = pickle_protocol
|
||||
self.close_connection_on_exit = close_connection_on_exit
|
||||
|
||||
if not path:
|
||||
raise ValueError('The "path" parameter must not be empty')
|
||||
|
||||
self.path = path
|
||||
|
||||
if client:
|
||||
self.client = maybe_ref(client)
|
||||
else:
|
||||
self.client = KazooClient(**connect_args)
|
||||
self._ensured_path = False
|
||||
|
||||
def _ensure_paths(self):
|
||||
if not self._ensured_path:
|
||||
self.client.ensure_path(self.path)
|
||||
self._ensured_path = True
|
||||
|
||||
def start(self, scheduler, alias):
|
||||
super(ZooKeeperJobStore, self).start(scheduler, alias)
|
||||
if not self.client.connected:
|
||||
self.client.start()
|
||||
|
||||
def lookup_job(self, job_id):
|
||||
self._ensure_paths()
|
||||
node_path = os.path.join(self.path, job_id)
|
||||
try:
|
||||
content, _ = self.client.get(node_path)
|
||||
doc = pickle.loads(content)
|
||||
job = self._reconstitute_job(doc['job_state'])
|
||||
return job
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_due_jobs(self, now):
|
||||
timestamp = datetime_to_utc_timestamp(now)
|
||||
jobs = [job_def['job'] for job_def in self._get_jobs()
|
||||
if job_def['next_run_time'] is not None and job_def['next_run_time'] <= timestamp]
|
||||
return jobs
|
||||
|
||||
def get_next_run_time(self):
|
||||
next_runs = [job_def['next_run_time'] for job_def in self._get_jobs()
|
||||
if job_def['next_run_time'] is not None]
|
||||
return utc_timestamp_to_datetime(min(next_runs)) if len(next_runs) > 0 else None
|
||||
|
||||
def get_all_jobs(self):
|
||||
jobs = [job_def['job'] for job_def in self._get_jobs()]
|
||||
self._fix_paused_jobs_sorting(jobs)
|
||||
return jobs
|
||||
|
||||
def add_job(self, job):
|
||||
self._ensure_paths()
|
||||
node_path = os.path.join(self.path, str(job.id))
|
||||
value = {
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': job.__getstate__()
|
||||
}
|
||||
data = pickle.dumps(value, self.pickle_protocol)
|
||||
try:
|
||||
self.client.create(node_path, value=data)
|
||||
except NodeExistsError:
|
||||
raise ConflictingIdError(job.id)
|
||||
|
||||
def update_job(self, job):
|
||||
self._ensure_paths()
|
||||
node_path = os.path.join(self.path, str(job.id))
|
||||
changes = {
|
||||
'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
|
||||
'job_state': job.__getstate__()
|
||||
}
|
||||
data = pickle.dumps(changes, self.pickle_protocol)
|
||||
try:
|
||||
self.client.set(node_path, value=data)
|
||||
except NoNodeError:
|
||||
raise JobLookupError(job.id)
|
||||
|
||||
def remove_job(self, job_id):
|
||||
self._ensure_paths()
|
||||
node_path = os.path.join(self.path, str(job_id))
|
||||
try:
|
||||
self.client.delete(node_path)
|
||||
except NoNodeError:
|
||||
raise JobLookupError(job_id)
|
||||
|
||||
def remove_all_jobs(self):
|
||||
try:
|
||||
self.client.delete(self.path, recursive=True)
|
||||
except NoNodeError:
|
||||
pass
|
||||
self._ensured_path = False
|
||||
|
||||
def shutdown(self):
|
||||
if self.close_connection_on_exit:
|
||||
self.client.stop()
|
||||
self.client.close()
|
||||
|
||||
def _reconstitute_job(self, job_state):
|
||||
job_state = job_state
|
||||
job = Job.__new__(Job)
|
||||
job.__setstate__(job_state)
|
||||
job._scheduler = self._scheduler
|
||||
job._jobstore_alias = self._alias
|
||||
return job
|
||||
|
||||
def _get_jobs(self):
|
||||
self._ensure_paths()
|
||||
jobs = []
|
||||
failed_job_ids = []
|
||||
all_ids = self.client.get_children(self.path)
|
||||
for node_name in all_ids:
|
||||
try:
|
||||
node_path = os.path.join(self.path, node_name)
|
||||
content, _ = self.client.get(node_path)
|
||||
doc = pickle.loads(content)
|
||||
job_def = {
|
||||
'job_id': node_name,
|
||||
'next_run_time': doc['next_run_time'] if doc['next_run_time'] else None,
|
||||
'job_state': doc['job_state'],
|
||||
'job': self._reconstitute_job(doc['job_state']),
|
||||
'creation_time': _.ctime
|
||||
}
|
||||
jobs.append(job_def)
|
||||
except:
|
||||
self._logger.exception('Unable to restore job "%s" -- removing it' % node_name)
|
||||
failed_job_ids.append(node_name)
|
||||
|
||||
# Remove all the jobs we failed to restore
|
||||
if failed_job_ids:
|
||||
for failed_id in failed_job_ids:
|
||||
self.remove_job(failed_id)
|
||||
paused_sort_key = datetime(9999, 12, 31, tzinfo=utc)
|
||||
return sorted(jobs, key=lambda job_def: (job_def['job'].next_run_time or paused_sort_key,
|
||||
job_def['creation_time']))
|
||||
|
||||
def __repr__(self):
|
||||
self._logger.exception('<%s (client=%s)>' % (self.__class__.__name__, self.client))
|
||||
return '<%s (client=%s)>' % (self.__class__.__name__, self.client)
|
|
@ -1,12 +0,0 @@
|
|||
class SchedulerAlreadyRunningError(Exception):
|
||||
"""Raised when attempting to start or configure the scheduler when it's already running."""
|
||||
|
||||
def __str__(self):
|
||||
return 'Scheduler is already running'
|
||||
|
||||
|
||||
class SchedulerNotRunningError(Exception):
|
||||
"""Raised when attempting to shutdown the scheduler when it's not running."""
|
||||
|
||||
def __str__(self):
|
||||
return 'Scheduler is not running'
|
|
@ -1,67 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
from functools import wraps
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
from apscheduler.util import maybe_ref
|
||||
|
||||
try:
|
||||
import asyncio
|
||||
except ImportError: # pragma: nocover
|
||||
try:
|
||||
import trollius as asyncio
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
'AsyncIOScheduler requires either Python 3.4 or the asyncio package installed')
|
||||
|
||||
|
||||
def run_in_event_loop(func):
|
||||
@wraps(func)
|
||||
def wrapper(self, *args):
|
||||
self._eventloop.call_soon_threadsafe(func, self, *args)
|
||||
return wrapper
|
||||
|
||||
|
||||
class AsyncIOScheduler(BaseScheduler):
|
||||
"""
|
||||
A scheduler that runs on an asyncio (:pep:`3156`) event loop.
|
||||
|
||||
The default executor can run jobs based on native coroutines (``async def``).
|
||||
|
||||
Extra options:
|
||||
|
||||
============== =============================================================
|
||||
``event_loop`` AsyncIO event loop to use (defaults to the global event loop)
|
||||
============== =============================================================
|
||||
"""
|
||||
|
||||
_eventloop = None
|
||||
_timeout = None
|
||||
|
||||
@run_in_event_loop
|
||||
def shutdown(self, wait=True):
|
||||
super(AsyncIOScheduler, self).shutdown(wait)
|
||||
self._stop_timer()
|
||||
|
||||
def _configure(self, config):
|
||||
self._eventloop = maybe_ref(config.pop('event_loop', None)) or asyncio.get_event_loop()
|
||||
super(AsyncIOScheduler, self)._configure(config)
|
||||
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._timeout = self._eventloop.call_later(wait_seconds, self.wakeup)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._timeout:
|
||||
self._timeout.cancel()
|
||||
del self._timeout
|
||||
|
||||
@run_in_event_loop
|
||||
def wakeup(self):
|
||||
self._stop_timer()
|
||||
wait_seconds = self._process_jobs()
|
||||
self._start_timer(wait_seconds)
|
||||
|
||||
def _create_default_executor(self):
|
||||
from apscheduler.executors.asyncio import AsyncIOExecutor
|
||||
return AsyncIOExecutor()
|
|
@ -1,41 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from threading import Thread, Event
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.util import asbool
|
||||
|
||||
|
||||
class BackgroundScheduler(BlockingScheduler):
|
||||
"""
|
||||
A scheduler that runs in the background using a separate thread
|
||||
(:meth:`~apscheduler.schedulers.base.BaseScheduler.start` will return immediately).
|
||||
|
||||
Extra options:
|
||||
|
||||
========== =============================================================================
|
||||
``daemon`` Set the ``daemon`` option in the background thread (defaults to ``True``, see
|
||||
`the documentation
|
||||
<https://docs.python.org/3.4/library/threading.html#thread-objects>`_
|
||||
for further details)
|
||||
========== =============================================================================
|
||||
"""
|
||||
|
||||
_thread = None
|
||||
|
||||
def _configure(self, config):
|
||||
self._daemon = asbool(config.pop('daemon', True))
|
||||
super(BackgroundScheduler, self)._configure(config)
|
||||
|
||||
def start(self, *args, **kwargs):
|
||||
self._event = Event()
|
||||
BaseScheduler.start(self, *args, **kwargs)
|
||||
self._thread = Thread(target=self._main_loop, name='APScheduler')
|
||||
self._thread.daemon = self._daemon
|
||||
self._thread.start()
|
||||
|
||||
def shutdown(self, *args, **kwargs):
|
||||
super(BackgroundScheduler, self).shutdown(*args, **kwargs)
|
||||
self._thread.join()
|
||||
del self._thread
|
File diff suppressed because it is too large
Load Diff
|
@ -1,33 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from threading import Event
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler, STATE_STOPPED
|
||||
from apscheduler.util import TIMEOUT_MAX
|
||||
|
||||
|
||||
class BlockingScheduler(BaseScheduler):
|
||||
"""
|
||||
A scheduler that runs in the foreground
|
||||
(:meth:`~apscheduler.schedulers.base.BaseScheduler.start` will block).
|
||||
"""
|
||||
_event = None
|
||||
|
||||
def start(self, *args, **kwargs):
|
||||
self._event = Event()
|
||||
super(BlockingScheduler, self).start(*args, **kwargs)
|
||||
self._main_loop()
|
||||
|
||||
def shutdown(self, wait=True):
|
||||
super(BlockingScheduler, self).shutdown(wait)
|
||||
self._event.set()
|
||||
|
||||
def _main_loop(self):
|
||||
wait_seconds = TIMEOUT_MAX
|
||||
while self.state != STATE_STOPPED:
|
||||
self._event.wait(wait_seconds)
|
||||
self._event.clear()
|
||||
wait_seconds = self._process_jobs()
|
||||
|
||||
def wakeup(self):
|
||||
self._event.set()
|
|
@ -1,35 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
|
||||
try:
|
||||
from gevent.event import Event
|
||||
from gevent.lock import RLock
|
||||
import gevent
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('GeventScheduler requires gevent installed')
|
||||
|
||||
|
||||
class GeventScheduler(BlockingScheduler):
|
||||
"""A scheduler that runs as a Gevent greenlet."""
|
||||
|
||||
_greenlet = None
|
||||
|
||||
def start(self, *args, **kwargs):
|
||||
self._event = Event()
|
||||
BaseScheduler.start(self, *args, **kwargs)
|
||||
self._greenlet = gevent.spawn(self._main_loop)
|
||||
return self._greenlet
|
||||
|
||||
def shutdown(self, *args, **kwargs):
|
||||
super(GeventScheduler, self).shutdown(*args, **kwargs)
|
||||
self._greenlet.join()
|
||||
del self._greenlet
|
||||
|
||||
def _create_lock(self):
|
||||
return RLock()
|
||||
|
||||
def _create_default_executor(self):
|
||||
from apscheduler.executors.gevent import GeventExecutor
|
||||
return GeventExecutor()
|
|
@ -1,42 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
|
||||
try:
|
||||
from PyQt5.QtCore import QObject, QTimer
|
||||
except ImportError: # pragma: nocover
|
||||
try:
|
||||
from PyQt4.QtCore import QObject, QTimer
|
||||
except ImportError:
|
||||
try:
|
||||
from PySide.QtCore import QObject, QTimer # flake8: noqa
|
||||
except ImportError:
|
||||
raise ImportError('QtScheduler requires either PyQt5, PyQt4 or PySide installed')
|
||||
|
||||
|
||||
class QtScheduler(BaseScheduler):
|
||||
"""A scheduler that runs in a Qt event loop."""
|
||||
|
||||
_timer = None
|
||||
|
||||
def shutdown(self, *args, **kwargs):
|
||||
super(QtScheduler, self).shutdown(*args, **kwargs)
|
||||
self._stop_timer()
|
||||
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._timer = QTimer.singleShot(wait_seconds * 1000, self._process_jobs)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._timer:
|
||||
if self._timer.isActive():
|
||||
self._timer.stop()
|
||||
del self._timer
|
||||
|
||||
def wakeup(self):
|
||||
self._start_timer(0)
|
||||
|
||||
def _process_jobs(self):
|
||||
wait_seconds = super(QtScheduler, self)._process_jobs()
|
||||
self._start_timer(wait_seconds)
|
|
@ -1,63 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from datetime import timedelta
|
||||
from functools import wraps
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
from apscheduler.util import maybe_ref
|
||||
|
||||
try:
|
||||
from tornado.ioloop import IOLoop
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('TornadoScheduler requires tornado installed')
|
||||
|
||||
|
||||
def run_in_ioloop(func):
|
||||
@wraps(func)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
self._ioloop.add_callback(func, self, *args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
class TornadoScheduler(BaseScheduler):
|
||||
"""
|
||||
A scheduler that runs on a Tornado IOLoop.
|
||||
|
||||
The default executor can run jobs based on native coroutines (``async def``).
|
||||
|
||||
=========== ===============================================================
|
||||
``io_loop`` Tornado IOLoop instance to use (defaults to the global IO loop)
|
||||
=========== ===============================================================
|
||||
"""
|
||||
|
||||
_ioloop = None
|
||||
_timeout = None
|
||||
|
||||
@run_in_ioloop
|
||||
def shutdown(self, wait=True):
|
||||
super(TornadoScheduler, self).shutdown(wait)
|
||||
self._stop_timer()
|
||||
|
||||
def _configure(self, config):
|
||||
self._ioloop = maybe_ref(config.pop('io_loop', None)) or IOLoop.current()
|
||||
super(TornadoScheduler, self)._configure(config)
|
||||
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._timeout = self._ioloop.add_timeout(timedelta(seconds=wait_seconds), self.wakeup)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._timeout:
|
||||
self._ioloop.remove_timeout(self._timeout)
|
||||
del self._timeout
|
||||
|
||||
def _create_default_executor(self):
|
||||
from apscheduler.executors.tornado import TornadoExecutor
|
||||
return TornadoExecutor()
|
||||
|
||||
@run_in_ioloop
|
||||
def wakeup(self):
|
||||
self._stop_timer()
|
||||
wait_seconds = self._process_jobs()
|
||||
self._start_timer(wait_seconds)
|
|
@ -1,62 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
from functools import wraps
|
||||
|
||||
from apscheduler.schedulers.base import BaseScheduler
|
||||
from apscheduler.util import maybe_ref
|
||||
|
||||
try:
|
||||
from twisted.internet import reactor as default_reactor
|
||||
except ImportError: # pragma: nocover
|
||||
raise ImportError('TwistedScheduler requires Twisted installed')
|
||||
|
||||
|
||||
def run_in_reactor(func):
|
||||
@wraps(func)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
self._reactor.callFromThread(func, self, *args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
|
||||
class TwistedScheduler(BaseScheduler):
|
||||
"""
|
||||
A scheduler that runs on a Twisted reactor.
|
||||
|
||||
Extra options:
|
||||
|
||||
=========== ========================================================
|
||||
``reactor`` Reactor instance to use (defaults to the global reactor)
|
||||
=========== ========================================================
|
||||
"""
|
||||
|
||||
_reactor = None
|
||||
_delayedcall = None
|
||||
|
||||
def _configure(self, config):
|
||||
self._reactor = maybe_ref(config.pop('reactor', default_reactor))
|
||||
super(TwistedScheduler, self)._configure(config)
|
||||
|
||||
@run_in_reactor
|
||||
def shutdown(self, wait=True):
|
||||
super(TwistedScheduler, self).shutdown(wait)
|
||||
self._stop_timer()
|
||||
|
||||
def _start_timer(self, wait_seconds):
|
||||
self._stop_timer()
|
||||
if wait_seconds is not None:
|
||||
self._delayedcall = self._reactor.callLater(wait_seconds, self.wakeup)
|
||||
|
||||
def _stop_timer(self):
|
||||
if self._delayedcall and self._delayedcall.active():
|
||||
self._delayedcall.cancel()
|
||||
del self._delayedcall
|
||||
|
||||
@run_in_reactor
|
||||
def wakeup(self):
|
||||
self._stop_timer()
|
||||
wait_seconds = self._process_jobs()
|
||||
self._start_timer(wait_seconds)
|
||||
|
||||
def _create_default_executor(self):
|
||||
from apscheduler.executors.twisted import TwistedExecutor
|
||||
return TwistedExecutor()
|
|
@ -1,19 +0,0 @@
|
|||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
import six
|
||||
|
||||
|
||||
class BaseTrigger(six.with_metaclass(ABCMeta)):
|
||||
"""Abstract base class that defines the interface that every trigger must implement."""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
@abstractmethod
|
||||
def get_next_fire_time(self, previous_fire_time, now):
|
||||
"""
|
||||
Returns the next datetime to fire on, If no such datetime can be calculated, returns
|
||||
``None``.
|
||||
|
||||
:param datetime.datetime previous_fire_time: the previous time the trigger was fired
|
||||
:param datetime.datetime now: current datetime
|
||||
"""
|
|
@ -1,206 +0,0 @@
|
|||
from datetime import datetime, timedelta
|
||||
|
||||
from tzlocal import get_localzone
|
||||
import six
|
||||
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.triggers.cron.fields import (
|
||||
BaseField, WeekField, DayOfMonthField, DayOfWeekField, DEFAULT_VALUES)
|
||||
from apscheduler.util import datetime_ceil, convert_to_datetime, datetime_repr, astimezone
|
||||
|
||||
|
||||
class CronTrigger(BaseTrigger):
|
||||
"""
|
||||
Triggers when current time matches all specified time constraints,
|
||||
similarly to how the UNIX cron scheduler works.
|
||||
|
||||
:param int|str year: 4-digit year
|
||||
:param int|str month: month (1-12)
|
||||
:param int|str day: day of the (1-31)
|
||||
:param int|str week: ISO week (1-53)
|
||||
:param int|str day_of_week: number or name of weekday (0-6 or mon,tue,wed,thu,fri,sat,sun)
|
||||
:param int|str hour: hour (0-23)
|
||||
:param int|str minute: minute (0-59)
|
||||
:param int|str second: second (0-59)
|
||||
:param datetime|str start_date: earliest possible date/time to trigger on (inclusive)
|
||||
:param datetime|str end_date: latest possible date/time to trigger on (inclusive)
|
||||
:param datetime.tzinfo|str timezone: time zone to use for the date/time calculations (defaults
|
||||
to scheduler timezone)
|
||||
|
||||
.. note:: The first weekday is always **monday**.
|
||||
"""
|
||||
|
||||
FIELD_NAMES = ('year', 'month', 'day', 'week', 'day_of_week', 'hour', 'minute', 'second')
|
||||
FIELDS_MAP = {
|
||||
'year': BaseField,
|
||||
'month': BaseField,
|
||||
'week': WeekField,
|
||||
'day': DayOfMonthField,
|
||||
'day_of_week': DayOfWeekField,
|
||||
'hour': BaseField,
|
||||
'minute': BaseField,
|
||||
'second': BaseField
|
||||
}
|
||||
|
||||
__slots__ = 'timezone', 'start_date', 'end_date', 'fields'
|
||||
|
||||
def __init__(self, year=None, month=None, day=None, week=None, day_of_week=None, hour=None,
|
||||
minute=None, second=None, start_date=None, end_date=None, timezone=None):
|
||||
if timezone:
|
||||
self.timezone = astimezone(timezone)
|
||||
elif isinstance(start_date, datetime) and start_date.tzinfo:
|
||||
self.timezone = start_date.tzinfo
|
||||
elif isinstance(end_date, datetime) and end_date.tzinfo:
|
||||
self.timezone = end_date.tzinfo
|
||||
else:
|
||||
self.timezone = get_localzone()
|
||||
|
||||
self.start_date = convert_to_datetime(start_date, self.timezone, 'start_date')
|
||||
self.end_date = convert_to_datetime(end_date, self.timezone, 'end_date')
|
||||
|
||||
values = dict((key, value) for (key, value) in six.iteritems(locals())
|
||||
if key in self.FIELD_NAMES and value is not None)
|
||||
self.fields = []
|
||||
assign_defaults = False
|
||||
for field_name in self.FIELD_NAMES:
|
||||
if field_name in values:
|
||||
exprs = values.pop(field_name)
|
||||
is_default = False
|
||||
assign_defaults = not values
|
||||
elif assign_defaults:
|
||||
exprs = DEFAULT_VALUES[field_name]
|
||||
is_default = True
|
||||
else:
|
||||
exprs = '*'
|
||||
is_default = True
|
||||
|
||||
field_class = self.FIELDS_MAP[field_name]
|
||||
field = field_class(field_name, exprs, is_default)
|
||||
self.fields.append(field)
|
||||
|
||||
def _increment_field_value(self, dateval, fieldnum):
|
||||
"""
|
||||
Increments the designated field and resets all less significant fields to their minimum
|
||||
values.
|
||||
|
||||
:type dateval: datetime
|
||||
:type fieldnum: int
|
||||
:return: a tuple containing the new date, and the number of the field that was actually
|
||||
incremented
|
||||
:rtype: tuple
|
||||
"""
|
||||
|
||||
values = {}
|
||||
i = 0
|
||||
while i < len(self.fields):
|
||||
field = self.fields[i]
|
||||
if not field.REAL:
|
||||
if i == fieldnum:
|
||||
fieldnum -= 1
|
||||
i -= 1
|
||||
else:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if i < fieldnum:
|
||||
values[field.name] = field.get_value(dateval)
|
||||
i += 1
|
||||
elif i > fieldnum:
|
||||
values[field.name] = field.get_min(dateval)
|
||||
i += 1
|
||||
else:
|
||||
value = field.get_value(dateval)
|
||||
maxval = field.get_max(dateval)
|
||||
if value == maxval:
|
||||
fieldnum -= 1
|
||||
i -= 1
|
||||
else:
|
||||
values[field.name] = value + 1
|
||||
i += 1
|
||||
|
||||
difference = datetime(**values) - dateval.replace(tzinfo=None)
|
||||
return self.timezone.normalize(dateval + difference), fieldnum
|
||||
|
||||
def _set_field_value(self, dateval, fieldnum, new_value):
|
||||
values = {}
|
||||
for i, field in enumerate(self.fields):
|
||||
if field.REAL:
|
||||
if i < fieldnum:
|
||||
values[field.name] = field.get_value(dateval)
|
||||
elif i > fieldnum:
|
||||
values[field.name] = field.get_min(dateval)
|
||||
else:
|
||||
values[field.name] = new_value
|
||||
|
||||
return self.timezone.localize(datetime(**values))
|
||||
|
||||
def get_next_fire_time(self, previous_fire_time, now):
|
||||
if previous_fire_time:
|
||||
start_date = min(now, previous_fire_time + timedelta(microseconds=1))
|
||||
if start_date == previous_fire_time:
|
||||
start_date += timedelta(microseconds=1)
|
||||
else:
|
||||
start_date = max(now, self.start_date) if self.start_date else now
|
||||
|
||||
fieldnum = 0
|
||||
next_date = datetime_ceil(start_date).astimezone(self.timezone)
|
||||
while 0 <= fieldnum < len(self.fields):
|
||||
field = self.fields[fieldnum]
|
||||
curr_value = field.get_value(next_date)
|
||||
next_value = field.get_next_value(next_date)
|
||||
|
||||
if next_value is None:
|
||||
# No valid value was found
|
||||
next_date, fieldnum = self._increment_field_value(next_date, fieldnum - 1)
|
||||
elif next_value > curr_value:
|
||||
# A valid, but higher than the starting value, was found
|
||||
if field.REAL:
|
||||
next_date = self._set_field_value(next_date, fieldnum, next_value)
|
||||
fieldnum += 1
|
||||
else:
|
||||
next_date, fieldnum = self._increment_field_value(next_date, fieldnum)
|
||||
else:
|
||||
# A valid value was found, no changes necessary
|
||||
fieldnum += 1
|
||||
|
||||
# Return if the date has rolled past the end date
|
||||
if self.end_date and next_date > self.end_date:
|
||||
return None
|
||||
|
||||
if fieldnum >= 0:
|
||||
return next_date
|
||||
|
||||
def __getstate__(self):
|
||||
return {
|
||||
'version': 1,
|
||||
'timezone': self.timezone,
|
||||
'start_date': self.start_date,
|
||||
'end_date': self.end_date,
|
||||
'fields': self.fields
|
||||
}
|
||||
|
||||
def __setstate__(self, state):
|
||||
# This is for compatibility with APScheduler 3.0.x
|
||||
if isinstance(state, tuple):
|
||||
state = state[1]
|
||||
|
||||
if state.get('version', 1) > 1:
|
||||
raise ValueError(
|
||||
'Got serialized data for version %s of %s, but only version 1 can be handled' %
|
||||
(state['version'], self.__class__.__name__))
|
||||
|
||||
self.timezone = state['timezone']
|
||||
self.start_date = state['start_date']
|
||||
self.end_date = state['end_date']
|
||||
self.fields = state['fields']
|
||||
|
||||
def __str__(self):
|
||||
options = ["%s='%s'" % (f.name, f) for f in self.fields if not f.is_default]
|
||||
return 'cron[%s]' % (', '.join(options))
|
||||
|
||||
def __repr__(self):
|
||||
options = ["%s='%s'" % (f.name, f) for f in self.fields if not f.is_default]
|
||||
if self.start_date:
|
||||
options.append("start_date='%s'" % datetime_repr(self.start_date))
|
||||
return "<%s (%s, timezone='%s')>" % (
|
||||
self.__class__.__name__, ', '.join(options), self.timezone)
|
|
@ -1,195 +0,0 @@
|
|||
"""This module contains the expressions applicable for CronTrigger's fields."""
|
||||
|
||||
from calendar import monthrange
|
||||
import re
|
||||
|
||||
from apscheduler.util import asint
|
||||
|
||||
__all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression',
|
||||
'WeekdayPositionExpression', 'LastDayOfMonthExpression')
|
||||
|
||||
|
||||
WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||
|
||||
|
||||
class AllExpression(object):
|
||||
value_re = re.compile(r'\*(?:/(?P<step>\d+))?$')
|
||||
|
||||
def __init__(self, step=None):
|
||||
self.step = asint(step)
|
||||
if self.step == 0:
|
||||
raise ValueError('Increment must be higher than 0')
|
||||
|
||||
def get_next_value(self, date, field):
|
||||
start = field.get_value(date)
|
||||
minval = field.get_min(date)
|
||||
maxval = field.get_max(date)
|
||||
start = max(start, minval)
|
||||
|
||||
if not self.step:
|
||||
next = start
|
||||
else:
|
||||
distance_to_next = (self.step - (start - minval)) % self.step
|
||||
next = start + distance_to_next
|
||||
|
||||
if next <= maxval:
|
||||
return next
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, self.__class__) and self.step == other.step
|
||||
|
||||
def __str__(self):
|
||||
if self.step:
|
||||
return '*/%d' % self.step
|
||||
return '*'
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (self.__class__.__name__, self.step)
|
||||
|
||||
|
||||
class RangeExpression(AllExpression):
|
||||
value_re = re.compile(
|
||||
r'(?P<first>\d+)(?:-(?P<last>\d+))?(?:/(?P<step>\d+))?$')
|
||||
|
||||
def __init__(self, first, last=None, step=None):
|
||||
AllExpression.__init__(self, step)
|
||||
first = asint(first)
|
||||
last = asint(last)
|
||||
if last is None and step is None:
|
||||
last = first
|
||||
if last is not None and first > last:
|
||||
raise ValueError('The minimum value in a range must not be higher than the maximum')
|
||||
self.first = first
|
||||
self.last = last
|
||||
|
||||
def get_next_value(self, date, field):
|
||||
startval = field.get_value(date)
|
||||
minval = field.get_min(date)
|
||||
maxval = field.get_max(date)
|
||||
|
||||
# Apply range limits
|
||||
minval = max(minval, self.first)
|
||||
maxval = min(maxval, self.last) if self.last is not None else maxval
|
||||
nextval = max(minval, startval)
|
||||
|
||||
# Apply the step if defined
|
||||
if self.step:
|
||||
distance_to_next = (self.step - (nextval - minval)) % self.step
|
||||
nextval += distance_to_next
|
||||
|
||||
return nextval if nextval <= maxval else None
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, self.__class__) and self.first == other.first and
|
||||
self.last == other.last)
|
||||
|
||||
def __str__(self):
|
||||
if self.last != self.first and self.last is not None:
|
||||
range = '%d-%d' % (self.first, self.last)
|
||||
else:
|
||||
range = str(self.first)
|
||||
|
||||
if self.step:
|
||||
return '%s/%d' % (range, self.step)
|
||||
return range
|
||||
|
||||
def __repr__(self):
|
||||
args = [str(self.first)]
|
||||
if self.last != self.first and self.last is not None or self.step:
|
||||
args.append(str(self.last))
|
||||
if self.step:
|
||||
args.append(str(self.step))
|
||||
return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
|
||||
|
||||
|
||||
class WeekdayRangeExpression(RangeExpression):
|
||||
value_re = re.compile(r'(?P<first>[a-z]+)(?:-(?P<last>[a-z]+))?', re.IGNORECASE)
|
||||
|
||||
def __init__(self, first, last=None):
|
||||
try:
|
||||
first_num = WEEKDAYS.index(first.lower())
|
||||
except ValueError:
|
||||
raise ValueError('Invalid weekday name "%s"' % first)
|
||||
|
||||
if last:
|
||||
try:
|
||||
last_num = WEEKDAYS.index(last.lower())
|
||||
except ValueError:
|
||||
raise ValueError('Invalid weekday name "%s"' % last)
|
||||
else:
|
||||
last_num = None
|
||||
|
||||
RangeExpression.__init__(self, first_num, last_num)
|
||||
|
||||
def __str__(self):
|
||||
if self.last != self.first and self.last is not None:
|
||||
return '%s-%s' % (WEEKDAYS[self.first], WEEKDAYS[self.last])
|
||||
return WEEKDAYS[self.first]
|
||||
|
||||
def __repr__(self):
|
||||
args = ["'%s'" % WEEKDAYS[self.first]]
|
||||
if self.last != self.first and self.last is not None:
|
||||
args.append("'%s'" % WEEKDAYS[self.last])
|
||||
return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
|
||||
|
||||
|
||||
class WeekdayPositionExpression(AllExpression):
|
||||
options = ['1st', '2nd', '3rd', '4th', '5th', 'last']
|
||||
value_re = re.compile(r'(?P<option_name>%s) +(?P<weekday_name>(?:\d+|\w+))' %
|
||||
'|'.join(options), re.IGNORECASE)
|
||||
|
||||
def __init__(self, option_name, weekday_name):
|
||||
try:
|
||||
self.option_num = self.options.index(option_name.lower())
|
||||
except ValueError:
|
||||
raise ValueError('Invalid weekday position "%s"' % option_name)
|
||||
|
||||
try:
|
||||
self.weekday = WEEKDAYS.index(weekday_name.lower())
|
||||
except ValueError:
|
||||
raise ValueError('Invalid weekday name "%s"' % weekday_name)
|
||||
|
||||
def get_next_value(self, date, field):
|
||||
# Figure out the weekday of the month's first day and the number of days in that month
|
||||
first_day_wday, last_day = monthrange(date.year, date.month)
|
||||
|
||||
# Calculate which day of the month is the first of the target weekdays
|
||||
first_hit_day = self.weekday - first_day_wday + 1
|
||||
if first_hit_day <= 0:
|
||||
first_hit_day += 7
|
||||
|
||||
# Calculate what day of the month the target weekday would be
|
||||
if self.option_num < 5:
|
||||
target_day = first_hit_day + self.option_num * 7
|
||||
else:
|
||||
target_day = first_hit_day + ((last_day - first_hit_day) // 7) * 7
|
||||
|
||||
if target_day <= last_day and target_day >= date.day:
|
||||
return target_day
|
||||
|
||||
def __eq__(self, other):
|
||||
return (super(WeekdayPositionExpression, self).__eq__(other) and
|
||||
self.option_num == other.option_num and self.weekday == other.weekday)
|
||||
|
||||
def __str__(self):
|
||||
return '%s %s' % (self.options[self.option_num], WEEKDAYS[self.weekday])
|
||||
|
||||
def __repr__(self):
|
||||
return "%s('%s', '%s')" % (self.__class__.__name__, self.options[self.option_num],
|
||||
WEEKDAYS[self.weekday])
|
||||
|
||||
|
||||
class LastDayOfMonthExpression(AllExpression):
|
||||
value_re = re.compile(r'last', re.IGNORECASE)
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_next_value(self, date, field):
|
||||
return monthrange(date.year, date.month)[1]
|
||||
|
||||
def __str__(self):
|
||||
return 'last'
|
||||
|
||||
def __repr__(self):
|
||||
return "%s()" % self.__class__.__name__
|
|
@ -1,100 +0,0 @@
|
|||
"""Fields represent CronTrigger options which map to :class:`~datetime.datetime` fields."""
|
||||
|
||||
from calendar import monthrange
|
||||
|
||||
from apscheduler.triggers.cron.expressions import (
|
||||
AllExpression, RangeExpression, WeekdayPositionExpression, LastDayOfMonthExpression,
|
||||
WeekdayRangeExpression)
|
||||
|
||||
|
||||
__all__ = ('MIN_VALUES', 'MAX_VALUES', 'DEFAULT_VALUES', 'BaseField', 'WeekField',
|
||||
'DayOfMonthField', 'DayOfWeekField')
|
||||
|
||||
|
||||
MIN_VALUES = {'year': 1970, 'month': 1, 'day': 1, 'week': 1, 'day_of_week': 0, 'hour': 0,
|
||||
'minute': 0, 'second': 0}
|
||||
MAX_VALUES = {'year': 2 ** 63, 'month': 12, 'day:': 31, 'week': 53, 'day_of_week': 6, 'hour': 23,
|
||||
'minute': 59, 'second': 59}
|
||||
DEFAULT_VALUES = {'year': '*', 'month': 1, 'day': 1, 'week': '*', 'day_of_week': '*', 'hour': 0,
|
||||
'minute': 0, 'second': 0}
|
||||
|
||||
|
||||
class BaseField(object):
|
||||
REAL = True
|
||||
COMPILERS = [AllExpression, RangeExpression]
|
||||
|
||||
def __init__(self, name, exprs, is_default=False):
|
||||
self.name = name
|
||||
self.is_default = is_default
|
||||
self.compile_expressions(exprs)
|
||||
|
||||
def get_min(self, dateval):
|
||||
return MIN_VALUES[self.name]
|
||||
|
||||
def get_max(self, dateval):
|
||||
return MAX_VALUES[self.name]
|
||||
|
||||
def get_value(self, dateval):
|
||||
return getattr(dateval, self.name)
|
||||
|
||||
def get_next_value(self, dateval):
|
||||
smallest = None
|
||||
for expr in self.expressions:
|
||||
value = expr.get_next_value(dateval, self)
|
||||
if smallest is None or (value is not None and value < smallest):
|
||||
smallest = value
|
||||
|
||||
return smallest
|
||||
|
||||
def compile_expressions(self, exprs):
|
||||
self.expressions = []
|
||||
|
||||
# Split a comma-separated expression list, if any
|
||||
exprs = str(exprs).strip()
|
||||
if ',' in exprs:
|
||||
for expr in exprs.split(','):
|
||||
self.compile_expression(expr)
|
||||
else:
|
||||
self.compile_expression(exprs)
|
||||
|
||||
def compile_expression(self, expr):
|
||||
for compiler in self.COMPILERS:
|
||||
match = compiler.value_re.match(expr)
|
||||
if match:
|
||||
compiled_expr = compiler(**match.groupdict())
|
||||
self.expressions.append(compiled_expr)
|
||||
return
|
||||
|
||||
raise ValueError('Unrecognized expression "%s" for field "%s"' % (expr, self.name))
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(self, self.__class__) and self.expressions == other.expressions
|
||||
|
||||
def __str__(self):
|
||||
expr_strings = (str(e) for e in self.expressions)
|
||||
return ','.join(expr_strings)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s('%s', '%s')" % (self.__class__.__name__, self.name, self)
|
||||
|
||||
|
||||
class WeekField(BaseField):
|
||||
REAL = False
|
||||
|
||||
def get_value(self, dateval):
|
||||
return dateval.isocalendar()[1]
|
||||
|
||||
|
||||
class DayOfMonthField(BaseField):
|
||||
COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression, LastDayOfMonthExpression]
|
||||
|
||||
def get_max(self, dateval):
|
||||
return monthrange(dateval.year, dateval.month)[1]
|
||||
|
||||
|
||||
class DayOfWeekField(BaseField):
|
||||
REAL = False
|
||||
COMPILERS = BaseField.COMPILERS + [WeekdayRangeExpression]
|
||||
|
||||
def get_value(self, dateval):
|
||||
return dateval.weekday()
|
|
@ -1,51 +0,0 @@
|
|||
from datetime import datetime
|
||||
|
||||
from tzlocal import get_localzone
|
||||
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.util import convert_to_datetime, datetime_repr, astimezone
|
||||
|
||||
|
||||
class DateTrigger(BaseTrigger):
|
||||
"""
|
||||
Triggers once on the given datetime. If ``run_date`` is left empty, current time is used.
|
||||
|
||||
:param datetime|str run_date: the date/time to run the job at
|
||||
:param datetime.tzinfo|str timezone: time zone for ``run_date`` if it doesn't have one already
|
||||
"""
|
||||
|
||||
__slots__ = 'run_date'
|
||||
|
||||
def __init__(self, run_date=None, timezone=None):
|
||||
timezone = astimezone(timezone) or get_localzone()
|
||||
if run_date is not None:
|
||||
self.run_date = convert_to_datetime(run_date, timezone, 'run_date')
|
||||
else:
|
||||
self.run_date = datetime.now(timezone)
|
||||
|
||||
def get_next_fire_time(self, previous_fire_time, now):
|
||||
return self.run_date if previous_fire_time is None else None
|
||||
|
||||
def __getstate__(self):
|
||||
return {
|
||||
'version': 1,
|
||||
'run_date': self.run_date
|
||||
}
|
||||
|
||||
def __setstate__(self, state):
|
||||
# This is for compatibility with APScheduler 3.0.x
|
||||
if isinstance(state, tuple):
|
||||
state = state[1]
|
||||
|
||||
if state.get('version', 1) > 1:
|
||||
raise ValueError(
|
||||
'Got serialized data for version %s of %s, but only version 1 can be handled' %
|
||||
(state['version'], self.__class__.__name__))
|
||||
|
||||
self.run_date = state['run_date']
|
||||
|
||||
def __str__(self):
|
||||
return 'date[%s]' % datetime_repr(self.run_date)
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s (run_date='%s')>" % (self.__class__.__name__, datetime_repr(self.run_date))
|
|
@ -1,92 +0,0 @@
|
|||
from datetime import timedelta, datetime
|
||||
from math import ceil
|
||||
|
||||
from tzlocal import get_localzone
|
||||
|
||||
from apscheduler.triggers.base import BaseTrigger
|
||||
from apscheduler.util import convert_to_datetime, timedelta_seconds, datetime_repr, astimezone
|
||||
|
||||
|
||||
class IntervalTrigger(BaseTrigger):
|
||||
"""
|
||||
Triggers on specified intervals, starting on ``start_date`` if specified, ``datetime.now()`` +
|
||||
interval otherwise.
|
||||
|
||||
:param int weeks: number of weeks to wait
|
||||
:param int days: number of days to wait
|
||||
:param int hours: number of hours to wait
|
||||
:param int minutes: number of minutes to wait
|
||||
:param int seconds: number of seconds to wait
|
||||
:param datetime|str start_date: starting point for the interval calculation
|
||||
:param datetime|str end_date: latest possible date/time to trigger on
|
||||
:param datetime.tzinfo|str timezone: time zone to use for the date/time calculations
|
||||
"""
|
||||
|
||||
__slots__ = 'timezone', 'start_date', 'end_date', 'interval', 'interval_length'
|
||||
|
||||
def __init__(self, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None,
|
||||
end_date=None, timezone=None):
|
||||
self.interval = timedelta(weeks=weeks, days=days, hours=hours, minutes=minutes,
|
||||
seconds=seconds)
|
||||
self.interval_length = timedelta_seconds(self.interval)
|
||||
if self.interval_length == 0:
|
||||
self.interval = timedelta(seconds=1)
|
||||
self.interval_length = 1
|
||||
|
||||
if timezone:
|
||||
self.timezone = astimezone(timezone)
|
||||
elif isinstance(start_date, datetime) and start_date.tzinfo:
|
||||
self.timezone = start_date.tzinfo
|
||||
elif isinstance(end_date, datetime) and end_date.tzinfo:
|
||||
self.timezone = end_date.tzinfo
|
||||
else:
|
||||
self.timezone = get_localzone()
|
||||
|
||||
start_date = start_date or (datetime.now(self.timezone) + self.interval)
|
||||
self.start_date = convert_to_datetime(start_date, self.timezone, 'start_date')
|
||||
self.end_date = convert_to_datetime(end_date, self.timezone, 'end_date')
|
||||
|
||||
def get_next_fire_time(self, previous_fire_time, now):
|
||||
if previous_fire_time:
|
||||
next_fire_time = previous_fire_time + self.interval
|
||||
elif self.start_date > now:
|
||||
next_fire_time = self.start_date
|
||||
else:
|
||||
timediff_seconds = timedelta_seconds(now - self.start_date)
|
||||
next_interval_num = int(ceil(timediff_seconds / self.interval_length))
|
||||
next_fire_time = self.start_date + self.interval * next_interval_num
|
||||
|
||||
if not self.end_date or next_fire_time <= self.end_date:
|
||||
return self.timezone.normalize(next_fire_time)
|
||||
|
||||
def __getstate__(self):
|
||||
return {
|
||||
'version': 1,
|
||||
'timezone': self.timezone,
|
||||
'start_date': self.start_date,
|
||||
'end_date': self.end_date,
|
||||
'interval': self.interval
|
||||
}
|
||||
|
||||
def __setstate__(self, state):
|
||||
# This is for compatibility with APScheduler 3.0.x
|
||||
if isinstance(state, tuple):
|
||||
state = state[1]
|
||||
|
||||
if state.get('version', 1) > 1:
|
||||
raise ValueError(
|
||||
'Got serialized data for version %s of %s, but only version 1 can be handled' %
|
||||
(state['version'], self.__class__.__name__))
|
||||
|
||||
self.timezone = state['timezone']
|
||||
self.start_date = state['start_date']
|
||||
self.end_date = state['end_date']
|
||||
self.interval = state['interval']
|
||||
self.interval_length = timedelta_seconds(self.interval)
|
||||
|
||||
def __str__(self):
|
||||
return 'interval[%s]' % str(self.interval)
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s (interval=%r, start_date='%s', timezone='%s')>" % (
|
||||
self.__class__.__name__, self.interval, datetime_repr(self.start_date), self.timezone)
|
|
@ -1,385 +0,0 @@
|
|||
"""This module contains several handy functions primarily meant for internal use."""
|
||||
|
||||
from __future__ import division
|
||||
from datetime import date, datetime, time, timedelta, tzinfo
|
||||
from calendar import timegm
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
from pytz import timezone, utc
|
||||
import six
|
||||
|
||||
try:
|
||||
from inspect import signature
|
||||
except ImportError: # pragma: nocover
|
||||
from funcsigs import signature
|
||||
|
||||
try:
|
||||
from threading import TIMEOUT_MAX
|
||||
except ImportError:
|
||||
TIMEOUT_MAX = 4294967 # Maximum value accepted by Event.wait() on Windows
|
||||
|
||||
__all__ = ('asint', 'asbool', 'astimezone', 'convert_to_datetime', 'datetime_to_utc_timestamp',
|
||||
'utc_timestamp_to_datetime', 'timedelta_seconds', 'datetime_ceil', 'get_callable_name',
|
||||
'obj_to_ref', 'ref_to_obj', 'maybe_ref', 'repr_escape', 'check_callable_args')
|
||||
|
||||
|
||||
class _Undefined(object):
|
||||
def __nonzero__(self):
|
||||
return False
|
||||
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
def __repr__(self):
|
||||
return '<undefined>'
|
||||
|
||||
|
||||
undefined = _Undefined() #: a unique object that only signifies that no value is defined
|
||||
|
||||
|
||||
def asint(text):
|
||||
"""
|
||||
Safely converts a string to an integer, returning ``None`` if the string is ``None``.
|
||||
|
||||
:type text: str
|
||||
:rtype: int
|
||||
|
||||
"""
|
||||
if text is not None:
|
||||
return int(text)
|
||||
|
||||
|
||||
def asbool(obj):
|
||||
"""
|
||||
Interprets an object as a boolean value.
|
||||
|
||||
:rtype: bool
|
||||
|
||||
"""
|
||||
if isinstance(obj, str):
|
||||
obj = obj.strip().lower()
|
||||
if obj in ('true', 'yes', 'on', 'y', 't', '1'):
|
||||
return True
|
||||
if obj in ('false', 'no', 'off', 'n', 'f', '0'):
|
||||
return False
|
||||
raise ValueError('Unable to interpret value "%s" as boolean' % obj)
|
||||
return bool(obj)
|
||||
|
||||
|
||||
def astimezone(obj):
|
||||
"""
|
||||
Interprets an object as a timezone.
|
||||
|
||||
:rtype: tzinfo
|
||||
|
||||
"""
|
||||
if isinstance(obj, six.string_types):
|
||||
return timezone(obj)
|
||||
if isinstance(obj, tzinfo):
|
||||
if not hasattr(obj, 'localize') or not hasattr(obj, 'normalize'):
|
||||
raise TypeError('Only timezones from the pytz library are supported')
|
||||
if obj.zone == 'local':
|
||||
raise ValueError(
|
||||
'Unable to determine the name of the local timezone -- you must explicitly '
|
||||
'specify the name of the local timezone. Please refrain from using timezones like '
|
||||
'EST to prevent problems with daylight saving time. Instead, use a locale based '
|
||||
'timezone name (such as Europe/Helsinki).')
|
||||
return obj
|
||||
if obj is not None:
|
||||
raise TypeError('Expected tzinfo, got %s instead' % obj.__class__.__name__)
|
||||
|
||||
|
||||
_DATE_REGEX = re.compile(
|
||||
r'(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})'
|
||||
r'(?: (?P<hour>\d{1,2}):(?P<minute>\d{1,2}):(?P<second>\d{1,2})'
|
||||
r'(?:\.(?P<microsecond>\d{1,6}))?)?')
|
||||
|
||||
|
||||
def convert_to_datetime(input, tz, arg_name):
|
||||
"""
|
||||
Converts the given object to a timezone aware datetime object.
|
||||
|
||||
If a timezone aware datetime object is passed, it is returned unmodified.
|
||||
If a native datetime object is passed, it is given the specified timezone.
|
||||
If the input is a string, it is parsed as a datetime with the given timezone.
|
||||
|
||||
Date strings are accepted in three different forms: date only (Y-m-d), date with time
|
||||
(Y-m-d H:M:S) or with date+time with microseconds (Y-m-d H:M:S.micro).
|
||||
|
||||
:param str|datetime input: the datetime or string to convert to a timezone aware datetime
|
||||
:param datetime.tzinfo tz: timezone to interpret ``input`` in
|
||||
:param str arg_name: the name of the argument (used in an error message)
|
||||
:rtype: datetime
|
||||
|
||||
"""
|
||||
if input is None:
|
||||
return
|
||||
elif isinstance(input, datetime):
|
||||
datetime_ = input
|
||||
elif isinstance(input, date):
|
||||
datetime_ = datetime.combine(input, time())
|
||||
elif isinstance(input, six.string_types):
|
||||
m = _DATE_REGEX.match(input)
|
||||
if not m:
|
||||
raise ValueError('Invalid date string')
|
||||
values = [(k, int(v or 0)) for k, v in m.groupdict().items()]
|
||||
values = dict(values)
|
||||
datetime_ = datetime(**values)
|
||||
else:
|
||||
raise TypeError('Unsupported type for %s: %s' % (arg_name, input.__class__.__name__))
|
||||
|
||||
if datetime_.tzinfo is not None:
|
||||
return datetime_
|
||||
if tz is None:
|
||||
raise ValueError(
|
||||
'The "tz" argument must be specified if %s has no timezone information' % arg_name)
|
||||
if isinstance(tz, six.string_types):
|
||||
tz = timezone(tz)
|
||||
|
||||
try:
|
||||
return tz.localize(datetime_, is_dst=None)
|
||||
except AttributeError:
|
||||
raise TypeError(
|
||||
'Only pytz timezones are supported (need the localize() and normalize() methods)')
|
||||
|
||||
|
||||
def datetime_to_utc_timestamp(timeval):
|
||||
"""
|
||||
Converts a datetime instance to a timestamp.
|
||||
|
||||
:type timeval: datetime
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
if timeval is not None:
|
||||
return timegm(timeval.utctimetuple()) + timeval.microsecond / 1000000
|
||||
|
||||
|
||||
def utc_timestamp_to_datetime(timestamp):
|
||||
"""
|
||||
Converts the given timestamp to a datetime instance.
|
||||
|
||||
:type timestamp: float
|
||||
:rtype: datetime
|
||||
|
||||
"""
|
||||
if timestamp is not None:
|
||||
return datetime.fromtimestamp(timestamp, utc)
|
||||
|
||||
|
||||
def timedelta_seconds(delta):
|
||||
"""
|
||||
Converts the given timedelta to seconds.
|
||||
|
||||
:type delta: timedelta
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
return delta.days * 24 * 60 * 60 + delta.seconds + \
|
||||
delta.microseconds / 1000000.0
|
||||
|
||||
|
||||
def datetime_ceil(dateval):
|
||||
"""
|
||||
Rounds the given datetime object upwards.
|
||||
|
||||
:type dateval: datetime
|
||||
|
||||
"""
|
||||
if dateval.microsecond > 0:
|
||||
return dateval + timedelta(seconds=1, microseconds=-dateval.microsecond)
|
||||
return dateval
|
||||
|
||||
|
||||
def datetime_repr(dateval):
|
||||
return dateval.strftime('%Y-%m-%d %H:%M:%S %Z') if dateval else 'None'
|
||||
|
||||
|
||||
def get_callable_name(func):
|
||||
"""
|
||||
Returns the best available display name for the given function/callable.
|
||||
|
||||
:rtype: str
|
||||
|
||||
"""
|
||||
# the easy case (on Python 3.3+)
|
||||
if hasattr(func, '__qualname__'):
|
||||
return func.__qualname__
|
||||
|
||||
# class methods, bound and unbound methods
|
||||
f_self = getattr(func, '__self__', None) or getattr(func, 'im_self', None)
|
||||
if f_self and hasattr(func, '__name__'):
|
||||
f_class = f_self if isinstance(f_self, type) else f_self.__class__
|
||||
else:
|
||||
f_class = getattr(func, 'im_class', None)
|
||||
|
||||
if f_class and hasattr(func, '__name__'):
|
||||
return '%s.%s' % (f_class.__name__, func.__name__)
|
||||
|
||||
# class or class instance
|
||||
if hasattr(func, '__call__'):
|
||||
# class
|
||||
if hasattr(func, '__name__'):
|
||||
return func.__name__
|
||||
|
||||
# instance of a class with a __call__ method
|
||||
return func.__class__.__name__
|
||||
|
||||
raise TypeError('Unable to determine a name for %r -- maybe it is not a callable?' % func)
|
||||
|
||||
|
||||
def obj_to_ref(obj):
|
||||
"""
|
||||
Returns the path to the given callable.
|
||||
|
||||
:rtype: str
|
||||
:raises TypeError: if the given object is not callable
|
||||
:raises ValueError: if the given object is a :class:`~functools.partial`, lambda or a nested
|
||||
function
|
||||
|
||||
"""
|
||||
if isinstance(obj, partial):
|
||||
raise ValueError('Cannot create a reference to a partial()')
|
||||
|
||||
name = get_callable_name(obj)
|
||||
if '<lambda>' in name:
|
||||
raise ValueError('Cannot create a reference to a lambda')
|
||||
if '<locals>' in name:
|
||||
raise ValueError('Cannot create a reference to a nested function')
|
||||
|
||||
return '%s:%s' % (obj.__module__, name)
|
||||
|
||||
|
||||
def ref_to_obj(ref):
|
||||
"""
|
||||
Returns the object pointed to by ``ref``.
|
||||
|
||||
:type ref: str
|
||||
|
||||
"""
|
||||
if not isinstance(ref, six.string_types):
|
||||
raise TypeError('References must be strings')
|
||||
if ':' not in ref:
|
||||
raise ValueError('Invalid reference')
|
||||
|
||||
modulename, rest = ref.split(':', 1)
|
||||
try:
|
||||
obj = __import__(modulename, fromlist=[rest])
|
||||
except ImportError:
|
||||
raise LookupError('Error resolving reference %s: could not import module' % ref)
|
||||
|
||||
try:
|
||||
for name in rest.split('.'):
|
||||
obj = getattr(obj, name)
|
||||
return obj
|
||||
except Exception:
|
||||
raise LookupError('Error resolving reference %s: error looking up object' % ref)
|
||||
|
||||
|
||||
def maybe_ref(ref):
|
||||
"""
|
||||
Returns the object that the given reference points to, if it is indeed a reference.
|
||||
If it is not a reference, the object is returned as-is.
|
||||
|
||||
"""
|
||||
if not isinstance(ref, str):
|
||||
return ref
|
||||
return ref_to_obj(ref)
|
||||
|
||||
|
||||
if six.PY2:
|
||||
def repr_escape(string):
|
||||
if isinstance(string, six.text_type):
|
||||
return string.encode('ascii', 'backslashreplace')
|
||||
return string
|
||||
else:
|
||||
def repr_escape(string):
|
||||
return string
|
||||
|
||||
|
||||
def check_callable_args(func, args, kwargs):
|
||||
"""
|
||||
Ensures that the given callable can be called with the given arguments.
|
||||
|
||||
:type args: tuple
|
||||
:type kwargs: dict
|
||||
|
||||
"""
|
||||
pos_kwargs_conflicts = [] # parameters that have a match in both args and kwargs
|
||||
positional_only_kwargs = [] # positional-only parameters that have a match in kwargs
|
||||
unsatisfied_args = [] # parameters in signature that don't have a match in args or kwargs
|
||||
unsatisfied_kwargs = [] # keyword-only arguments that don't have a match in kwargs
|
||||
unmatched_args = list(args) # args that didn't match any of the parameters in the signature
|
||||
# kwargs that didn't match any of the parameters in the signature
|
||||
unmatched_kwargs = list(kwargs)
|
||||
# indicates if the signature defines *args and **kwargs respectively
|
||||
has_varargs = has_var_kwargs = False
|
||||
|
||||
try:
|
||||
sig = signature(func)
|
||||
except ValueError:
|
||||
# signature() doesn't work against every kind of callable
|
||||
return
|
||||
|
||||
for param in six.itervalues(sig.parameters):
|
||||
if param.kind == param.POSITIONAL_OR_KEYWORD:
|
||||
if param.name in unmatched_kwargs and unmatched_args:
|
||||
pos_kwargs_conflicts.append(param.name)
|
||||
elif unmatched_args:
|
||||
del unmatched_args[0]
|
||||
elif param.name in unmatched_kwargs:
|
||||
unmatched_kwargs.remove(param.name)
|
||||
elif param.default is param.empty:
|
||||
unsatisfied_args.append(param.name)
|
||||
elif param.kind == param.POSITIONAL_ONLY:
|
||||
if unmatched_args:
|
||||
del unmatched_args[0]
|
||||
elif param.name in unmatched_kwargs:
|
||||
unmatched_kwargs.remove(param.name)
|
||||
positional_only_kwargs.append(param.name)
|
||||
elif param.default is param.empty:
|
||||
unsatisfied_args.append(param.name)
|
||||
elif param.kind == param.KEYWORD_ONLY:
|
||||
if param.name in unmatched_kwargs:
|
||||
unmatched_kwargs.remove(param.name)
|
||||
elif param.default is param.empty:
|
||||
unsatisfied_kwargs.append(param.name)
|
||||
elif param.kind == param.VAR_POSITIONAL:
|
||||
has_varargs = True
|
||||
elif param.kind == param.VAR_KEYWORD:
|
||||
has_var_kwargs = True
|
||||
|
||||
# Make sure there are no conflicts between args and kwargs
|
||||
if pos_kwargs_conflicts:
|
||||
raise ValueError('The following arguments are supplied in both args and kwargs: %s' %
|
||||
', '.join(pos_kwargs_conflicts))
|
||||
|
||||
# Check if keyword arguments are being fed to positional-only parameters
|
||||
if positional_only_kwargs:
|
||||
raise ValueError('The following arguments cannot be given as keyword arguments: %s' %
|
||||
', '.join(positional_only_kwargs))
|
||||
|
||||
# Check that the number of positional arguments minus the number of matched kwargs matches the
|
||||
# argspec
|
||||
if unsatisfied_args:
|
||||
raise ValueError('The following arguments have not been supplied: %s' %
|
||||
', '.join(unsatisfied_args))
|
||||
|
||||
# Check that all keyword-only arguments have been supplied
|
||||
if unsatisfied_kwargs:
|
||||
raise ValueError(
|
||||
'The following keyword-only arguments have not been supplied in kwargs: %s' %
|
||||
', '.join(unsatisfied_kwargs))
|
||||
|
||||
# Check that the callable can accept the given number of positional arguments
|
||||
if not has_varargs and unmatched_args:
|
||||
raise ValueError(
|
||||
'The list of positional arguments is longer than the target callable can handle '
|
||||
'(allowed: %d, given in args: %d)' % (len(args) - len(unmatched_args), len(args)))
|
||||
|
||||
# Check that the callable can accept the given keyword arguments
|
||||
if not has_var_kwargs and unmatched_kwargs:
|
||||
raise ValueError(
|
||||
'The target callable does not accept the following keyword arguments: %s' %
|
||||
', '.join(unmatched_kwargs))
|
2386
lib/argparse.py
2386
lib/argparse.py
File diff suppressed because it is too large
Load Diff
|
@ -1,529 +0,0 @@
|
|||
"""Beautiful Soup
|
||||
Elixir and Tonic
|
||||
"The Screen-Scraper's Friend"
|
||||
http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides methods and Pythonic idioms that make it easy to navigate,
|
||||
search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.7 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
"""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.6.0"
|
||||
__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
from .dammit import UnicodeDammit
|
||||
from .element import (
|
||||
CData,
|
||||
Comment,
|
||||
DEFAULT_OUTPUT_ENCODING,
|
||||
Declaration,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
PageElement,
|
||||
ProcessingInstruction,
|
||||
ResultSet,
|
||||
SoupStrainer,
|
||||
Tag,
|
||||
)
|
||||
|
||||
# The very first thing we do is give a useful error if someone is
|
||||
# running this code under Python 3 without converting it.
|
||||
'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
|
||||
|
||||
class BeautifulSoup(Tag):
|
||||
"""
|
||||
This class defines the basic interface called by the tree builders.
|
||||
|
||||
These methods will be called by the parser:
|
||||
reset()
|
||||
feed(markup)
|
||||
|
||||
The tree builder may call these methods from its feed() implementation:
|
||||
handle_starttag(name, attrs) # See note about return value
|
||||
handle_endtag(name)
|
||||
handle_data(data) # Appends to the current data node
|
||||
endData(containerClass=NavigableString) # Ends the current data node
|
||||
|
||||
No matter how complicated the underlying parser is, you should be
|
||||
able to build a tree using 'start tag' events, 'end tag' events,
|
||||
'data' events, and "done with data" events.
|
||||
|
||||
If you encounter an empty-element tag (aka a self-closing tag,
|
||||
like HTML's <br> tag), call handle_starttag and then
|
||||
handle_endtag.
|
||||
"""
|
||||
ROOT_TAG_NAME = u'[document]'
|
||||
|
||||
# If the end-user gives no indication which tree builder they
|
||||
# want, look for one with these features.
|
||||
DEFAULT_BUILDER_FEATURES = ['html', 'fast']
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n"
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
**kwargs):
|
||||
"""The Soup object is initialized as the 'root tag', and the
|
||||
provided markup (which can be a string or a file-like object)
|
||||
is fed into the underlying parser."""
|
||||
|
||||
if 'convertEntities' in kwargs:
|
||||
warnings.warn(
|
||||
"BS4 does not respect the convertEntities argument to the "
|
||||
"BeautifulSoup constructor. Entities are always converted "
|
||||
"to Unicode characters.")
|
||||
|
||||
if 'markupMassage' in kwargs:
|
||||
del kwargs['markupMassage']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the markupMassage argument to the "
|
||||
"BeautifulSoup constructor. The tree builder is responsible "
|
||||
"for any necessary markup massage.")
|
||||
|
||||
if 'smartQuotesTo' in kwargs:
|
||||
del kwargs['smartQuotesTo']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the smartQuotesTo argument to the "
|
||||
"BeautifulSoup constructor. Smart quotes are always converted "
|
||||
"to Unicode characters.")
|
||||
|
||||
if 'selfClosingTags' in kwargs:
|
||||
del kwargs['selfClosingTags']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the selfClosingTags argument to the "
|
||||
"BeautifulSoup constructor. The tree builder is responsible "
|
||||
"for understanding self-closing tags.")
|
||||
|
||||
if 'isHTML' in kwargs:
|
||||
del kwargs['isHTML']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the isHTML argument to the "
|
||||
"BeautifulSoup constructor. Suggest you use "
|
||||
"features='lxml' for HTML and features='lxml-xml' for "
|
||||
"XML.")
|
||||
|
||||
def deprecated_argument(old_name, new_name):
|
||||
if old_name in kwargs:
|
||||
warnings.warn(
|
||||
'The "%s" argument to the BeautifulSoup constructor '
|
||||
'has been renamed to "%s."' % (old_name, new_name))
|
||||
value = kwargs[old_name]
|
||||
del kwargs[old_name]
|
||||
return value
|
||||
return None
|
||||
|
||||
parse_only = parse_only or deprecated_argument(
|
||||
"parseOnlyThese", "parse_only")
|
||||
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if from_encoding and isinstance(markup, unicode):
|
||||
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
|
||||
from_encoding = None
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
"__init__() got an unexpected keyword argument '%s'" % arg)
|
||||
|
||||
if builder is None:
|
||||
original_features = features
|
||||
if isinstance(features, basestring):
|
||||
features = [features]
|
||||
if features is None or len(features) == 0:
|
||||
features = self.DEFAULT_BUILDER_FEATURES
|
||||
builder_class = builder_registry.lookup(*features)
|
||||
if builder_class is None:
|
||||
raise FeatureNotFound(
|
||||
"Couldn't find a tree builder with the features you "
|
||||
"requested: %s. Do you need to install a parser library?"
|
||||
% ",".join(features))
|
||||
builder = builder_class()
|
||||
if not (original_features == builder.NAME or
|
||||
original_features in builder.ALTERNATE_NAMES):
|
||||
if builder.is_xml:
|
||||
markup_type = "XML"
|
||||
else:
|
||||
markup_type = "HTML"
|
||||
|
||||
caller = traceback.extract_stack()[0]
|
||||
filename = caller[0]
|
||||
line_number = caller[1]
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
|
||||
filename=filename,
|
||||
line_number=line_number,
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type))
|
||||
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.known_xml = self.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.parse_only = parse_only
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256 and (
|
||||
(isinstance(markup, bytes) and not b'<' in markup)
|
||||
or (isinstance(markup, unicode) and not u'<' in markup)
|
||||
):
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
# just in case that's what the user really wants.
|
||||
if (isinstance(markup, unicode)
|
||||
and not os.path.supports_unicode_filenames):
|
||||
possible_filename = markup.encode("utf8")
|
||||
else:
|
||||
possible_filename = markup
|
||||
is_file = False
|
||||
try:
|
||||
is_file = os.path.exists(possible_filename)
|
||||
except Exception, e:
|
||||
# This is almost certainly a problem involving
|
||||
# characters not valid in filenames on this
|
||||
# system. Just let it go.
|
||||
pass
|
||||
if is_file:
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should'
|
||||
' probably open this file and pass the filehandle into'
|
||||
' Beautiful Soup.' % markup)
|
||||
self._check_markup_is_url(markup)
|
||||
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
self.builder.prepare_markup(
|
||||
markup, from_encoding, exclude_encodings=exclude_encodings)):
|
||||
self.reset()
|
||||
try:
|
||||
self._feed()
|
||||
break
|
||||
except ParserRejectedMarkup:
|
||||
pass
|
||||
|
||||
# Clear out the markup and remove the builder's circular
|
||||
# reference to this object.
|
||||
self.markup = None
|
||||
self.builder.soup = None
|
||||
|
||||
def __copy__(self):
|
||||
copy = type(self)(
|
||||
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
|
||||
)
|
||||
|
||||
# Although we encoded the tree to UTF-8, that may not have
|
||||
# been the encoding of the original markup. Set the copy's
|
||||
# .original_encoding to reflect the original object's
|
||||
# .original_encoding.
|
||||
copy.original_encoding = self.original_encoding
|
||||
return copy
|
||||
|
||||
def __getstate__(self):
|
||||
# Frequently a tree builder can't be pickled.
|
||||
d = dict(self.__dict__)
|
||||
if 'builder' in d and not self.builder.picklable:
|
||||
d['builder'] = None
|
||||
return d
|
||||
|
||||
@staticmethod
|
||||
def _check_markup_is_url(markup):
|
||||
"""
|
||||
Check if markup looks like it's actually a url and raise a warning
|
||||
if so. Markup can be unicode or str (py2) / bytes (py3).
|
||||
"""
|
||||
if isinstance(markup, bytes):
|
||||
space = b' '
|
||||
cant_start_with = (b"http:", b"https:")
|
||||
elif isinstance(markup, unicode):
|
||||
space = u' '
|
||||
cant_start_with = (u"http:", u"https:")
|
||||
else:
|
||||
return
|
||||
|
||||
if any(markup.startswith(prefix) for prefix in cant_start_with):
|
||||
if not space in markup:
|
||||
if isinstance(markup, bytes):
|
||||
decoded_markup = markup.decode('utf-8', 'replace')
|
||||
else:
|
||||
decoded_markup = markup
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an'
|
||||
' HTTP client. You should probably use an HTTP client like'
|
||||
' requests to get the document behind the URL, and feed'
|
||||
' that document to Beautiful Soup.' % decoded_markup
|
||||
)
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
|
||||
self.builder.feed(self.markup)
|
||||
# Close out any unfinished strings and close all the open tags.
|
||||
self.endData()
|
||||
while self.currentTag.name != self.ROOT_TAG_NAME:
|
||||
self.popTag()
|
||||
|
||||
def reset(self):
|
||||
Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
|
||||
self.hidden = 1
|
||||
self.builder.reset()
|
||||
self.current_data = []
|
||||
self.currentTag = None
|
||||
self.tagStack = []
|
||||
self.preserve_whitespace_tag_stack = []
|
||||
self.pushTag(self)
|
||||
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
|
||||
"""Create a new tag associated with this soup."""
|
||||
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
|
||||
|
||||
def new_string(self, s, subclass=NavigableString):
|
||||
"""Create a new NavigableString associated with this soup."""
|
||||
return subclass(s)
|
||||
|
||||
def insert_before(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
|
||||
|
||||
def insert_after(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
|
||||
|
||||
def popTag(self):
|
||||
tag = self.tagStack.pop()
|
||||
if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
|
||||
self.preserve_whitespace_tag_stack.pop()
|
||||
#print "Pop", tag.name
|
||||
if self.tagStack:
|
||||
self.currentTag = self.tagStack[-1]
|
||||
return self.currentTag
|
||||
|
||||
def pushTag(self, tag):
|
||||
#print "Push", tag.name
|
||||
if self.currentTag:
|
||||
self.currentTag.contents.append(tag)
|
||||
self.tagStack.append(tag)
|
||||
self.currentTag = self.tagStack[-1]
|
||||
if tag.name in self.builder.preserve_whitespace_tags:
|
||||
self.preserve_whitespace_tag_stack.append(tag)
|
||||
|
||||
def endData(self, containerClass=NavigableString):
|
||||
if self.current_data:
|
||||
current_data = u''.join(self.current_data)
|
||||
# If whitespace is not preserved, and this string contains
|
||||
# nothing but ASCII spaces, replace it with a single space
|
||||
# or newline.
|
||||
if not self.preserve_whitespace_tag_stack:
|
||||
strippable = True
|
||||
for i in current_data:
|
||||
if i not in self.ASCII_SPACES:
|
||||
strippable = False
|
||||
break
|
||||
if strippable:
|
||||
if '\n' in current_data:
|
||||
current_data = '\n'
|
||||
else:
|
||||
current_data = ' '
|
||||
|
||||
# Reset the data collector.
|
||||
self.current_data = []
|
||||
|
||||
# Should we add this string to the tree at all?
|
||||
if self.parse_only and len(self.tagStack) <= 1 and \
|
||||
(not self.parse_only.text or \
|
||||
not self.parse_only.search(current_data)):
|
||||
return
|
||||
|
||||
o = containerClass(current_data)
|
||||
self.object_was_parsed(o)
|
||||
|
||||
def object_was_parsed(self, o, parent=None, most_recent_element=None):
|
||||
"""Add an object to the parse tree."""
|
||||
parent = parent or self.currentTag
|
||||
previous_element = most_recent_element or self._most_recent_element
|
||||
|
||||
next_element = previous_sibling = next_sibling = None
|
||||
if isinstance(o, Tag):
|
||||
next_element = o.next_element
|
||||
next_sibling = o.next_sibling
|
||||
previous_sibling = o.previous_sibling
|
||||
if not previous_element:
|
||||
previous_element = o.previous_element
|
||||
|
||||
o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
|
||||
|
||||
self._most_recent_element = o
|
||||
parent.contents.append(o)
|
||||
|
||||
if parent.next_sibling:
|
||||
# This node is being inserted into an element that has
|
||||
# already been parsed. Deal with any dangling references.
|
||||
index = len(parent.contents)-1
|
||||
while index >= 0:
|
||||
if parent.contents[index] is o:
|
||||
break
|
||||
index -= 1
|
||||
else:
|
||||
raise ValueError(
|
||||
"Error building tree: supposedly %r was inserted "
|
||||
"into %r after the fact, but I don't see it!" % (
|
||||
o, parent
|
||||
)
|
||||
)
|
||||
if index == 0:
|
||||
previous_element = parent
|
||||
previous_sibling = None
|
||||
else:
|
||||
previous_element = previous_sibling = parent.contents[index-1]
|
||||
if index == len(parent.contents)-1:
|
||||
next_element = parent.next_sibling
|
||||
next_sibling = None
|
||||
else:
|
||||
next_element = next_sibling = parent.contents[index+1]
|
||||
|
||||
o.previous_element = previous_element
|
||||
if previous_element:
|
||||
previous_element.next_element = o
|
||||
o.next_element = next_element
|
||||
if next_element:
|
||||
next_element.previous_element = o
|
||||
o.next_sibling = next_sibling
|
||||
if next_sibling:
|
||||
next_sibling.previous_sibling = o
|
||||
o.previous_sibling = previous_sibling
|
||||
if previous_sibling:
|
||||
previous_sibling.next_sibling = o
|
||||
|
||||
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
|
||||
"""Pops the tag stack up to and including the most recent
|
||||
instance of the given tag. If inclusivePop is false, pops the tag
|
||||
stack up to but *not* including the most recent instqance of
|
||||
the given tag."""
|
||||
#print "Popping to %s" % name
|
||||
if name == self.ROOT_TAG_NAME:
|
||||
# The BeautifulSoup object itself can never be popped.
|
||||
return
|
||||
|
||||
most_recently_popped = None
|
||||
|
||||
stack_size = len(self.tagStack)
|
||||
for i in range(stack_size - 1, 0, -1):
|
||||
t = self.tagStack[i]
|
||||
if (name == t.name and nsprefix == t.prefix):
|
||||
if inclusivePop:
|
||||
most_recently_popped = self.popTag()
|
||||
break
|
||||
most_recently_popped = self.popTag()
|
||||
|
||||
return most_recently_popped
|
||||
|
||||
def handle_starttag(self, name, namespace, nsprefix, attrs):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occurred
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
|
||||
# print "Start tag %s: %s" % (name, attrs)
|
||||
self.endData()
|
||||
|
||||
if (self.parse_only and len(self.tagStack) <= 1
|
||||
and (self.parse_only.text
|
||||
or not self.parse_only.search_tag(name, attrs))):
|
||||
return None
|
||||
|
||||
tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
|
||||
self.currentTag, self._most_recent_element)
|
||||
if tag is None:
|
||||
return tag
|
||||
if self._most_recent_element:
|
||||
self._most_recent_element.next_element = tag
|
||||
self._most_recent_element = tag
|
||||
self.pushTag(tag)
|
||||
return tag
|
||||
|
||||
def handle_endtag(self, name, nsprefix=None):
|
||||
#print "End tag: " + name
|
||||
self.endData()
|
||||
self._popToTag(name, nsprefix)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.current_data.append(data)
|
||||
|
||||
def decode(self, pretty_print=False,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
"""Returns a string or Unicode representation of this document.
|
||||
To get Unicode, pass None for encoding."""
|
||||
|
||||
if self.is_xml:
|
||||
# Print the XML declaration
|
||||
encoding_part = ''
|
||||
if eventual_encoding != None:
|
||||
encoding_part = ' encoding="%s"' % eventual_encoding
|
||||
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
|
||||
else:
|
||||
prefix = u''
|
||||
if not pretty_print:
|
||||
indent_level = None
|
||||
else:
|
||||
indent_level = 0
|
||||
return prefix + super(BeautifulSoup, self).decode(
|
||||
indent_level, eventual_encoding, formatter)
|
||||
|
||||
# Alias to make it easier to type import: 'from bs4 import _soup'
|
||||
_s = BeautifulSoup
|
||||
_soup = BeautifulSoup
|
||||
|
||||
class BeautifulStoneSoup(BeautifulSoup):
|
||||
"""Deprecated interface to an XML parser."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['features'] = 'xml'
|
||||
warnings.warn(
|
||||
'The BeautifulStoneSoup class is deprecated. Instead of using '
|
||||
'it, pass features="xml" into the BeautifulSoup constructor.')
|
||||
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class StopParsing(Exception):
|
||||
pass
|
||||
|
||||
class FeatureNotFound(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
#By default, act as an HTML pretty-printer.
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
soup = BeautifulSoup(sys.stdin)
|
||||
print soup.prettify()
|
|
@ -1,333 +0,0 @@
|
|||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
HTMLAwareEntitySubstitution,
|
||||
whitespace_re
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'HTMLTreeBuilder',
|
||||
'SAXTreeBuilder',
|
||||
'TreeBuilder',
|
||||
'TreeBuilderRegistry',
|
||||
]
|
||||
|
||||
# Some useful features for a TreeBuilder to have.
|
||||
FAST = 'fast'
|
||||
PERMISSIVE = 'permissive'
|
||||
STRICT = 'strict'
|
||||
XML = 'xml'
|
||||
HTML = 'html'
|
||||
HTML_5 = 'html5'
|
||||
|
||||
|
||||
class TreeBuilderRegistry(object):
|
||||
|
||||
def __init__(self):
|
||||
self.builders_for_feature = defaultdict(list)
|
||||
self.builders = []
|
||||
|
||||
def register(self, treebuilder_class):
|
||||
"""Register a treebuilder based on its advertised features."""
|
||||
for feature in treebuilder_class.features:
|
||||
self.builders_for_feature[feature].insert(0, treebuilder_class)
|
||||
self.builders.insert(0, treebuilder_class)
|
||||
|
||||
def lookup(self, *features):
|
||||
if len(self.builders) == 0:
|
||||
# There are no builders at all.
|
||||
return None
|
||||
|
||||
if len(features) == 0:
|
||||
# They didn't ask for any features. Give them the most
|
||||
# recently registered builder.
|
||||
return self.builders[0]
|
||||
|
||||
# Go down the list of features in order, and eliminate any builders
|
||||
# that don't match every feature.
|
||||
features = list(features)
|
||||
features.reverse()
|
||||
candidates = None
|
||||
candidate_set = None
|
||||
while len(features) > 0:
|
||||
feature = features.pop()
|
||||
we_have_the_feature = self.builders_for_feature.get(feature, [])
|
||||
if len(we_have_the_feature) > 0:
|
||||
if candidates is None:
|
||||
candidates = we_have_the_feature
|
||||
candidate_set = set(candidates)
|
||||
else:
|
||||
# Eliminate any candidates that don't have this feature.
|
||||
candidate_set = candidate_set.intersection(
|
||||
set(we_have_the_feature))
|
||||
|
||||
# The only valid candidates are the ones in candidate_set.
|
||||
# Go through the original list of candidates and pick the first one
|
||||
# that's in candidate_set.
|
||||
if candidate_set is None:
|
||||
return None
|
||||
for candidate in candidates:
|
||||
if candidate in candidate_set:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# The BeautifulSoup class will take feature lists from developers and use them
|
||||
# to look up builders in this registry.
|
||||
builder_registry = TreeBuilderRegistry()
|
||||
|
||||
class TreeBuilder(object):
|
||||
"""Turn a document into a Beautiful Soup object tree."""
|
||||
|
||||
NAME = "[Unknown tree builder]"
|
||||
ALTERNATE_NAMES = []
|
||||
features = []
|
||||
|
||||
is_xml = False
|
||||
picklable = False
|
||||
preserve_whitespace_tags = set()
|
||||
empty_element_tags = None # A tag will be considered an empty-element
|
||||
# tag when and only when it has no contents.
|
||||
|
||||
# A value for these tag/attribute combinations is a space- or
|
||||
# comma-separated list of CDATA, rather than a single CDATA.
|
||||
cdata_list_attributes = {}
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.soup = None
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
def can_be_empty_element(self, tag_name):
|
||||
"""Might a tag with this name be an empty-element tag?
|
||||
|
||||
The final markup may or may not actually present this tag as
|
||||
self-closing.
|
||||
|
||||
For instance: an HTMLBuilder does not consider a <p> tag to be
|
||||
an empty-element tag (it's not in
|
||||
HTMLBuilder.empty_element_tags). This means an empty <p> tag
|
||||
will be presented as "<p></p>", not "<p />".
|
||||
|
||||
The default implementation has no opinion about which tags are
|
||||
empty-element tags, so a tag will be presented as an
|
||||
empty-element tag if and only if it has no contents.
|
||||
"<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
|
||||
be left alone.
|
||||
"""
|
||||
if self.empty_element_tags is None:
|
||||
return True
|
||||
return tag_name in self.empty_element_tags
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
return markup, None, None, False
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""Wrap an HTML fragment to make it look like a document.
|
||||
|
||||
Different parsers do this differently. For instance, lxml
|
||||
introduces an empty <head> tag, and html5lib
|
||||
doesn't. Abstracting this away lets us write simple tests
|
||||
which run HTML fragments through the parser and compare the
|
||||
results against other HTML fragments.
|
||||
|
||||
This method should not be used outside of tests.
|
||||
"""
|
||||
return fragment
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
return False
|
||||
|
||||
def _replace_cdata_list_attribute_values(self, tag_name, attrs):
|
||||
"""Replaces class="foo bar" with class=["foo", "bar"]
|
||||
|
||||
Modifies its input in place.
|
||||
"""
|
||||
if not attrs:
|
||||
return attrs
|
||||
if self.cdata_list_attributes:
|
||||
universal = self.cdata_list_attributes.get('*', [])
|
||||
tag_specific = self.cdata_list_attributes.get(
|
||||
tag_name.lower(), None)
|
||||
for attr in attrs.keys():
|
||||
if attr in universal or (tag_specific and attr in tag_specific):
|
||||
# We have a "class"-type attribute whose string
|
||||
# value is a whitespace-separated list of
|
||||
# values. Split it into a list.
|
||||
value = attrs[attr]
|
||||
if isinstance(value, basestring):
|
||||
values = whitespace_re.split(value)
|
||||
else:
|
||||
# html5lib sometimes calls setAttributes twice
|
||||
# for the same tag when rearranging the parse
|
||||
# tree. On the second call the attribute value
|
||||
# here is already a list. If this happens,
|
||||
# leave the value alone rather than trying to
|
||||
# split it again.
|
||||
values = value
|
||||
attrs[attr] = values
|
||||
return attrs
|
||||
|
||||
class SAXTreeBuilder(TreeBuilder):
|
||||
"""A Beautiful Soup treebuilder that listens for SAX events."""
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
attrs = dict((key[1], value) for key, value in list(attrs.items()))
|
||||
#print "Start %s, %r" % (name, attrs)
|
||||
self.soup.handle_starttag(name, attrs)
|
||||
|
||||
def endElement(self, name):
|
||||
#print "End %s" % name
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def startElementNS(self, nsTuple, nodeName, attrs):
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.startElement(nodeName, attrs)
|
||||
|
||||
def endElementNS(self, nsTuple, nodeName):
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.endElement(nodeName)
|
||||
#handler.endElementNS((ns, node.nodeName), node.nodeName)
|
||||
|
||||
def startPrefixMapping(self, prefix, nodeValue):
|
||||
# Ignore the prefix for now.
|
||||
pass
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
# Ignore the prefix for now.
|
||||
# handler.endPrefixMapping(prefix)
|
||||
pass
|
||||
|
||||
def characters(self, content):
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def startDocument(self):
|
||||
pass
|
||||
|
||||
def endDocument(self):
|
||||
pass
|
||||
|
||||
|
||||
class HTMLTreeBuilder(TreeBuilder):
|
||||
"""This TreeBuilder knows facts about HTML.
|
||||
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
|
||||
empty_element_tags = set([
|
||||
# These are from HTML5.
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
|
||||
# These are from HTML4, removed in HTML5.
|
||||
'spacer', 'frame'
|
||||
])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
# class="foo bar" means that the 'class' attribute has two values,
|
||||
# 'foo' and 'bar', not the single value 'foo bar'. When we
|
||||
# encounter one of these attributes, we will parse its value into
|
||||
# a list of values if possible. Upon output, the list will be
|
||||
# converted back into a string.
|
||||
cdata_list_attributes = {
|
||||
"*" : ['class', 'accesskey', 'dropzone'],
|
||||
"a" : ['rel', 'rev'],
|
||||
"link" : ['rel', 'rev'],
|
||||
"td" : ["headers"],
|
||||
"th" : ["headers"],
|
||||
"td" : ["headers"],
|
||||
"form" : ["accept-charset"],
|
||||
"object" : ["archive"],
|
||||
|
||||
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
|
||||
"area" : ["rel"],
|
||||
"icon" : ["sizes"],
|
||||
"iframe" : ["sandbox"],
|
||||
"output" : ["for"],
|
||||
}
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
# We are only interested in <meta> tags
|
||||
if tag.name != 'meta':
|
||||
return False
|
||||
|
||||
http_equiv = tag.get('http-equiv')
|
||||
content = tag.get('content')
|
||||
charset = tag.get('charset')
|
||||
|
||||
# We are interested in <meta> tags that say what encoding the
|
||||
# document was originally in. This means HTML 5-style <meta>
|
||||
# tags that provide the "charset" attribute. It also means
|
||||
# HTML 4-style <meta> tags that provide the "content"
|
||||
# attribute and have "http-equiv" set to "content-type".
|
||||
#
|
||||
# In both cases we will replace the value of the appropriate
|
||||
# attribute with a standin object that can take on any
|
||||
# encoding.
|
||||
meta_encoding = None
|
||||
if charset is not None:
|
||||
# HTML 5 style:
|
||||
# <meta charset="utf8">
|
||||
meta_encoding = charset
|
||||
tag['charset'] = CharsetMetaAttributeValue(charset)
|
||||
|
||||
elif (content is not None and http_equiv is not None
|
||||
and http_equiv.lower() == 'content-type'):
|
||||
# HTML 4 style:
|
||||
# <meta http-equiv="content-type" content="text/html; charset=utf8">
|
||||
tag['content'] = ContentMetaAttributeValue(content)
|
||||
|
||||
return (meta_encoding is not None)
|
||||
|
||||
def register_treebuilders_from(module):
|
||||
"""Copy TreeBuilders from the given module into this module."""
|
||||
# I'm fairly sure this is not the best way to do this.
|
||||
this_module = sys.modules['bs4.builder']
|
||||
for name in module.__all__:
|
||||
obj = getattr(module, name)
|
||||
|
||||
if issubclass(obj, TreeBuilder):
|
||||
setattr(this_module, name, obj)
|
||||
this_module.__all__.append(name)
|
||||
# Register the builder while we're at it.
|
||||
this_module.builder_registry.register(obj)
|
||||
|
||||
class ParserRejectedMarkup(Exception):
|
||||
pass
|
||||
|
||||
# Builders are registered in reverse order of priority, so that custom
|
||||
# builder registrations will take precedence. In general, we want lxml
|
||||
# to take precedence over html5lib, because it's faster. And we only
|
||||
# want to use HTMLParser as a last result.
|
||||
from . import _htmlparser
|
||||
register_treebuilders_from(_htmlparser)
|
||||
try:
|
||||
from . import _html5lib
|
||||
register_treebuilders_from(_html5lib)
|
||||
except ImportError:
|
||||
# They don't have html5lib installed.
|
||||
pass
|
||||
try:
|
||||
from . import _lxml
|
||||
register_treebuilders_from(_lxml)
|
||||
except ImportError:
|
||||
# They don't have lxml installed.
|
||||
pass
|
|
@ -1,426 +0,0 @@
|
|||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
import warnings
|
||||
import re
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
HTML_5,
|
||||
HTMLTreeBuilder,
|
||||
)
|
||||
from bs4.element import (
|
||||
NamespacedAttribute,
|
||||
whitespace_re,
|
||||
)
|
||||
import html5lib
|
||||
from html5lib.constants import (
|
||||
namespaces,
|
||||
prefixes,
|
||||
)
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
Tag,
|
||||
)
|
||||
|
||||
try:
|
||||
# Pre-0.99999999
|
||||
from html5lib.treebuilders import _base as treebuilder_base
|
||||
new_html5lib = False
|
||||
except ImportError, e:
|
||||
# 0.99999999 and up
|
||||
from html5lib.treebuilders import base as treebuilder_base
|
||||
new_html5lib = True
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
NAME = "html5lib"
|
||||
|
||||
features = [NAME, PERMISSIVE, HTML_5, HTML]
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding,
|
||||
document_declared_encoding=None, exclude_encodings=None):
|
||||
# Store the user-specified encoding for use later on.
|
||||
self.user_specified_encoding = user_specified_encoding
|
||||
|
||||
# document_declared_encoding and exclude_encodings aren't used
|
||||
# ATM because the html5lib TreeBuilder doesn't use
|
||||
# UnicodeDammit.
|
||||
if exclude_encodings:
|
||||
warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
|
||||
yield (markup, None, None, False)
|
||||
|
||||
# These methods are defined by Beautiful Soup.
|
||||
def feed(self, markup):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
|
||||
extra_kwargs = dict()
|
||||
if not isinstance(markup, unicode):
|
||||
if new_html5lib:
|
||||
extra_kwargs['override_encoding'] = self.user_specified_encoding
|
||||
else:
|
||||
extra_kwargs['encoding'] = self.user_specified_encoding
|
||||
doc = parser.parse(markup, **extra_kwargs)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
# We need to special-case this because html5lib sets
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
if not isinstance(original_encoding, basestring):
|
||||
# In 0.99999999 and up, the encoding is an html5lib
|
||||
# Encoding object. We want to use a string for compatibility
|
||||
# with other tree builders.
|
||||
original_encoding = original_encoding.name
|
||||
doc.original_encoding = original_encoding
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
namespaceHTMLElements, self.soup)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
|
||||
def __init__(self, namespaceHTMLElements, soup=None):
|
||||
if soup:
|
||||
self.soup = soup
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
def documentClass(self):
|
||||
self.soup.reset()
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def elementClass(self, name, namespace):
|
||||
tag = self.soup.new_tag(name, namespace)
|
||||
return Element(tag, self.soup, namespace)
|
||||
|
||||
def commentClass(self, data):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
from bs4 import BeautifulSoup
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def appendChild(self, node):
|
||||
# XXX This code is not covered by the BS4 tests.
|
||||
self.soup.append(node.element)
|
||||
|
||||
def getDocument(self):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return treebuilder_base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def testSerializer(self, element):
|
||||
from bs4 import BeautifulSoup
|
||||
rv = []
|
||||
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if isinstance(element, BeautifulSoup):
|
||||
pass
|
||||
if isinstance(element, Doctype):
|
||||
m = doctype_re.match(element)
|
||||
if m:
|
||||
name = m.group(1)
|
||||
if m.lastindex > 1:
|
||||
publicId = m.group(2) or ""
|
||||
systemId = m.group(3) or m.group(4) or ""
|
||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(' ' * indent, name, publicId, systemId))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||
elif isinstance(element, Comment):
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element))
|
||||
elif isinstance(element, NavigableString):
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
if element.namespace:
|
||||
name = "%s %s" % (prefixes[element.namespace],
|
||||
element.name)
|
||||
else:
|
||||
name = element.name
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
if element.attrs:
|
||||
attributes = []
|
||||
for name, value in element.attrs.items():
|
||||
if isinstance(name, NamespacedAttribute):
|
||||
name = "%s %s" % (prefixes[name.namespace], name.name)
|
||||
if isinstance(value, list):
|
||||
value = " ".join(value)
|
||||
attributes.append((name, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
indent += 2
|
||||
for child in element.children:
|
||||
serializeElement(child, indent)
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
self.element = element
|
||||
self.attrs = dict(self.element.attrs)
|
||||
def __iter__(self):
|
||||
return list(self.attrs.items()).__iter__()
|
||||
def __setitem__(self, name, value):
|
||||
# If this attribute is a multi-valued attribute for this element,
|
||||
# turn its value into a list.
|
||||
list_attr = HTML5TreeBuilder.cdata_list_attributes
|
||||
if (name in list_attr['*']
|
||||
or (self.element.name in list_attr
|
||||
and name in list_attr[self.element.name])):
|
||||
# A node that is being cloned may have already undergone
|
||||
# this procedure.
|
||||
if not isinstance(value, list):
|
||||
value = whitespace_re.split(value)
|
||||
self.element[name] = value
|
||||
def items(self):
|
||||
return list(self.attrs.items())
|
||||
def keys(self):
|
||||
return list(self.attrs.keys())
|
||||
def __len__(self):
|
||||
return len(self.attrs)
|
||||
def __getitem__(self, name):
|
||||
return self.attrs[name]
|
||||
def __contains__(self, name):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(treebuilder_base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
treebuilder_base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
|
||||
def appendChild(self, node):
|
||||
string_child = child = None
|
||||
if isinstance(node, basestring):
|
||||
# Some other piece of code decided to pass in a string
|
||||
# instead of creating a TextElement object to contain the
|
||||
# string.
|
||||
string_child = child = node
|
||||
elif isinstance(node, Tag):
|
||||
# Some other piece of code decided to pass in a Tag
|
||||
# instead of creating an Element object to contain the
|
||||
# Tag.
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
node.parent = self
|
||||
else:
|
||||
child = node.element
|
||||
node.parent = self
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
node.element.extract()
|
||||
|
||||
if (string_child and self.element.contents
|
||||
and self.element.contents[-1].__class__ == NavigableString):
|
||||
# We are appending a string onto another string.
|
||||
# TODO This has O(n^2) performance, for input like
|
||||
# "a</a>a</a>a</a>..."
|
||||
old_element = self.element.contents[-1]
|
||||
new_element = self.soup.new_string(old_element + string_child)
|
||||
old_element.replace_with(new_element)
|
||||
self.soup._most_recent_element = new_element
|
||||
else:
|
||||
if isinstance(node, basestring):
|
||||
# Create a brand new NavigableString from this string.
|
||||
child = self.soup.new_string(node)
|
||||
|
||||
# Tell Beautiful Soup to act as if it parsed this element
|
||||
# immediately after the parent's last descendant. (Or
|
||||
# immediately after the parent, if it has no children.)
|
||||
if self.element.contents:
|
||||
most_recent_element = self.element._last_descendant(False)
|
||||
elif self.element.next_element is not None:
|
||||
# Something from further ahead in the parse tree is
|
||||
# being inserted into this earlier element. This is
|
||||
# very annoying because it means an expensive search
|
||||
# for the last element in the tree.
|
||||
most_recent_element = self.soup._last_descendant()
|
||||
else:
|
||||
most_recent_element = self.element
|
||||
|
||||
self.soup.object_was_parsed(
|
||||
child, parent=self.element,
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
if isinstance(self.element, Comment):
|
||||
return {}
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
|
||||
if attributes is not None and len(attributes) > 0:
|
||||
|
||||
converted_attributes = []
|
||||
for name, value in list(attributes.items()):
|
||||
if isinstance(name, tuple):
|
||||
new_name = NamespacedAttribute(*name)
|
||||
del attributes[name]
|
||||
attributes[new_name] = value
|
||||
|
||||
self.soup.builder._replace_cdata_list_attribute_values(
|
||||
self.name, attributes)
|
||||
for name, value in attributes.items():
|
||||
self.element[name] = value
|
||||
|
||||
# The attributes may contain variables that need substitution.
|
||||
# Call set_up_substitutions manually.
|
||||
#
|
||||
# The Tag constructor called this method when the Tag was created,
|
||||
# but we just set/changed the attributes, so call it again.
|
||||
self.soup.builder.set_up_substitutions(self.element)
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
if insertBefore:
|
||||
self.insertBefore(text, insertBefore)
|
||||
else:
|
||||
self.appendChild(text)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
if (node.element.__class__ == NavigableString and self.element.contents
|
||||
and self.element.contents[index-1].__class__ == NavigableString):
|
||||
# (See comments in appendChild)
|
||||
old_node = self.element.contents[index-1]
|
||||
new_str = self.soup.new_string(old_node + node.element)
|
||||
old_node.replace_with(new_str)
|
||||
else:
|
||||
self.element.insert(index, node.element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
node.element.extract()
|
||||
|
||||
def reparentChildren(self, new_parent):
|
||||
"""Move all of this tag's children into another tag."""
|
||||
# print "MOVE", self.element.contents
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent.element
|
||||
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
# are removed.
|
||||
final_next_element = element.next_sibling
|
||||
|
||||
new_parents_last_descendant = new_parent_element._last_descendant(False, False)
|
||||
if len(new_parent_element.contents) > 0:
|
||||
# The new parent already contains children. We will be
|
||||
# appending this tag's children to the end.
|
||||
new_parents_last_child = new_parent_element.contents[-1]
|
||||
new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
|
||||
else:
|
||||
# The new parent contains no children.
|
||||
new_parents_last_child = None
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
first_child = to_append[0]
|
||||
if new_parents_last_descendant:
|
||||
first_child.previous_element = new_parents_last_descendant
|
||||
else:
|
||||
first_child.previous_element = new_parent_element
|
||||
first_child.previous_sibling = new_parents_last_child
|
||||
if new_parents_last_descendant:
|
||||
new_parents_last_descendant.next_element = first_child
|
||||
else:
|
||||
new_parent_element.next_element = first_child
|
||||
if new_parents_last_child:
|
||||
new_parents_last_child.next_sibling = first_child
|
||||
|
||||
# Find the very last element being moved. It is now the
|
||||
# parent's last descendant. It has no .next_sibling and
|
||||
# its .next_element is whatever the previous last
|
||||
# descendant had.
|
||||
last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
|
||||
|
||||
last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
|
||||
if new_parents_last_descendant_next_element:
|
||||
# TODO: This code has no test coverage and I'm not sure
|
||||
# how to get html5lib to go through this path, but it's
|
||||
# just the other side of the previous line.
|
||||
new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
|
||||
last_childs_last_descendant.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
new_parent_element.contents.append(child)
|
||||
|
||||
# Now that this element has no children, change its .next_element.
|
||||
element.contents = []
|
||||
element.next_element = final_next_element
|
||||
|
||||
# print "DONE WITH MOVE"
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent_element
|
||||
|
||||
def cloneNode(self):
|
||||
tag = self.soup.new_tag(self.element.name, self.namespace)
|
||||
node = Element(tag, self.soup, self.namespace)
|
||||
for key,value in self.attributes:
|
||||
node.attributes[key] = value
|
||||
return node
|
||||
|
||||
def hasContent(self):
|
||||
return self.element.contents
|
||||
|
||||
def getNameTuple(self):
|
||||
if self.namespace == None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
|
||||
nameTuple = property(getNameTuple)
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
treebuilder_base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
def cloneNode(self):
|
||||
raise NotImplementedError
|
|
@ -1,314 +0,0 @@
|
|||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
|
||||
from HTMLParser import HTMLParser
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParseError
|
||||
except ImportError, e:
|
||||
# HTMLParseError is removed in Python 3.5. Since it can never be
|
||||
# thrown in 3.5, we can just define our own class as a placeholder.
|
||||
class HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
|
||||
# argument, which we'd like to set to False. Unfortunately,
|
||||
# http://bugs.python.org/issue13273 makes strict=True a better bet
|
||||
# before Python 3.2.3.
|
||||
#
|
||||
# At the end of this file, we monkeypatch HTMLParser so that
|
||||
# strict=True works well on Python 3.2.2.
|
||||
major, minor, release = sys.version_info[:3]
|
||||
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
|
||||
CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
|
||||
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
|
||||
|
||||
|
||||
from bs4.element import (
|
||||
CData,
|
||||
Comment,
|
||||
Declaration,
|
||||
Doctype,
|
||||
ProcessingInstruction,
|
||||
)
|
||||
from bs4.dammit import EntitySubstitution, UnicodeDammit
|
||||
|
||||
from bs4.builder import (
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
STRICT,
|
||||
)
|
||||
|
||||
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
HTMLParser.__init__(self, *args, **kwargs)
|
||||
|
||||
# Keep a list of empty-element tags that were encountered
|
||||
# without an explicit closing tag. If we encounter a closing tag
|
||||
# of this type, we'll associate it with one of those entries.
|
||||
#
|
||||
# This isn't a stack because we don't care about the
|
||||
# order. It's a list of closing tags we've already handled and
|
||||
# will ignore, assuming they ever show up.
|
||||
self.already_closed_empty_element = []
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
# This is only called when the markup looks like
|
||||
# <tag/>.
|
||||
|
||||
# is_startend() tells handle_starttag not to close the tag
|
||||
# just because its name matches a known empty-element tag. We
|
||||
# know that this is an empty-element tag and we want to call
|
||||
# handle_endtag ourselves.
|
||||
tag = self.handle_starttag(name, attrs, handle_empty_element=False)
|
||||
self.handle_endtag(name)
|
||||
|
||||
def handle_starttag(self, name, attrs, handle_empty_element=True):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
# Change None attribute values to the empty string
|
||||
# for consistency with the other tree builders.
|
||||
if value is None:
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
#print "START", name
|
||||
tag = self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
if tag and tag.is_empty_element and handle_empty_element:
|
||||
# Unlike other parsers, html.parser doesn't send separate end tag
|
||||
# events for empty-element tags. (It's handled in
|
||||
# handle_startendtag, but only if the original markup looked like
|
||||
# <tag/>.)
|
||||
#
|
||||
# So we need to call handle_endtag() ourselves. Since we
|
||||
# know the start event is identical to the end event, we
|
||||
# don't want handle_endtag() to cross off any previous end
|
||||
# events for tags of this name.
|
||||
self.handle_endtag(name, check_already_closed=False)
|
||||
|
||||
# But we might encounter an explicit closing tag for this tag
|
||||
# later on. If so, we want to ignore it.
|
||||
self.already_closed_empty_element.append(name)
|
||||
|
||||
def handle_endtag(self, name, check_already_closed=True):
|
||||
#print "END", name
|
||||
if check_already_closed and name in self.already_closed_empty_element:
|
||||
# This is a redundant end tag for an empty-element tag.
|
||||
# We've already called handle_endtag() for it, so just
|
||||
# check it off the list.
|
||||
# print "ALREADY CLOSED", name
|
||||
self.already_closed_empty_element.remove(name)
|
||||
else:
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
# XXX workaround for a bug in HTMLParser. Remove this once
|
||||
# it's fixed in all supported versions.
|
||||
# http://bugs.python.org/issue13633
|
||||
if name.startswith('x'):
|
||||
real_name = int(name.lstrip('x'), 16)
|
||||
elif name.startswith('X'):
|
||||
real_name = int(name.lstrip('X'), 16)
|
||||
else:
|
||||
real_name = int(name)
|
||||
|
||||
try:
|
||||
data = unichr(real_name)
|
||||
except (ValueError, OverflowError), e:
|
||||
data = u"\N{REPLACEMENT CHARACTER}"
|
||||
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
|
||||
if character is not None:
|
||||
data = character
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def handle_decl(self, data):
|
||||
self.soup.endData()
|
||||
if data.startswith("DOCTYPE "):
|
||||
data = data[len("DOCTYPE "):]
|
||||
elif data == 'DOCTYPE':
|
||||
# i.e. "<!DOCTYPE>"
|
||||
data = ''
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Doctype)
|
||||
|
||||
def unknown_decl(self, data):
|
||||
if data.upper().startswith('CDATA['):
|
||||
cls = CData
|
||||
data = data[len('CDATA['):]
|
||||
else:
|
||||
cls = Declaration
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(cls)
|
||||
|
||||
def handle_pi(self, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
|
||||
|
||||
class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
|
||||
is_xml = False
|
||||
picklable = True
|
||||
NAME = HTMLPARSER
|
||||
features = [NAME, HTML, STRICT]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
|
||||
kwargs['strict'] = False
|
||||
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
|
||||
kwargs['convert_charrefs'] = False
|
||||
self.parser_args = (args, kwargs)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None, exclude_encodings=None):
|
||||
"""
|
||||
:return: A 4-tuple (markup, original encoding, encoding
|
||||
declared within markup, whether any characters had to be
|
||||
replaced with REPLACEMENT CHARACTER).
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
yield (markup, None, None, False)
|
||||
return
|
||||
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
dammit = UnicodeDammit(markup, try_encodings, is_html=True,
|
||||
exclude_encodings=exclude_encodings)
|
||||
yield (dammit.markup, dammit.original_encoding,
|
||||
dammit.declared_html_encoding,
|
||||
dammit.contains_replacement_characters)
|
||||
|
||||
def feed(self, markup):
|
||||
args, kwargs = self.parser_args
|
||||
parser = BeautifulSoupHTMLParser(*args, **kwargs)
|
||||
parser.soup = self.soup
|
||||
try:
|
||||
parser.feed(markup)
|
||||
except HTMLParseError, e:
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
parser.already_closed_empty_element = []
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
# string.
|
||||
#
|
||||
# XXX This code can be removed once most Python 3 users are on 3.2.3.
|
||||
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
|
||||
import re
|
||||
attrfind_tolerant = re.compile(
|
||||
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
|
||||
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
|
||||
|
||||
locatestarttagend = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
(?:\s+ # whitespace before attribute name
|
||||
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
|
||||
(?:\s*=\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|\"[^\"]*\" # LIT-enclosed value
|
||||
|[^'\">\s]+ # bare value
|
||||
)
|
||||
)?
|
||||
)
|
||||
)*
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
|
||||
|
||||
from html.parser import tagfind, attrfind
|
||||
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = tagfind.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = rawdata[i+1:k].lower()
|
||||
while k < endpos:
|
||||
if self.strict:
|
||||
m = attrfind.match(rawdata, k)
|
||||
else:
|
||||
m = attrfind_tolerant.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
if attrvalue:
|
||||
attrvalue = self.unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
if self.strict:
|
||||
self.error("junk characters in start tag: %r"
|
||||
% (rawdata[k:endpos][:20],))
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
|
||||
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
|
||||
|
||||
CONSTRUCTOR_TAKES_STRICT = True
|
|
@ -1,258 +0,0 @@
|
|||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
]
|
||||
|
||||
from io import BytesIO
|
||||
from StringIO import StringIO
|
||||
import collections
|
||||
from lxml import etree
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
XMLProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
PERMISSIVE,
|
||||
ParserRejectedMarkup,
|
||||
TreeBuilder,
|
||||
XML)
|
||||
from bs4.dammit import EncodingDetector
|
||||
|
||||
LXML = 'lxml'
|
||||
|
||||
class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
NAME = "lxml-xml"
|
||||
ALTERNATE_NAMES = ["xml"]
|
||||
|
||||
# Well, it's permissive by XML parser standards.
|
||||
features = [NAME, LXML, XML, FAST, PERMISSIVE]
|
||||
|
||||
CHUNK_SIZE = 512
|
||||
|
||||
# This namespace mapping is specified in the XML Namespace
|
||||
# standard.
|
||||
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
|
||||
|
||||
def default_parser(self, encoding):
|
||||
# This can either return a parser object or a class, which
|
||||
# will be instantiated with default arguments.
|
||||
if self._default_parser is not None:
|
||||
return self._default_parser
|
||||
return etree.XMLParser(
|
||||
target=self, strip_cdata=False, recover=True, encoding=encoding)
|
||||
|
||||
def parser_for(self, encoding):
|
||||
# Use the default parser.
|
||||
parser = self.default_parser(encoding)
|
||||
|
||||
if isinstance(parser, collections.Callable):
|
||||
# Instantiate the parser with default arguments
|
||||
parser = parser(target=self, strip_cdata=False, encoding=encoding)
|
||||
return parser
|
||||
|
||||
def __init__(self, parser=None, empty_element_tags=None):
|
||||
# TODO: Issue a warning if parser is present but not a
|
||||
# callable, since that means there's no way to create new
|
||||
# parsers for different encodings.
|
||||
self._default_parser = parser
|
||||
if empty_element_tags is not None:
|
||||
self.empty_element_tags = set(empty_element_tags)
|
||||
self.soup = None
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
def _getNsTag(self, tag):
|
||||
# Split the namespace URL out of a fully-qualified lxml tag
|
||||
# name. Copied from lxml's src/lxml/sax.py.
|
||||
if tag[0] == '{':
|
||||
return tuple(tag[1:].split('}', 1))
|
||||
else:
|
||||
return (None, tag)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
exclude_encodings=None,
|
||||
document_declared_encoding=None):
|
||||
"""
|
||||
:yield: A series of 4-tuples.
|
||||
(markup, encoding, declared encoding,
|
||||
has undergone character replacement)
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
if is_html:
|
||||
self.processing_instruction_class = ProcessingInstruction
|
||||
else:
|
||||
self.processing_instruction_class = XMLProcessingInstruction
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
yield markup, None, document_declared_encoding, False
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# No, apparently not. Convert the Unicode to UTF-8 and
|
||||
# tell lxml to parse it as UTF-8.
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(
|
||||
markup, try_encodings, is_html, exclude_encodings)
|
||||
for encoding in detector.encodings:
|
||||
yield (detector.markup, encoding, document_declared_encoding, False)
|
||||
|
||||
def feed(self, markup):
|
||||
if isinstance(markup, bytes):
|
||||
markup = BytesIO(markup)
|
||||
elif isinstance(markup, unicode):
|
||||
markup = StringIO(markup)
|
||||
|
||||
# Call feed() at least once, even if the markup is empty,
|
||||
# or the parser won't be initialized.
|
||||
data = markup.read(self.CHUNK_SIZE)
|
||||
try:
|
||||
self.parser = self.parser_for(self.soup.original_encoding)
|
||||
self.parser.feed(data)
|
||||
while len(data) != 0:
|
||||
# Now call feed() on the rest of the data, chunk by chunk.
|
||||
data = markup.read(self.CHUNK_SIZE)
|
||||
if len(data) != 0:
|
||||
self.parser.feed(data)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
|
||||
def close(self):
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
def start(self, name, attrs, nsmap={}):
|
||||
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
|
||||
attrs = dict(attrs)
|
||||
nsprefix = None
|
||||
# Invert each namespace map as it comes in.
|
||||
if len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
elif len(nsmap) > 0:
|
||||
# A new namespace mapping has come into play.
|
||||
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
|
||||
self.nsmaps.append(inverted_nsmap)
|
||||
# Also treat the namespace mapping as a set of attributes on the
|
||||
# tag, so we can recreate it later.
|
||||
attrs = attrs.copy()
|
||||
for prefix, namespace in nsmap.items():
|
||||
attribute = NamespacedAttribute(
|
||||
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
|
||||
attrs[attribute] = namespace
|
||||
|
||||
# Namespaces are in play. Find any attributes that came in
|
||||
# from lxml with namespaces attached to their names, and
|
||||
# turn then into NamespacedAttribute objects.
|
||||
new_attrs = {}
|
||||
for attr, value in attrs.items():
|
||||
namespace, attr = self._getNsTag(attr)
|
||||
if namespace is None:
|
||||
new_attrs[attr] = value
|
||||
else:
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
attr = NamespacedAttribute(nsprefix, attr, namespace)
|
||||
new_attrs[attr] = value
|
||||
attrs = new_attrs
|
||||
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
|
||||
|
||||
def _prefix_for_namespace(self, namespace):
|
||||
"""Find the currently active prefix for the given namespace."""
|
||||
if namespace is None:
|
||||
return None
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
return inverted_nsmap[namespace]
|
||||
return None
|
||||
|
||||
def end(self, name):
|
||||
self.soup.endData()
|
||||
completed_tag = self.soup.tagStack[-1]
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = None
|
||||
if namespace is not None:
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
nsprefix = inverted_nsmap[namespace]
|
||||
break
|
||||
self.soup.handle_endtag(name, nsprefix)
|
||||
if len(self.nsmaps) > 1:
|
||||
# This tag, or one of its parents, introduced a namespace
|
||||
# mapping, so pop it off the stack.
|
||||
self.nsmaps.pop()
|
||||
|
||||
def pi(self, target, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(target + ' ' + data)
|
||||
self.soup.endData(self.processing_instruction_class)
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def doctype(self, name, pubid, system):
|
||||
self.soup.endData()
|
||||
doctype = Doctype.for_name_and_ids(name, pubid, system)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def comment(self, content):
|
||||
"Handle comments as Comment objects."
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(content)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
|
||||
|
||||
|
||||
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
NAME = LXML
|
||||
ALTERNATE_NAMES = ["lxml-html"]
|
||||
|
||||
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
processing_instruction_class = ProcessingInstruction
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
|
||||
def feed(self, markup):
|
||||
encoding = self.soup.original_encoding
|
||||
try:
|
||||
self.parser = self.parser_for(encoding)
|
||||
self.parser.feed(markup)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><body>%s</body></html>' % fragment
|
|
@ -1,842 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""Beautiful Soup bonus library: Unicode, Dammit
|
||||
|
||||
This library converts a bytestream to Unicode through any means
|
||||
necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and HTML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
import re
|
||||
import logging
|
||||
import string
|
||||
|
||||
# Import a library to autodetect character encodings.
|
||||
chardet_type = None
|
||||
try:
|
||||
# First try the fast C implementation.
|
||||
# PyPI package: cchardet
|
||||
import cchardet
|
||||
def chardet_dammit(s):
|
||||
return cchardet.detect(s)['encoding']
|
||||
except ImportError:
|
||||
try:
|
||||
# Fall back to the pure Python implementation
|
||||
# Debian package: python-chardet
|
||||
# PyPI package: chardet
|
||||
import chardet
|
||||
def chardet_dammit(s):
|
||||
return chardet.detect(s)['encoding']
|
||||
#import chardet.constants
|
||||
#chardet.constants._debug = 1
|
||||
except ImportError:
|
||||
# No chardet available.
|
||||
def chardet_dammit(s):
|
||||
return None
|
||||
|
||||
# Available from http://cjkpython.i18n.org/.
|
||||
try:
|
||||
import iconv_codec
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
xml_encoding_re = re.compile(
|
||||
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
|
||||
html_meta_re = re.compile(
|
||||
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
|
||||
|
||||
class EntitySubstitution(object):
|
||||
|
||||
"""Substitute XML or HTML entities for the corresponding characters."""
|
||||
|
||||
def _populate_class_variables():
|
||||
lookup = {}
|
||||
reverse_lookup = {}
|
||||
characters_for_re = []
|
||||
for codepoint, name in list(codepoint2name.items()):
|
||||
character = unichr(codepoint)
|
||||
if codepoint != 34:
|
||||
# There's no point in turning the quotation mark into
|
||||
# ", unless it happens within an attribute value, which
|
||||
# is handled elsewhere.
|
||||
characters_for_re.append(character)
|
||||
lookup[character] = name
|
||||
# But we do want to turn " into the quotation mark.
|
||||
reverse_lookup[name] = character
|
||||
re_definition = "[%s]" % "".join(characters_for_re)
|
||||
return lookup, reverse_lookup, re.compile(re_definition)
|
||||
(CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
|
||||
CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
|
||||
|
||||
CHARACTER_TO_XML_ENTITY = {
|
||||
"'": "apos",
|
||||
'"': "quot",
|
||||
"&": "amp",
|
||||
"<": "lt",
|
||||
">": "gt",
|
||||
}
|
||||
|
||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
|
||||
")")
|
||||
|
||||
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
|
||||
|
||||
@classmethod
|
||||
def _substitute_html_entity(cls, matchobj):
|
||||
entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
|
||||
return "&%s;" % entity
|
||||
|
||||
@classmethod
|
||||
def _substitute_xml_entity(cls, matchobj):
|
||||
"""Used with a regular expression to substitute the
|
||||
appropriate XML entity for an XML special character."""
|
||||
entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
|
||||
return "&%s;" % entity
|
||||
|
||||
@classmethod
|
||||
def quoted_attribute_value(self, value):
|
||||
"""Make a value into a quoted XML attribute, possibly escaping it.
|
||||
|
||||
Most strings will be quoted using double quotes.
|
||||
|
||||
Bob's Bar -> "Bob's Bar"
|
||||
|
||||
If a string contains double quotes, it will be quoted using
|
||||
single quotes.
|
||||
|
||||
Welcome to "my bar" -> 'Welcome to "my bar"'
|
||||
|
||||
If a string contains both single and double quotes, the
|
||||
double quotes will be escaped, and the string will be quoted
|
||||
using double quotes.
|
||||
|
||||
Welcome to "Bob's Bar" -> "Welcome to "Bob's bar"
|
||||
"""
|
||||
quote_with = '"'
|
||||
if '"' in value:
|
||||
if "'" in value:
|
||||
# The string contains both single and double
|
||||
# quotes. Turn the double quotes into
|
||||
# entities. We quote the double quotes rather than
|
||||
# the single quotes because the entity name is
|
||||
# """ whether this is HTML or XML. If we
|
||||
# quoted the single quotes, we'd have to decide
|
||||
# between ' and &squot;.
|
||||
replace_with = """
|
||||
value = value.replace('"', replace_with)
|
||||
else:
|
||||
# There are double quotes but no single quotes.
|
||||
# We can use single quotes to quote the attribute.
|
||||
quote_with = "'"
|
||||
return quote_with + value + quote_with
|
||||
|
||||
@classmethod
|
||||
def substitute_xml(cls, value, make_quoted_attribute=False):
|
||||
"""Substitute XML entities for special XML characters.
|
||||
|
||||
:param value: A string to be substituted. The less-than sign
|
||||
will become <, the greater-than sign will become >,
|
||||
and any ampersands will become &. If you want ampersands
|
||||
that appear to be part of an entity definition to be left
|
||||
alone, use substitute_xml_containing_entities() instead.
|
||||
|
||||
:param make_quoted_attribute: If True, then the string will be
|
||||
quoted, as befits an attribute value.
|
||||
"""
|
||||
# Escape angle brackets and ampersands.
|
||||
value = cls.AMPERSAND_OR_BRACKET.sub(
|
||||
cls._substitute_xml_entity, value)
|
||||
|
||||
if make_quoted_attribute:
|
||||
value = cls.quoted_attribute_value(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def substitute_xml_containing_entities(
|
||||
cls, value, make_quoted_attribute=False):
|
||||
"""Substitute XML entities for special XML characters.
|
||||
|
||||
:param value: A string to be substituted. The less-than sign will
|
||||
become <, the greater-than sign will become >, and any
|
||||
ampersands that are not part of an entity defition will
|
||||
become &.
|
||||
|
||||
:param make_quoted_attribute: If True, then the string will be
|
||||
quoted, as befits an attribute value.
|
||||
"""
|
||||
# Escape angle brackets, and ampersands that aren't part of
|
||||
# entities.
|
||||
value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
|
||||
cls._substitute_xml_entity, value)
|
||||
|
||||
if make_quoted_attribute:
|
||||
value = cls.quoted_attribute_value(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def substitute_html(cls, s):
|
||||
"""Replace certain Unicode characters with named HTML entities.
|
||||
|
||||
This differs from data.encode(encoding, 'xmlcharrefreplace')
|
||||
in that the goal is to make the result more readable (to those
|
||||
with ASCII displays) rather than to recover from
|
||||
errors. There's absolutely nothing wrong with a UTF-8 string
|
||||
containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
|
||||
character with "é" will make it more readable to some
|
||||
people.
|
||||
"""
|
||||
return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
|
||||
cls._substitute_html_entity, s)
|
||||
|
||||
|
||||
class EncodingDetector:
|
||||
"""Suggests a number of possible encodings for a bytestring.
|
||||
|
||||
Order of precedence:
|
||||
|
||||
1. Encodings you specifically tell EncodingDetector to try first
|
||||
(the override_encodings argument to the constructor).
|
||||
|
||||
2. An encoding declared within the bytestring itself, either in an
|
||||
XML declaration (if the bytestring is to be interpreted as an XML
|
||||
document), or in a <meta> tag (if the bytestring is to be
|
||||
interpreted as an HTML document.)
|
||||
|
||||
3. An encoding detected through textual analysis by chardet,
|
||||
cchardet, or a similar external library.
|
||||
|
||||
4. UTF-8.
|
||||
|
||||
5. Windows-1252.
|
||||
"""
|
||||
def __init__(self, markup, override_encodings=None, is_html=False,
|
||||
exclude_encodings=None):
|
||||
self.override_encodings = override_encodings or []
|
||||
exclude_encodings = exclude_encodings or []
|
||||
self.exclude_encodings = set([x.lower() for x in exclude_encodings])
|
||||
self.chardet_encoding = None
|
||||
self.is_html = is_html
|
||||
self.declared_encoding = None
|
||||
|
||||
# First order of business: strip a byte-order mark.
|
||||
self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
|
||||
|
||||
def _usable(self, encoding, tried):
|
||||
if encoding is not None:
|
||||
encoding = encoding.lower()
|
||||
if encoding in self.exclude_encodings:
|
||||
return False
|
||||
if encoding not in tried:
|
||||
tried.add(encoding)
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def encodings(self):
|
||||
"""Yield a number of encodings that might work for this markup."""
|
||||
tried = set()
|
||||
for e in self.override_encodings:
|
||||
if self._usable(e, tried):
|
||||
yield e
|
||||
|
||||
# Did the document originally start with a byte-order mark
|
||||
# that indicated its encoding?
|
||||
if self._usable(self.sniffed_encoding, tried):
|
||||
yield self.sniffed_encoding
|
||||
|
||||
# Look within the document for an XML or HTML encoding
|
||||
# declaration.
|
||||
if self.declared_encoding is None:
|
||||
self.declared_encoding = self.find_declared_encoding(
|
||||
self.markup, self.is_html)
|
||||
if self._usable(self.declared_encoding, tried):
|
||||
yield self.declared_encoding
|
||||
|
||||
# Use third-party character set detection to guess at the
|
||||
# encoding.
|
||||
if self.chardet_encoding is None:
|
||||
self.chardet_encoding = chardet_dammit(self.markup)
|
||||
if self._usable(self.chardet_encoding, tried):
|
||||
yield self.chardet_encoding
|
||||
|
||||
# As a last-ditch effort, try utf-8 and windows-1252.
|
||||
for e in ('utf-8', 'windows-1252'):
|
||||
if self._usable(e, tried):
|
||||
yield e
|
||||
|
||||
@classmethod
|
||||
def strip_byte_order_mark(cls, data):
|
||||
"""If a byte-order mark is present, strip it and return the encoding it implies."""
|
||||
encoding = None
|
||||
if isinstance(data, unicode):
|
||||
# Unicode data cannot have a byte-order mark.
|
||||
return data, encoding
|
||||
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16be'
|
||||
data = data[2:]
|
||||
elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16le'
|
||||
data = data[2:]
|
||||
elif data[:3] == b'\xef\xbb\xbf':
|
||||
encoding = 'utf-8'
|
||||
data = data[3:]
|
||||
elif data[:4] == b'\x00\x00\xfe\xff':
|
||||
encoding = 'utf-32be'
|
||||
data = data[4:]
|
||||
elif data[:4] == b'\xff\xfe\x00\x00':
|
||||
encoding = 'utf-32le'
|
||||
data = data[4:]
|
||||
return data, encoding
|
||||
|
||||
@classmethod
|
||||
def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
|
||||
"""Given a document, tries to find its declared encoding.
|
||||
|
||||
An XML encoding is declared at the beginning of the document.
|
||||
|
||||
An HTML encoding is declared in a <meta> tag, hopefully near the
|
||||
beginning of the document.
|
||||
"""
|
||||
if search_entire_document:
|
||||
xml_endpos = html_endpos = len(markup)
|
||||
else:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
|
||||
if declared_encoding_match is not None:
|
||||
declared_encoding = declared_encoding_match.groups()[0].decode(
|
||||
'ascii', 'replace')
|
||||
if declared_encoding:
|
||||
return declared_encoding.lower()
|
||||
return None
|
||||
|
||||
class UnicodeDammit:
|
||||
"""A class for detecting the encoding of a *ML document and
|
||||
converting it to a Unicode string. If the source encoding is
|
||||
windows-1252, can replace MS smart quotes with their HTML or XML
|
||||
equivalents."""
|
||||
|
||||
# This dictionary maps commonly seen values for "charset" in HTML
|
||||
# meta tags to the corresponding Python codec names. It only covers
|
||||
# values that aren't in Python's aliases and can't be determined
|
||||
# by the heuristics in find_codec.
|
||||
CHARSET_ALIASES = {"macintosh": "mac-roman",
|
||||
"x-sjis": "shift-jis"}
|
||||
|
||||
ENCODINGS_WITH_SMART_QUOTES = [
|
||||
"windows-1252",
|
||||
"iso-8859-1",
|
||||
"iso-8859-2",
|
||||
]
|
||||
|
||||
def __init__(self, markup, override_encodings=[],
|
||||
smart_quotes_to=None, is_html=False, exclude_encodings=[]):
|
||||
self.smart_quotes_to = smart_quotes_to
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
self.log = logging.getLogger(__name__)
|
||||
self.detector = EncodingDetector(
|
||||
markup, override_encodings, is_html, exclude_encodings)
|
||||
|
||||
# Short-circuit if the data is in Unicode to begin with.
|
||||
if isinstance(markup, unicode) or markup == '':
|
||||
self.markup = markup
|
||||
self.unicode_markup = unicode(markup)
|
||||
self.original_encoding = None
|
||||
return
|
||||
|
||||
# The encoding detector may have stripped a byte-order mark.
|
||||
# Use the stripped markup from this point on.
|
||||
self.markup = self.detector.markup
|
||||
|
||||
u = None
|
||||
for encoding in self.detector.encodings:
|
||||
markup = self.detector.markup
|
||||
u = self._convert_from(encoding)
|
||||
if u is not None:
|
||||
break
|
||||
|
||||
if not u:
|
||||
# None of the encodings worked. As an absolute last resort,
|
||||
# try them again with character replacement.
|
||||
|
||||
for encoding in self.detector.encodings:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
self.log.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER."
|
||||
)
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
# If none of that worked, we could at this point force it to
|
||||
# ASCII, but that would destroy so much data that I think
|
||||
# giving up is better.
|
||||
self.unicode_markup = u
|
||||
if not u:
|
||||
self.original_encoding = None
|
||||
|
||||
def _sub_ms_char(self, match):
|
||||
"""Changes a MS smart quote character to an XML or HTML
|
||||
entity, or an ASCII character."""
|
||||
orig = match.group(1)
|
||||
if self.smart_quotes_to == 'ascii':
|
||||
sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
|
||||
else:
|
||||
sub = self.MS_CHARS.get(orig)
|
||||
if type(sub) == tuple:
|
||||
if self.smart_quotes_to == 'xml':
|
||||
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
|
||||
else:
|
||||
sub = '&'.encode() + sub[0].encode() + ';'.encode()
|
||||
else:
|
||||
sub = sub.encode()
|
||||
return sub
|
||||
|
||||
def _convert_from(self, proposed, errors="strict"):
|
||||
proposed = self.find_codec(proposed)
|
||||
if not proposed or (proposed, errors) in self.tried_encodings:
|
||||
return None
|
||||
self.tried_encodings.append((proposed, errors))
|
||||
markup = self.markup
|
||||
# Convert smart quotes to HTML if coming from an encoding
|
||||
# that might have them.
|
||||
if (self.smart_quotes_to is not None
|
||||
and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
|
||||
smart_quotes_re = b"([\x80-\x9f])"
|
||||
smart_quotes_compiled = re.compile(smart_quotes_re)
|
||||
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
|
||||
|
||||
try:
|
||||
#print "Trying to convert document to %s (errors=%s)" % (
|
||||
# proposed, errors)
|
||||
u = self._to_unicode(markup, proposed, errors)
|
||||
self.markup = u
|
||||
self.original_encoding = proposed
|
||||
except Exception as e:
|
||||
#print "That didn't work!"
|
||||
#print e
|
||||
return None
|
||||
#print "Correct encoding: %s" % proposed
|
||||
return self.markup
|
||||
|
||||
def _to_unicode(self, data, encoding, errors="strict"):
|
||||
'''Given a string and its encoding, decodes the string into Unicode.
|
||||
%encoding is a string recognized by encodings.aliases'''
|
||||
return unicode(data, encoding, errors)
|
||||
|
||||
@property
|
||||
def declared_html_encoding(self):
|
||||
if not self.is_html:
|
||||
return None
|
||||
return self.detector.declared_encoding
|
||||
|
||||
def find_codec(self, charset):
|
||||
value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
|
||||
or (charset and self._codec(charset.replace("-", "")))
|
||||
or (charset and self._codec(charset.replace("-", "_")))
|
||||
or (charset and charset.lower())
|
||||
or charset
|
||||
)
|
||||
if value:
|
||||
return value.lower()
|
||||
return None
|
||||
|
||||
def _codec(self, charset):
|
||||
if not charset:
|
||||
return charset
|
||||
codec = None
|
||||
try:
|
||||
codecs.lookup(charset)
|
||||
codec = charset
|
||||
except (LookupError, ValueError):
|
||||
pass
|
||||
return codec
|
||||
|
||||
|
||||
# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
|
||||
MS_CHARS = {b'\x80': ('euro', '20AC'),
|
||||
b'\x81': ' ',
|
||||
b'\x82': ('sbquo', '201A'),
|
||||
b'\x83': ('fnof', '192'),
|
||||
b'\x84': ('bdquo', '201E'),
|
||||
b'\x85': ('hellip', '2026'),
|
||||
b'\x86': ('dagger', '2020'),
|
||||
b'\x87': ('Dagger', '2021'),
|
||||
b'\x88': ('circ', '2C6'),
|
||||
b'\x89': ('permil', '2030'),
|
||||
b'\x8A': ('Scaron', '160'),
|
||||
b'\x8B': ('lsaquo', '2039'),
|
||||
b'\x8C': ('OElig', '152'),
|
||||
b'\x8D': '?',
|
||||
b'\x8E': ('#x17D', '17D'),
|
||||
b'\x8F': '?',
|
||||
b'\x90': '?',
|
||||
b'\x91': ('lsquo', '2018'),
|
||||
b'\x92': ('rsquo', '2019'),
|
||||
b'\x93': ('ldquo', '201C'),
|
||||
b'\x94': ('rdquo', '201D'),
|
||||
b'\x95': ('bull', '2022'),
|
||||
b'\x96': ('ndash', '2013'),
|
||||
b'\x97': ('mdash', '2014'),
|
||||
b'\x98': ('tilde', '2DC'),
|
||||
b'\x99': ('trade', '2122'),
|
||||
b'\x9a': ('scaron', '161'),
|
||||
b'\x9b': ('rsaquo', '203A'),
|
||||
b'\x9c': ('oelig', '153'),
|
||||
b'\x9d': '?',
|
||||
b'\x9e': ('#x17E', '17E'),
|
||||
b'\x9f': ('Yuml', ''),}
|
||||
|
||||
# A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
|
||||
# horrors like stripping diacritical marks to turn á into a, but also
|
||||
# contains non-horrors like turning “ into ".
|
||||
MS_CHARS_TO_ASCII = {
|
||||
b'\x80' : 'EUR',
|
||||
b'\x81' : ' ',
|
||||
b'\x82' : ',',
|
||||
b'\x83' : 'f',
|
||||
b'\x84' : ',,',
|
||||
b'\x85' : '...',
|
||||
b'\x86' : '+',
|
||||
b'\x87' : '++',
|
||||
b'\x88' : '^',
|
||||
b'\x89' : '%',
|
||||
b'\x8a' : 'S',
|
||||
b'\x8b' : '<',
|
||||
b'\x8c' : 'OE',
|
||||
b'\x8d' : '?',
|
||||
b'\x8e' : 'Z',
|
||||
b'\x8f' : '?',
|
||||
b'\x90' : '?',
|
||||
b'\x91' : "'",
|
||||
b'\x92' : "'",
|
||||
b'\x93' : '"',
|
||||
b'\x94' : '"',
|
||||
b'\x95' : '*',
|
||||
b'\x96' : '-',
|
||||
b'\x97' : '--',
|
||||
b'\x98' : '~',
|
||||
b'\x99' : '(TM)',
|
||||
b'\x9a' : 's',
|
||||
b'\x9b' : '>',
|
||||
b'\x9c' : 'oe',
|
||||
b'\x9d' : '?',
|
||||
b'\x9e' : 'z',
|
||||
b'\x9f' : 'Y',
|
||||
b'\xa0' : ' ',
|
||||
b'\xa1' : '!',
|
||||
b'\xa2' : 'c',
|
||||
b'\xa3' : 'GBP',
|
||||
b'\xa4' : '$', #This approximation is especially parochial--this is the
|
||||
#generic currency symbol.
|
||||
b'\xa5' : 'YEN',
|
||||
b'\xa6' : '|',
|
||||
b'\xa7' : 'S',
|
||||
b'\xa8' : '..',
|
||||
b'\xa9' : '',
|
||||
b'\xaa' : '(th)',
|
||||
b'\xab' : '<<',
|
||||
b'\xac' : '!',
|
||||
b'\xad' : ' ',
|
||||
b'\xae' : '(R)',
|
||||
b'\xaf' : '-',
|
||||
b'\xb0' : 'o',
|
||||
b'\xb1' : '+-',
|
||||
b'\xb2' : '2',
|
||||
b'\xb3' : '3',
|
||||
b'\xb4' : ("'", 'acute'),
|
||||
b'\xb5' : 'u',
|
||||
b'\xb6' : 'P',
|
||||
b'\xb7' : '*',
|
||||
b'\xb8' : ',',
|
||||
b'\xb9' : '1',
|
||||
b'\xba' : '(th)',
|
||||
b'\xbb' : '>>',
|
||||
b'\xbc' : '1/4',
|
||||
b'\xbd' : '1/2',
|
||||
b'\xbe' : '3/4',
|
||||
b'\xbf' : '?',
|
||||
b'\xc0' : 'A',
|
||||
b'\xc1' : 'A',
|
||||
b'\xc2' : 'A',
|
||||
b'\xc3' : 'A',
|
||||
b'\xc4' : 'A',
|
||||
b'\xc5' : 'A',
|
||||
b'\xc6' : 'AE',
|
||||
b'\xc7' : 'C',
|
||||
b'\xc8' : 'E',
|
||||
b'\xc9' : 'E',
|
||||
b'\xca' : 'E',
|
||||
b'\xcb' : 'E',
|
||||
b'\xcc' : 'I',
|
||||
b'\xcd' : 'I',
|
||||
b'\xce' : 'I',
|
||||
b'\xcf' : 'I',
|
||||
b'\xd0' : 'D',
|
||||
b'\xd1' : 'N',
|
||||
b'\xd2' : 'O',
|
||||
b'\xd3' : 'O',
|
||||
b'\xd4' : 'O',
|
||||
b'\xd5' : 'O',
|
||||
b'\xd6' : 'O',
|
||||
b'\xd7' : '*',
|
||||
b'\xd8' : 'O',
|
||||
b'\xd9' : 'U',
|
||||
b'\xda' : 'U',
|
||||
b'\xdb' : 'U',
|
||||
b'\xdc' : 'U',
|
||||
b'\xdd' : 'Y',
|
||||
b'\xde' : 'b',
|
||||
b'\xdf' : 'B',
|
||||
b'\xe0' : 'a',
|
||||
b'\xe1' : 'a',
|
||||
b'\xe2' : 'a',
|
||||
b'\xe3' : 'a',
|
||||
b'\xe4' : 'a',
|
||||
b'\xe5' : 'a',
|
||||
b'\xe6' : 'ae',
|
||||
b'\xe7' : 'c',
|
||||
b'\xe8' : 'e',
|
||||
b'\xe9' : 'e',
|
||||
b'\xea' : 'e',
|
||||
b'\xeb' : 'e',
|
||||
b'\xec' : 'i',
|
||||
b'\xed' : 'i',
|
||||
b'\xee' : 'i',
|
||||
b'\xef' : 'i',
|
||||
b'\xf0' : 'o',
|
||||
b'\xf1' : 'n',
|
||||
b'\xf2' : 'o',
|
||||
b'\xf3' : 'o',
|
||||
b'\xf4' : 'o',
|
||||
b'\xf5' : 'o',
|
||||
b'\xf6' : 'o',
|
||||
b'\xf7' : '/',
|
||||
b'\xf8' : 'o',
|
||||
b'\xf9' : 'u',
|
||||
b'\xfa' : 'u',
|
||||
b'\xfb' : 'u',
|
||||
b'\xfc' : 'u',
|
||||
b'\xfd' : 'y',
|
||||
b'\xfe' : 'b',
|
||||
b'\xff' : 'y',
|
||||
}
|
||||
|
||||
# A map used when removing rogue Windows-1252/ISO-8859-1
|
||||
# characters in otherwise UTF-8 documents.
|
||||
#
|
||||
# Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
|
||||
# Windows-1252.
|
||||
WINDOWS_1252_TO_UTF8 = {
|
||||
0x80 : b'\xe2\x82\xac', # €
|
||||
0x82 : b'\xe2\x80\x9a', # ‚
|
||||
0x83 : b'\xc6\x92', # ƒ
|
||||
0x84 : b'\xe2\x80\x9e', # „
|
||||
0x85 : b'\xe2\x80\xa6', # …
|
||||
0x86 : b'\xe2\x80\xa0', # †
|
||||
0x87 : b'\xe2\x80\xa1', # ‡
|
||||
0x88 : b'\xcb\x86', # ˆ
|
||||
0x89 : b'\xe2\x80\xb0', # ‰
|
||||
0x8a : b'\xc5\xa0', # Š
|
||||
0x8b : b'\xe2\x80\xb9', # ‹
|
||||
0x8c : b'\xc5\x92', # Œ
|
||||
0x8e : b'\xc5\xbd', # Ž
|
||||
0x91 : b'\xe2\x80\x98', # ‘
|
||||
0x92 : b'\xe2\x80\x99', # ’
|
||||
0x93 : b'\xe2\x80\x9c', # “
|
||||
0x94 : b'\xe2\x80\x9d', # ”
|
||||
0x95 : b'\xe2\x80\xa2', # •
|
||||
0x96 : b'\xe2\x80\x93', # –
|
||||
0x97 : b'\xe2\x80\x94', # —
|
||||
0x98 : b'\xcb\x9c', # ˜
|
||||
0x99 : b'\xe2\x84\xa2', # ™
|
||||
0x9a : b'\xc5\xa1', # š
|
||||
0x9b : b'\xe2\x80\xba', # ›
|
||||
0x9c : b'\xc5\x93', # œ
|
||||
0x9e : b'\xc5\xbe', # ž
|
||||
0x9f : b'\xc5\xb8', # Ÿ
|
||||
0xa0 : b'\xc2\xa0', #
|
||||
0xa1 : b'\xc2\xa1', # ¡
|
||||
0xa2 : b'\xc2\xa2', # ¢
|
||||
0xa3 : b'\xc2\xa3', # £
|
||||
0xa4 : b'\xc2\xa4', # ¤
|
||||
0xa5 : b'\xc2\xa5', # ¥
|
||||
0xa6 : b'\xc2\xa6', # ¦
|
||||
0xa7 : b'\xc2\xa7', # §
|
||||
0xa8 : b'\xc2\xa8', # ¨
|
||||
0xa9 : b'\xc2\xa9', # ©
|
||||
0xaa : b'\xc2\xaa', # ª
|
||||
0xab : b'\xc2\xab', # «
|
||||
0xac : b'\xc2\xac', # ¬
|
||||
0xad : b'\xc2\xad', #
|
||||
0xae : b'\xc2\xae', # ®
|
||||
0xaf : b'\xc2\xaf', # ¯
|
||||
0xb0 : b'\xc2\xb0', # °
|
||||
0xb1 : b'\xc2\xb1', # ±
|
||||
0xb2 : b'\xc2\xb2', # ²
|
||||
0xb3 : b'\xc2\xb3', # ³
|
||||
0xb4 : b'\xc2\xb4', # ´
|
||||
0xb5 : b'\xc2\xb5', # µ
|
||||
0xb6 : b'\xc2\xb6', # ¶
|
||||
0xb7 : b'\xc2\xb7', # ·
|
||||
0xb8 : b'\xc2\xb8', # ¸
|
||||
0xb9 : b'\xc2\xb9', # ¹
|
||||
0xba : b'\xc2\xba', # º
|
||||
0xbb : b'\xc2\xbb', # »
|
||||
0xbc : b'\xc2\xbc', # ¼
|
||||
0xbd : b'\xc2\xbd', # ½
|
||||
0xbe : b'\xc2\xbe', # ¾
|
||||
0xbf : b'\xc2\xbf', # ¿
|
||||
0xc0 : b'\xc3\x80', # À
|
||||
0xc1 : b'\xc3\x81', # Á
|
||||
0xc2 : b'\xc3\x82', # Â
|
||||
0xc3 : b'\xc3\x83', # Ã
|
||||
0xc4 : b'\xc3\x84', # Ä
|
||||
0xc5 : b'\xc3\x85', # Å
|
||||
0xc6 : b'\xc3\x86', # Æ
|
||||
0xc7 : b'\xc3\x87', # Ç
|
||||
0xc8 : b'\xc3\x88', # È
|
||||
0xc9 : b'\xc3\x89', # É
|
||||
0xca : b'\xc3\x8a', # Ê
|
||||
0xcb : b'\xc3\x8b', # Ë
|
||||
0xcc : b'\xc3\x8c', # Ì
|
||||
0xcd : b'\xc3\x8d', # Í
|
||||
0xce : b'\xc3\x8e', # Î
|
||||
0xcf : b'\xc3\x8f', # Ï
|
||||
0xd0 : b'\xc3\x90', # Ð
|
||||
0xd1 : b'\xc3\x91', # Ñ
|
||||
0xd2 : b'\xc3\x92', # Ò
|
||||
0xd3 : b'\xc3\x93', # Ó
|
||||
0xd4 : b'\xc3\x94', # Ô
|
||||
0xd5 : b'\xc3\x95', # Õ
|
||||
0xd6 : b'\xc3\x96', # Ö
|
||||
0xd7 : b'\xc3\x97', # ×
|
||||
0xd8 : b'\xc3\x98', # Ø
|
||||
0xd9 : b'\xc3\x99', # Ù
|
||||
0xda : b'\xc3\x9a', # Ú
|
||||
0xdb : b'\xc3\x9b', # Û
|
||||
0xdc : b'\xc3\x9c', # Ü
|
||||
0xdd : b'\xc3\x9d', # Ý
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
0xe5 : b'\xc3\xa5', # å
|
||||
0xe6 : b'\xc3\xa6', # æ
|
||||
0xe7 : b'\xc3\xa7', # ç
|
||||
0xe8 : b'\xc3\xa8', # è
|
||||
0xe9 : b'\xc3\xa9', # é
|
||||
0xea : b'\xc3\xaa', # ê
|
||||
0xeb : b'\xc3\xab', # ë
|
||||
0xec : b'\xc3\xac', # ì
|
||||
0xed : b'\xc3\xad', # í
|
||||
0xee : b'\xc3\xae', # î
|
||||
0xef : b'\xc3\xaf', # ï
|
||||
0xf0 : b'\xc3\xb0', # ð
|
||||
0xf1 : b'\xc3\xb1', # ñ
|
||||
0xf2 : b'\xc3\xb2', # ò
|
||||
0xf3 : b'\xc3\xb3', # ó
|
||||
0xf4 : b'\xc3\xb4', # ô
|
||||
0xf5 : b'\xc3\xb5', # õ
|
||||
0xf6 : b'\xc3\xb6', # ö
|
||||
0xf7 : b'\xc3\xb7', # ÷
|
||||
0xf8 : b'\xc3\xb8', # ø
|
||||
0xf9 : b'\xc3\xb9', # ù
|
||||
0xfa : b'\xc3\xba', # ú
|
||||
0xfb : b'\xc3\xbb', # û
|
||||
0xfc : b'\xc3\xbc', # ü
|
||||
0xfd : b'\xc3\xbd', # ý
|
||||
0xfe : b'\xc3\xbe', # þ
|
||||
}
|
||||
|
||||
MULTIBYTE_MARKERS_AND_SIZES = [
|
||||
(0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
|
||||
(0xe0, 0xef, 3), # 3-byte characters start with E0-EF
|
||||
(0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
|
||||
]
|
||||
|
||||
FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
|
||||
LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
|
||||
|
||||
@classmethod
|
||||
def detwingle(cls, in_bytes, main_encoding="utf8",
|
||||
embedded_encoding="windows-1252"):
|
||||
"""Fix characters from one encoding embedded in some other encoding.
|
||||
|
||||
Currently the only situation supported is Windows-1252 (or its
|
||||
subset ISO-8859-1), embedded in UTF-8.
|
||||
|
||||
The input must be a bytestring. If you've already converted
|
||||
the document to Unicode, you're too late.
|
||||
|
||||
The output is a bytestring in which `embedded_encoding`
|
||||
characters have been converted to their `main_encoding`
|
||||
equivalents.
|
||||
"""
|
||||
if embedded_encoding.replace('_', '-').lower() not in (
|
||||
'windows-1252', 'windows_1252'):
|
||||
raise NotImplementedError(
|
||||
"Windows-1252 and ISO-8859-1 are the only currently supported "
|
||||
"embedded encodings.")
|
||||
|
||||
if main_encoding.lower() not in ('utf8', 'utf-8'):
|
||||
raise NotImplementedError(
|
||||
"UTF-8 is the only currently supported main encoding.")
|
||||
|
||||
byte_chunks = []
|
||||
|
||||
chunk_start = 0
|
||||
pos = 0
|
||||
while pos < len(in_bytes):
|
||||
byte = in_bytes[pos]
|
||||
if not isinstance(byte, int):
|
||||
# Python 2.x
|
||||
byte = ord(byte)
|
||||
if (byte >= cls.FIRST_MULTIBYTE_MARKER
|
||||
and byte <= cls.LAST_MULTIBYTE_MARKER):
|
||||
# This is the start of a UTF-8 multibyte character. Skip
|
||||
# to the end.
|
||||
for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
|
||||
if byte >= start and byte <= end:
|
||||
pos += size
|
||||
break
|
||||
elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
|
||||
# We found a Windows-1252 character!
|
||||
# Save the string up to this point as a chunk.
|
||||
byte_chunks.append(in_bytes[chunk_start:pos])
|
||||
|
||||
# Now translate the Windows-1252 character into UTF-8
|
||||
# and add it as another, one-byte chunk.
|
||||
byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
|
||||
pos += 1
|
||||
chunk_start = pos
|
||||
else:
|
||||
# Go on to the next character.
|
||||
pos += 1
|
||||
if chunk_start == 0:
|
||||
# The string is unchanged.
|
||||
return in_bytes
|
||||
else:
|
||||
# Store the final chunk.
|
||||
byte_chunks.append(in_bytes[chunk_start:])
|
||||
return b''.join(byte_chunks)
|
||||
|
|
@ -1,219 +0,0 @@
|
|||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import cProfile
|
||||
from StringIO import StringIO
|
||||
from HTMLParser import HTMLParser
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup, __version__
|
||||
from bs4.builder import builder_registry
|
||||
|
||||
import os
|
||||
import pstats
|
||||
import random
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import sys
|
||||
import cProfile
|
||||
|
||||
def diagnose(data):
|
||||
"""Diagnostic suite for isolating common problems."""
|
||||
print "Diagnostic running on Beautiful Soup %s" % __version__
|
||||
print "Python version %s" % sys.version
|
||||
|
||||
basic_parsers = ["html.parser", "html5lib", "lxml"]
|
||||
for name in basic_parsers:
|
||||
for builder in builder_registry.builders:
|
||||
if name in builder.features:
|
||||
break
|
||||
else:
|
||||
basic_parsers.remove(name)
|
||||
print (
|
||||
"I noticed that %s is not installed. Installing it may help." %
|
||||
name)
|
||||
|
||||
if 'lxml' in basic_parsers:
|
||||
basic_parsers.append(["lxml", "xml"])
|
||||
try:
|
||||
from lxml import etree
|
||||
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
|
||||
except ImportError, e:
|
||||
print (
|
||||
"lxml is not installed or couldn't be imported.")
|
||||
|
||||
|
||||
if 'html5lib' in basic_parsers:
|
||||
try:
|
||||
import html5lib
|
||||
print "Found html5lib version %s" % html5lib.__version__
|
||||
except ImportError, e:
|
||||
print (
|
||||
"html5lib is not installed or couldn't be imported.")
|
||||
|
||||
if hasattr(data, 'read'):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
return
|
||||
print
|
||||
|
||||
for parser in basic_parsers:
|
||||
print "Trying to parse your markup with %s" % parser
|
||||
success = False
|
||||
try:
|
||||
soup = BeautifulSoup(data, parser)
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "Here's what %s did with the markup:" % parser
|
||||
print soup.prettify()
|
||||
|
||||
print "-" * 80
|
||||
|
||||
def lxml_trace(data, html=True, **kwargs):
|
||||
"""Print out the lxml events that occur during parsing.
|
||||
|
||||
This lets you see how lxml parses a document when no Beautiful
|
||||
Soup code is running.
|
||||
"""
|
||||
from lxml import etree
|
||||
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
|
||||
print("%s, %4s, %s" % (event, element.tag, element.text))
|
||||
|
||||
class AnnouncingParser(HTMLParser):
|
||||
"""Announces HTMLParser parse events, without doing anything else."""
|
||||
|
||||
def _p(self, s):
|
||||
print(s)
|
||||
|
||||
def handle_starttag(self, name, attrs):
|
||||
self._p("%s START" % name)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self._p("%s END" % name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self._p("%s DATA" % data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
self._p("%s CHARREF" % name)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
self._p("%s ENTITYREF" % name)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self._p("%s COMMENT" % data)
|
||||
|
||||
def handle_decl(self, data):
|
||||
self._p("%s DECL" % data)
|
||||
|
||||
def unknown_decl(self, data):
|
||||
self._p("%s UNKNOWN-DECL" % data)
|
||||
|
||||
def handle_pi(self, data):
|
||||
self._p("%s PI" % data)
|
||||
|
||||
def htmlparser_trace(data):
|
||||
"""Print out the HTMLParser events that occur during parsing.
|
||||
|
||||
This lets you see how HTMLParser parses a document when no
|
||||
Beautiful Soup code is running.
|
||||
"""
|
||||
parser = AnnouncingParser()
|
||||
parser.feed(data)
|
||||
|
||||
_vowels = "aeiou"
|
||||
_consonants = "bcdfghjklmnpqrstvwxyz"
|
||||
|
||||
def rword(length=5):
|
||||
"Generate a random word-like string."
|
||||
s = ''
|
||||
for i in range(length):
|
||||
if i % 2 == 0:
|
||||
t = _consonants
|
||||
else:
|
||||
t = _vowels
|
||||
s += random.choice(t)
|
||||
return s
|
||||
|
||||
def rsentence(length=4):
|
||||
"Generate a random sentence-like string."
|
||||
return " ".join(rword(random.randint(4,9)) for i in range(length))
|
||||
|
||||
def rdoc(num_elements=1000):
|
||||
"""Randomly generate an invalid HTML document."""
|
||||
tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
|
||||
elements = []
|
||||
for i in range(num_elements):
|
||||
choice = random.randint(0,3)
|
||||
if choice == 0:
|
||||
# New tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("<%s>" % tag_name)
|
||||
elif choice == 1:
|
||||
elements.append(rsentence(random.randint(1,4)))
|
||||
elif choice == 2:
|
||||
# Close a tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("</%s>" % tag_name)
|
||||
return "<html>" + "\n".join(elements) + "</html>"
|
||||
|
||||
def benchmark_parsers(num_elements=100000):
|
||||
"""Very basic head-to-head performance benchmark."""
|
||||
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
|
||||
data = rdoc(num_elements)
|
||||
print "Generated a large invalid HTML document (%d bytes)." % len(data)
|
||||
|
||||
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
|
||||
success = False
|
||||
try:
|
||||
a = time.time()
|
||||
soup = BeautifulSoup(data, parser)
|
||||
b = time.time()
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
|
||||
|
||||
from lxml import etree
|
||||
a = time.time()
|
||||
etree.HTML(data)
|
||||
b = time.time()
|
||||
print "Raw lxml parsed the markup in %.2fs." % (b-a)
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser()
|
||||
a = time.time()
|
||||
parser.parse(data)
|
||||
b = time.time()
|
||||
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
|
||||
|
||||
def profile(num_elements=100000, parser="lxml"):
|
||||
|
||||
filehandle = tempfile.NamedTemporaryFile()
|
||||
filename = filehandle.name
|
||||
|
||||
data = rdoc(num_elements)
|
||||
vars = dict(bs4=bs4, data=data, parser=parser)
|
||||
cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
|
||||
|
||||
stats = pstats.Stats(filename)
|
||||
# stats.strip_dirs()
|
||||
stats.sort_stats("cumulative")
|
||||
stats.print_stats('_html5lib|bs4', 50)
|
||||
|
||||
if __name__ == '__main__':
|
||||
diagnose(sys.stdin.read())
|
1808
lib/bs4/element.py
1808
lib/bs4/element.py
File diff suppressed because it is too large
Load Diff
|
@ -1,770 +0,0 @@
|
|||
"""Helper classes for tests."""
|
||||
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
__license__ = "MIT"
|
||||
|
||||
import pickle
|
||||
import copy
|
||||
import functools
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
Comment,
|
||||
ContentMetaAttributeValue,
|
||||
Doctype,
|
||||
SoupStrainer,
|
||||
)
|
||||
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
|
||||
class SoupTest(unittest.TestCase):
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return default_builder()
|
||||
|
||||
def soup(self, markup, **kwargs):
|
||||
"""Build a Beautiful Soup object from markup."""
|
||||
builder = kwargs.pop('builder', self.default_builder)
|
||||
return BeautifulSoup(markup, builder=builder, **kwargs)
|
||||
|
||||
def document_for(self, markup):
|
||||
"""Turn an HTML fragment into a document.
|
||||
|
||||
The details depend on the builder.
|
||||
"""
|
||||
return self.default_builder.test_fragment_to_document(markup)
|
||||
|
||||
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
|
||||
builder = self.default_builder
|
||||
obj = BeautifulSoup(to_parse, builder=builder)
|
||||
if compare_parsed_to is None:
|
||||
compare_parsed_to = to_parse
|
||||
|
||||
self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
|
||||
|
||||
def assertConnectedness(self, element):
|
||||
"""Ensure that next_element and previous_element are properly
|
||||
set for all descendants of the given element.
|
||||
"""
|
||||
earlier = None
|
||||
for e in element.descendants:
|
||||
if earlier:
|
||||
self.assertEqual(e, earlier.next_element)
|
||||
self.assertEqual(earlier, e.previous_element)
|
||||
earlier = e
|
||||
|
||||
class HTMLTreeBuilderSmokeTest(object):
|
||||
|
||||
"""A basic test of a treebuilder's competence.
|
||||
|
||||
Any HTML treebuilder, present or future, should be able to pass
|
||||
these tests. With invalid markup, there's room for interpretation,
|
||||
and different parsers can handle it differently. But with the
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify that all HTML4 and HTML5 empty element (aka void element) tags
|
||||
are handled correctly.
|
||||
"""
|
||||
for name in [
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
|
||||
'spacer', 'frame'
|
||||
]:
|
||||
soup = self.soup("")
|
||||
new_tag = soup.new_tag(name)
|
||||
self.assertEqual(True, new_tag.is_empty_element)
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertEqual(loaded.__class__, BeautifulSoup)
|
||||
self.assertEqual(loaded.decode(), tree.decode())
|
||||
|
||||
def assertDoctypeHandled(self, doctype_fragment):
|
||||
"""Assert that a given doctype string is handled correctly."""
|
||||
doctype_str, soup = self._document_with_doctype(doctype_fragment)
|
||||
|
||||
# Make sure a Doctype object was created.
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual(doctype.__class__, Doctype)
|
||||
self.assertEqual(doctype, doctype_fragment)
|
||||
self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
|
||||
|
||||
# Make sure that the doctype was correctly associated with the
|
||||
# parse tree and that the rest of the document parsed.
|
||||
self.assertEqual(soup.p.contents[0], 'foo')
|
||||
|
||||
def _document_with_doctype(self, doctype_fragment):
|
||||
"""Generate and parse a document with the given doctype."""
|
||||
doctype = '<!DOCTYPE %s>' % doctype_fragment
|
||||
markup = doctype + '\n<p>foo</p>'
|
||||
soup = self.soup(markup)
|
||||
return doctype, soup
|
||||
|
||||
def test_normal_doctypes(self):
|
||||
"""Make sure normal, everyday HTML doctypes are handled correctly."""
|
||||
self.assertDoctypeHandled("html")
|
||||
self.assertDoctypeHandled(
|
||||
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
|
||||
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual("", doctype.strip())
|
||||
|
||||
def test_public_doctype_with_url(self):
|
||||
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
|
||||
self.assertDoctypeHandled(doctype)
|
||||
|
||||
def test_system_doctype(self):
|
||||
self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# We can handle a namespaced doctype with a system ID.
|
||||
self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
|
||||
|
||||
def test_namespaced_public_doctype(self):
|
||||
# Test a namespaced doctype with a public id.
|
||||
self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out more or less the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>Hello.</title></head>
|
||||
<body>Goodbye.</body>
|
||||
</html>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8").replace(b"\n", b""),
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
# We test both Unicode and bytestring to verify that
|
||||
# process_markup correctly sets processing_instruction_class
|
||||
# even when the markup is already Unicode and there is no
|
||||
# need to process anything.
|
||||
markup = u"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.decode())
|
||||
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_deepcopy(self):
|
||||
"""Make sure you can copy the tree builder.
|
||||
|
||||
This is important because the builder is part of a
|
||||
BeautifulSoup object, and we want to be able to copy that.
|
||||
"""
|
||||
copy.deepcopy(self.default_builder)
|
||||
|
||||
def test_p_tag_is_never_empty_element(self):
|
||||
"""A <p> tag is never designated as an empty-element tag.
|
||||
|
||||
Even if the markup shows it as an empty-element tag, it
|
||||
shouldn't be presented that way.
|
||||
"""
|
||||
soup = self.soup("<p/>")
|
||||
self.assertFalse(soup.p.is_empty_element)
|
||||
self.assertEqual(str(soup.p), "<p></p>")
|
||||
|
||||
def test_unclosed_tags_get_closed(self):
|
||||
"""A tag that's not closed by the end of the document should be closed.
|
||||
|
||||
This applies to all tags except empty-element tags.
|
||||
"""
|
||||
self.assertSoupEquals("<p>", "<p></p>")
|
||||
self.assertSoupEquals("<b>", "<b></b>")
|
||||
|
||||
self.assertSoupEquals("<br>", "<br/>")
|
||||
|
||||
def test_br_is_always_empty_element_tag(self):
|
||||
"""A <br> tag is designated as an empty-element tag.
|
||||
|
||||
Some parsers treat <br></br> as one <br/> tag, some parsers as
|
||||
two tags, but it should always be an empty-element tag.
|
||||
"""
|
||||
soup = self.soup("<br></br>")
|
||||
self.assertTrue(soup.br.is_empty_element)
|
||||
self.assertEqual(str(soup.br), "<br/>")
|
||||
|
||||
def test_nested_formatting_elements(self):
|
||||
self.assertSoupEquals("<em><em></em></em>")
|
||||
|
||||
def test_double_head(self):
|
||||
html = '''<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Ordinary HEAD element test</title>
|
||||
</head>
|
||||
<script type="text/javascript">
|
||||
alert("Help!");
|
||||
</script>
|
||||
<body>
|
||||
Hello, world!
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
soup = self.soup(html)
|
||||
self.assertEqual("text/javascript", soup.find('script')['type'])
|
||||
|
||||
def test_comment(self):
|
||||
# Comments are represented as Comment objects.
|
||||
markup = "<p>foo<!--foobar-->baz</p>"
|
||||
self.assertSoupEquals(markup)
|
||||
|
||||
soup = self.soup(markup)
|
||||
comment = soup.find(text="foobar")
|
||||
self.assertEqual(comment.__class__, Comment)
|
||||
|
||||
# The comment is properly integrated into the tree.
|
||||
foo = soup.find(text="foo")
|
||||
self.assertEqual(comment, foo.next_element)
|
||||
baz = soup.find(text="baz")
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags,
|
||||
even if that would mean not prettifying the markup.
|
||||
"""
|
||||
pre_markup = "<pre> </pre>"
|
||||
textarea_markup = "<textarea> woo\nwoo </textarea>"
|
||||
self.assertSoupEquals(pre_markup)
|
||||
self.assertSoupEquals(textarea_markup)
|
||||
|
||||
soup = self.soup(pre_markup)
|
||||
self.assertEqual(soup.pre.prettify(), pre_markup)
|
||||
|
||||
soup = self.soup(textarea_markup)
|
||||
self.assertEqual(soup.textarea.prettify(), textarea_markup)
|
||||
|
||||
soup = self.soup("<textarea></textarea>")
|
||||
self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
b_tag = "<b>Inside a B tag</b>"
|
||||
self.assertSoupEquals(b_tag)
|
||||
|
||||
nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
|
||||
self.assertSoupEquals(nested_b_tag)
|
||||
|
||||
double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
|
||||
self.assertSoupEquals(nested_b_tag)
|
||||
|
||||
def test_nested_block_level_elements(self):
|
||||
"""Block elements can be nested."""
|
||||
soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
|
||||
blockquote = soup.blockquote
|
||||
self.assertEqual(blockquote.p.b.string, 'Foo')
|
||||
self.assertEqual(blockquote.b.string, 'Foo')
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""One table can go inside another one."""
|
||||
markup = ('<table id="1">'
|
||||
'<tr>'
|
||||
"<td>Here's another table:"
|
||||
'<table id="2">'
|
||||
'<tr><td>foo</td></tr>'
|
||||
'</table></td>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
markup,
|
||||
'<table id="1"><tr><td>Here\'s another table:'
|
||||
'<table id="2"><tr><td>foo</td></tr></table>'
|
||||
'</td></tr></table>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
"<table><thead><tr><td>Foo</td></tr></thead>"
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_deeply_nested_multivalued_attribute(self):
|
||||
# html5lib can set the attributes of the same tag many times
|
||||
# as it rearranges the tree. This has caused problems with
|
||||
# multivalued attributes.
|
||||
markup = '<table><div><div class="css"></div></div></table>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(["css"], soup.div.div['class'])
|
||||
|
||||
def test_multivalued_attribute_on_html(self):
|
||||
# html5lib uses a different API to set the attributes ot the
|
||||
# <html> tag. This has caused problems with multivalued
|
||||
# attributes.
|
||||
markup = '<html class="a b"></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(["a", "b"], soup.html['class'])
|
||||
|
||||
def test_angle_brackets_in_attribute_values_are_escaped(self):
|
||||
self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>')
|
||||
|
||||
def test_entities_in_attributes_converted_to_unicode(self):
|
||||
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
|
||||
def test_entities_in_text_converted_to_unicode(self):
|
||||
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
|
||||
def test_quot_entity_converted_to_quotation_mark(self):
|
||||
self.assertSoupEquals("<p>I said "good day!"</p>",
|
||||
'<p>I said "good day!"</p>')
|
||||
|
||||
def test_out_of_range_entity(self):
|
||||
expect = u"\N{REPLACEMENT CHARACTER}"
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
|
||||
def test_multipart_strings(self):
|
||||
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
|
||||
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
|
||||
self.assertEqual("p", soup.h2.string.next_element.name)
|
||||
self.assertEqual("p", soup.p.name)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_empty_element_tags(self):
|
||||
"""Verify consistent handling of empty-element tags,
|
||||
no matter how they come in through the markup.
|
||||
"""
|
||||
self.assertSoupEquals('<br/><br/><br/>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('<br /><br /><br />', "<br/><br/><br/>")
|
||||
|
||||
def test_head_tag_between_head_and_body(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<html><head></head>
|
||||
<link></link>
|
||||
<body>foo</body>
|
||||
</html>
|
||||
"""
|
||||
soup = self.soup(content)
|
||||
self.assertNotEqual(None, soup.html.body)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_multiple_copies_of_a_tag(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<article id="a" >
|
||||
<div><a href="1"></div>
|
||||
<footer>
|
||||
<a href="2"></a>
|
||||
</footer>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
soup = self.soup(content)
|
||||
self.assertConnectedness(soup.article)
|
||||
|
||||
def test_basic_namespaces(self):
|
||||
"""Parsers don't need to *understand* namespaces, but at the
|
||||
very least they should not choke on namespaces or lose
|
||||
data."""
|
||||
|
||||
markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode())
|
||||
html = soup.html
|
||||
self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
|
||||
self.assertEqual(
|
||||
'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
|
||||
self.assertEqual(
|
||||
'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
|
||||
|
||||
def test_multivalued_attribute_value_becomes_list(self):
|
||||
markup = b'<a class="foo bar">'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(['foo', 'bar'], soup.a['class'])
|
||||
|
||||
#
|
||||
# Generally speaking, tests below this point are more tests of
|
||||
# Beautiful Soup than tests of the tree builders. But parsers are
|
||||
# weird, so we run these tests separately for every tree builder
|
||||
# to detect any differences between them.
|
||||
#
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
# A seemingly innocuous document... but it's in Unicode! And
|
||||
# it contains characters that can't be represented in the
|
||||
# encoding found in the declaration! The horror!
|
||||
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
|
||||
|
||||
def test_soupstrainer(self):
|
||||
"""Parsers should be able to work with SoupStrainers."""
|
||||
strainer = SoupStrainer("b")
|
||||
soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
|
||||
parse_only=strainer)
|
||||
self.assertEqual(soup.decode(), "<b>bold</b>")
|
||||
|
||||
def test_single_quote_attribute_values_become_double_quotes(self):
|
||||
self.assertSoupEquals("<foo attr='bar'></foo>",
|
||||
'<foo attr="bar"></foo>')
|
||||
|
||||
def test_attribute_values_with_nested_quotes_are_left_alone(self):
|
||||
text = """<foo attr='bar "brawls" happen'>a</foo>"""
|
||||
self.assertSoupEquals(text)
|
||||
|
||||
def test_attribute_values_with_double_nested_quotes_get_quoted(self):
|
||||
text = """<foo attr='bar "brawls" happen'>a</foo>"""
|
||||
soup = self.soup(text)
|
||||
soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
|
||||
self.assertSoupEquals(
|
||||
soup.foo.decode(),
|
||||
"""<foo attr="Brawls happen at "Bob\'s Bar"">a</foo>""")
|
||||
|
||||
def test_ampersand_in_attribute_value_gets_escaped(self):
|
||||
self.assertSoupEquals('<this is="really messed up & stuff"></this>',
|
||||
'<this is="really messed up & stuff"></this>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
'<a href="http://example.org?a=1&b=2;3">foo</a>',
|
||||
'<a href="http://example.org?a=1&b=2;3">foo</a>')
|
||||
|
||||
def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
|
||||
self.assertSoupEquals('<a href="http://example.org?a=1&b=2;3"></a>')
|
||||
|
||||
def test_entities_in_strings_converted_during_parsing(self):
|
||||
# Both XML and HTML entities are converted to Unicode characters
|
||||
# during parsing.
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
|
||||
self.assertSoupEquals(text, expected)
|
||||
|
||||
def test_smart_quotes_converted_on_the_way_in(self):
|
||||
# Microsoft smart quotes are converted to Unicode characters during
|
||||
# parsing.
|
||||
quote = b"<p>\x91Foo\x92</p>"
|
||||
soup = self.soup(quote)
|
||||
self.assertEqual(
|
||||
soup.p.string,
|
||||
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
|
||||
|
||||
def test_non_breaking_spaces_converted_on_the_way_in(self):
|
||||
soup = self.soup("<a> </a>")
|
||||
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
|
||||
|
||||
def test_entities_converted_on_the_way_out(self):
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
|
||||
soup = self.soup(text)
|
||||
self.assertEqual(soup.p.encode("utf-8"), expected)
|
||||
|
||||
def test_real_iso_latin_document(self):
|
||||
# Smoke test of interrelated functionality, using an
|
||||
# easy-to-understand document.
|
||||
|
||||
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
|
||||
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||
|
||||
# That's because we're going to encode it into ISO-Latin-1, and use
|
||||
# that to test.
|
||||
iso_latin_html = unicode_html.encode("iso-8859-1")
|
||||
|
||||
# Parse the ISO-Latin-1 HTML.
|
||||
soup = self.soup(iso_latin_html)
|
||||
# Encode it to UTF-8.
|
||||
result = soup.encode("utf-8")
|
||||
|
||||
# What do we expect the result to look like? Well, it would
|
||||
# look like unicode_html, except that the META tag would say
|
||||
# UTF-8 instead of ISO-Latin-1.
|
||||
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
|
||||
|
||||
# And, of course, it would be in UTF-8, not Unicode.
|
||||
expected = expected.encode("utf-8")
|
||||
|
||||
# Ta-da!
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_real_shift_jis_document(self):
|
||||
# Smoke test to make sure the parser can handle a document in
|
||||
# Shift-JIS encoding, without choking.
|
||||
shift_jis_html = (
|
||||
b'<html><head></head><body><pre>'
|
||||
b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
|
||||
b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
|
||||
b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
|
||||
b'</pre></body></html>')
|
||||
unicode_html = shift_jis_html.decode("shift-jis")
|
||||
soup = self.soup(unicode_html)
|
||||
|
||||
# Make sure the parse tree is correctly encoded to various
|
||||
# encodings.
|
||||
self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
|
||||
self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
|
||||
|
||||
def test_real_hebrew_document(self):
|
||||
# A real-world test to make sure we can convert ISO-8859-9 (a
|
||||
# Hebrew encoding) to UTF-8.
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
# Some tree builders call it iso8859-8, others call it iso-8859-9.
|
||||
# That's not a difference we really care about.
|
||||
assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
|
||||
def test_meta_tag_reflects_current_encoding(self):
|
||||
# Here's the <meta> tag saying that a document is
|
||||
# encoded in Shift-JIS.
|
||||
meta_tag = ('<meta content="text/html; charset=x-sjis" '
|
||||
'http-equiv="Content-type"/>')
|
||||
|
||||
# Here's a document incorporating that meta tag.
|
||||
shift_jis_html = (
|
||||
'<html><head>\n%s\n'
|
||||
'<meta http-equiv="Content-language" content="ja"/>'
|
||||
'</head><body>Shift-JIS markup goes here.') % meta_tag
|
||||
soup = self.soup(shift_jis_html)
|
||||
|
||||
# Parse the document, and the charset is seemingly unaffected.
|
||||
parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
|
||||
content = parsed_meta['content']
|
||||
self.assertEqual('text/html; charset=x-sjis', content)
|
||||
|
||||
# But that value is actually a ContentMetaAttributeValue object.
|
||||
self.assertTrue(isinstance(content, ContentMetaAttributeValue))
|
||||
|
||||
# And it will take on a value that reflects its current
|
||||
# encoding.
|
||||
self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
|
||||
|
||||
# For the rest of the story, see TestSubstitutions in
|
||||
# test_tree.py.
|
||||
|
||||
def test_html5_style_meta_tag_reflects_current_encoding(self):
|
||||
# Here's the <meta> tag saying that a document is
|
||||
# encoded in Shift-JIS.
|
||||
meta_tag = ('<meta id="encoding" charset="x-sjis" />')
|
||||
|
||||
# Here's a document incorporating that meta tag.
|
||||
shift_jis_html = (
|
||||
'<html><head>\n%s\n'
|
||||
'<meta http-equiv="Content-language" content="ja"/>'
|
||||
'</head><body>Shift-JIS markup goes here.') % meta_tag
|
||||
soup = self.soup(shift_jis_html)
|
||||
|
||||
# Parse the document, and the charset is seemingly unaffected.
|
||||
parsed_meta = soup.find('meta', id="encoding")
|
||||
charset = parsed_meta['charset']
|
||||
self.assertEqual('x-sjis', charset)
|
||||
|
||||
# But that value is actually a CharsetMetaAttributeValue object.
|
||||
self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
|
||||
|
||||
# And it will take on a value that reflects its current
|
||||
# encoding.
|
||||
self.assertEqual('utf8', charset.encode("utf8"))
|
||||
|
||||
def test_tag_with_no_attributes_can_have_attributes_added(self):
|
||||
data = self.soup("<a>text</a>")
|
||||
data.a['foo'] = 'bar'
|
||||
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
|
||||
|
||||
class XMLTreeBuilderSmokeTest(object):
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertEqual(loaded.__class__, BeautifulSoup)
|
||||
self.assertEqual(loaded.decode(), tree.decode())
|
||||
|
||||
def test_docstring_generated(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
|
||||
|
||||
def test_xml_declaration(self):
|
||||
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<foo/>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>Hello.</title></head>
|
||||
<body>Goodbye.</body>
|
||||
</html>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8"), markup)
|
||||
|
||||
def test_formatter_processes_script_tag_for_xml_documents(self):
|
||||
doc = """
|
||||
<script type="text/javascript">
|
||||
</script>
|
||||
"""
|
||||
soup = BeautifulSoup(doc, "lxml-xml")
|
||||
# lxml would have stripped this while parsing, but we can add
|
||||
# it later.
|
||||
soup.script.string = 'console.log("< < hey > > ");'
|
||||
encoded = soup.encode()
|
||||
self.assertTrue(b"< < hey > >" in encoded)
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
|
||||
|
||||
def test_popping_namespaced_tag(self):
|
||||
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
unicode(soup.rss), markup)
|
||||
|
||||
def test_docstring_includes_correct_encoding(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode("latin1"),
|
||||
b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
|
||||
|
||||
def test_large_xml_document(self):
|
||||
"""A large XML document should come out the same as it went in."""
|
||||
markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
|
||||
+ b'0' * (2**12)
|
||||
+ b'</root>')
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(soup.encode("utf-8"), markup)
|
||||
|
||||
|
||||
def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
|
||||
self.assertSoupEquals("<p>", "<p/>")
|
||||
self.assertSoupEquals("<p>foo</p>")
|
||||
|
||||
def test_namespaces_are_preserved(self):
|
||||
markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
|
||||
soup = self.soup(markup)
|
||||
root = soup.root
|
||||
self.assertEqual("http://example.com/", root['xmlns:a'])
|
||||
self.assertEqual("http://example.net/", root['xmlns:b'])
|
||||
|
||||
def test_closing_namespaced_tag(self):
|
||||
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.p), markup)
|
||||
|
||||
def test_namespaced_attributes(self):
|
||||
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_namespaced_attributes_xml_namespace(self):
|
||||
markup = '<foo xml:lang="fr">bar</foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_find_by_prefixed_name(self):
|
||||
doc = """<?xml version="1.0" encoding="utf-8"?>
|
||||
<Document xmlns="http://example.com/ns0"
|
||||
xmlns:ns1="http://example.com/ns1"
|
||||
xmlns:ns2="http://example.com/ns2"
|
||||
<ns1:tag>foo</ns1:tag>
|
||||
<ns1:tag>bar</ns1:tag>
|
||||
<ns2:tag key="value">baz</ns2:tag>
|
||||
</Document>
|
||||
"""
|
||||
soup = self.soup(doc)
|
||||
|
||||
# There are three <tag> tags.
|
||||
self.assertEqual(3, len(soup.find_all('tag')))
|
||||
|
||||
# But two of them are ns1:tag and one of them is ns2:tag.
|
||||
self.assertEqual(2, len(soup.find_all('ns1:tag')))
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag')))
|
||||
|
||||
self.assertEqual(1, len(soup.find_all('ns2:tag', key='value')))
|
||||
self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag'])))
|
||||
|
||||
def test_copy_tag_preserves_namespace(self):
|
||||
xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<w:document xmlns:w="http://example.com/ns0"/>"""
|
||||
|
||||
soup = self.soup(xml)
|
||||
tag = soup.document
|
||||
duplicate = copy.copy(tag)
|
||||
|
||||
# The two tags have the same namespace prefix.
|
||||
self.assertEqual(tag.prefix, duplicate.prefix)
|
||||
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
# Since XHTML is not HTML5, HTML5 parsers are not tested to handle
|
||||
# XHTML documents in any particular way.
|
||||
pass
|
||||
|
||||
def test_html_tags_have_namespace(self):
|
||||
markup = "<a>"
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
|
||||
|
||||
def test_svg_tags_have_namespace(self):
|
||||
markup = '<svg><circle/></svg>'
|
||||
soup = self.soup(markup)
|
||||
namespace = "http://www.w3.org/2000/svg"
|
||||
self.assertEqual(namespace, soup.svg.namespace)
|
||||
self.assertEqual(namespace, soup.circle.namespace)
|
||||
|
||||
|
||||
def test_mathml_tags_have_namespace(self):
|
||||
markup = '<math><msqrt>5</msqrt></math>'
|
||||
soup = self.soup(markup)
|
||||
namespace = 'http://www.w3.org/1998/Math/MathML'
|
||||
self.assertEqual(namespace, soup.math.namespace)
|
||||
self.assertEqual(namespace, soup.msqrt.namespace)
|
||||
|
||||
def test_xml_declaration_becomes_comment(self):
|
||||
markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertTrue(isinstance(soup.contents[0], Comment))
|
||||
self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
|
||||
self.assertEqual("html", soup.contents[0].next_element.name)
|
||||
|
||||
def skipIf(condition, reason):
|
||||
def nothing(test, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def decorator(test_item):
|
||||
if condition:
|
||||
return nothing
|
||||
else:
|
||||
return test_item
|
||||
|
||||
return decorator
|
|
@ -1 +0,0 @@
|
|||
"The beautifulsoup tests."
|
|
@ -1,147 +0,0 @@
|
|||
"""Tests of the builder registry."""
|
||||
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.builder import (
|
||||
builder_registry as registry,
|
||||
HTMLParserTreeBuilder,
|
||||
TreeBuilderRegistry,
|
||||
)
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError:
|
||||
HTML5LIB_PRESENT = False
|
||||
|
||||
try:
|
||||
from bs4.builder import (
|
||||
LXMLTreeBuilderForXML,
|
||||
LXMLTreeBuilder,
|
||||
)
|
||||
LXML_PRESENT = True
|
||||
except ImportError:
|
||||
LXML_PRESENT = False
|
||||
|
||||
|
||||
class BuiltInRegistryTest(unittest.TestCase):
|
||||
"""Test the built-in registry with the default builders registered."""
|
||||
|
||||
def test_combination(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('fast', 'html'),
|
||||
LXMLTreeBuilder)
|
||||
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('permissive', 'xml'),
|
||||
LXMLTreeBuilderForXML)
|
||||
self.assertEqual(registry.lookup('strict', 'html'),
|
||||
HTMLParserTreeBuilder)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html5lib', 'html'),
|
||||
HTML5TreeBuilder)
|
||||
|
||||
def test_lookup_by_markup_type(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
|
||||
self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
|
||||
else:
|
||||
self.assertEqual(registry.lookup('xml'), None)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
|
||||
else:
|
||||
self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
|
||||
|
||||
def test_named_library(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('lxml', 'xml'),
|
||||
LXMLTreeBuilderForXML)
|
||||
self.assertEqual(registry.lookup('lxml', 'html'),
|
||||
LXMLTreeBuilder)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html5lib'),
|
||||
HTML5TreeBuilder)
|
||||
|
||||
self.assertEqual(registry.lookup('html.parser'),
|
||||
HTMLParserTreeBuilder)
|
||||
|
||||
def test_beautifulsoup_constructor_does_lookup(self):
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
# This will create a warning about not explicitly
|
||||
# specifying a parser, but we'll ignore it.
|
||||
|
||||
# You can pass in a string.
|
||||
BeautifulSoup("", features="html")
|
||||
# Or a list of strings.
|
||||
BeautifulSoup("", features=["html", "fast"])
|
||||
|
||||
# You'll get an exception if BS can't find an appropriate
|
||||
# builder.
|
||||
self.assertRaises(ValueError, BeautifulSoup,
|
||||
"", features="no-such-feature")
|
||||
|
||||
class RegistryTest(unittest.TestCase):
|
||||
"""Test the TreeBuilderRegistry class in general."""
|
||||
|
||||
def setUp(self):
|
||||
self.registry = TreeBuilderRegistry()
|
||||
|
||||
def builder_for_features(self, *feature_list):
|
||||
cls = type('Builder_' + '_'.join(feature_list),
|
||||
(object,), {'features' : feature_list})
|
||||
|
||||
self.registry.register(cls)
|
||||
return cls
|
||||
|
||||
def test_register_with_no_features(self):
|
||||
builder = self.builder_for_features()
|
||||
|
||||
# Since the builder advertises no features, you can't find it
|
||||
# by looking up features.
|
||||
self.assertEqual(self.registry.lookup('foo'), None)
|
||||
|
||||
# But you can find it by doing a lookup with no features, if
|
||||
# this happens to be the only registered builder.
|
||||
self.assertEqual(self.registry.lookup(), builder)
|
||||
|
||||
def test_register_with_features_makes_lookup_succeed(self):
|
||||
builder = self.builder_for_features('foo', 'bar')
|
||||
self.assertEqual(self.registry.lookup('foo'), builder)
|
||||
self.assertEqual(self.registry.lookup('bar'), builder)
|
||||
|
||||
def test_lookup_fails_when_no_builder_implements_feature(self):
|
||||
builder = self.builder_for_features('foo', 'bar')
|
||||
self.assertEqual(self.registry.lookup('baz'), None)
|
||||
|
||||
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
|
||||
builder1 = self.builder_for_features('foo')
|
||||
builder2 = self.builder_for_features('bar')
|
||||
self.assertEqual(self.registry.lookup(), builder2)
|
||||
|
||||
def test_lookup_fails_when_no_tree_builders_registered(self):
|
||||
self.assertEqual(self.registry.lookup(), None)
|
||||
|
||||
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
|
||||
has_one = self.builder_for_features('foo')
|
||||
has_the_other = self.builder_for_features('bar')
|
||||
has_both_early = self.builder_for_features('foo', 'bar', 'baz')
|
||||
has_both_late = self.builder_for_features('foo', 'bar', 'quux')
|
||||
lacks_one = self.builder_for_features('bar')
|
||||
has_the_other = self.builder_for_features('foo')
|
||||
|
||||
# There are two builders featuring 'foo' and 'bar', but
|
||||
# the one that also features 'quux' was registered later.
|
||||
self.assertEqual(self.registry.lookup('foo', 'bar'),
|
||||
has_both_late)
|
||||
|
||||
# There is only one builder featuring 'foo', 'bar', and 'baz'.
|
||||
self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
|
||||
has_both_early)
|
||||
|
||||
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
|
||||
builder1 = self.builder_for_features('foo', 'bar')
|
||||
builder2 = self.builder_for_features('foo', 'baz')
|
||||
self.assertEqual(self.registry.lookup('bar', 'baz'), None)
|
|
@ -1,36 +0,0 @@
|
|||
"Test harness for doctests."
|
||||
|
||||
# pylint: disable-msg=E0611,W0142
|
||||
|
||||
__metaclass__ = type
|
||||
__all__ = [
|
||||
'additional_tests',
|
||||
]
|
||||
|
||||
import atexit
|
||||
import doctest
|
||||
import os
|
||||
#from pkg_resources import (
|
||||
# resource_filename, resource_exists, resource_listdir, cleanup_resources)
|
||||
import unittest
|
||||
|
||||
DOCTEST_FLAGS = (
|
||||
doctest.ELLIPSIS |
|
||||
doctest.NORMALIZE_WHITESPACE |
|
||||
doctest.REPORT_NDIFF)
|
||||
|
||||
|
||||
# def additional_tests():
|
||||
# "Run the doc tests (README.txt and docs/*, if any exist)"
|
||||
# doctest_files = [
|
||||
# os.path.abspath(resource_filename('bs4', 'README.txt'))]
|
||||
# if resource_exists('bs4', 'docs'):
|
||||
# for name in resource_listdir('bs4', 'docs'):
|
||||
# if name.endswith('.txt'):
|
||||
# doctest_files.append(
|
||||
# os.path.abspath(
|
||||
# resource_filename('bs4', 'docs/%s' % name)))
|
||||
# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
|
||||
# atexit.register(cleanup_resources)
|
||||
# return unittest.TestSuite((
|
||||
# doctest.DocFileSuite(*doctest_files, **kwargs)))
|
|
@ -1,130 +0,0 @@
|
|||
"""Tests to ensure that the html5lib tree builder generates good trees."""
|
||||
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError, e:
|
||||
HTML5LIB_PRESENT = False
|
||||
from bs4.element import SoupStrainer
|
||||
from bs4.testing import (
|
||||
HTML5TreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
not HTML5LIB_PRESENT,
|
||||
"html5lib seems not to be present, not testing its tree builder.")
|
||||
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
"""See ``HTML5TreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return HTML5TreeBuilder()
|
||||
|
||||
def test_soupstrainer(self):
|
||||
# The html5lib tree builder does not support SoupStrainers.
|
||||
strainer = SoupStrainer("b")
|
||||
markup = "<p>A <b>bold</b> statement.</p>"
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(markup, parse_only=strainer)
|
||||
self.assertEqual(
|
||||
soup.decode(), self.document_for(markup))
|
||||
|
||||
self.assertTrue(
|
||||
"the html5lib tree builder doesn't support parse_only" in
|
||||
str(w[0].message))
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""html5lib inserts <tbody> tags where other parsers don't."""
|
||||
markup = ('<table id="1">'
|
||||
'<tr>'
|
||||
"<td>Here's another table:"
|
||||
'<table id="2">'
|
||||
'<tr><td>foo</td></tr>'
|
||||
'</table></td>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
markup,
|
||||
'<table id="1"><tbody><tr><td>Here\'s another table:'
|
||||
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
|
||||
'</td></tr></tbody></table>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
"<table><thead><tr><td>Foo</td></tr></thead>"
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_xml_declaration_followed_by_doctype(self):
|
||||
markup = '''<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
<p>foo</p>
|
||||
</body>
|
||||
</html>'''
|
||||
soup = self.soup(markup)
|
||||
# Verify that we can reach the <p> tag; this means the tree is connected.
|
||||
self.assertEqual(b"<p>foo</p>", soup.p.encode())
|
||||
|
||||
def test_reparented_markup(self):
|
||||
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
|
||||
def test_reparented_markup_ends_with_whitespace(self):
|
||||
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
def test_reparented_markup_containing_identical_whitespace_nodes(self):
|
||||
"""Verify that we keep the two whitespace nodes in this
|
||||
document distinct when reparenting the adjacent <tbody> tags.
|
||||
"""
|
||||
markup = '<table> <tbody><tbody><ims></tbody> </table>'
|
||||
soup = self.soup(markup)
|
||||
space1, space2 = soup.find_all(string=' ')
|
||||
tbody1, tbody2 = soup.find_all('tbody')
|
||||
assert space1.next_element is tbody1
|
||||
assert tbody2.next_element is space2
|
||||
|
||||
def test_reparented_markup_containing_children(self):
|
||||
markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
|
||||
soup = self.soup(markup)
|
||||
noscript = soup.noscript
|
||||
self.assertEqual("target", noscript.next_element)
|
||||
target = soup.find(string='target')
|
||||
|
||||
# The 'aftermath' string was duplicated; we want the second one.
|
||||
final_aftermath = soup.find_all(string='aftermath')[-1]
|
||||
|
||||
# The <noscript> tag was moved beneath a copy of the <a> tag,
|
||||
# but the 'target' string within is still connected to the
|
||||
# (second) 'aftermath' string.
|
||||
self.assertEqual(final_aftermath, target.next_element)
|
||||
self.assertEqual(target, final_aftermath.previous_element)
|
||||
|
||||
def test_processing_instruction(self):
|
||||
"""Processing instructions become comments."""
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
assert str(soup).startswith("<!--?PITarget PIContent?-->")
|
||||
|
||||
def test_cloned_multivalue_node(self):
|
||||
markup = b"""<a class="my_class"><p></a>"""
|
||||
soup = self.soup(markup)
|
||||
a1, a2 = soup.find_all('a')
|
||||
self.assertEqual(a1, a2)
|
||||
assert a1 is not a2
|
||||
|
||||
def test_foster_parenting(self):
|
||||
markup = b"""<table><td></tbody>A"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
|
|
@ -1,34 +0,0 @@
|
|||
"""Tests to ensure that the html.parser tree builder generates good
|
||||
trees."""
|
||||
|
||||
from pdb import set_trace
|
||||
import pickle
|
||||
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
|
||||
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return HTMLParserTreeBuilder()
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
|
||||
def test_namespaced_public_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
|
||||
def test_builder_is_pickled(self):
|
||||
"""Unlike most tree builders, HTMLParserTreeBuilder and will
|
||||
be restored after pickling.
|
||||
"""
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
|
||||
|
||||
def test_redundant_empty_element_closing_tags(self):
|
||||
self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>")
|
||||
self.assertSoupEquals('</br></br></br>', "")
|
|
@ -1,76 +0,0 @@
|
|||
"""Tests to ensure that the lxml tree builder generates good trees."""
|
||||
|
||||
import re
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import lxml.etree
|
||||
LXML_PRESENT = True
|
||||
LXML_VERSION = lxml.etree.LXML_VERSION
|
||||
except ImportError, e:
|
||||
LXML_PRESENT = False
|
||||
LXML_VERSION = (0,)
|
||||
|
||||
if LXML_PRESENT:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
|
||||
from bs4 import (
|
||||
BeautifulSoup,
|
||||
BeautifulStoneSoup,
|
||||
)
|
||||
from bs4.element import Comment, Doctype, SoupStrainer
|
||||
from bs4.testing import skipIf
|
||||
from bs4.tests import test_htmlparser
|
||||
from bs4.testing import (
|
||||
HTMLTreeBuilderSmokeTest,
|
||||
XMLTreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT,
|
||||
"lxml seems not to be present, not testing its tree builder.")
|
||||
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return LXMLTreeBuilder()
|
||||
|
||||
def test_out_of_range_entity(self):
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
|
||||
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
|
||||
# test if an old version of lxml is installed.
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
|
||||
"Skipping doctype test for old version of lxml to avoid segfault.")
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual("", doctype.strip())
|
||||
|
||||
def test_beautifulstonesoup_is_xml_parser(self):
|
||||
# Make sure that the deprecated BSS class uses an xml builder
|
||||
# if one is installed.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulStoneSoup("<b />")
|
||||
self.assertEqual(u"<b/>", unicode(soup.b))
|
||||
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT,
|
||||
"lxml seems not to be present, not testing its XML tree builder.")
|
||||
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
|
||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return LXMLTreeBuilderForXML()
|
|
@ -1,501 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""Tests of Beautiful Soup as a whole."""
|
||||
|
||||
from pdb import set_trace
|
||||
import logging
|
||||
import unittest
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from bs4 import (
|
||||
BeautifulSoup,
|
||||
BeautifulStoneSoup,
|
||||
)
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
SoupStrainer,
|
||||
NamespacedAttribute,
|
||||
)
|
||||
import bs4.dammit
|
||||
from bs4.dammit import (
|
||||
EntitySubstitution,
|
||||
UnicodeDammit,
|
||||
EncodingDetector,
|
||||
)
|
||||
from bs4.testing import (
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
LXML_PRESENT = True
|
||||
except ImportError, e:
|
||||
LXML_PRESENT = False
|
||||
|
||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
||||
|
||||
class TestConstructor(SoupTest):
|
||||
|
||||
def test_short_unicode_input(self):
|
||||
data = u"<h1>éé</h1>"
|
||||
soup = self.soup(data)
|
||||
self.assertEqual(u"éé", soup.h1.string)
|
||||
|
||||
def test_embedded_null(self):
|
||||
data = u"<h1>foo\0bar</h1>"
|
||||
soup = self.soup(data)
|
||||
self.assertEqual(u"foo\0bar", soup.h1.string)
|
||||
|
||||
def test_exclude_encodings(self):
|
||||
utf8_data = u"Räksmörgås".encode("utf-8")
|
||||
soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
|
||||
self.assertEqual("windows-1252", soup.original_encoding)
|
||||
|
||||
|
||||
class TestWarnings(SoupTest):
|
||||
|
||||
def _no_parser_specified(self, s, is_there=True):
|
||||
v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80])
|
||||
self.assertTrue(v)
|
||||
|
||||
def test_warning_if_no_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>")
|
||||
msg = str(w[0].message)
|
||||
self._assert_no_parser_specified(msg)
|
||||
|
||||
def test_warning_if_parser_specified_too_vague(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html")
|
||||
msg = str(w[0].message)
|
||||
self._assert_no_parser_specified(msg)
|
||||
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html.parser")
|
||||
self.assertEqual([], w)
|
||||
|
||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("parseOnlyThese" in msg)
|
||||
self.assertTrue("parse_only" in msg)
|
||||
self.assertEqual(b"<b></b>", soup.encode())
|
||||
|
||||
def test_fromEncoding_renamed_to_from_encoding(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
utf8 = b"\xc3\xa9"
|
||||
soup = self.soup(utf8, fromEncoding="utf8")
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("fromEncoding" in msg)
|
||||
self.assertTrue("from_encoding" in msg)
|
||||
self.assertEqual("utf8", soup.original_encoding)
|
||||
|
||||
def test_unrecognized_keyword_argument(self):
|
||||
self.assertRaises(
|
||||
TypeError, self.soup, "<a>", no_such_argument=True)
|
||||
|
||||
class TestWarnings(SoupTest):
|
||||
|
||||
def test_disk_file_warning(self):
|
||||
filehandle = tempfile.NamedTemporaryFile()
|
||||
filename = filehandle.name
|
||||
try:
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(filename)
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("looks like a filename" in msg)
|
||||
finally:
|
||||
filehandle.close()
|
||||
|
||||
# The file no longer exists, so Beautiful Soup will no longer issue the warning.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(filename)
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
def test_url_warning_with_bytes_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/")
|
||||
# Be aware this isn't the only warning that can be raised during
|
||||
# execution..
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_url(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
# note - this url must differ from the bytes one otherwise
|
||||
# python's warnings system swallows the second warning
|
||||
soup = self.soup(u"http://www.crummyunicode.com/")
|
||||
self.assertTrue(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_bytes_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(b"http://www.crummybytes.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
def test_url_warning_with_unicode_and_space(self):
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
soup = self.soup(u"http://www.crummyuncode.com/ is great")
|
||||
self.assertFalse(any("looks like a URL" in str(w.message)
|
||||
for w in warning_list))
|
||||
|
||||
|
||||
class TestSelectiveParsing(SoupTest):
|
||||
|
||||
def test_parse_with_soupstrainer(self):
|
||||
markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
|
||||
strainer = SoupStrainer("b")
|
||||
soup = self.soup(markup, parse_only=strainer)
|
||||
self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
|
||||
|
||||
|
||||
class TestEntitySubstitution(unittest.TestCase):
|
||||
"""Standalone tests of the EntitySubstitution class."""
|
||||
def setUp(self):
|
||||
self.sub = EntitySubstitution
|
||||
|
||||
def test_simple_html_substitution(self):
|
||||
# Unicode characters corresponding to named HTML entites
|
||||
# are substituted, and no others.
|
||||
s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
|
||||
self.assertEqual(self.sub.substitute_html(s),
|
||||
u"foo∀\N{SNOWMAN}õbar")
|
||||
|
||||
def test_smart_quote_substitution(self):
|
||||
# MS smart quotes are a common source of frustration, so we
|
||||
# give them a special test.
|
||||
quotes = b"\x91\x92foo\x93\x94"
|
||||
dammit = UnicodeDammit(quotes)
|
||||
self.assertEqual(self.sub.substitute_html(dammit.markup),
|
||||
"‘’foo“”")
|
||||
|
||||
def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
|
||||
s = 'Welcome to "my bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(s, False), s)
|
||||
|
||||
def test_xml_attribute_quoting_normally_uses_double_quotes(self):
|
||||
self.assertEqual(self.sub.substitute_xml("Welcome", True),
|
||||
'"Welcome"')
|
||||
self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
|
||||
'"Bob\'s Bar"')
|
||||
|
||||
def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
|
||||
s = 'Welcome to "my bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(s, True),
|
||||
"'Welcome to \"my bar\"'")
|
||||
|
||||
def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
|
||||
s = 'Welcome to "Bob\'s Bar"'
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml(s, True),
|
||||
'"Welcome to "Bob\'s Bar""')
|
||||
|
||||
def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
|
||||
quoted = 'Welcome to "Bob\'s Bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(quoted), quoted)
|
||||
|
||||
def test_xml_quoting_handles_angle_brackets(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml("foo<bar>"),
|
||||
"foo<bar>")
|
||||
|
||||
def test_xml_quoting_handles_ampersands(self):
|
||||
self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&T")
|
||||
|
||||
def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml("ÁT&T"),
|
||||
"&Aacute;T&T")
|
||||
|
||||
def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml_containing_entities("ÁT&T"),
|
||||
"ÁT&T")
|
||||
|
||||
def test_quotes_not_html_substituted(self):
|
||||
"""There's no need to do this except inside attribute values."""
|
||||
text = 'Bob\'s "bar"'
|
||||
self.assertEqual(self.sub.substitute_html(text), text)
|
||||
|
||||
|
||||
class TestEncodingConversion(SoupTest):
|
||||
# Test Beautiful Soup's ability to decode and encode from various
|
||||
# encodings.
|
||||
|
||||
def setUp(self):
|
||||
super(TestEncodingConversion, self).setUp()
|
||||
self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
|
||||
self.utf8_data = self.unicode_data.encode("utf-8")
|
||||
# Just so you know what it looks like.
|
||||
self.assertEqual(
|
||||
self.utf8_data,
|
||||
b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
|
||||
|
||||
def test_ascii_in_unicode_out(self):
|
||||
# ASCII input is converted to Unicode. The original_encoding
|
||||
# attribute is set to 'utf-8', a superset of ASCII.
|
||||
chardet = bs4.dammit.chardet_dammit
|
||||
logging.disable(logging.WARNING)
|
||||
try:
|
||||
def noop(str):
|
||||
return None
|
||||
# Disable chardet, which will realize that the ASCII is ASCII.
|
||||
bs4.dammit.chardet_dammit = noop
|
||||
ascii = b"<foo>a</foo>"
|
||||
soup_from_ascii = self.soup(ascii)
|
||||
unicode_output = soup_from_ascii.decode()
|
||||
self.assertTrue(isinstance(unicode_output, unicode))
|
||||
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
|
||||
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
|
||||
finally:
|
||||
logging.disable(logging.NOTSET)
|
||||
bs4.dammit.chardet_dammit = chardet
|
||||
|
||||
def test_unicode_in_unicode_out(self):
|
||||
# Unicode input is left alone. The original_encoding attribute
|
||||
# is not set.
|
||||
soup_from_unicode = self.soup(self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
|
||||
self.assertEqual(soup_from_unicode.original_encoding, None)
|
||||
|
||||
def test_utf8_in_unicode_out(self):
|
||||
# UTF-8 input is converted to Unicode. The original_encoding
|
||||
# attribute is set.
|
||||
soup_from_utf8 = self.soup(self.utf8_data)
|
||||
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
|
||||
self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
|
||||
|
||||
def test_utf8_out(self):
|
||||
# The internal data structures can be encoded as UTF-8.
|
||||
soup_from_unicode = self.soup(self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
|
||||
|
||||
@skipIf(
|
||||
PYTHON_3_PRE_3_2,
|
||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
||||
def test_attribute_name_containing_unicode_characters(self):
|
||||
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
|
||||
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
|
||||
|
||||
class TestUnicodeDammit(unittest.TestCase):
|
||||
"""Standalone tests of UnicodeDammit."""
|
||||
|
||||
def test_unicode_input(self):
|
||||
markup = u"I'm already Unicode! \N{SNOWMAN}"
|
||||
dammit = UnicodeDammit(markup)
|
||||
self.assertEqual(dammit.unicode_markup, markup)
|
||||
|
||||
def test_smart_quotes_to_unicode(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup)
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
|
||||
|
||||
def test_smart_quotes_to_xml_entities(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, "<foo>‘’“”</foo>")
|
||||
|
||||
def test_smart_quotes_to_html_entities(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="html")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, "<foo>‘’“”</foo>")
|
||||
|
||||
def test_smart_quotes_to_ascii(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, """<foo>''""</foo>""")
|
||||
|
||||
def test_detect_utf8(self):
|
||||
utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
|
||||
dammit = UnicodeDammit(utf8)
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}')
|
||||
|
||||
|
||||
def test_convert_hebrew(self):
|
||||
hebrew = b"\xed\xe5\xec\xf9"
|
||||
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
|
||||
self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
|
||||
|
||||
def test_dont_see_smart_quotes_where_there_are_none(self):
|
||||
utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
|
||||
dammit = UnicodeDammit(utf_8)
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
|
||||
|
||||
def test_ignore_inappropriate_codecs(self):
|
||||
utf8_data = u"Räksmörgås".encode("utf-8")
|
||||
dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
|
||||
def test_ignore_invalid_codecs(self):
|
||||
utf8_data = u"Räksmörgås".encode("utf-8")
|
||||
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
|
||||
dammit = UnicodeDammit(utf8_data, [bad_encoding])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
|
||||
def test_exclude_encodings(self):
|
||||
# This is UTF-8.
|
||||
utf8_data = u"Räksmörgås".encode("utf-8")
|
||||
|
||||
# But if we exclude UTF-8 from consideration, the guess is
|
||||
# Windows-1252.
|
||||
dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
|
||||
|
||||
# And if we exclude that, there is no valid guess at all.
|
||||
dammit = UnicodeDammit(
|
||||
utf8_data, exclude_encodings=["utf-8", "windows-1252"])
|
||||
self.assertEqual(dammit.original_encoding, None)
|
||||
|
||||
def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
|
||||
detected = EncodingDetector(
|
||||
b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
|
||||
encodings = list(detected.encodings)
|
||||
assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
|
||||
|
||||
def test_detect_html5_style_meta_tag(self):
|
||||
|
||||
for data in (
|
||||
b'<html><meta charset="euc-jp" /></html>',
|
||||
b"<html><meta charset='euc-jp' /></html>",
|
||||
b"<html><meta charset=euc-jp /></html>",
|
||||
b"<html><meta charset=euc-jp/></html>"):
|
||||
dammit = UnicodeDammit(data, is_html=True)
|
||||
self.assertEqual(
|
||||
"euc-jp", dammit.original_encoding)
|
||||
|
||||
def test_last_ditch_entity_replacement(self):
|
||||
# This is a UTF-8 document that contains bytestrings
|
||||
# completely incompatible with UTF-8 (ie. encoded with some other
|
||||
# encoding).
|
||||
#
|
||||
# Since there is no consistent encoding for the document,
|
||||
# Unicode, Dammit will eventually encode the document as UTF-8
|
||||
# and encode the incompatible characters as REPLACEMENT
|
||||
# CHARACTER.
|
||||
#
|
||||
# If chardet is installed, it will detect that the document
|
||||
# can be converted into ISO-8859-1 without errors. This happens
|
||||
# to be the wrong encoding, but it is a consistent encoding, so the
|
||||
# code we're testing here won't run.
|
||||
#
|
||||
# So we temporarily disable chardet if it's present.
|
||||
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
|
||||
<html><b>\330\250\330\252\330\261</b>
|
||||
<i>\310\322\321\220\312\321\355\344</i></html>"""
|
||||
chardet = bs4.dammit.chardet_dammit
|
||||
logging.disable(logging.WARNING)
|
||||
try:
|
||||
def noop(str):
|
||||
return None
|
||||
bs4.dammit.chardet_dammit = noop
|
||||
dammit = UnicodeDammit(doc)
|
||||
self.assertEqual(True, dammit.contains_replacement_characters)
|
||||
self.assertTrue(u"\ufffd" in dammit.unicode_markup)
|
||||
|
||||
soup = BeautifulSoup(doc, "html.parser")
|
||||
self.assertTrue(soup.contains_replacement_characters)
|
||||
finally:
|
||||
logging.disable(logging.NOTSET)
|
||||
bs4.dammit.chardet_dammit = chardet
|
||||
|
||||
def test_byte_order_mark_removed(self):
|
||||
# A document written in UTF-16LE will have its byte order marker stripped.
|
||||
data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
|
||||
dammit = UnicodeDammit(data)
|
||||
self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
|
||||
self.assertEqual("utf-16le", dammit.original_encoding)
|
||||
|
||||
def test_detwingle(self):
|
||||
# Here's a UTF8 document.
|
||||
utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
|
||||
|
||||
# Here's a Windows-1252 document.
|
||||
windows_1252 = (
|
||||
u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
|
||||
u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
|
||||
|
||||
# Through some unholy alchemy, they've been stuck together.
|
||||
doc = utf8 + windows_1252 + utf8
|
||||
|
||||
# The document can't be turned into UTF-8:
|
||||
self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
|
||||
|
||||
# Unicode, Dammit thinks the whole document is Windows-1252,
|
||||
# and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃"
|
||||
|
||||
# But if we run it through fix_embedded_windows_1252, it's fixed:
|
||||
|
||||
fixed = UnicodeDammit.detwingle(doc)
|
||||
self.assertEqual(
|
||||
u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
|
||||
|
||||
def test_detwingle_ignores_multibyte_characters(self):
|
||||
# Each of these characters has a UTF-8 representation ending
|
||||
# in \x93. \x93 is a smart quote if interpreted as
|
||||
# Windows-1252. But our code knows to skip over multibyte
|
||||
# UTF-8 characters, so they'll survive the process unscathed.
|
||||
for tricky_unicode_char in (
|
||||
u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
|
||||
u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
|
||||
u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
|
||||
):
|
||||
input = tricky_unicode_char.encode("utf8")
|
||||
self.assertTrue(input.endswith(b'\x93'))
|
||||
output = UnicodeDammit.detwingle(input)
|
||||
self.assertEqual(output, input)
|
||||
|
||||
class TestNamedspacedAttribute(SoupTest):
|
||||
|
||||
def test_name_may_be_none(self):
|
||||
a = NamespacedAttribute("xmlns", None)
|
||||
self.assertEqual(a, "xmlns")
|
||||
|
||||
def test_attribute_is_equivalent_to_colon_separated_string(self):
|
||||
a = NamespacedAttribute("a", "b")
|
||||
self.assertEqual("a:b", a)
|
||||
|
||||
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
|
||||
a = NamespacedAttribute("a", "b", "c")
|
||||
b = NamespacedAttribute("a", "b", "c")
|
||||
self.assertEqual(a, b)
|
||||
|
||||
# The actual namespace is not considered.
|
||||
c = NamespacedAttribute("a", "b", None)
|
||||
self.assertEqual(a, c)
|
||||
|
||||
# But name and prefix are important.
|
||||
d = NamespacedAttribute("a", "z", "c")
|
||||
self.assertNotEqual(a, d)
|
||||
|
||||
e = NamespacedAttribute("z", "b", "c")
|
||||
self.assertNotEqual(a, e)
|
||||
|
||||
|
||||
class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
|
||||
|
||||
def test_content_meta_attribute_value(self):
|
||||
value = CharsetMetaAttributeValue("euc-jp")
|
||||
self.assertEqual("euc-jp", value)
|
||||
self.assertEqual("euc-jp", value.original_value)
|
||||
self.assertEqual("utf8", value.encode("utf8"))
|
||||
|
||||
|
||||
def test_content_meta_attribute_value(self):
|
||||
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
|
||||
self.assertEqual("text/html; charset=euc-jp", value)
|
||||
self.assertEqual("text/html; charset=euc-jp", value.original_value)
|
||||
self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
|
File diff suppressed because it is too large
Load Diff
|
@ -1,21 +0,0 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Anorov
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -1,266 +0,0 @@
|
|||
import logging
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import copy
|
||||
import time
|
||||
|
||||
from requests.sessions import Session
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
from urlparse import urlunparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlunparse
|
||||
|
||||
__version__ = "1.9.7"
|
||||
|
||||
DEFAULT_USER_AGENTS = [
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/65.0.3325.181 Chrome/65.0.3325.181 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 7.0; Moto G (5) Build/NPPS25.137-93-8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0"
|
||||
]
|
||||
|
||||
BUG_REPORT = """\
|
||||
Cloudflare may have changed their technique, or there may be a bug in the script.
|
||||
|
||||
Please read https://github.com/Anorov/cloudflare-scrape#updates, then file a \
|
||||
bug report at https://github.com/Anorov/cloudflare-scrape/issues."\
|
||||
"""
|
||||
|
||||
ANSWER_ACCEPT_ERROR = """\
|
||||
The challenge answer was not properly accepted by Cloudflare. This can occur if \
|
||||
the target website is under heavy load, or if Cloudflare is experiencing issues. You can
|
||||
potentially resolve this by increasing the challenge answer delay (default: 8 seconds). \
|
||||
For example: cfscrape.create_scraper(delay=15)
|
||||
|
||||
If increasing the delay does not help, please open a GitHub issue at \
|
||||
https://github.com/Anorov/cloudflare-scrape/issues\
|
||||
"""
|
||||
|
||||
class CloudflareScraper(Session):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.default_delay = 8
|
||||
self.delay = kwargs.pop("delay", self.default_delay)
|
||||
super(CloudflareScraper, self).__init__(*args, **kwargs)
|
||||
|
||||
if "requests" in self.headers["User-Agent"]:
|
||||
# Set a random User-Agent if no custom User-Agent has been set
|
||||
self.headers["User-Agent"] = random.choice(DEFAULT_USER_AGENTS)
|
||||
|
||||
def is_cloudflare_challenge(self, resp):
|
||||
return (
|
||||
resp.status_code == 503
|
||||
and resp.headers.get("Server", "").startswith("cloudflare")
|
||||
and b"jschl_vc" in resp.content
|
||||
and b"jschl_answer" in resp.content
|
||||
)
|
||||
|
||||
def request(self, method, url, *args, **kwargs):
|
||||
self.headers = (
|
||||
OrderedDict(
|
||||
[
|
||||
('User-Agent', self.headers['User-Agent']),
|
||||
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
|
||||
('Accept-Language', 'en-US,en;q=0.5'),
|
||||
('Accept-Encoding', 'gzip, deflate'),
|
||||
('Connection', 'close'),
|
||||
('Upgrade-Insecure-Requests', '1')
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
|
||||
|
||||
# Check if Cloudflare anti-bot is on
|
||||
if self.is_cloudflare_challenge(resp):
|
||||
resp = self.solve_cf_challenge(resp, **kwargs)
|
||||
|
||||
return resp
|
||||
|
||||
def solve_cf_challenge(self, resp, **original_kwargs):
|
||||
start_time = time.time()
|
||||
|
||||
body = resp.text
|
||||
parsed_url = urlparse(resp.url)
|
||||
domain = parsed_url.netloc
|
||||
submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)
|
||||
|
||||
cloudflare_kwargs = copy.deepcopy(original_kwargs)
|
||||
params = cloudflare_kwargs.setdefault("params", {})
|
||||
headers = cloudflare_kwargs.setdefault("headers", {})
|
||||
headers["Referer"] = resp.url
|
||||
|
||||
try:
|
||||
params["s"] = re.search(r'name="s"\svalue="(?P<s_value>[^"]+)', body).group('s_value')
|
||||
params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', body).group(1)
|
||||
params["pass"] = re.search(r'name="pass" value="(.+?)"', body).group(1)
|
||||
except Exception as e:
|
||||
# Something is wrong with the page.
|
||||
# This may indicate Cloudflare has changed their anti-bot
|
||||
# technique. If you see this and are running the latest version,
|
||||
# please open a GitHub issue so I can update the code accordingly.
|
||||
raise ValueError("Unable to parse Cloudflare anti-bots page: %s %s" % (e.message, BUG_REPORT))
|
||||
|
||||
# Solve the Javascript challenge
|
||||
params["jschl_answer"] = self.solve_challenge(body, domain)
|
||||
|
||||
# Check if the default delay has been overridden. If not, use the delay required by
|
||||
# cloudflare.
|
||||
if self.delay == self.default_delay:
|
||||
try:
|
||||
self.delay = float(re.search(r"submit\(\);\r?\n\s*},\s*([0-9]+)", body).group(1)) / float(1000)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Requests transforms any request into a GET after a redirect,
|
||||
# so the redirect has to be handled manually here to allow for
|
||||
# performing other types of requests even as the first request.
|
||||
method = resp.request.method
|
||||
cloudflare_kwargs["allow_redirects"] = False
|
||||
|
||||
end_time = time.time()
|
||||
# Cloudflare requires a delay before solving the challenge
|
||||
time.sleep(self.delay - (end_time - start_time))
|
||||
|
||||
redirect = self.request(method, submit_url, **cloudflare_kwargs)
|
||||
redirect_location = urlparse(redirect.headers["Location"])
|
||||
if not redirect_location.netloc:
|
||||
redirect_url = urlunparse((parsed_url.scheme, domain, redirect_location.path, redirect_location.params, redirect_location.query, redirect_location.fragment))
|
||||
return self.request(method, redirect_url, **original_kwargs)
|
||||
return self.request(method, redirect.headers["Location"], **original_kwargs)
|
||||
|
||||
def solve_challenge(self, body, domain):
|
||||
try:
|
||||
js = re.search(r"setTimeout\(function\(\){\s+(var "
|
||||
"s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1)
|
||||
except Exception:
|
||||
raise ValueError("Unable to identify Cloudflare IUAM Javascript on website. %s" % BUG_REPORT)
|
||||
|
||||
js = re.sub(r"a\.value = (.+\.toFixed\(10\);).+", r"\1", js)
|
||||
# Match code that accesses the DOM and remove it, but without stripping too much.
|
||||
try:
|
||||
solution_name = re.search("s,t,o,p,b,r,e,a,k,i,n,g,f,\s*(.+)\s*=", js).groups(1)
|
||||
match = re.search("(.*};)\n\s*(t\s*=(.+))\n\s*(;%s.*)" % (solution_name), js, re.M | re.I | re.DOTALL).groups()
|
||||
js = match[0] + match[-1]
|
||||
except Exception:
|
||||
raise ValueError("Error parsing Cloudflare IUAM Javascript challenge. %s" % BUG_REPORT)
|
||||
js = js.replace("t.length", str(len(domain)))
|
||||
|
||||
# Strip characters that could be used to exit the string context
|
||||
# These characters are not currently used in Cloudflare's arithmetic snippet
|
||||
js = re.sub(r"[\n\\']", "", js)
|
||||
|
||||
if "toFixed" not in js:
|
||||
raise ValueError("Error parsing Cloudflare IUAM Javascript challenge. %s" % BUG_REPORT)
|
||||
|
||||
# 2019-03-20: Cloudflare sometimes stores part of the challenge in a div which is later
|
||||
# added using document.getElementById(x).innerHTML, so it is necessary to simulate that
|
||||
# method and value.
|
||||
try:
|
||||
# Find the id of the div in the javascript code.
|
||||
k = re.search(r"k\s+=\s+'([^']+)';", body).group(1)
|
||||
# Find the div with that id and store its content.
|
||||
val = re.search(r'<div(.*)id="%s"(.*)>(.*)</div>' % (k), body).group(3)
|
||||
except Exception:
|
||||
# If not available, either the code has been modified again, or the old
|
||||
# style challenge is used.
|
||||
k = ''
|
||||
val = ''
|
||||
|
||||
# Use vm.runInNewContext to safely evaluate code
|
||||
# The sandboxed code cannot use the Node.js standard library
|
||||
# Add the atob method which is now used by Cloudflares code, but is not available in all node versions.
|
||||
simulate_document_js = 'var document= {getElementById: function(x) { return {innerHTML:"%s"};}}' % (val)
|
||||
atob_js = 'var atob = function(str) {return Buffer.from(str, "base64").toString("binary");}'
|
||||
# t is not defined, so we have to define it and set it to the domain name.
|
||||
js = '%s;%s;var t="%s";%s' % (simulate_document_js,atob_js,domain,js)
|
||||
buffer_js = "var Buffer = require('buffer').Buffer"
|
||||
# Pass Buffer into the new context, so it is available for atob.
|
||||
js = "%s;console.log(require('vm').runInNewContext('%s', {'Buffer':Buffer,'g':String.fromCharCode}, {timeout: 5000}));" % (buffer_js, js)
|
||||
|
||||
try:
|
||||
result = subprocess.check_output(["node", "-e", js]).strip()
|
||||
except OSError as e:
|
||||
if e.errno == 2:
|
||||
raise EnvironmentError("Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`). Your Node binary may be called `nodejs` rather than `node`, in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems. (Please read the cfscrape"
|
||||
" README's Dependencies section: https://github.com/Anorov/cloudflare-scrape#dependencies.")
|
||||
raise
|
||||
except Exception:
|
||||
logging.error("Error executing Cloudflare IUAM Javascript. %s" % BUG_REPORT)
|
||||
raise
|
||||
|
||||
try:
|
||||
float(result)
|
||||
except Exception:
|
||||
raise ValueError("Cloudflare IUAM challenge returned unexpected answer. %s" % BUG_REPORT)
|
||||
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def create_scraper(cls, sess=None, **kwargs):
|
||||
"""
|
||||
Convenience function for creating a ready-to-go CloudflareScraper object.
|
||||
"""
|
||||
scraper = cls(**kwargs)
|
||||
|
||||
if sess:
|
||||
attrs = ["auth", "cert", "cookies", "headers", "hooks", "params", "proxies", "data"]
|
||||
for attr in attrs:
|
||||
val = getattr(sess, attr, None)
|
||||
if val:
|
||||
setattr(scraper, attr, val)
|
||||
|
||||
return scraper
|
||||
|
||||
|
||||
## Functions for integrating cloudflare-scrape with other applications and scripts
|
||||
|
||||
@classmethod
|
||||
def get_tokens(cls, url, user_agent=None, **kwargs):
|
||||
scraper = cls.create_scraper()
|
||||
if user_agent:
|
||||
scraper.headers["User-Agent"] = user_agent
|
||||
|
||||
try:
|
||||
resp = scraper.get(url, **kwargs)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
logging.error("'%s' returned an error. Could not collect tokens." % url)
|
||||
raise
|
||||
|
||||
domain = urlparse(resp.url).netloc
|
||||
cookie_domain = None
|
||||
|
||||
for d in scraper.cookies.list_domains():
|
||||
if d.startswith(".") and d in ("." + domain):
|
||||
cookie_domain = d
|
||||
break
|
||||
else:
|
||||
raise ValueError("Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM (\"I'm Under Attack Mode\") enabled?")
|
||||
|
||||
return ({
|
||||
"__cfduid": scraper.cookies.get("__cfduid", "", domain=cookie_domain),
|
||||
"cf_clearance": scraper.cookies.get("cf_clearance", "", domain=cookie_domain)
|
||||
},
|
||||
scraper.headers["User-Agent"]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_cookie_string(cls, url, user_agent=None, **kwargs):
|
||||
"""
|
||||
Convenience function for building a Cookie HTTP header value.
|
||||
"""
|
||||
tokens, user_agent = cls.get_tokens(url, user_agent=user_agent, **kwargs)
|
||||
return "; ".join("=".join(pair) for pair in tokens.items()), user_agent
|
||||
|
||||
create_scraper = CloudflareScraper.create_scraper
|
||||
get_tokens = CloudflareScraper.get_tokens
|
||||
get_cookie_string = CloudflareScraper.get_cookie_string
|
|
@ -1,25 +0,0 @@
|
|||
Copyright (c) 2004-2011, CherryPy Team (team@cherrypy.org)
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the CherryPy Team nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,652 +0,0 @@
|
|||
"""CherryPy is a pythonic, object-oriented HTTP framework.
|
||||
|
||||
|
||||
CherryPy consists of not one, but four separate API layers.
|
||||
|
||||
The APPLICATION LAYER is the simplest. CherryPy applications are written as
|
||||
a tree of classes and methods, where each branch in the tree corresponds to
|
||||
a branch in the URL path. Each method is a 'page handler', which receives
|
||||
GET and POST params as keyword arguments, and returns or yields the (HTML)
|
||||
body of the response. The special method name 'index' is used for paths
|
||||
that end in a slash, and the special method name 'default' is used to
|
||||
handle multiple paths via a single handler. This layer also includes:
|
||||
|
||||
* the 'exposed' attribute (and cherrypy.expose)
|
||||
* cherrypy.quickstart()
|
||||
* _cp_config attributes
|
||||
* cherrypy.tools (including cherrypy.session)
|
||||
* cherrypy.url()
|
||||
|
||||
The ENVIRONMENT LAYER is used by developers at all levels. It provides
|
||||
information about the current request and response, plus the application
|
||||
and server environment, via a (default) set of top-level objects:
|
||||
|
||||
* cherrypy.request
|
||||
* cherrypy.response
|
||||
* cherrypy.engine
|
||||
* cherrypy.server
|
||||
* cherrypy.tree
|
||||
* cherrypy.config
|
||||
* cherrypy.thread_data
|
||||
* cherrypy.log
|
||||
* cherrypy.HTTPError, NotFound, and HTTPRedirect
|
||||
* cherrypy.lib
|
||||
|
||||
The EXTENSION LAYER allows advanced users to construct and share their own
|
||||
plugins. It consists of:
|
||||
|
||||
* Hook API
|
||||
* Tool API
|
||||
* Toolbox API
|
||||
* Dispatch API
|
||||
* Config Namespace API
|
||||
|
||||
Finally, there is the CORE LAYER, which uses the core API's to construct
|
||||
the default components which are available at higher layers. You can think
|
||||
of the default components as the 'reference implementation' for CherryPy.
|
||||
Megaframeworks (and advanced users) may replace the default components
|
||||
with customized or extended components. The core API's are:
|
||||
|
||||
* Application API
|
||||
* Engine API
|
||||
* Request API
|
||||
* Server API
|
||||
* WSGI API
|
||||
|
||||
These API's are described in the `CherryPy specification <https://bitbucket.org/cherrypy/cherrypy/wiki/CherryPySpec>`_.
|
||||
"""
|
||||
|
||||
__version__ = "3.6.0"
|
||||
|
||||
from cherrypy._cpcompat import urljoin as _urljoin, urlencode as _urlencode
|
||||
from cherrypy._cpcompat import basestring, unicodestr, set
|
||||
|
||||
from cherrypy._cperror import HTTPError, HTTPRedirect, InternalRedirect
|
||||
from cherrypy._cperror import NotFound, CherryPyException, TimeoutError
|
||||
|
||||
from cherrypy import _cpdispatch as dispatch
|
||||
|
||||
from cherrypy import _cptools
|
||||
tools = _cptools.default_toolbox
|
||||
Tool = _cptools.Tool
|
||||
|
||||
from cherrypy import _cprequest
|
||||
from cherrypy.lib import httputil as _httputil
|
||||
|
||||
from cherrypy import _cptree
|
||||
tree = _cptree.Tree()
|
||||
from cherrypy._cptree import Application
|
||||
from cherrypy import _cpwsgi as wsgi
|
||||
|
||||
from cherrypy import process
|
||||
try:
|
||||
from cherrypy.process import win32
|
||||
engine = win32.Win32Bus()
|
||||
engine.console_control_handler = win32.ConsoleCtrlHandler(engine)
|
||||
del win32
|
||||
except ImportError:
|
||||
engine = process.bus
|
||||
|
||||
|
||||
# Timeout monitor. We add two channels to the engine
|
||||
# to which cherrypy.Application will publish.
|
||||
engine.listeners['before_request'] = set()
|
||||
engine.listeners['after_request'] = set()
|
||||
|
||||
|
||||
class _TimeoutMonitor(process.plugins.Monitor):
|
||||
|
||||
def __init__(self, bus):
|
||||
self.servings = []
|
||||
process.plugins.Monitor.__init__(self, bus, self.run)
|
||||
|
||||
def before_request(self):
|
||||
self.servings.append((serving.request, serving.response))
|
||||
|
||||
def after_request(self):
|
||||
try:
|
||||
self.servings.remove((serving.request, serving.response))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""Check timeout on all responses. (Internal)"""
|
||||
for req, resp in self.servings:
|
||||
resp.check_timeout()
|
||||
engine.timeout_monitor = _TimeoutMonitor(engine)
|
||||
engine.timeout_monitor.subscribe()
|
||||
|
||||
engine.autoreload = process.plugins.Autoreloader(engine)
|
||||
engine.autoreload.subscribe()
|
||||
|
||||
engine.thread_manager = process.plugins.ThreadManager(engine)
|
||||
engine.thread_manager.subscribe()
|
||||
|
||||
engine.signal_handler = process.plugins.SignalHandler(engine)
|
||||
|
||||
|
||||
class _HandleSignalsPlugin(object):
|
||||
|
||||
"""Handle signals from other processes based on the configured
|
||||
platform handlers above."""
|
||||
|
||||
def __init__(self, bus):
|
||||
self.bus = bus
|
||||
|
||||
def subscribe(self):
|
||||
"""Add the handlers based on the platform"""
|
||||
if hasattr(self.bus, "signal_handler"):
|
||||
self.bus.signal_handler.subscribe()
|
||||
if hasattr(self.bus, "console_control_handler"):
|
||||
self.bus.console_control_handler.subscribe()
|
||||
|
||||
engine.signals = _HandleSignalsPlugin(engine)
|
||||
|
||||
|
||||
from cherrypy import _cpserver
|
||||
server = _cpserver.Server()
|
||||
server.subscribe()
|
||||
|
||||
|
||||
def quickstart(root=None, script_name="", config=None):
|
||||
"""Mount the given root, start the builtin server (and engine), then block.
|
||||
|
||||
root: an instance of a "controller class" (a collection of page handler
|
||||
methods) which represents the root of the application.
|
||||
script_name: a string containing the "mount point" of the application.
|
||||
This should start with a slash, and be the path portion of the URL
|
||||
at which to mount the given root. For example, if root.index() will
|
||||
handle requests to "http://www.example.com:8080/dept/app1/", then
|
||||
the script_name argument would be "/dept/app1".
|
||||
|
||||
It MUST NOT end in a slash. If the script_name refers to the root
|
||||
of the URI, it MUST be an empty string (not "/").
|
||||
config: a file or dict containing application config. If this contains
|
||||
a [global] section, those entries will be used in the global
|
||||
(site-wide) config.
|
||||
"""
|
||||
if config:
|
||||
_global_conf_alias.update(config)
|
||||
|
||||
tree.mount(root, script_name, config)
|
||||
|
||||
engine.signals.subscribe()
|
||||
engine.start()
|
||||
engine.block()
|
||||
|
||||
|
||||
from cherrypy._cpcompat import threadlocal as _local
|
||||
|
||||
|
||||
class _Serving(_local):
|
||||
|
||||
"""An interface for registering request and response objects.
|
||||
|
||||
Rather than have a separate "thread local" object for the request and
|
||||
the response, this class works as a single threadlocal container for
|
||||
both objects (and any others which developers wish to define). In this
|
||||
way, we can easily dump those objects when we stop/start a new HTTP
|
||||
conversation, yet still refer to them as module-level globals in a
|
||||
thread-safe way.
|
||||
"""
|
||||
|
||||
request = _cprequest.Request(_httputil.Host("127.0.0.1", 80),
|
||||
_httputil.Host("127.0.0.1", 1111))
|
||||
"""
|
||||
The request object for the current thread. In the main thread,
|
||||
and any threads which are not receiving HTTP requests, this is None."""
|
||||
|
||||
response = _cprequest.Response()
|
||||
"""
|
||||
The response object for the current thread. In the main thread,
|
||||
and any threads which are not receiving HTTP requests, this is None."""
|
||||
|
||||
def load(self, request, response):
|
||||
self.request = request
|
||||
self.response = response
|
||||
|
||||
def clear(self):
|
||||
"""Remove all attributes of self."""
|
||||
self.__dict__.clear()
|
||||
|
||||
serving = _Serving()
|
||||
|
||||
|
||||
class _ThreadLocalProxy(object):
|
||||
|
||||
__slots__ = ['__attrname__', '__dict__']
|
||||
|
||||
def __init__(self, attrname):
|
||||
self.__attrname__ = attrname
|
||||
|
||||
def __getattr__(self, name):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return getattr(child, name)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name in ("__attrname__", ):
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
child = getattr(serving, self.__attrname__)
|
||||
setattr(child, name, value)
|
||||
|
||||
def __delattr__(self, name):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
delattr(child, name)
|
||||
|
||||
def _get_dict(self):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
d = child.__class__.__dict__.copy()
|
||||
d.update(child.__dict__)
|
||||
return d
|
||||
__dict__ = property(_get_dict)
|
||||
|
||||
def __getitem__(self, key):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return child[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
child[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
del child[key]
|
||||
|
||||
def __contains__(self, key):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return key in child
|
||||
|
||||
def __len__(self):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return len(child)
|
||||
|
||||
def __nonzero__(self):
|
||||
child = getattr(serving, self.__attrname__)
|
||||
return bool(child)
|
||||
# Python 3
|
||||
__bool__ = __nonzero__
|
||||
|
||||
# Create request and response object (the same objects will be used
|
||||
# throughout the entire life of the webserver, but will redirect
|
||||
# to the "serving" object)
|
||||
request = _ThreadLocalProxy('request')
|
||||
response = _ThreadLocalProxy('response')
|
||||
|
||||
# Create thread_data object as a thread-specific all-purpose storage
|
||||
|
||||
|
||||
class _ThreadData(_local):
|
||||
|
||||
"""A container for thread-specific data."""
|
||||
thread_data = _ThreadData()
|
||||
|
||||
|
||||
# Monkeypatch pydoc to allow help() to go through the threadlocal proxy.
|
||||
# Jan 2007: no Googleable examples of anyone else replacing pydoc.resolve.
|
||||
# The only other way would be to change what is returned from type(request)
|
||||
# and that's not possible in pure Python (you'd have to fake ob_type).
|
||||
def _cherrypy_pydoc_resolve(thing, forceload=0):
|
||||
"""Given an object or a path to an object, get the object and its name."""
|
||||
if isinstance(thing, _ThreadLocalProxy):
|
||||
thing = getattr(serving, thing.__attrname__)
|
||||
return _pydoc._builtin_resolve(thing, forceload)
|
||||
|
||||
try:
|
||||
import pydoc as _pydoc
|
||||
_pydoc._builtin_resolve = _pydoc.resolve
|
||||
_pydoc.resolve = _cherrypy_pydoc_resolve
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
from cherrypy import _cplogging
|
||||
|
||||
|
||||
class _GlobalLogManager(_cplogging.LogManager):
|
||||
|
||||
"""A site-wide LogManager; routes to app.log or global log as appropriate.
|
||||
|
||||
This :class:`LogManager<cherrypy._cplogging.LogManager>` implements
|
||||
cherrypy.log() and cherrypy.log.access(). If either
|
||||
function is called during a request, the message will be sent to the
|
||||
logger for the current Application. If they are called outside of a
|
||||
request, the message will be sent to the site-wide logger.
|
||||
"""
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Log the given message to the app.log or global log as appropriate.
|
||||
"""
|
||||
# Do NOT use try/except here. See
|
||||
# https://bitbucket.org/cherrypy/cherrypy/issue/945
|
||||
if hasattr(request, 'app') and hasattr(request.app, 'log'):
|
||||
log = request.app.log
|
||||
else:
|
||||
log = self
|
||||
return log.error(*args, **kwargs)
|
||||
|
||||
def access(self):
|
||||
"""Log an access message to the app.log or global log as appropriate.
|
||||
"""
|
||||
try:
|
||||
return request.app.log.access()
|
||||
except AttributeError:
|
||||
return _cplogging.LogManager.access(self)
|
||||
|
||||
|
||||
log = _GlobalLogManager()
|
||||
# Set a default screen handler on the global log.
|
||||
log.screen = True
|
||||
log.error_file = ''
|
||||
# Using an access file makes CP about 10% slower. Leave off by default.
|
||||
log.access_file = ''
|
||||
|
||||
|
||||
def _buslog(msg, level):
|
||||
log.error(msg, 'ENGINE', severity=level)
|
||||
engine.subscribe('log', _buslog)
|
||||
|
||||
# Helper functions for CP apps #
|
||||
|
||||
|
||||
def expose(func=None, alias=None):
|
||||
"""Expose the function, optionally providing an alias or set of aliases."""
|
||||
def expose_(func):
|
||||
func.exposed = True
|
||||
if alias is not None:
|
||||
if isinstance(alias, basestring):
|
||||
parents[alias.replace(".", "_")] = func
|
||||
else:
|
||||
for a in alias:
|
||||
parents[a.replace(".", "_")] = func
|
||||
return func
|
||||
|
||||
import sys
|
||||
import types
|
||||
if isinstance(func, (types.FunctionType, types.MethodType)):
|
||||
if alias is None:
|
||||
# @expose
|
||||
func.exposed = True
|
||||
return func
|
||||
else:
|
||||
# func = expose(func, alias)
|
||||
parents = sys._getframe(1).f_locals
|
||||
return expose_(func)
|
||||
elif func is None:
|
||||
if alias is None:
|
||||
# @expose()
|
||||
parents = sys._getframe(1).f_locals
|
||||
return expose_
|
||||
else:
|
||||
# @expose(alias="alias") or
|
||||
# @expose(alias=["alias1", "alias2"])
|
||||
parents = sys._getframe(1).f_locals
|
||||
return expose_
|
||||
else:
|
||||
# @expose("alias") or
|
||||
# @expose(["alias1", "alias2"])
|
||||
parents = sys._getframe(1).f_locals
|
||||
alias = func
|
||||
return expose_
|
||||
|
||||
|
||||
def popargs(*args, **kwargs):
|
||||
"""A decorator for _cp_dispatch
|
||||
(cherrypy.dispatch.Dispatcher.dispatch_method_name).
|
||||
|
||||
Optional keyword argument: handler=(Object or Function)
|
||||
|
||||
Provides a _cp_dispatch function that pops off path segments into
|
||||
cherrypy.request.params under the names specified. The dispatch
|
||||
is then forwarded on to the next vpath element.
|
||||
|
||||
Note that any existing (and exposed) member function of the class that
|
||||
popargs is applied to will override that value of the argument. For
|
||||
instance, if you have a method named "list" on the class decorated with
|
||||
popargs, then accessing "/list" will call that function instead of popping
|
||||
it off as the requested parameter. This restriction applies to all
|
||||
_cp_dispatch functions. The only way around this restriction is to create
|
||||
a "blank class" whose only function is to provide _cp_dispatch.
|
||||
|
||||
If there are path elements after the arguments, or more arguments
|
||||
are requested than are available in the vpath, then the 'handler'
|
||||
keyword argument specifies the next object to handle the parameterized
|
||||
request. If handler is not specified or is None, then self is used.
|
||||
If handler is a function rather than an instance, then that function
|
||||
will be called with the args specified and the return value from that
|
||||
function used as the next object INSTEAD of adding the parameters to
|
||||
cherrypy.request.args.
|
||||
|
||||
This decorator may be used in one of two ways:
|
||||
|
||||
As a class decorator:
|
||||
@cherrypy.popargs('year', 'month', 'day')
|
||||
class Blog:
|
||||
def index(self, year=None, month=None, day=None):
|
||||
#Process the parameters here; any url like
|
||||
#/, /2009, /2009/12, or /2009/12/31
|
||||
#will fill in the appropriate parameters.
|
||||
|
||||
def create(self):
|
||||
#This link will still be available at /create. Defined functions
|
||||
#take precedence over arguments.
|
||||
|
||||
Or as a member of a class:
|
||||
class Blog:
|
||||
_cp_dispatch = cherrypy.popargs('year', 'month', 'day')
|
||||
#...
|
||||
|
||||
The handler argument may be used to mix arguments with built in functions.
|
||||
For instance, the following setup allows different activities at the
|
||||
day, month, and year level:
|
||||
|
||||
class DayHandler:
|
||||
def index(self, year, month, day):
|
||||
#Do something with this day; probably list entries
|
||||
|
||||
def delete(self, year, month, day):
|
||||
#Delete all entries for this day
|
||||
|
||||
@cherrypy.popargs('day', handler=DayHandler())
|
||||
class MonthHandler:
|
||||
def index(self, year, month):
|
||||
#Do something with this month; probably list entries
|
||||
|
||||
def delete(self, year, month):
|
||||
#Delete all entries for this month
|
||||
|
||||
@cherrypy.popargs('month', handler=MonthHandler())
|
||||
class YearHandler:
|
||||
def index(self, year):
|
||||
#Do something with this year
|
||||
|
||||
#...
|
||||
|
||||
@cherrypy.popargs('year', handler=YearHandler())
|
||||
class Root:
|
||||
def index(self):
|
||||
#...
|
||||
|
||||
"""
|
||||
|
||||
# Since keyword arg comes after *args, we have to process it ourselves
|
||||
# for lower versions of python.
|
||||
|
||||
handler = None
|
||||
handler_call = False
|
||||
for k, v in kwargs.items():
|
||||
if k == 'handler':
|
||||
handler = v
|
||||
else:
|
||||
raise TypeError(
|
||||
"cherrypy.popargs() got an unexpected keyword argument '{0}'"
|
||||
.format(k)
|
||||
)
|
||||
|
||||
import inspect
|
||||
|
||||
if handler is not None \
|
||||
and (hasattr(handler, '__call__') or inspect.isclass(handler)):
|
||||
handler_call = True
|
||||
|
||||
def decorated(cls_or_self=None, vpath=None):
|
||||
if inspect.isclass(cls_or_self):
|
||||
# cherrypy.popargs is a class decorator
|
||||
cls = cls_or_self
|
||||
setattr(cls, dispatch.Dispatcher.dispatch_method_name, decorated)
|
||||
return cls
|
||||
|
||||
# We're in the actual function
|
||||
self = cls_or_self
|
||||
parms = {}
|
||||
for arg in args:
|
||||
if not vpath:
|
||||
break
|
||||
parms[arg] = vpath.pop(0)
|
||||
|
||||
if handler is not None:
|
||||
if handler_call:
|
||||
return handler(**parms)
|
||||
else:
|
||||
request.params.update(parms)
|
||||
return handler
|
||||
|
||||
request.params.update(parms)
|
||||
|
||||
# If we are the ultimate handler, then to prevent our _cp_dispatch
|
||||
# from being called again, we will resolve remaining elements through
|
||||
# getattr() directly.
|
||||
if vpath:
|
||||
return getattr(self, vpath.pop(0), None)
|
||||
else:
|
||||
return self
|
||||
|
||||
return decorated
|
||||
|
||||
|
||||
def url(path="", qs="", script_name=None, base=None, relative=None):
|
||||
"""Create an absolute URL for the given path.
|
||||
|
||||
If 'path' starts with a slash ('/'), this will return
|
||||
(base + script_name + path + qs).
|
||||
If it does not start with a slash, this returns
|
||||
(base + script_name [+ request.path_info] + path + qs).
|
||||
|
||||
If script_name is None, cherrypy.request will be used
|
||||
to find a script_name, if available.
|
||||
|
||||
If base is None, cherrypy.request.base will be used (if available).
|
||||
Note that you can use cherrypy.tools.proxy to change this.
|
||||
|
||||
Finally, note that this function can be used to obtain an absolute URL
|
||||
for the current request path (minus the querystring) by passing no args.
|
||||
If you call url(qs=cherrypy.request.query_string), you should get the
|
||||
original browser URL (assuming no internal redirections).
|
||||
|
||||
If relative is None or not provided, request.app.relative_urls will
|
||||
be used (if available, else False). If False, the output will be an
|
||||
absolute URL (including the scheme, host, vhost, and script_name).
|
||||
If True, the output will instead be a URL that is relative to the
|
||||
current request path, perhaps including '..' atoms. If relative is
|
||||
the string 'server', the output will instead be a URL that is
|
||||
relative to the server root; i.e., it will start with a slash.
|
||||
"""
|
||||
if isinstance(qs, (tuple, list, dict)):
|
||||
qs = _urlencode(qs)
|
||||
if qs:
|
||||
qs = '?' + qs
|
||||
|
||||
if request.app:
|
||||
if not path.startswith("/"):
|
||||
# Append/remove trailing slash from path_info as needed
|
||||
# (this is to support mistyped URL's without redirecting;
|
||||
# if you want to redirect, use tools.trailing_slash).
|
||||
pi = request.path_info
|
||||
if request.is_index is True:
|
||||
if not pi.endswith('/'):
|
||||
pi = pi + '/'
|
||||
elif request.is_index is False:
|
||||
if pi.endswith('/') and pi != '/':
|
||||
pi = pi[:-1]
|
||||
|
||||
if path == "":
|
||||
path = pi
|
||||
else:
|
||||
path = _urljoin(pi, path)
|
||||
|
||||
if script_name is None:
|
||||
script_name = request.script_name
|
||||
if base is None:
|
||||
base = request.base
|
||||
|
||||
newurl = base + script_name + path + qs
|
||||
else:
|
||||
# No request.app (we're being called outside a request).
|
||||
# We'll have to guess the base from server.* attributes.
|
||||
# This will produce very different results from the above
|
||||
# if you're using vhosts or tools.proxy.
|
||||
if base is None:
|
||||
base = server.base()
|
||||
|
||||
path = (script_name or "") + path
|
||||
newurl = base + path + qs
|
||||
|
||||
if './' in newurl:
|
||||
# Normalize the URL by removing ./ and ../
|
||||
atoms = []
|
||||
for atom in newurl.split('/'):
|
||||
if atom == '.':
|
||||
pass
|
||||
elif atom == '..':
|
||||
atoms.pop()
|
||||
else:
|
||||
atoms.append(atom)
|
||||
newurl = '/'.join(atoms)
|
||||
|
||||
# At this point, we should have a fully-qualified absolute URL.
|
||||
|
||||
if relative is None:
|
||||
relative = getattr(request.app, "relative_urls", False)
|
||||
|
||||
# See http://www.ietf.org/rfc/rfc2396.txt
|
||||
if relative == 'server':
|
||||
# "A relative reference beginning with a single slash character is
|
||||
# termed an absolute-path reference, as defined by <abs_path>..."
|
||||
# This is also sometimes called "server-relative".
|
||||
newurl = '/' + '/'.join(newurl.split('/', 3)[3:])
|
||||
elif relative:
|
||||
# "A relative reference that does not begin with a scheme name
|
||||
# or a slash character is termed a relative-path reference."
|
||||
old = url(relative=False).split('/')[:-1]
|
||||
new = newurl.split('/')
|
||||
while old and new:
|
||||
a, b = old[0], new[0]
|
||||
if a != b:
|
||||
break
|
||||
old.pop(0)
|
||||
new.pop(0)
|
||||
new = (['..'] * len(old)) + new
|
||||
newurl = '/'.join(new)
|
||||
|
||||
return newurl
|
||||
|
||||
|
||||
# import _cpconfig last so it can reference other top-level objects
|
||||
from cherrypy import _cpconfig
|
||||
# Use _global_conf_alias so quickstart can use 'config' as an arg
|
||||
# without shadowing cherrypy.config.
|
||||
config = _global_conf_alias = _cpconfig.Config()
|
||||
config.defaults = {
|
||||
'tools.log_tracebacks.on': True,
|
||||
'tools.log_headers.on': True,
|
||||
'tools.trailing_slash.on': True,
|
||||
'tools.encode.on': True
|
||||
}
|
||||
config.namespaces["log"] = lambda k, v: setattr(log, k, v)
|
||||
config.namespaces["checker"] = lambda k, v: setattr(checker, k, v)
|
||||
# Must reset to get our defaults applied.
|
||||
config.reset()
|
||||
|
||||
from cherrypy import _cpchecker
|
||||
checker = _cpchecker.Checker()
|
||||
engine.subscribe('start', checker)
|
|
@ -1,332 +0,0 @@
|
|||
import os
|
||||
import warnings
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import iteritems, copykeys, builtins
|
||||
|
||||
|
||||
class Checker(object):
|
||||
|
||||
"""A checker for CherryPy sites and their mounted applications.
|
||||
|
||||
When this object is called at engine startup, it executes each
|
||||
of its own methods whose names start with ``check_``. If you wish
|
||||
to disable selected checks, simply add a line in your global
|
||||
config which sets the appropriate method to False::
|
||||
|
||||
[global]
|
||||
checker.check_skipped_app_config = False
|
||||
|
||||
You may also dynamically add or replace ``check_*`` methods in this way.
|
||||
"""
|
||||
|
||||
on = True
|
||||
"""If True (the default), run all checks; if False, turn off all checks."""
|
||||
|
||||
def __init__(self):
|
||||
self._populate_known_types()
|
||||
|
||||
def __call__(self):
|
||||
"""Run all check_* methods."""
|
||||
if self.on:
|
||||
oldformatwarning = warnings.formatwarning
|
||||
warnings.formatwarning = self.formatwarning
|
||||
try:
|
||||
for name in dir(self):
|
||||
if name.startswith("check_"):
|
||||
method = getattr(self, name)
|
||||
if method and hasattr(method, '__call__'):
|
||||
method()
|
||||
finally:
|
||||
warnings.formatwarning = oldformatwarning
|
||||
|
||||
def formatwarning(self, message, category, filename, lineno, line=None):
|
||||
"""Function to format a warning."""
|
||||
return "CherryPy Checker:\n%s\n\n" % message
|
||||
|
||||
# This value should be set inside _cpconfig.
|
||||
global_config_contained_paths = False
|
||||
|
||||
def check_app_config_entries_dont_start_with_script_name(self):
|
||||
"""Check for Application config with sections that repeat script_name.
|
||||
"""
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
if not app.config:
|
||||
continue
|
||||
if sn == '':
|
||||
continue
|
||||
sn_atoms = sn.strip("/").split("/")
|
||||
for key in app.config.keys():
|
||||
key_atoms = key.strip("/").split("/")
|
||||
if key_atoms[:len(sn_atoms)] == sn_atoms:
|
||||
warnings.warn(
|
||||
"The application mounted at %r has config "
|
||||
"entries that start with its script name: %r" % (sn,
|
||||
key))
|
||||
|
||||
def check_site_config_entries_in_app_config(self):
|
||||
"""Check for mounted Applications that have site-scoped config."""
|
||||
for sn, app in iteritems(cherrypy.tree.apps):
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
|
||||
msg = []
|
||||
for section, entries in iteritems(app.config):
|
||||
if section.startswith('/'):
|
||||
for key, value in iteritems(entries):
|
||||
for n in ("engine.", "server.", "tree.", "checker."):
|
||||
if key.startswith(n):
|
||||
msg.append("[%s] %s = %s" %
|
||||
(section, key, value))
|
||||
if msg:
|
||||
msg.insert(0,
|
||||
"The application mounted at %r contains the "
|
||||
"following config entries, which are only allowed "
|
||||
"in site-wide config. Move them to a [global] "
|
||||
"section and pass them to cherrypy.config.update() "
|
||||
"instead of tree.mount()." % sn)
|
||||
warnings.warn(os.linesep.join(msg))
|
||||
|
||||
def check_skipped_app_config(self):
|
||||
"""Check for mounted Applications that have no config."""
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
if not app.config:
|
||||
msg = "The Application mounted at %r has an empty config." % sn
|
||||
if self.global_config_contained_paths:
|
||||
msg += (" It looks like the config you passed to "
|
||||
"cherrypy.config.update() contains application-"
|
||||
"specific sections. You must explicitly pass "
|
||||
"application config via "
|
||||
"cherrypy.tree.mount(..., config=app_config)")
|
||||
warnings.warn(msg)
|
||||
return
|
||||
|
||||
def check_app_config_brackets(self):
|
||||
"""Check for Application config with extraneous brackets in section
|
||||
names.
|
||||
"""
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
if not app.config:
|
||||
continue
|
||||
for key in app.config.keys():
|
||||
if key.startswith("[") or key.endswith("]"):
|
||||
warnings.warn(
|
||||
"The application mounted at %r has config "
|
||||
"section names with extraneous brackets: %r. "
|
||||
"Config *files* need brackets; config *dicts* "
|
||||
"(e.g. passed to tree.mount) do not." % (sn, key))
|
||||
|
||||
def check_static_paths(self):
|
||||
"""Check Application config for incorrect static paths."""
|
||||
# Use the dummy Request object in the main thread.
|
||||
request = cherrypy.request
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
request.app = app
|
||||
for section in app.config:
|
||||
# get_resource will populate request.config
|
||||
request.get_resource(section + "/dummy.html")
|
||||
conf = request.config.get
|
||||
|
||||
if conf("tools.staticdir.on", False):
|
||||
msg = ""
|
||||
root = conf("tools.staticdir.root")
|
||||
dir = conf("tools.staticdir.dir")
|
||||
if dir is None:
|
||||
msg = "tools.staticdir.dir is not set."
|
||||
else:
|
||||
fulldir = ""
|
||||
if os.path.isabs(dir):
|
||||
fulldir = dir
|
||||
if root:
|
||||
msg = ("dir is an absolute path, even "
|
||||
"though a root is provided.")
|
||||
testdir = os.path.join(root, dir[1:])
|
||||
if os.path.exists(testdir):
|
||||
msg += (
|
||||
"\nIf you meant to serve the "
|
||||
"filesystem folder at %r, remove the "
|
||||
"leading slash from dir." % (testdir,))
|
||||
else:
|
||||
if not root:
|
||||
msg = (
|
||||
"dir is a relative path and "
|
||||
"no root provided.")
|
||||
else:
|
||||
fulldir = os.path.join(root, dir)
|
||||
if not os.path.isabs(fulldir):
|
||||
msg = ("%r is not an absolute path." % (
|
||||
fulldir,))
|
||||
|
||||
if fulldir and not os.path.exists(fulldir):
|
||||
if msg:
|
||||
msg += "\n"
|
||||
msg += ("%r (root + dir) is not an existing "
|
||||
"filesystem path." % fulldir)
|
||||
|
||||
if msg:
|
||||
warnings.warn("%s\nsection: [%s]\nroot: %r\ndir: %r"
|
||||
% (msg, section, root, dir))
|
||||
|
||||
# -------------------------- Compatibility -------------------------- #
|
||||
obsolete = {
|
||||
'server.default_content_type': 'tools.response_headers.headers',
|
||||
'log_access_file': 'log.access_file',
|
||||
'log_config_options': None,
|
||||
'log_file': 'log.error_file',
|
||||
'log_file_not_found': None,
|
||||
'log_request_headers': 'tools.log_headers.on',
|
||||
'log_to_screen': 'log.screen',
|
||||
'show_tracebacks': 'request.show_tracebacks',
|
||||
'throw_errors': 'request.throw_errors',
|
||||
'profiler.on': ('cherrypy.tree.mount(profiler.make_app('
|
||||
'cherrypy.Application(Root())))'),
|
||||
}
|
||||
|
||||
deprecated = {}
|
||||
|
||||
def _compat(self, config):
|
||||
"""Process config and warn on each obsolete or deprecated entry."""
|
||||
for section, conf in config.items():
|
||||
if isinstance(conf, dict):
|
||||
for k, v in conf.items():
|
||||
if k in self.obsolete:
|
||||
warnings.warn("%r is obsolete. Use %r instead.\n"
|
||||
"section: [%s]" %
|
||||
(k, self.obsolete[k], section))
|
||||
elif k in self.deprecated:
|
||||
warnings.warn("%r is deprecated. Use %r instead.\n"
|
||||
"section: [%s]" %
|
||||
(k, self.deprecated[k], section))
|
||||
else:
|
||||
if section in self.obsolete:
|
||||
warnings.warn("%r is obsolete. Use %r instead."
|
||||
% (section, self.obsolete[section]))
|
||||
elif section in self.deprecated:
|
||||
warnings.warn("%r is deprecated. Use %r instead."
|
||||
% (section, self.deprecated[section]))
|
||||
|
||||
def check_compatibility(self):
|
||||
"""Process config and warn on each obsolete or deprecated entry."""
|
||||
self._compat(cherrypy.config)
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
self._compat(app.config)
|
||||
|
||||
# ------------------------ Known Namespaces ------------------------ #
|
||||
extra_config_namespaces = []
|
||||
|
||||
def _known_ns(self, app):
|
||||
ns = ["wsgi"]
|
||||
ns.extend(copykeys(app.toolboxes))
|
||||
ns.extend(copykeys(app.namespaces))
|
||||
ns.extend(copykeys(app.request_class.namespaces))
|
||||
ns.extend(copykeys(cherrypy.config.namespaces))
|
||||
ns += self.extra_config_namespaces
|
||||
|
||||
for section, conf in app.config.items():
|
||||
is_path_section = section.startswith("/")
|
||||
if is_path_section and isinstance(conf, dict):
|
||||
for k, v in conf.items():
|
||||
atoms = k.split(".")
|
||||
if len(atoms) > 1:
|
||||
if atoms[0] not in ns:
|
||||
# Spit out a special warning if a known
|
||||
# namespace is preceded by "cherrypy."
|
||||
if atoms[0] == "cherrypy" and atoms[1] in ns:
|
||||
msg = (
|
||||
"The config entry %r is invalid; "
|
||||
"try %r instead.\nsection: [%s]"
|
||||
% (k, ".".join(atoms[1:]), section))
|
||||
else:
|
||||
msg = (
|
||||
"The config entry %r is invalid, "
|
||||
"because the %r config namespace "
|
||||
"is unknown.\n"
|
||||
"section: [%s]" % (k, atoms[0], section))
|
||||
warnings.warn(msg)
|
||||
elif atoms[0] == "tools":
|
||||
if atoms[1] not in dir(cherrypy.tools):
|
||||
msg = (
|
||||
"The config entry %r may be invalid, "
|
||||
"because the %r tool was not found.\n"
|
||||
"section: [%s]" % (k, atoms[1], section))
|
||||
warnings.warn(msg)
|
||||
|
||||
def check_config_namespaces(self):
|
||||
"""Process config and warn on each unknown config namespace."""
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
self._known_ns(app)
|
||||
|
||||
# -------------------------- Config Types -------------------------- #
|
||||
known_config_types = {}
|
||||
|
||||
def _populate_known_types(self):
|
||||
b = [x for x in vars(builtins).values()
|
||||
if type(x) is type(str)]
|
||||
|
||||
def traverse(obj, namespace):
|
||||
for name in dir(obj):
|
||||
# Hack for 3.2's warning about body_params
|
||||
if name == 'body_params':
|
||||
continue
|
||||
vtype = type(getattr(obj, name, None))
|
||||
if vtype in b:
|
||||
self.known_config_types[namespace + "." + name] = vtype
|
||||
|
||||
traverse(cherrypy.request, "request")
|
||||
traverse(cherrypy.response, "response")
|
||||
traverse(cherrypy.server, "server")
|
||||
traverse(cherrypy.engine, "engine")
|
||||
traverse(cherrypy.log, "log")
|
||||
|
||||
def _known_types(self, config):
|
||||
msg = ("The config entry %r in section %r is of type %r, "
|
||||
"which does not match the expected type %r.")
|
||||
|
||||
for section, conf in config.items():
|
||||
if isinstance(conf, dict):
|
||||
for k, v in conf.items():
|
||||
if v is not None:
|
||||
expected_type = self.known_config_types.get(k, None)
|
||||
vtype = type(v)
|
||||
if expected_type and vtype != expected_type:
|
||||
warnings.warn(msg % (k, section, vtype.__name__,
|
||||
expected_type.__name__))
|
||||
else:
|
||||
k, v = section, conf
|
||||
if v is not None:
|
||||
expected_type = self.known_config_types.get(k, None)
|
||||
vtype = type(v)
|
||||
if expected_type and vtype != expected_type:
|
||||
warnings.warn(msg % (k, section, vtype.__name__,
|
||||
expected_type.__name__))
|
||||
|
||||
def check_config_types(self):
|
||||
"""Assert that config values are of the same type as default values."""
|
||||
self._known_types(cherrypy.config)
|
||||
for sn, app in cherrypy.tree.apps.items():
|
||||
if not isinstance(app, cherrypy.Application):
|
||||
continue
|
||||
self._known_types(app.config)
|
||||
|
||||
# -------------------- Specific config warnings -------------------- #
|
||||
def check_localhost(self):
|
||||
"""Warn if any socket_host is 'localhost'. See #711."""
|
||||
for k, v in cherrypy.config.items():
|
||||
if k == 'server.socket_host' and v == 'localhost':
|
||||
warnings.warn("The use of 'localhost' as a socket host can "
|
||||
"cause problems on newer systems, since "
|
||||
"'localhost' can map to either an IPv4 or an "
|
||||
"IPv6 address. You should use '127.0.0.1' "
|
||||
"or '[::1]' instead.")
|
|
@ -1,383 +0,0 @@
|
|||
"""Compatibility code for using CherryPy with various versions of Python.
|
||||
|
||||
CherryPy 3.2 is compatible with Python versions 2.3+. This module provides a
|
||||
useful abstraction over the differences between Python versions, sometimes by
|
||||
preferring a newer idiom, sometimes an older one, and sometimes a custom one.
|
||||
|
||||
In particular, Python 2 uses str and '' for byte strings, while Python 3
|
||||
uses str and '' for unicode strings. We will call each of these the 'native
|
||||
string' type for each version. Because of this major difference, this module
|
||||
provides new 'bytestr', 'unicodestr', and 'nativestr' attributes, as well as
|
||||
two functions: 'ntob', which translates native strings (of type 'str') into
|
||||
byte strings regardless of Python version, and 'ntou', which translates native
|
||||
strings to unicode strings. This also provides a 'BytesIO' name for dealing
|
||||
specifically with bytes, and a 'StringIO' name for dealing with native strings.
|
||||
It also provides a 'base64_decode' function with native strings as input and
|
||||
output.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
py3k = True
|
||||
bytestr = bytes
|
||||
unicodestr = str
|
||||
nativestr = unicodestr
|
||||
basestring = (bytes, str)
|
||||
|
||||
def ntob(n, encoding='ISO-8859-1'):
|
||||
"""Return the given native string as a byte string in the given
|
||||
encoding.
|
||||
"""
|
||||
assert_native(n)
|
||||
# In Python 3, the native string type is unicode
|
||||
return n.encode(encoding)
|
||||
|
||||
def ntou(n, encoding='ISO-8859-1'):
|
||||
"""Return the given native string as a unicode string with the given
|
||||
encoding.
|
||||
"""
|
||||
assert_native(n)
|
||||
# In Python 3, the native string type is unicode
|
||||
return n
|
||||
|
||||
def tonative(n, encoding='ISO-8859-1'):
|
||||
"""Return the given string as a native string in the given encoding."""
|
||||
# In Python 3, the native string type is unicode
|
||||
if isinstance(n, bytes):
|
||||
return n.decode(encoding)
|
||||
return n
|
||||
# type("")
|
||||
from io import StringIO
|
||||
# bytes:
|
||||
from io import BytesIO as BytesIO
|
||||
else:
|
||||
# Python 2
|
||||
py3k = False
|
||||
bytestr = str
|
||||
unicodestr = unicode
|
||||
nativestr = bytestr
|
||||
basestring = basestring
|
||||
|
||||
def ntob(n, encoding='ISO-8859-1'):
|
||||
"""Return the given native string as a byte string in the given
|
||||
encoding.
|
||||
"""
|
||||
assert_native(n)
|
||||
# In Python 2, the native string type is bytes. Assume it's already
|
||||
# in the given encoding, which for ISO-8859-1 is almost always what
|
||||
# was intended.
|
||||
return n
|
||||
|
||||
def ntou(n, encoding='ISO-8859-1'):
|
||||
"""Return the given native string as a unicode string with the given
|
||||
encoding.
|
||||
"""
|
||||
assert_native(n)
|
||||
# In Python 2, the native string type is bytes.
|
||||
# First, check for the special encoding 'escape'. The test suite uses
|
||||
# this to signal that it wants to pass a string with embedded \uXXXX
|
||||
# escapes, but without having to prefix it with u'' for Python 2,
|
||||
# but no prefix for Python 3.
|
||||
if encoding == 'escape':
|
||||
return unicode(
|
||||
re.sub(r'\\u([0-9a-zA-Z]{4})',
|
||||
lambda m: unichr(int(m.group(1), 16)),
|
||||
n.decode('ISO-8859-1')))
|
||||
# Assume it's already in the given encoding, which for ISO-8859-1
|
||||
# is almost always what was intended.
|
||||
return n.decode(encoding)
|
||||
|
||||
def tonative(n, encoding='ISO-8859-1'):
|
||||
"""Return the given string as a native string in the given encoding."""
|
||||
# In Python 2, the native string type is bytes.
|
||||
if isinstance(n, unicode):
|
||||
return n.encode(encoding)
|
||||
return n
|
||||
try:
|
||||
# type("")
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
# type("")
|
||||
from StringIO import StringIO
|
||||
# bytes:
|
||||
BytesIO = StringIO
|
||||
|
||||
|
||||
def assert_native(n):
|
||||
if not isinstance(n, nativestr):
|
||||
raise TypeError("n must be a native str (got %s)" % type(n).__name__)
|
||||
|
||||
try:
|
||||
set = set
|
||||
except NameError:
|
||||
from sets import Set as set
|
||||
|
||||
try:
|
||||
# Python 3.1+
|
||||
from base64 import decodebytes as _base64_decodebytes
|
||||
except ImportError:
|
||||
# Python 3.0-
|
||||
# since CherryPy claims compability with Python 2.3, we must use
|
||||
# the legacy API of base64
|
||||
from base64 import decodestring as _base64_decodebytes
|
||||
|
||||
|
||||
def base64_decode(n, encoding='ISO-8859-1'):
|
||||
"""Return the native string base64-decoded (as a native string)."""
|
||||
if isinstance(n, unicodestr):
|
||||
b = n.encode(encoding)
|
||||
else:
|
||||
b = n
|
||||
b = _base64_decodebytes(b)
|
||||
if nativestr is unicodestr:
|
||||
return b.decode(encoding)
|
||||
else:
|
||||
return b
|
||||
|
||||
try:
|
||||
# Python 2.5+
|
||||
from hashlib import md5
|
||||
except ImportError:
|
||||
from md5 import new as md5
|
||||
|
||||
try:
|
||||
# Python 2.5+
|
||||
from hashlib import sha1 as sha
|
||||
except ImportError:
|
||||
from sha import new as sha
|
||||
|
||||
try:
|
||||
sorted = sorted
|
||||
except NameError:
|
||||
def sorted(i):
|
||||
i = i[:]
|
||||
i.sort()
|
||||
return i
|
||||
|
||||
try:
|
||||
reversed = reversed
|
||||
except NameError:
|
||||
def reversed(x):
|
||||
i = len(x)
|
||||
while i > 0:
|
||||
i -= 1
|
||||
yield x[i]
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
from urllib.parse import urljoin, urlencode
|
||||
from urllib.parse import quote, quote_plus
|
||||
from urllib.request import unquote, urlopen
|
||||
from urllib.request import parse_http_list, parse_keqv_list
|
||||
except ImportError:
|
||||
# Python 2
|
||||
from urlparse import urljoin
|
||||
from urllib import urlencode, urlopen
|
||||
from urllib import quote, quote_plus
|
||||
from urllib import unquote
|
||||
from urllib2 import parse_http_list, parse_keqv_list
|
||||
|
||||
try:
|
||||
from threading import local as threadlocal
|
||||
except ImportError:
|
||||
from cherrypy._cpthreadinglocal import local as threadlocal
|
||||
|
||||
try:
|
||||
dict.iteritems
|
||||
# Python 2
|
||||
iteritems = lambda d: d.iteritems()
|
||||
copyitems = lambda d: d.items()
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
iteritems = lambda d: d.items()
|
||||
copyitems = lambda d: list(d.items())
|
||||
|
||||
try:
|
||||
dict.iterkeys
|
||||
# Python 2
|
||||
iterkeys = lambda d: d.iterkeys()
|
||||
copykeys = lambda d: d.keys()
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
iterkeys = lambda d: d.keys()
|
||||
copykeys = lambda d: list(d.keys())
|
||||
|
||||
try:
|
||||
dict.itervalues
|
||||
# Python 2
|
||||
itervalues = lambda d: d.itervalues()
|
||||
copyvalues = lambda d: d.values()
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
itervalues = lambda d: d.values()
|
||||
copyvalues = lambda d: list(d.values())
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
import builtins
|
||||
except ImportError:
|
||||
# Python 2
|
||||
import __builtin__ as builtins
|
||||
|
||||
try:
|
||||
# Python 2. We try Python 2 first clients on Python 2
|
||||
# don't try to import the 'http' module from cherrypy.lib
|
||||
from Cookie import SimpleCookie, CookieError
|
||||
from httplib import BadStatusLine, HTTPConnection, IncompleteRead
|
||||
from httplib import NotConnected
|
||||
from BaseHTTPServer import BaseHTTPRequestHandler
|
||||
except ImportError:
|
||||
# Python 3
|
||||
from http.cookies import SimpleCookie, CookieError
|
||||
from http.client import BadStatusLine, HTTPConnection, IncompleteRead
|
||||
from http.client import NotConnected
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
|
||||
# Some platforms don't expose HTTPSConnection, so handle it separately
|
||||
if py3k:
|
||||
try:
|
||||
from http.client import HTTPSConnection
|
||||
except ImportError:
|
||||
# Some platforms which don't have SSL don't expose HTTPSConnection
|
||||
HTTPSConnection = None
|
||||
else:
|
||||
try:
|
||||
from httplib import HTTPSConnection
|
||||
except ImportError:
|
||||
HTTPSConnection = None
|
||||
|
||||
try:
|
||||
# Python 2
|
||||
xrange = xrange
|
||||
except NameError:
|
||||
# Python 3
|
||||
xrange = range
|
||||
|
||||
import threading
|
||||
if hasattr(threading.Thread, "daemon"):
|
||||
# Python 2.6+
|
||||
def get_daemon(t):
|
||||
return t.daemon
|
||||
|
||||
def set_daemon(t, val):
|
||||
t.daemon = val
|
||||
else:
|
||||
def get_daemon(t):
|
||||
return t.isDaemon()
|
||||
|
||||
def set_daemon(t, val):
|
||||
t.setDaemon(val)
|
||||
|
||||
try:
|
||||
from email.utils import formatdate
|
||||
|
||||
def HTTPDate(timeval=None):
|
||||
return formatdate(timeval, usegmt=True)
|
||||
except ImportError:
|
||||
from rfc822 import formatdate as HTTPDate
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
from urllib.parse import unquote as parse_unquote
|
||||
|
||||
def unquote_qs(atom, encoding, errors='strict'):
|
||||
return parse_unquote(
|
||||
atom.replace('+', ' '),
|
||||
encoding=encoding,
|
||||
errors=errors)
|
||||
except ImportError:
|
||||
# Python 2
|
||||
from urllib import unquote as parse_unquote
|
||||
|
||||
def unquote_qs(atom, encoding, errors='strict'):
|
||||
return parse_unquote(atom.replace('+', ' ')).decode(encoding, errors)
|
||||
|
||||
try:
|
||||
# Prefer simplejson, which is usually more advanced than the builtin
|
||||
# module.
|
||||
import simplejson as json
|
||||
json_decode = json.JSONDecoder().decode
|
||||
_json_encode = json.JSONEncoder().iterencode
|
||||
except ImportError:
|
||||
if sys.version_info >= (2, 6):
|
||||
# Python >=2.6 : json is part of the standard library
|
||||
import json
|
||||
json_decode = json.JSONDecoder().decode
|
||||
_json_encode = json.JSONEncoder().iterencode
|
||||
else:
|
||||
json = None
|
||||
|
||||
def json_decode(s):
|
||||
raise ValueError('No JSON library is available')
|
||||
|
||||
def _json_encode(s):
|
||||
raise ValueError('No JSON library is available')
|
||||
finally:
|
||||
if json and py3k:
|
||||
# The two Python 3 implementations (simplejson/json)
|
||||
# outputs str. We need bytes.
|
||||
def json_encode(value):
|
||||
for chunk in _json_encode(value):
|
||||
yield chunk.encode('utf8')
|
||||
else:
|
||||
json_encode = _json_encode
|
||||
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
# In Python 2, pickle is a Python version.
|
||||
# In Python 3, pickle is the sped-up C version.
|
||||
import pickle
|
||||
|
||||
try:
|
||||
os.urandom(20)
|
||||
import binascii
|
||||
|
||||
def random20():
|
||||
return binascii.hexlify(os.urandom(20)).decode('ascii')
|
||||
except (AttributeError, NotImplementedError):
|
||||
import random
|
||||
# os.urandom not available until Python 2.4. Fall back to random.random.
|
||||
|
||||
def random20():
|
||||
return sha('%s' % random.random()).hexdigest()
|
||||
|
||||
try:
|
||||
from _thread import get_ident as get_thread_ident
|
||||
except ImportError:
|
||||
from thread import get_ident as get_thread_ident
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
next = next
|
||||
except NameError:
|
||||
# Python 2
|
||||
def next(i):
|
||||
return i.next()
|
||||
|
||||
if sys.version_info >= (3, 3):
|
||||
Timer = threading.Timer
|
||||
Event = threading.Event
|
||||
else:
|
||||
# Python 3.2 and earlier
|
||||
Timer = threading._Timer
|
||||
Event = threading._Event
|
||||
|
||||
# Prior to Python 2.6, the Thread class did not have a .daemon property.
|
||||
# This mix-in adds that property.
|
||||
|
||||
|
||||
class SetDaemonProperty:
|
||||
|
||||
def __get_daemon(self):
|
||||
return self.isDaemon()
|
||||
|
||||
def __set_daemon(self, daemon):
|
||||
self.setDaemon(daemon)
|
||||
|
||||
if sys.version_info < (2, 6):
|
||||
daemon = property(__get_daemon, __set_daemon)
|
File diff suppressed because it is too large
Load Diff
|
@ -1,317 +0,0 @@
|
|||
"""
|
||||
Configuration system for CherryPy.
|
||||
|
||||
Configuration in CherryPy is implemented via dictionaries. Keys are strings
|
||||
which name the mapped value, which may be of any type.
|
||||
|
||||
|
||||
Architecture
|
||||
------------
|
||||
|
||||
CherryPy Requests are part of an Application, which runs in a global context,
|
||||
and configuration data may apply to any of those three scopes:
|
||||
|
||||
Global
|
||||
Configuration entries which apply everywhere are stored in
|
||||
cherrypy.config.
|
||||
|
||||
Application
|
||||
Entries which apply to each mounted application are stored
|
||||
on the Application object itself, as 'app.config'. This is a two-level
|
||||
dict where each key is a path, or "relative URL" (for example, "/" or
|
||||
"/path/to/my/page"), and each value is a config dict. Usually, this
|
||||
data is provided in the call to tree.mount(root(), config=conf),
|
||||
although you may also use app.merge(conf).
|
||||
|
||||
Request
|
||||
Each Request object possesses a single 'Request.config' dict.
|
||||
Early in the request process, this dict is populated by merging global
|
||||
config entries, Application entries (whose path equals or is a parent
|
||||
of Request.path_info), and any config acquired while looking up the
|
||||
page handler (see next).
|
||||
|
||||
|
||||
Declaration
|
||||
-----------
|
||||
|
||||
Configuration data may be supplied as a Python dictionary, as a filename,
|
||||
or as an open file object. When you supply a filename or file, CherryPy
|
||||
uses Python's builtin ConfigParser; you declare Application config by
|
||||
writing each path as a section header::
|
||||
|
||||
[/path/to/my/page]
|
||||
request.stream = True
|
||||
|
||||
To declare global configuration entries, place them in a [global] section.
|
||||
|
||||
You may also declare config entries directly on the classes and methods
|
||||
(page handlers) that make up your CherryPy application via the ``_cp_config``
|
||||
attribute. For example::
|
||||
|
||||
class Demo:
|
||||
_cp_config = {'tools.gzip.on': True}
|
||||
|
||||
def index(self):
|
||||
return "Hello world"
|
||||
index.exposed = True
|
||||
index._cp_config = {'request.show_tracebacks': False}
|
||||
|
||||
.. note::
|
||||
|
||||
This behavior is only guaranteed for the default dispatcher.
|
||||
Other dispatchers may have different restrictions on where
|
||||
you can attach _cp_config attributes.
|
||||
|
||||
|
||||
Namespaces
|
||||
----------
|
||||
|
||||
Configuration keys are separated into namespaces by the first "." in the key.
|
||||
Current namespaces:
|
||||
|
||||
engine
|
||||
Controls the 'application engine', including autoreload.
|
||||
These can only be declared in the global config.
|
||||
|
||||
tree
|
||||
Grafts cherrypy.Application objects onto cherrypy.tree.
|
||||
These can only be declared in the global config.
|
||||
|
||||
hooks
|
||||
Declares additional request-processing functions.
|
||||
|
||||
log
|
||||
Configures the logging for each application.
|
||||
These can only be declared in the global or / config.
|
||||
|
||||
request
|
||||
Adds attributes to each Request.
|
||||
|
||||
response
|
||||
Adds attributes to each Response.
|
||||
|
||||
server
|
||||
Controls the default HTTP server via cherrypy.server.
|
||||
These can only be declared in the global config.
|
||||
|
||||
tools
|
||||
Runs and configures additional request-processing packages.
|
||||
|
||||
wsgi
|
||||
Adds WSGI middleware to an Application's "pipeline".
|
||||
These can only be declared in the app's root config ("/").
|
||||
|
||||
checker
|
||||
Controls the 'checker', which looks for common errors in
|
||||
app state (including config) when the engine starts.
|
||||
Global config only.
|
||||
|
||||
The only key that does not exist in a namespace is the "environment" entry.
|
||||
This special entry 'imports' other config entries from a template stored in
|
||||
cherrypy._cpconfig.environments[environment]. It only applies to the global
|
||||
config, and only when you use cherrypy.config.update.
|
||||
|
||||
You can define your own namespaces to be called at the Global, Application,
|
||||
or Request level, by adding a named handler to cherrypy.config.namespaces,
|
||||
app.namespaces, or app.request_class.namespaces. The name can
|
||||
be any string, and the handler must be either a callable or a (Python 2.5
|
||||
style) context manager.
|
||||
"""
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import set, basestring
|
||||
from cherrypy.lib import reprconf
|
||||
|
||||
# Deprecated in CherryPy 3.2--remove in 3.3
|
||||
NamespaceSet = reprconf.NamespaceSet
|
||||
|
||||
|
||||
def merge(base, other):
|
||||
"""Merge one app config (from a dict, file, or filename) into another.
|
||||
|
||||
If the given config is a filename, it will be appended to
|
||||
the list of files to monitor for "autoreload" changes.
|
||||
"""
|
||||
if isinstance(other, basestring):
|
||||
cherrypy.engine.autoreload.files.add(other)
|
||||
|
||||
# Load other into base
|
||||
for section, value_map in reprconf.as_dict(other).items():
|
||||
if not isinstance(value_map, dict):
|
||||
raise ValueError(
|
||||
"Application config must include section headers, but the "
|
||||
"config you tried to merge doesn't have any sections. "
|
||||
"Wrap your config in another dict with paths as section "
|
||||
"headers, for example: {'/': config}.")
|
||||
base.setdefault(section, {}).update(value_map)
|
||||
|
||||
|
||||
class Config(reprconf.Config):
|
||||
|
||||
"""The 'global' configuration data for the entire CherryPy process."""
|
||||
|
||||
def update(self, config):
|
||||
"""Update self from a dict, file or filename."""
|
||||
if isinstance(config, basestring):
|
||||
# Filename
|
||||
cherrypy.engine.autoreload.files.add(config)
|
||||
reprconf.Config.update(self, config)
|
||||
|
||||
def _apply(self, config):
|
||||
"""Update self from a dict."""
|
||||
if isinstance(config.get("global"), dict):
|
||||
if len(config) > 1:
|
||||
cherrypy.checker.global_config_contained_paths = True
|
||||
config = config["global"]
|
||||
if 'tools.staticdir.dir' in config:
|
||||
config['tools.staticdir.section'] = "global"
|
||||
reprconf.Config._apply(self, config)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Decorator for page handlers to set _cp_config."""
|
||||
if args:
|
||||
raise TypeError(
|
||||
"The cherrypy.config decorator does not accept positional "
|
||||
"arguments; you must use keyword arguments.")
|
||||
|
||||
def tool_decorator(f):
|
||||
if not hasattr(f, "_cp_config"):
|
||||
f._cp_config = {}
|
||||
for k, v in kwargs.items():
|
||||
f._cp_config[k] = v
|
||||
return f
|
||||
return tool_decorator
|
||||
|
||||
|
||||
# Sphinx begin config.environments
|
||||
Config.environments = environments = {
|
||||
"staging": {
|
||||
'engine.autoreload.on': False,
|
||||
'checker.on': False,
|
||||
'tools.log_headers.on': False,
|
||||
'request.show_tracebacks': False,
|
||||
'request.show_mismatched_params': False,
|
||||
},
|
||||
"production": {
|
||||
'engine.autoreload.on': False,
|
||||
'checker.on': False,
|
||||
'tools.log_headers.on': False,
|
||||
'request.show_tracebacks': False,
|
||||
'request.show_mismatched_params': False,
|
||||
'log.screen': False,
|
||||
},
|
||||
"embedded": {
|
||||
# For use with CherryPy embedded in another deployment stack.
|
||||
'engine.autoreload.on': False,
|
||||
'checker.on': False,
|
||||
'tools.log_headers.on': False,
|
||||
'request.show_tracebacks': False,
|
||||
'request.show_mismatched_params': False,
|
||||
'log.screen': False,
|
||||
'engine.SIGHUP': None,
|
||||
'engine.SIGTERM': None,
|
||||
},
|
||||
"test_suite": {
|
||||
'engine.autoreload.on': False,
|
||||
'checker.on': False,
|
||||
'tools.log_headers.on': False,
|
||||
'request.show_tracebacks': True,
|
||||
'request.show_mismatched_params': True,
|
||||
'log.screen': False,
|
||||
},
|
||||
}
|
||||
# Sphinx end config.environments
|
||||
|
||||
|
||||
def _server_namespace_handler(k, v):
|
||||
"""Config handler for the "server" namespace."""
|
||||
atoms = k.split(".", 1)
|
||||
if len(atoms) > 1:
|
||||
# Special-case config keys of the form 'server.servername.socket_port'
|
||||
# to configure additional HTTP servers.
|
||||
if not hasattr(cherrypy, "servers"):
|
||||
cherrypy.servers = {}
|
||||
|
||||
servername, k = atoms
|
||||
if servername not in cherrypy.servers:
|
||||
from cherrypy import _cpserver
|
||||
cherrypy.servers[servername] = _cpserver.Server()
|
||||
# On by default, but 'on = False' can unsubscribe it (see below).
|
||||
cherrypy.servers[servername].subscribe()
|
||||
|
||||
if k == 'on':
|
||||
if v:
|
||||
cherrypy.servers[servername].subscribe()
|
||||
else:
|
||||
cherrypy.servers[servername].unsubscribe()
|
||||
else:
|
||||
setattr(cherrypy.servers[servername], k, v)
|
||||
else:
|
||||
setattr(cherrypy.server, k, v)
|
||||
Config.namespaces["server"] = _server_namespace_handler
|
||||
|
||||
|
||||
def _engine_namespace_handler(k, v):
|
||||
"""Backward compatibility handler for the "engine" namespace."""
|
||||
engine = cherrypy.engine
|
||||
|
||||
deprecated = {
|
||||
'autoreload_on': 'autoreload.on',
|
||||
'autoreload_frequency': 'autoreload.frequency',
|
||||
'autoreload_match': 'autoreload.match',
|
||||
'reload_files': 'autoreload.files',
|
||||
'deadlock_poll_freq': 'timeout_monitor.frequency'
|
||||
}
|
||||
|
||||
if k in deprecated:
|
||||
engine.log(
|
||||
'WARNING: Use of engine.%s is deprecated and will be removed in a '
|
||||
'future version. Use engine.%s instead.' % (k, deprecated[k]))
|
||||
|
||||
if k == 'autoreload_on':
|
||||
if v:
|
||||
engine.autoreload.subscribe()
|
||||
else:
|
||||
engine.autoreload.unsubscribe()
|
||||
elif k == 'autoreload_frequency':
|
||||
engine.autoreload.frequency = v
|
||||
elif k == 'autoreload_match':
|
||||
engine.autoreload.match = v
|
||||
elif k == 'reload_files':
|
||||
engine.autoreload.files = set(v)
|
||||
elif k == 'deadlock_poll_freq':
|
||||
engine.timeout_monitor.frequency = v
|
||||
elif k == 'SIGHUP':
|
||||
engine.listeners['SIGHUP'] = set([v])
|
||||
elif k == 'SIGTERM':
|
||||
engine.listeners['SIGTERM'] = set([v])
|
||||
elif "." in k:
|
||||
plugin, attrname = k.split(".", 1)
|
||||
plugin = getattr(engine, plugin)
|
||||
if attrname == 'on':
|
||||
if v and hasattr(getattr(plugin, 'subscribe', None), '__call__'):
|
||||
plugin.subscribe()
|
||||
return
|
||||
elif (
|
||||
(not v) and
|
||||
hasattr(getattr(plugin, 'unsubscribe', None), '__call__')
|
||||
):
|
||||
plugin.unsubscribe()
|
||||
return
|
||||
setattr(plugin, attrname, v)
|
||||
else:
|
||||
setattr(engine, k, v)
|
||||
Config.namespaces["engine"] = _engine_namespace_handler
|
||||
|
||||
|
||||
def _tree_namespace_handler(k, v):
|
||||
"""Namespace handler for the 'tree' config namespace."""
|
||||
if isinstance(v, dict):
|
||||
for script_name, app in v.items():
|
||||
cherrypy.tree.graft(app, script_name)
|
||||
cherrypy.engine.log("Mounted: %s on %s" %
|
||||
(app, script_name or "/"))
|
||||
else:
|
||||
cherrypy.tree.graft(v, v.script_name)
|
||||
cherrypy.engine.log("Mounted: %s on %s" % (v, v.script_name or "/"))
|
||||
Config.namespaces["tree"] = _tree_namespace_handler
|
|
@ -1,686 +0,0 @@
|
|||
"""CherryPy dispatchers.
|
||||
|
||||
A 'dispatcher' is the object which looks up the 'page handler' callable
|
||||
and collects config for the current request based on the path_info, other
|
||||
request attributes, and the application architecture. The core calls the
|
||||
dispatcher as early as possible, passing it a 'path_info' argument.
|
||||
|
||||
The default dispatcher discovers the page handler by matching path_info
|
||||
to a hierarchical arrangement of objects, starting at request.app.root.
|
||||
"""
|
||||
|
||||
import string
|
||||
import sys
|
||||
import types
|
||||
try:
|
||||
classtype = (type, types.ClassType)
|
||||
except AttributeError:
|
||||
classtype = type
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import set
|
||||
|
||||
|
||||
class PageHandler(object):
|
||||
|
||||
"""Callable which sets response.body."""
|
||||
|
||||
def __init__(self, callable, *args, **kwargs):
|
||||
self.callable = callable
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def get_args(self):
|
||||
return cherrypy.serving.request.args
|
||||
|
||||
def set_args(self, args):
|
||||
cherrypy.serving.request.args = args
|
||||
return cherrypy.serving.request.args
|
||||
|
||||
args = property(
|
||||
get_args,
|
||||
set_args,
|
||||
doc="The ordered args should be accessible from post dispatch hooks"
|
||||
)
|
||||
|
||||
def get_kwargs(self):
|
||||
return cherrypy.serving.request.kwargs
|
||||
|
||||
def set_kwargs(self, kwargs):
|
||||
cherrypy.serving.request.kwargs = kwargs
|
||||
return cherrypy.serving.request.kwargs
|
||||
|
||||
kwargs = property(
|
||||
get_kwargs,
|
||||
set_kwargs,
|
||||
doc="The named kwargs should be accessible from post dispatch hooks"
|
||||
)
|
||||
|
||||
def __call__(self):
|
||||
try:
|
||||
return self.callable(*self.args, **self.kwargs)
|
||||
except TypeError:
|
||||
x = sys.exc_info()[1]
|
||||
try:
|
||||
test_callable_spec(self.callable, self.args, self.kwargs)
|
||||
except cherrypy.HTTPError:
|
||||
raise sys.exc_info()[1]
|
||||
except:
|
||||
raise x
|
||||
raise
|
||||
|
||||
|
||||
def test_callable_spec(callable, callable_args, callable_kwargs):
|
||||
"""
|
||||
Inspect callable and test to see if the given args are suitable for it.
|
||||
|
||||
When an error occurs during the handler's invoking stage there are 2
|
||||
erroneous cases:
|
||||
1. Too many parameters passed to a function which doesn't define
|
||||
one of *args or **kwargs.
|
||||
2. Too little parameters are passed to the function.
|
||||
|
||||
There are 3 sources of parameters to a cherrypy handler.
|
||||
1. query string parameters are passed as keyword parameters to the
|
||||
handler.
|
||||
2. body parameters are also passed as keyword parameters.
|
||||
3. when partial matching occurs, the final path atoms are passed as
|
||||
positional args.
|
||||
Both the query string and path atoms are part of the URI. If they are
|
||||
incorrect, then a 404 Not Found should be raised. Conversely the body
|
||||
parameters are part of the request; if they are invalid a 400 Bad Request.
|
||||
"""
|
||||
show_mismatched_params = getattr(
|
||||
cherrypy.serving.request, 'show_mismatched_params', False)
|
||||
try:
|
||||
(args, varargs, varkw, defaults) = getargspec(callable)
|
||||
except TypeError:
|
||||
if isinstance(callable, object) and hasattr(callable, '__call__'):
|
||||
(args, varargs, varkw,
|
||||
defaults) = getargspec(callable.__call__)
|
||||
else:
|
||||
# If it wasn't one of our own types, re-raise
|
||||
# the original error
|
||||
raise
|
||||
|
||||
if args and args[0] == 'self':
|
||||
args = args[1:]
|
||||
|
||||
arg_usage = dict([(arg, 0,) for arg in args])
|
||||
vararg_usage = 0
|
||||
varkw_usage = 0
|
||||
extra_kwargs = set()
|
||||
|
||||
for i, value in enumerate(callable_args):
|
||||
try:
|
||||
arg_usage[args[i]] += 1
|
||||
except IndexError:
|
||||
vararg_usage += 1
|
||||
|
||||
for key in callable_kwargs.keys():
|
||||
try:
|
||||
arg_usage[key] += 1
|
||||
except KeyError:
|
||||
varkw_usage += 1
|
||||
extra_kwargs.add(key)
|
||||
|
||||
# figure out which args have defaults.
|
||||
args_with_defaults = args[-len(defaults or []):]
|
||||
for i, val in enumerate(defaults or []):
|
||||
# Defaults take effect only when the arg hasn't been used yet.
|
||||
if arg_usage[args_with_defaults[i]] == 0:
|
||||
arg_usage[args_with_defaults[i]] += 1
|
||||
|
||||
missing_args = []
|
||||
multiple_args = []
|
||||
for key, usage in arg_usage.items():
|
||||
if usage == 0:
|
||||
missing_args.append(key)
|
||||
elif usage > 1:
|
||||
multiple_args.append(key)
|
||||
|
||||
if missing_args:
|
||||
# In the case where the method allows body arguments
|
||||
# there are 3 potential errors:
|
||||
# 1. not enough query string parameters -> 404
|
||||
# 2. not enough body parameters -> 400
|
||||
# 3. not enough path parts (partial matches) -> 404
|
||||
#
|
||||
# We can't actually tell which case it is,
|
||||
# so I'm raising a 404 because that covers 2/3 of the
|
||||
# possibilities
|
||||
#
|
||||
# In the case where the method does not allow body
|
||||
# arguments it's definitely a 404.
|
||||
message = None
|
||||
if show_mismatched_params:
|
||||
message = "Missing parameters: %s" % ",".join(missing_args)
|
||||
raise cherrypy.HTTPError(404, message=message)
|
||||
|
||||
# the extra positional arguments come from the path - 404 Not Found
|
||||
if not varargs and vararg_usage > 0:
|
||||
raise cherrypy.HTTPError(404)
|
||||
|
||||
body_params = cherrypy.serving.request.body.params or {}
|
||||
body_params = set(body_params.keys())
|
||||
qs_params = set(callable_kwargs.keys()) - body_params
|
||||
|
||||
if multiple_args:
|
||||
if qs_params.intersection(set(multiple_args)):
|
||||
# If any of the multiple parameters came from the query string then
|
||||
# it's a 404 Not Found
|
||||
error = 404
|
||||
else:
|
||||
# Otherwise it's a 400 Bad Request
|
||||
error = 400
|
||||
|
||||
message = None
|
||||
if show_mismatched_params:
|
||||
message = "Multiple values for parameters: "\
|
||||
"%s" % ",".join(multiple_args)
|
||||
raise cherrypy.HTTPError(error, message=message)
|
||||
|
||||
if not varkw and varkw_usage > 0:
|
||||
|
||||
# If there were extra query string parameters, it's a 404 Not Found
|
||||
extra_qs_params = set(qs_params).intersection(extra_kwargs)
|
||||
if extra_qs_params:
|
||||
message = None
|
||||
if show_mismatched_params:
|
||||
message = "Unexpected query string "\
|
||||
"parameters: %s" % ", ".join(extra_qs_params)
|
||||
raise cherrypy.HTTPError(404, message=message)
|
||||
|
||||
# If there were any extra body parameters, it's a 400 Not Found
|
||||
extra_body_params = set(body_params).intersection(extra_kwargs)
|
||||
if extra_body_params:
|
||||
message = None
|
||||
if show_mismatched_params:
|
||||
message = "Unexpected body parameters: "\
|
||||
"%s" % ", ".join(extra_body_params)
|
||||
raise cherrypy.HTTPError(400, message=message)
|
||||
|
||||
|
||||
try:
|
||||
import inspect
|
||||
except ImportError:
|
||||
test_callable_spec = lambda callable, args, kwargs: None
|
||||
else:
|
||||
getargspec = inspect.getargspec
|
||||
# Python 3 requires using getfullargspec if keyword-only arguments are present
|
||||
if hasattr(inspect, 'getfullargspec'):
|
||||
def getargspec(callable):
|
||||
return inspect.getfullargspec(callable)[:4]
|
||||
|
||||
|
||||
class LateParamPageHandler(PageHandler):
|
||||
|
||||
"""When passing cherrypy.request.params to the page handler, we do not
|
||||
want to capture that dict too early; we want to give tools like the
|
||||
decoding tool a chance to modify the params dict in-between the lookup
|
||||
of the handler and the actual calling of the handler. This subclass
|
||||
takes that into account, and allows request.params to be 'bound late'
|
||||
(it's more complicated than that, but that's the effect).
|
||||
"""
|
||||
|
||||
def _get_kwargs(self):
|
||||
kwargs = cherrypy.serving.request.params.copy()
|
||||
if self._kwargs:
|
||||
kwargs.update(self._kwargs)
|
||||
return kwargs
|
||||
|
||||
def _set_kwargs(self, kwargs):
|
||||
cherrypy.serving.request.kwargs = kwargs
|
||||
self._kwargs = kwargs
|
||||
|
||||
kwargs = property(_get_kwargs, _set_kwargs,
|
||||
doc='page handler kwargs (with '
|
||||
'cherrypy.request.params copied in)')
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
punctuation_to_underscores = string.maketrans(
|
||||
string.punctuation, '_' * len(string.punctuation))
|
||||
|
||||
def validate_translator(t):
|
||||
if not isinstance(t, str) or len(t) != 256:
|
||||
raise ValueError(
|
||||
"The translate argument must be a str of len 256.")
|
||||
else:
|
||||
punctuation_to_underscores = str.maketrans(
|
||||
string.punctuation, '_' * len(string.punctuation))
|
||||
|
||||
def validate_translator(t):
|
||||
if not isinstance(t, dict):
|
||||
raise ValueError("The translate argument must be a dict.")
|
||||
|
||||
|
||||
class Dispatcher(object):
|
||||
|
||||
"""CherryPy Dispatcher which walks a tree of objects to find a handler.
|
||||
|
||||
The tree is rooted at cherrypy.request.app.root, and each hierarchical
|
||||
component in the path_info argument is matched to a corresponding nested
|
||||
attribute of the root object. Matching handlers must have an 'exposed'
|
||||
attribute which evaluates to True. The special method name "index"
|
||||
matches a URI which ends in a slash ("/"). The special method name
|
||||
"default" may match a portion of the path_info (but only when no longer
|
||||
substring of the path_info matches some other object).
|
||||
|
||||
This is the default, built-in dispatcher for CherryPy.
|
||||
"""
|
||||
|
||||
dispatch_method_name = '_cp_dispatch'
|
||||
"""
|
||||
The name of the dispatch method that nodes may optionally implement
|
||||
to provide their own dynamic dispatch algorithm.
|
||||
"""
|
||||
|
||||
def __init__(self, dispatch_method_name=None,
|
||||
translate=punctuation_to_underscores):
|
||||
validate_translator(translate)
|
||||
self.translate = translate
|
||||
if dispatch_method_name:
|
||||
self.dispatch_method_name = dispatch_method_name
|
||||
|
||||
def __call__(self, path_info):
|
||||
"""Set handler and config for the current request."""
|
||||
request = cherrypy.serving.request
|
||||
func, vpath = self.find_handler(path_info)
|
||||
|
||||
if func:
|
||||
# Decode any leftover %2F in the virtual_path atoms.
|
||||
vpath = [x.replace("%2F", "/") for x in vpath]
|
||||
request.handler = LateParamPageHandler(func, *vpath)
|
||||
else:
|
||||
request.handler = cherrypy.NotFound()
|
||||
|
||||
def find_handler(self, path):
|
||||
"""Return the appropriate page handler, plus any virtual path.
|
||||
|
||||
This will return two objects. The first will be a callable,
|
||||
which can be used to generate page output. Any parameters from
|
||||
the query string or request body will be sent to that callable
|
||||
as keyword arguments.
|
||||
|
||||
The callable is found by traversing the application's tree,
|
||||
starting from cherrypy.request.app.root, and matching path
|
||||
components to successive objects in the tree. For example, the
|
||||
URL "/path/to/handler" might return root.path.to.handler.
|
||||
|
||||
The second object returned will be a list of names which are
|
||||
'virtual path' components: parts of the URL which are dynamic,
|
||||
and were not used when looking up the handler.
|
||||
These virtual path components are passed to the handler as
|
||||
positional arguments.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
app = request.app
|
||||
root = app.root
|
||||
dispatch_name = self.dispatch_method_name
|
||||
|
||||
# Get config for the root object/path.
|
||||
fullpath = [x for x in path.strip('/').split('/') if x] + ['index']
|
||||
fullpath_len = len(fullpath)
|
||||
segleft = fullpath_len
|
||||
nodeconf = {}
|
||||
if hasattr(root, "_cp_config"):
|
||||
nodeconf.update(root._cp_config)
|
||||
if "/" in app.config:
|
||||
nodeconf.update(app.config["/"])
|
||||
object_trail = [['root', root, nodeconf, segleft]]
|
||||
|
||||
node = root
|
||||
iternames = fullpath[:]
|
||||
while iternames:
|
||||
name = iternames[0]
|
||||
# map to legal Python identifiers (e.g. replace '.' with '_')
|
||||
objname = name.translate(self.translate)
|
||||
|
||||
nodeconf = {}
|
||||
subnode = getattr(node, objname, None)
|
||||
pre_len = len(iternames)
|
||||
if subnode is None:
|
||||
dispatch = getattr(node, dispatch_name, None)
|
||||
if dispatch and hasattr(dispatch, '__call__') and not \
|
||||
getattr(dispatch, 'exposed', False) and \
|
||||
pre_len > 1:
|
||||
# Don't expose the hidden 'index' token to _cp_dispatch
|
||||
# We skip this if pre_len == 1 since it makes no sense
|
||||
# to call a dispatcher when we have no tokens left.
|
||||
index_name = iternames.pop()
|
||||
subnode = dispatch(vpath=iternames)
|
||||
iternames.append(index_name)
|
||||
else:
|
||||
# We didn't find a path, but keep processing in case there
|
||||
# is a default() handler.
|
||||
iternames.pop(0)
|
||||
else:
|
||||
# We found the path, remove the vpath entry
|
||||
iternames.pop(0)
|
||||
segleft = len(iternames)
|
||||
if segleft > pre_len:
|
||||
# No path segment was removed. Raise an error.
|
||||
raise cherrypy.CherryPyException(
|
||||
"A vpath segment was added. Custom dispatchers may only "
|
||||
+ "remove elements. While trying to process "
|
||||
+ "{0} in {1}".format(name, fullpath)
|
||||
)
|
||||
elif segleft == pre_len:
|
||||
# Assume that the handler used the current path segment, but
|
||||
# did not pop it. This allows things like
|
||||
# return getattr(self, vpath[0], None)
|
||||
iternames.pop(0)
|
||||
segleft -= 1
|
||||
node = subnode
|
||||
|
||||
if node is not None:
|
||||
# Get _cp_config attached to this node.
|
||||
if hasattr(node, "_cp_config"):
|
||||
nodeconf.update(node._cp_config)
|
||||
|
||||
# Mix in values from app.config for this path.
|
||||
existing_len = fullpath_len - pre_len
|
||||
if existing_len != 0:
|
||||
curpath = '/' + '/'.join(fullpath[0:existing_len])
|
||||
else:
|
||||
curpath = ''
|
||||
new_segs = fullpath[fullpath_len - pre_len:fullpath_len - segleft]
|
||||
for seg in new_segs:
|
||||
curpath += '/' + seg
|
||||
if curpath in app.config:
|
||||
nodeconf.update(app.config[curpath])
|
||||
|
||||
object_trail.append([name, node, nodeconf, segleft])
|
||||
|
||||
def set_conf():
|
||||
"""Collapse all object_trail config into cherrypy.request.config.
|
||||
"""
|
||||
base = cherrypy.config.copy()
|
||||
# Note that we merge the config from each node
|
||||
# even if that node was None.
|
||||
for name, obj, conf, segleft in object_trail:
|
||||
base.update(conf)
|
||||
if 'tools.staticdir.dir' in conf:
|
||||
base['tools.staticdir.section'] = '/' + \
|
||||
'/'.join(fullpath[0:fullpath_len - segleft])
|
||||
return base
|
||||
|
||||
# Try successive objects (reverse order)
|
||||
num_candidates = len(object_trail) - 1
|
||||
for i in range(num_candidates, -1, -1):
|
||||
|
||||
name, candidate, nodeconf, segleft = object_trail[i]
|
||||
if candidate is None:
|
||||
continue
|
||||
|
||||
# Try a "default" method on the current leaf.
|
||||
if hasattr(candidate, "default"):
|
||||
defhandler = candidate.default
|
||||
if getattr(defhandler, 'exposed', False):
|
||||
# Insert any extra _cp_config from the default handler.
|
||||
conf = getattr(defhandler, "_cp_config", {})
|
||||
object_trail.insert(
|
||||
i + 1, ["default", defhandler, conf, segleft])
|
||||
request.config = set_conf()
|
||||
# See https://bitbucket.org/cherrypy/cherrypy/issue/613
|
||||
request.is_index = path.endswith("/")
|
||||
return defhandler, fullpath[fullpath_len - segleft:-1]
|
||||
|
||||
# Uncomment the next line to restrict positional params to
|
||||
# "default".
|
||||
# if i < num_candidates - 2: continue
|
||||
|
||||
# Try the current leaf.
|
||||
if getattr(candidate, 'exposed', False):
|
||||
request.config = set_conf()
|
||||
if i == num_candidates:
|
||||
# We found the extra ".index". Mark request so tools
|
||||
# can redirect if path_info has no trailing slash.
|
||||
request.is_index = True
|
||||
else:
|
||||
# We're not at an 'index' handler. Mark request so tools
|
||||
# can redirect if path_info has NO trailing slash.
|
||||
# Note that this also includes handlers which take
|
||||
# positional parameters (virtual paths).
|
||||
request.is_index = False
|
||||
return candidate, fullpath[fullpath_len - segleft:-1]
|
||||
|
||||
# We didn't find anything
|
||||
request.config = set_conf()
|
||||
return None, []
|
||||
|
||||
|
||||
class MethodDispatcher(Dispatcher):
|
||||
|
||||
"""Additional dispatch based on cherrypy.request.method.upper().
|
||||
|
||||
Methods named GET, POST, etc will be called on an exposed class.
|
||||
The method names must be all caps; the appropriate Allow header
|
||||
will be output showing all capitalized method names as allowable
|
||||
HTTP verbs.
|
||||
|
||||
Note that the containing class must be exposed, not the methods.
|
||||
"""
|
||||
|
||||
def __call__(self, path_info):
|
||||
"""Set handler and config for the current request."""
|
||||
request = cherrypy.serving.request
|
||||
resource, vpath = self.find_handler(path_info)
|
||||
|
||||
if resource:
|
||||
# Set Allow header
|
||||
avail = [m for m in dir(resource) if m.isupper()]
|
||||
if "GET" in avail and "HEAD" not in avail:
|
||||
avail.append("HEAD")
|
||||
avail.sort()
|
||||
cherrypy.serving.response.headers['Allow'] = ", ".join(avail)
|
||||
|
||||
# Find the subhandler
|
||||
meth = request.method.upper()
|
||||
func = getattr(resource, meth, None)
|
||||
if func is None and meth == "HEAD":
|
||||
func = getattr(resource, "GET", None)
|
||||
if func:
|
||||
# Grab any _cp_config on the subhandler.
|
||||
if hasattr(func, "_cp_config"):
|
||||
request.config.update(func._cp_config)
|
||||
|
||||
# Decode any leftover %2F in the virtual_path atoms.
|
||||
vpath = [x.replace("%2F", "/") for x in vpath]
|
||||
request.handler = LateParamPageHandler(func, *vpath)
|
||||
else:
|
||||
request.handler = cherrypy.HTTPError(405)
|
||||
else:
|
||||
request.handler = cherrypy.NotFound()
|
||||
|
||||
|
||||
class RoutesDispatcher(object):
|
||||
|
||||
"""A Routes based dispatcher for CherryPy."""
|
||||
|
||||
def __init__(self, full_result=False, **mapper_options):
|
||||
"""
|
||||
Routes dispatcher
|
||||
|
||||
Set full_result to True if you wish the controller
|
||||
and the action to be passed on to the page handler
|
||||
parameters. By default they won't be.
|
||||
"""
|
||||
import routes
|
||||
self.full_result = full_result
|
||||
self.controllers = {}
|
||||
self.mapper = routes.Mapper(**mapper_options)
|
||||
self.mapper.controller_scan = self.controllers.keys
|
||||
|
||||
def connect(self, name, route, controller, **kwargs):
|
||||
self.controllers[name] = controller
|
||||
self.mapper.connect(name, route, controller=name, **kwargs)
|
||||
|
||||
def redirect(self, url):
|
||||
raise cherrypy.HTTPRedirect(url)
|
||||
|
||||
def __call__(self, path_info):
|
||||
"""Set handler and config for the current request."""
|
||||
func = self.find_handler(path_info)
|
||||
if func:
|
||||
cherrypy.serving.request.handler = LateParamPageHandler(func)
|
||||
else:
|
||||
cherrypy.serving.request.handler = cherrypy.NotFound()
|
||||
|
||||
def find_handler(self, path_info):
|
||||
"""Find the right page handler, and set request.config."""
|
||||
import routes
|
||||
|
||||
request = cherrypy.serving.request
|
||||
|
||||
config = routes.request_config()
|
||||
config.mapper = self.mapper
|
||||
if hasattr(request, 'wsgi_environ'):
|
||||
config.environ = request.wsgi_environ
|
||||
config.host = request.headers.get('Host', None)
|
||||
config.protocol = request.scheme
|
||||
config.redirect = self.redirect
|
||||
|
||||
result = self.mapper.match(path_info)
|
||||
|
||||
config.mapper_dict = result
|
||||
params = {}
|
||||
if result:
|
||||
params = result.copy()
|
||||
if not self.full_result:
|
||||
params.pop('controller', None)
|
||||
params.pop('action', None)
|
||||
request.params.update(params)
|
||||
|
||||
# Get config for the root object/path.
|
||||
request.config = base = cherrypy.config.copy()
|
||||
curpath = ""
|
||||
|
||||
def merge(nodeconf):
|
||||
if 'tools.staticdir.dir' in nodeconf:
|
||||
nodeconf['tools.staticdir.section'] = curpath or "/"
|
||||
base.update(nodeconf)
|
||||
|
||||
app = request.app
|
||||
root = app.root
|
||||
if hasattr(root, "_cp_config"):
|
||||
merge(root._cp_config)
|
||||
if "/" in app.config:
|
||||
merge(app.config["/"])
|
||||
|
||||
# Mix in values from app.config.
|
||||
atoms = [x for x in path_info.split("/") if x]
|
||||
if atoms:
|
||||
last = atoms.pop()
|
||||
else:
|
||||
last = None
|
||||
for atom in atoms:
|
||||
curpath = "/".join((curpath, atom))
|
||||
if curpath in app.config:
|
||||
merge(app.config[curpath])
|
||||
|
||||
handler = None
|
||||
if result:
|
||||
controller = result.get('controller')
|
||||
controller = self.controllers.get(controller, controller)
|
||||
if controller:
|
||||
if isinstance(controller, classtype):
|
||||
controller = controller()
|
||||
# Get config from the controller.
|
||||
if hasattr(controller, "_cp_config"):
|
||||
merge(controller._cp_config)
|
||||
|
||||
action = result.get('action')
|
||||
if action is not None:
|
||||
handler = getattr(controller, action, None)
|
||||
# Get config from the handler
|
||||
if hasattr(handler, "_cp_config"):
|
||||
merge(handler._cp_config)
|
||||
else:
|
||||
handler = controller
|
||||
|
||||
# Do the last path atom here so it can
|
||||
# override the controller's _cp_config.
|
||||
if last:
|
||||
curpath = "/".join((curpath, last))
|
||||
if curpath in app.config:
|
||||
merge(app.config[curpath])
|
||||
|
||||
return handler
|
||||
|
||||
|
||||
def XMLRPCDispatcher(next_dispatcher=Dispatcher()):
|
||||
from cherrypy.lib import xmlrpcutil
|
||||
|
||||
def xmlrpc_dispatch(path_info):
|
||||
path_info = xmlrpcutil.patched_path(path_info)
|
||||
return next_dispatcher(path_info)
|
||||
return xmlrpc_dispatch
|
||||
|
||||
|
||||
def VirtualHost(next_dispatcher=Dispatcher(), use_x_forwarded_host=True,
|
||||
**domains):
|
||||
"""
|
||||
Select a different handler based on the Host header.
|
||||
|
||||
This can be useful when running multiple sites within one CP server.
|
||||
It allows several domains to point to different parts of a single
|
||||
website structure. For example::
|
||||
|
||||
http://www.domain.example -> root
|
||||
http://www.domain2.example -> root/domain2/
|
||||
http://www.domain2.example:443 -> root/secure
|
||||
|
||||
can be accomplished via the following config::
|
||||
|
||||
[/]
|
||||
request.dispatch = cherrypy.dispatch.VirtualHost(
|
||||
**{'www.domain2.example': '/domain2',
|
||||
'www.domain2.example:443': '/secure',
|
||||
})
|
||||
|
||||
next_dispatcher
|
||||
The next dispatcher object in the dispatch chain.
|
||||
The VirtualHost dispatcher adds a prefix to the URL and calls
|
||||
another dispatcher. Defaults to cherrypy.dispatch.Dispatcher().
|
||||
|
||||
use_x_forwarded_host
|
||||
If True (the default), any "X-Forwarded-Host"
|
||||
request header will be used instead of the "Host" header. This
|
||||
is commonly added by HTTP servers (such as Apache) when proxying.
|
||||
|
||||
``**domains``
|
||||
A dict of {host header value: virtual prefix} pairs.
|
||||
The incoming "Host" request header is looked up in this dict,
|
||||
and, if a match is found, the corresponding "virtual prefix"
|
||||
value will be prepended to the URL path before calling the
|
||||
next dispatcher. Note that you often need separate entries
|
||||
for "example.com" and "www.example.com". In addition, "Host"
|
||||
headers may contain the port number.
|
||||
"""
|
||||
from cherrypy.lib import httputil
|
||||
|
||||
def vhost_dispatch(path_info):
|
||||
request = cherrypy.serving.request
|
||||
header = request.headers.get
|
||||
|
||||
domain = header('Host', '')
|
||||
if use_x_forwarded_host:
|
||||
domain = header("X-Forwarded-Host", domain)
|
||||
|
||||
prefix = domains.get(domain, "")
|
||||
if prefix:
|
||||
path_info = httputil.urljoin(prefix, path_info)
|
||||
|
||||
result = next_dispatcher(path_info)
|
||||
|
||||
# Touch up staticdir config. See
|
||||
# https://bitbucket.org/cherrypy/cherrypy/issue/614.
|
||||
section = request.config.get('tools.staticdir.section')
|
||||
if section:
|
||||
section = section[len(prefix):]
|
||||
request.config['tools.staticdir.section'] = section
|
||||
|
||||
return result
|
||||
return vhost_dispatch
|
|
@ -1,609 +0,0 @@
|
|||
"""Exception classes for CherryPy.
|
||||
|
||||
CherryPy provides (and uses) exceptions for declaring that the HTTP response
|
||||
should be a status other than the default "200 OK". You can ``raise`` them like
|
||||
normal Python exceptions. You can also call them and they will raise
|
||||
themselves; this means you can set an
|
||||
:class:`HTTPError<cherrypy._cperror.HTTPError>`
|
||||
or :class:`HTTPRedirect<cherrypy._cperror.HTTPRedirect>` as the
|
||||
:attr:`request.handler<cherrypy._cprequest.Request.handler>`.
|
||||
|
||||
.. _redirectingpost:
|
||||
|
||||
Redirecting POST
|
||||
================
|
||||
|
||||
When you GET a resource and are redirected by the server to another Location,
|
||||
there's generally no problem since GET is both a "safe method" (there should
|
||||
be no side-effects) and an "idempotent method" (multiple calls are no different
|
||||
than a single call).
|
||||
|
||||
POST, however, is neither safe nor idempotent--if you
|
||||
charge a credit card, you don't want to be charged twice by a redirect!
|
||||
|
||||
For this reason, *none* of the 3xx responses permit a user-agent (browser) to
|
||||
resubmit a POST on redirection without first confirming the action with the
|
||||
user:
|
||||
|
||||
===== ================================= ===========
|
||||
300 Multiple Choices Confirm with the user
|
||||
301 Moved Permanently Confirm with the user
|
||||
302 Found (Object moved temporarily) Confirm with the user
|
||||
303 See Other GET the new URI--no confirmation
|
||||
304 Not modified (for conditional GET only--POST should not raise this error)
|
||||
305 Use Proxy Confirm with the user
|
||||
307 Temporary Redirect Confirm with the user
|
||||
===== ================================= ===========
|
||||
|
||||
However, browsers have historically implemented these restrictions poorly;
|
||||
in particular, many browsers do not force the user to confirm 301, 302
|
||||
or 307 when redirecting POST. For this reason, CherryPy defaults to 303,
|
||||
which most user-agents appear to have implemented correctly. Therefore, if
|
||||
you raise HTTPRedirect for a POST request, the user-agent will most likely
|
||||
attempt to GET the new URI (without asking for confirmation from the user).
|
||||
We realize this is confusing for developers, but it's the safest thing we
|
||||
could do. You are of course free to raise ``HTTPRedirect(uri, status=302)``
|
||||
or any other 3xx status if you know what you're doing, but given the
|
||||
environment, we couldn't let any of those be the default.
|
||||
|
||||
Custom Error Handling
|
||||
=====================
|
||||
|
||||
.. image:: /refman/cperrors.gif
|
||||
|
||||
Anticipated HTTP responses
|
||||
--------------------------
|
||||
|
||||
The 'error_page' config namespace can be used to provide custom HTML output for
|
||||
expected responses (like 404 Not Found). Supply a filename from which the
|
||||
output will be read. The contents will be interpolated with the values
|
||||
%(status)s, %(message)s, %(traceback)s, and %(version)s using plain old Python
|
||||
`string formatting <http://docs.python.org/2/library/stdtypes.html#string-formatting-operations>`_.
|
||||
|
||||
::
|
||||
|
||||
_cp_config = {
|
||||
'error_page.404': os.path.join(localDir, "static/index.html")
|
||||
}
|
||||
|
||||
|
||||
Beginning in version 3.1, you may also provide a function or other callable as
|
||||
an error_page entry. It will be passed the same status, message, traceback and
|
||||
version arguments that are interpolated into templates::
|
||||
|
||||
def error_page_402(status, message, traceback, version):
|
||||
return "Error %s - Well, I'm very sorry but you haven't paid!" % status
|
||||
cherrypy.config.update({'error_page.402': error_page_402})
|
||||
|
||||
Also in 3.1, in addition to the numbered error codes, you may also supply
|
||||
"error_page.default" to handle all codes which do not have their own error_page
|
||||
entry.
|
||||
|
||||
|
||||
|
||||
Unanticipated errors
|
||||
--------------------
|
||||
|
||||
CherryPy also has a generic error handling mechanism: whenever an unanticipated
|
||||
error occurs in your code, it will call
|
||||
:func:`Request.error_response<cherrypy._cprequest.Request.error_response>` to
|
||||
set the response status, headers, and body. By default, this is the same
|
||||
output as
|
||||
:class:`HTTPError(500) <cherrypy._cperror.HTTPError>`. If you want to provide
|
||||
some other behavior, you generally replace "request.error_response".
|
||||
|
||||
Here is some sample code that shows how to display a custom error message and
|
||||
send an e-mail containing the error::
|
||||
|
||||
from cherrypy import _cperror
|
||||
|
||||
def handle_error():
|
||||
cherrypy.response.status = 500
|
||||
cherrypy.response.body = [
|
||||
"<html><body>Sorry, an error occured</body></html>"
|
||||
]
|
||||
sendMail('error@domain.com',
|
||||
'Error in your web app',
|
||||
_cperror.format_exc())
|
||||
|
||||
class Root:
|
||||
_cp_config = {'request.error_response': handle_error}
|
||||
|
||||
|
||||
Note that you have to explicitly set
|
||||
:attr:`response.body <cherrypy._cprequest.Response.body>`
|
||||
and not simply return an error message as a result.
|
||||
"""
|
||||
|
||||
from cgi import escape as _escape
|
||||
from sys import exc_info as _exc_info
|
||||
from traceback import format_exception as _format_exception
|
||||
from cherrypy._cpcompat import basestring, bytestr, iteritems, ntob
|
||||
from cherrypy._cpcompat import tonative, urljoin as _urljoin
|
||||
from cherrypy.lib import httputil as _httputil
|
||||
|
||||
|
||||
class CherryPyException(Exception):
|
||||
|
||||
"""A base class for CherryPy exceptions."""
|
||||
pass
|
||||
|
||||
|
||||
class TimeoutError(CherryPyException):
|
||||
|
||||
"""Exception raised when Response.timed_out is detected."""
|
||||
pass
|
||||
|
||||
|
||||
class InternalRedirect(CherryPyException):
|
||||
|
||||
"""Exception raised to switch to the handler for a different URL.
|
||||
|
||||
This exception will redirect processing to another path within the site
|
||||
(without informing the client). Provide the new path as an argument when
|
||||
raising the exception. Provide any params in the querystring for the new
|
||||
URL.
|
||||
"""
|
||||
|
||||
def __init__(self, path, query_string=""):
|
||||
import cherrypy
|
||||
self.request = cherrypy.serving.request
|
||||
|
||||
self.query_string = query_string
|
||||
if "?" in path:
|
||||
# Separate any params included in the path
|
||||
path, self.query_string = path.split("?", 1)
|
||||
|
||||
# Note that urljoin will "do the right thing" whether url is:
|
||||
# 1. a URL relative to root (e.g. "/dummy")
|
||||
# 2. a URL relative to the current path
|
||||
# Note that any query string will be discarded.
|
||||
path = _urljoin(self.request.path_info, path)
|
||||
|
||||
# Set a 'path' member attribute so that code which traps this
|
||||
# error can have access to it.
|
||||
self.path = path
|
||||
|
||||
CherryPyException.__init__(self, path, self.query_string)
|
||||
|
||||
|
||||
class HTTPRedirect(CherryPyException):
|
||||
|
||||
"""Exception raised when the request should be redirected.
|
||||
|
||||
This exception will force a HTTP redirect to the URL or URL's you give it.
|
||||
The new URL must be passed as the first argument to the Exception,
|
||||
e.g., HTTPRedirect(newUrl). Multiple URLs are allowed in a list.
|
||||
If a URL is absolute, it will be used as-is. If it is relative, it is
|
||||
assumed to be relative to the current cherrypy.request.path_info.
|
||||
|
||||
If one of the provided URL is a unicode object, it will be encoded
|
||||
using the default encoding or the one passed in parameter.
|
||||
|
||||
There are multiple types of redirect, from which you can select via the
|
||||
``status`` argument. If you do not provide a ``status`` arg, it defaults to
|
||||
303 (or 302 if responding with HTTP/1.0).
|
||||
|
||||
Examples::
|
||||
|
||||
raise cherrypy.HTTPRedirect("")
|
||||
raise cherrypy.HTTPRedirect("/abs/path", 307)
|
||||
raise cherrypy.HTTPRedirect(["path1", "path2?a=1&b=2"], 301)
|
||||
|
||||
See :ref:`redirectingpost` for additional caveats.
|
||||
"""
|
||||
|
||||
status = None
|
||||
"""The integer HTTP status code to emit."""
|
||||
|
||||
urls = None
|
||||
"""The list of URL's to emit."""
|
||||
|
||||
encoding = 'utf-8'
|
||||
"""The encoding when passed urls are not native strings"""
|
||||
|
||||
def __init__(self, urls, status=None, encoding=None):
|
||||
import cherrypy
|
||||
request = cherrypy.serving.request
|
||||
|
||||
if isinstance(urls, basestring):
|
||||
urls = [urls]
|
||||
|
||||
abs_urls = []
|
||||
for url in urls:
|
||||
url = tonative(url, encoding or self.encoding)
|
||||
|
||||
# Note that urljoin will "do the right thing" whether url is:
|
||||
# 1. a complete URL with host (e.g. "http://www.example.com/test")
|
||||
# 2. a URL relative to root (e.g. "/dummy")
|
||||
# 3. a URL relative to the current path
|
||||
# Note that any query string in cherrypy.request is discarded.
|
||||
url = _urljoin(cherrypy.url(), url)
|
||||
abs_urls.append(url)
|
||||
self.urls = abs_urls
|
||||
|
||||
# RFC 2616 indicates a 301 response code fits our goal; however,
|
||||
# browser support for 301 is quite messy. Do 302/303 instead. See
|
||||
# http://www.alanflavell.org.uk/www/post-redirect.html
|
||||
if status is None:
|
||||
if request.protocol >= (1, 1):
|
||||
status = 303
|
||||
else:
|
||||
status = 302
|
||||
else:
|
||||
status = int(status)
|
||||
if status < 300 or status > 399:
|
||||
raise ValueError("status must be between 300 and 399.")
|
||||
|
||||
self.status = status
|
||||
CherryPyException.__init__(self, abs_urls, status)
|
||||
|
||||
def set_response(self):
|
||||
"""Modify cherrypy.response status, headers, and body to represent
|
||||
self.
|
||||
|
||||
CherryPy uses this internally, but you can also use it to create an
|
||||
HTTPRedirect object and set its output without *raising* the exception.
|
||||
"""
|
||||
import cherrypy
|
||||
response = cherrypy.serving.response
|
||||
response.status = status = self.status
|
||||
|
||||
if status in (300, 301, 302, 303, 307):
|
||||
response.headers['Content-Type'] = "text/html;charset=utf-8"
|
||||
# "The ... URI SHOULD be given by the Location field
|
||||
# in the response."
|
||||
response.headers['Location'] = self.urls[0]
|
||||
|
||||
# "Unless the request method was HEAD, the entity of the response
|
||||
# SHOULD contain a short hypertext note with a hyperlink to the
|
||||
# new URI(s)."
|
||||
msg = {
|
||||
300: "This resource can be found at ",
|
||||
301: "This resource has permanently moved to ",
|
||||
302: "This resource resides temporarily at ",
|
||||
303: "This resource can be found at ",
|
||||
307: "This resource has moved temporarily to ",
|
||||
}[status]
|
||||
msg += '<a href=%s>%s</a>.'
|
||||
from xml.sax import saxutils
|
||||
msgs = [msg % (saxutils.quoteattr(u), u) for u in self.urls]
|
||||
response.body = ntob("<br />\n".join(msgs), 'utf-8')
|
||||
# Previous code may have set C-L, so we have to reset it
|
||||
# (allow finalize to set it).
|
||||
response.headers.pop('Content-Length', None)
|
||||
elif status == 304:
|
||||
# Not Modified.
|
||||
# "The response MUST include the following header fields:
|
||||
# Date, unless its omission is required by section 14.18.1"
|
||||
# The "Date" header should have been set in Response.__init__
|
||||
|
||||
# "...the response SHOULD NOT include other entity-headers."
|
||||
for key in ('Allow', 'Content-Encoding', 'Content-Language',
|
||||
'Content-Length', 'Content-Location', 'Content-MD5',
|
||||
'Content-Range', 'Content-Type', 'Expires',
|
||||
'Last-Modified'):
|
||||
if key in response.headers:
|
||||
del response.headers[key]
|
||||
|
||||
# "The 304 response MUST NOT contain a message-body."
|
||||
response.body = None
|
||||
# Previous code may have set C-L, so we have to reset it.
|
||||
response.headers.pop('Content-Length', None)
|
||||
elif status == 305:
|
||||
# Use Proxy.
|
||||
# self.urls[0] should be the URI of the proxy.
|
||||
response.headers['Location'] = self.urls[0]
|
||||
response.body = None
|
||||
# Previous code may have set C-L, so we have to reset it.
|
||||
response.headers.pop('Content-Length', None)
|
||||
else:
|
||||
raise ValueError("The %s status code is unknown." % status)
|
||||
|
||||
def __call__(self):
|
||||
"""Use this exception as a request.handler (raise self)."""
|
||||
raise self
|
||||
|
||||
|
||||
def clean_headers(status):
|
||||
"""Remove any headers which should not apply to an error response."""
|
||||
import cherrypy
|
||||
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# Remove headers which applied to the original content,
|
||||
# but do not apply to the error page.
|
||||
respheaders = response.headers
|
||||
for key in ["Accept-Ranges", "Age", "ETag", "Location", "Retry-After",
|
||||
"Vary", "Content-Encoding", "Content-Length", "Expires",
|
||||
"Content-Location", "Content-MD5", "Last-Modified"]:
|
||||
if key in respheaders:
|
||||
del respheaders[key]
|
||||
|
||||
if status != 416:
|
||||
# A server sending a response with status code 416 (Requested
|
||||
# range not satisfiable) SHOULD include a Content-Range field
|
||||
# with a byte-range-resp-spec of "*". The instance-length
|
||||
# specifies the current length of the selected resource.
|
||||
# A response with status code 206 (Partial Content) MUST NOT
|
||||
# include a Content-Range field with a byte-range- resp-spec of "*".
|
||||
if "Content-Range" in respheaders:
|
||||
del respheaders["Content-Range"]
|
||||
|
||||
|
||||
class HTTPError(CherryPyException):
|
||||
|
||||
"""Exception used to return an HTTP error code (4xx-5xx) to the client.
|
||||
|
||||
This exception can be used to automatically send a response using a
|
||||
http status code, with an appropriate error page. It takes an optional
|
||||
``status`` argument (which must be between 400 and 599); it defaults to 500
|
||||
("Internal Server Error"). It also takes an optional ``message`` argument,
|
||||
which will be returned in the response body. See
|
||||
`RFC2616 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.4>`_
|
||||
for a complete list of available error codes and when to use them.
|
||||
|
||||
Examples::
|
||||
|
||||
raise cherrypy.HTTPError(403)
|
||||
raise cherrypy.HTTPError(
|
||||
"403 Forbidden", "You are not allowed to access this resource.")
|
||||
"""
|
||||
|
||||
status = None
|
||||
"""The HTTP status code. May be of type int or str (with a Reason-Phrase).
|
||||
"""
|
||||
|
||||
code = None
|
||||
"""The integer HTTP status code."""
|
||||
|
||||
reason = None
|
||||
"""The HTTP Reason-Phrase string."""
|
||||
|
||||
def __init__(self, status=500, message=None):
|
||||
self.status = status
|
||||
try:
|
||||
self.code, self.reason, defaultmsg = _httputil.valid_status(status)
|
||||
except ValueError:
|
||||
raise self.__class__(500, _exc_info()[1].args[0])
|
||||
|
||||
if self.code < 400 or self.code > 599:
|
||||
raise ValueError("status must be between 400 and 599.")
|
||||
|
||||
# See http://www.python.org/dev/peps/pep-0352/
|
||||
# self.message = message
|
||||
self._message = message or defaultmsg
|
||||
CherryPyException.__init__(self, status, message)
|
||||
|
||||
def set_response(self):
|
||||
"""Modify cherrypy.response status, headers, and body to represent
|
||||
self.
|
||||
|
||||
CherryPy uses this internally, but you can also use it to create an
|
||||
HTTPError object and set its output without *raising* the exception.
|
||||
"""
|
||||
import cherrypy
|
||||
|
||||
response = cherrypy.serving.response
|
||||
|
||||
clean_headers(self.code)
|
||||
|
||||
# In all cases, finalize will be called after this method,
|
||||
# so don't bother cleaning up response values here.
|
||||
response.status = self.status
|
||||
tb = None
|
||||
if cherrypy.serving.request.show_tracebacks:
|
||||
tb = format_exc()
|
||||
|
||||
response.headers.pop('Content-Length', None)
|
||||
|
||||
content = self.get_error_page(self.status, traceback=tb,
|
||||
message=self._message)
|
||||
response.body = content
|
||||
|
||||
_be_ie_unfriendly(self.code)
|
||||
|
||||
def get_error_page(self, *args, **kwargs):
|
||||
return get_error_page(*args, **kwargs)
|
||||
|
||||
def __call__(self):
|
||||
"""Use this exception as a request.handler (raise self)."""
|
||||
raise self
|
||||
|
||||
|
||||
class NotFound(HTTPError):
|
||||
|
||||
"""Exception raised when a URL could not be mapped to any handler (404).
|
||||
|
||||
This is equivalent to raising
|
||||
:class:`HTTPError("404 Not Found") <cherrypy._cperror.HTTPError>`.
|
||||
"""
|
||||
|
||||
def __init__(self, path=None):
|
||||
if path is None:
|
||||
import cherrypy
|
||||
request = cherrypy.serving.request
|
||||
path = request.script_name + request.path_info
|
||||
self.args = (path,)
|
||||
HTTPError.__init__(self, 404, "The path '%s' was not found." % path)
|
||||
|
||||
|
||||
_HTTPErrorTemplate = '''<!DOCTYPE html PUBLIC
|
||||
"-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
|
||||
<title>%(status)s</title>
|
||||
<style type="text/css">
|
||||
#powered_by {
|
||||
margin-top: 20px;
|
||||
border-top: 2px solid black;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
#traceback {
|
||||
color: red;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h2>%(status)s</h2>
|
||||
<p>%(message)s</p>
|
||||
<pre id="traceback">%(traceback)s</pre>
|
||||
<div id="powered_by">
|
||||
<span>
|
||||
Powered by <a href="http://www.cherrypy.org">CherryPy %(version)s</a>
|
||||
</span>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
|
||||
def get_error_page(status, **kwargs):
|
||||
"""Return an HTML page, containing a pretty error response.
|
||||
|
||||
status should be an int or a str.
|
||||
kwargs will be interpolated into the page template.
|
||||
"""
|
||||
import cherrypy
|
||||
|
||||
try:
|
||||
code, reason, message = _httputil.valid_status(status)
|
||||
except ValueError:
|
||||
raise cherrypy.HTTPError(500, _exc_info()[1].args[0])
|
||||
|
||||
# We can't use setdefault here, because some
|
||||
# callers send None for kwarg values.
|
||||
if kwargs.get('status') is None:
|
||||
kwargs['status'] = "%s %s" % (code, reason)
|
||||
if kwargs.get('message') is None:
|
||||
kwargs['message'] = message
|
||||
if kwargs.get('traceback') is None:
|
||||
kwargs['traceback'] = ''
|
||||
if kwargs.get('version') is None:
|
||||
kwargs['version'] = cherrypy.__version__
|
||||
|
||||
for k, v in iteritems(kwargs):
|
||||
if v is None:
|
||||
kwargs[k] = ""
|
||||
else:
|
||||
kwargs[k] = _escape(kwargs[k])
|
||||
|
||||
# Use a custom template or callable for the error page?
|
||||
pages = cherrypy.serving.request.error_page
|
||||
error_page = pages.get(code) or pages.get('default')
|
||||
|
||||
# Default template, can be overridden below.
|
||||
template = _HTTPErrorTemplate
|
||||
if error_page:
|
||||
try:
|
||||
if hasattr(error_page, '__call__'):
|
||||
# The caller function may be setting headers manually,
|
||||
# so we delegate to it completely. We may be returning
|
||||
# an iterator as well as a string here.
|
||||
#
|
||||
# We *must* make sure any content is not unicode.
|
||||
result = error_page(**kwargs)
|
||||
if cherrypy.lib.is_iterator(result):
|
||||
from cherrypy.lib.encoding import UTF8StreamEncoder
|
||||
return UTF8StreamEncoder(result)
|
||||
elif isinstance(result, cherrypy._cpcompat.unicodestr):
|
||||
return result.encode('utf-8')
|
||||
else:
|
||||
if not isinstance(result, cherrypy._cpcompat.bytestr):
|
||||
raise ValueError('error page function did not '
|
||||
'return a bytestring, unicodestring or an '
|
||||
'iterator - returned object of type %s.'
|
||||
% (type(result).__name__))
|
||||
return result
|
||||
else:
|
||||
# Load the template from this path.
|
||||
template = tonative(open(error_page, 'rb').read())
|
||||
except:
|
||||
e = _format_exception(*_exc_info())[-1]
|
||||
m = kwargs['message']
|
||||
if m:
|
||||
m += "<br />"
|
||||
m += "In addition, the custom error page failed:\n<br />%s" % e
|
||||
kwargs['message'] = m
|
||||
|
||||
response = cherrypy.serving.response
|
||||
response.headers['Content-Type'] = "text/html;charset=utf-8"
|
||||
result = template % kwargs
|
||||
return result.encode('utf-8')
|
||||
|
||||
|
||||
|
||||
_ie_friendly_error_sizes = {
|
||||
400: 512, 403: 256, 404: 512, 405: 256,
|
||||
406: 512, 408: 512, 409: 512, 410: 256,
|
||||
500: 512, 501: 512, 505: 512,
|
||||
}
|
||||
|
||||
|
||||
def _be_ie_unfriendly(status):
|
||||
import cherrypy
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# For some statuses, Internet Explorer 5+ shows "friendly error
|
||||
# messages" instead of our response.body if the body is smaller
|
||||
# than a given size. Fix this by returning a body over that size
|
||||
# (by adding whitespace).
|
||||
# See http://support.microsoft.com/kb/q218155/
|
||||
s = _ie_friendly_error_sizes.get(status, 0)
|
||||
if s:
|
||||
s += 1
|
||||
# Since we are issuing an HTTP error status, we assume that
|
||||
# the entity is short, and we should just collapse it.
|
||||
content = response.collapse_body()
|
||||
l = len(content)
|
||||
if l and l < s:
|
||||
# IN ADDITION: the response must be written to IE
|
||||
# in one chunk or it will still get replaced! Bah.
|
||||
content = content + (ntob(" ") * (s - l))
|
||||
response.body = content
|
||||
response.headers['Content-Length'] = str(len(content))
|
||||
|
||||
|
||||
def format_exc(exc=None):
|
||||
"""Return exc (or sys.exc_info if None), formatted."""
|
||||
try:
|
||||
if exc is None:
|
||||
exc = _exc_info()
|
||||
if exc == (None, None, None):
|
||||
return ""
|
||||
import traceback
|
||||
return "".join(traceback.format_exception(*exc))
|
||||
finally:
|
||||
del exc
|
||||
|
||||
|
||||
def bare_error(extrabody=None):
|
||||
"""Produce status, headers, body for a critical error.
|
||||
|
||||
Returns a triple without calling any other questionable functions,
|
||||
so it should be as error-free as possible. Call it from an HTTP server
|
||||
if you get errors outside of the request.
|
||||
|
||||
If extrabody is None, a friendly but rather unhelpful error message
|
||||
is set in the body. If extrabody is a string, it will be appended
|
||||
as-is to the body.
|
||||
"""
|
||||
|
||||
# The whole point of this function is to be a last line-of-defense
|
||||
# in handling errors. That is, it must not raise any errors itself;
|
||||
# it cannot be allowed to fail. Therefore, don't add to it!
|
||||
# In particular, don't call any other CP functions.
|
||||
|
||||
body = ntob("Unrecoverable error in the server.")
|
||||
if extrabody is not None:
|
||||
if not isinstance(extrabody, bytestr):
|
||||
extrabody = extrabody.encode('utf-8')
|
||||
body += ntob("\n") + extrabody
|
||||
|
||||
return (ntob("500 Internal Server Error"),
|
||||
[(ntob('Content-Type'), ntob('text/plain')),
|
||||
(ntob('Content-Length'), ntob(str(len(body)), 'ISO-8859-1'))],
|
||||
[body])
|
|
@ -1,459 +0,0 @@
|
|||
"""
|
||||
Simple config
|
||||
=============
|
||||
|
||||
Although CherryPy uses the :mod:`Python logging module <logging>`, it does so
|
||||
behind the scenes so that simple logging is simple, but complicated logging
|
||||
is still possible. "Simple" logging means that you can log to the screen
|
||||
(i.e. console/stdout) or to a file, and that you can easily have separate
|
||||
error and access log files.
|
||||
|
||||
Here are the simplified logging settings. You use these by adding lines to
|
||||
your config file or dict. You should set these at either the global level or
|
||||
per application (see next), but generally not both.
|
||||
|
||||
* ``log.screen``: Set this to True to have both "error" and "access" messages
|
||||
printed to stdout.
|
||||
* ``log.access_file``: Set this to an absolute filename where you want
|
||||
"access" messages written.
|
||||
* ``log.error_file``: Set this to an absolute filename where you want "error"
|
||||
messages written.
|
||||
|
||||
Many events are automatically logged; to log your own application events, call
|
||||
:func:`cherrypy.log`.
|
||||
|
||||
Architecture
|
||||
============
|
||||
|
||||
Separate scopes
|
||||
---------------
|
||||
|
||||
CherryPy provides log managers at both the global and application layers.
|
||||
This means you can have one set of logging rules for your entire site,
|
||||
and another set of rules specific to each application. The global log
|
||||
manager is found at :func:`cherrypy.log`, and the log manager for each
|
||||
application is found at :attr:`app.log<cherrypy._cptree.Application.log>`.
|
||||
If you're inside a request, the latter is reachable from
|
||||
``cherrypy.request.app.log``; if you're outside a request, you'll have to
|
||||
obtain a reference to the ``app``: either the return value of
|
||||
:func:`tree.mount()<cherrypy._cptree.Tree.mount>` or, if you used
|
||||
:func:`quickstart()<cherrypy.quickstart>` instead, via
|
||||
``cherrypy.tree.apps['/']``.
|
||||
|
||||
By default, the global logs are named "cherrypy.error" and "cherrypy.access",
|
||||
and the application logs are named "cherrypy.error.2378745" and
|
||||
"cherrypy.access.2378745" (the number is the id of the Application object).
|
||||
This means that the application logs "bubble up" to the site logs, so if your
|
||||
application has no log handlers, the site-level handlers will still log the
|
||||
messages.
|
||||
|
||||
Errors vs. Access
|
||||
-----------------
|
||||
|
||||
Each log manager handles both "access" messages (one per HTTP request) and
|
||||
"error" messages (everything else). Note that the "error" log is not just for
|
||||
errors! The format of access messages is highly formalized, but the error log
|
||||
isn't--it receives messages from a variety of sources (including full error
|
||||
tracebacks, if enabled).
|
||||
|
||||
If you are logging the access log and error log to the same source, then there
|
||||
is a possibility that a specially crafted error message may replicate an access
|
||||
log message as described in CWE-117. In this case it is the application
|
||||
developer's responsibility to manually escape data before using CherryPy's log()
|
||||
functionality, or they may create an application that is vulnerable to CWE-117.
|
||||
This would be achieved by using a custom handler escape any special characters,
|
||||
and attached as described below.
|
||||
|
||||
Custom Handlers
|
||||
===============
|
||||
|
||||
The simple settings above work by manipulating Python's standard :mod:`logging`
|
||||
module. So when you need something more complex, the full power of the standard
|
||||
module is yours to exploit. You can borrow or create custom handlers, formats,
|
||||
filters, and much more. Here's an example that skips the standard FileHandler
|
||||
and uses a RotatingFileHandler instead:
|
||||
|
||||
::
|
||||
|
||||
#python
|
||||
log = app.log
|
||||
|
||||
# Remove the default FileHandlers if present.
|
||||
log.error_file = ""
|
||||
log.access_file = ""
|
||||
|
||||
maxBytes = getattr(log, "rot_maxBytes", 10000000)
|
||||
backupCount = getattr(log, "rot_backupCount", 1000)
|
||||
|
||||
# Make a new RotatingFileHandler for the error log.
|
||||
fname = getattr(log, "rot_error_file", "error.log")
|
||||
h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount)
|
||||
h.setLevel(DEBUG)
|
||||
h.setFormatter(_cplogging.logfmt)
|
||||
log.error_log.addHandler(h)
|
||||
|
||||
# Make a new RotatingFileHandler for the access log.
|
||||
fname = getattr(log, "rot_access_file", "access.log")
|
||||
h = handlers.RotatingFileHandler(fname, 'a', maxBytes, backupCount)
|
||||
h.setLevel(DEBUG)
|
||||
h.setFormatter(_cplogging.logfmt)
|
||||
log.access_log.addHandler(h)
|
||||
|
||||
|
||||
The ``rot_*`` attributes are pulled straight from the application log object.
|
||||
Since "log.*" config entries simply set attributes on the log object, you can
|
||||
add custom attributes to your heart's content. Note that these handlers are
|
||||
used ''instead'' of the default, simple handlers outlined above (so don't set
|
||||
the "log.error_file" config entry, for example).
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
# Silence the no-handlers "warning" (stderr write!) in stdlib logging
|
||||
logging.Logger.manager.emittedNoHandlerWarning = 1
|
||||
logfmt = logging.Formatter("%(message)s")
|
||||
import os
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy import _cperror
|
||||
from cherrypy._cpcompat import ntob, py3k
|
||||
|
||||
|
||||
class NullHandler(logging.Handler):
|
||||
|
||||
"""A no-op logging handler to silence the logging.lastResort handler."""
|
||||
|
||||
def handle(self, record):
|
||||
pass
|
||||
|
||||
def emit(self, record):
|
||||
pass
|
||||
|
||||
def createLock(self):
|
||||
self.lock = None
|
||||
|
||||
|
||||
class LogManager(object):
|
||||
|
||||
"""An object to assist both simple and advanced logging.
|
||||
|
||||
``cherrypy.log`` is an instance of this class.
|
||||
"""
|
||||
|
||||
appid = None
|
||||
"""The id() of the Application object which owns this log manager. If this
|
||||
is a global log manager, appid is None."""
|
||||
|
||||
error_log = None
|
||||
"""The actual :class:`logging.Logger` instance for error messages."""
|
||||
|
||||
access_log = None
|
||||
"""The actual :class:`logging.Logger` instance for access messages."""
|
||||
|
||||
if py3k:
|
||||
access_log_format = \
|
||||
'{h} {l} {u} {t} "{r}" {s} {b} "{f}" "{a}"'
|
||||
else:
|
||||
access_log_format = \
|
||||
'%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
|
||||
|
||||
logger_root = None
|
||||
"""The "top-level" logger name.
|
||||
|
||||
This string will be used as the first segment in the Logger names.
|
||||
The default is "cherrypy", for example, in which case the Logger names
|
||||
will be of the form::
|
||||
|
||||
cherrypy.error.<appid>
|
||||
cherrypy.access.<appid>
|
||||
"""
|
||||
|
||||
def __init__(self, appid=None, logger_root="cherrypy"):
|
||||
self.logger_root = logger_root
|
||||
self.appid = appid
|
||||
if appid is None:
|
||||
self.error_log = logging.getLogger("%s.error" % logger_root)
|
||||
self.access_log = logging.getLogger("%s.access" % logger_root)
|
||||
else:
|
||||
self.error_log = logging.getLogger(
|
||||
"%s.error.%s" % (logger_root, appid))
|
||||
self.access_log = logging.getLogger(
|
||||
"%s.access.%s" % (logger_root, appid))
|
||||
self.error_log.setLevel(logging.INFO)
|
||||
self.access_log.setLevel(logging.INFO)
|
||||
|
||||
# Silence the no-handlers "warning" (stderr write!) in stdlib logging
|
||||
self.error_log.addHandler(NullHandler())
|
||||
self.access_log.addHandler(NullHandler())
|
||||
|
||||
cherrypy.engine.subscribe('graceful', self.reopen_files)
|
||||
|
||||
def reopen_files(self):
|
||||
"""Close and reopen all file handlers."""
|
||||
for log in (self.error_log, self.access_log):
|
||||
for h in log.handlers:
|
||||
if isinstance(h, logging.FileHandler):
|
||||
h.acquire()
|
||||
h.stream.close()
|
||||
h.stream = open(h.baseFilename, h.mode)
|
||||
h.release()
|
||||
|
||||
def error(self, msg='', context='', severity=logging.INFO,
|
||||
traceback=False):
|
||||
"""Write the given ``msg`` to the error log.
|
||||
|
||||
This is not just for errors! Applications may call this at any time
|
||||
to log application-specific information.
|
||||
|
||||
If ``traceback`` is True, the traceback of the current exception
|
||||
(if any) will be appended to ``msg``.
|
||||
"""
|
||||
if traceback:
|
||||
msg += _cperror.format_exc()
|
||||
self.error_log.log(severity, ' '.join((self.time(), context, msg)))
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""An alias for ``error``."""
|
||||
return self.error(*args, **kwargs)
|
||||
|
||||
def access(self):
|
||||
"""Write to the access log (in Apache/NCSA Combined Log format).
|
||||
|
||||
See the
|
||||
`apache documentation <http://httpd.apache.org/docs/current/logs.html#combined>`_
|
||||
for format details.
|
||||
|
||||
CherryPy calls this automatically for you. Note there are no arguments;
|
||||
it collects the data itself from
|
||||
:class:`cherrypy.request<cherrypy._cprequest.Request>`.
|
||||
|
||||
Like Apache started doing in 2.0.46, non-printable and other special
|
||||
characters in %r (and we expand that to all parts) are escaped using
|
||||
\\xhh sequences, where hh stands for the hexadecimal representation
|
||||
of the raw byte. Exceptions from this rule are " and \\, which are
|
||||
escaped by prepending a backslash, and all whitespace characters,
|
||||
which are written in their C-style notation (\\n, \\t, etc).
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
remote = request.remote
|
||||
response = cherrypy.serving.response
|
||||
outheaders = response.headers
|
||||
inheaders = request.headers
|
||||
if response.output_status is None:
|
||||
status = "-"
|
||||
else:
|
||||
status = response.output_status.split(ntob(" "), 1)[0]
|
||||
if py3k:
|
||||
status = status.decode('ISO-8859-1')
|
||||
|
||||
atoms = {'h': remote.name or remote.ip,
|
||||
'l': '-',
|
||||
'u': getattr(request, "login", None) or "-",
|
||||
't': self.time(),
|
||||
'r': request.request_line,
|
||||
's': status,
|
||||
'b': dict.get(outheaders, 'Content-Length', '') or "-",
|
||||
'f': dict.get(inheaders, 'Referer', ''),
|
||||
'a': dict.get(inheaders, 'User-Agent', ''),
|
||||
'o': dict.get(inheaders, 'Host', '-'),
|
||||
}
|
||||
if py3k:
|
||||
for k, v in atoms.items():
|
||||
if not isinstance(v, str):
|
||||
v = str(v)
|
||||
v = v.replace('"', '\\"').encode('utf8')
|
||||
# Fortunately, repr(str) escapes unprintable chars, \n, \t, etc
|
||||
# and backslash for us. All we have to do is strip the quotes.
|
||||
v = repr(v)[2:-1]
|
||||
|
||||
# in python 3.0 the repr of bytes (as returned by encode)
|
||||
# uses double \'s. But then the logger escapes them yet, again
|
||||
# resulting in quadruple slashes. Remove the extra one here.
|
||||
v = v.replace('\\\\', '\\')
|
||||
|
||||
# Escape double-quote.
|
||||
atoms[k] = v
|
||||
|
||||
try:
|
||||
self.access_log.log(
|
||||
logging.INFO, self.access_log_format.format(**atoms))
|
||||
except:
|
||||
self(traceback=True)
|
||||
else:
|
||||
for k, v in atoms.items():
|
||||
if isinstance(v, unicode):
|
||||
v = v.encode('utf8')
|
||||
elif not isinstance(v, str):
|
||||
v = str(v)
|
||||
# Fortunately, repr(str) escapes unprintable chars, \n, \t, etc
|
||||
# and backslash for us. All we have to do is strip the quotes.
|
||||
v = repr(v)[1:-1]
|
||||
# Escape double-quote.
|
||||
atoms[k] = v.replace('"', '\\"')
|
||||
|
||||
try:
|
||||
self.access_log.log(
|
||||
logging.INFO, self.access_log_format % atoms)
|
||||
except:
|
||||
self(traceback=True)
|
||||
|
||||
def time(self):
|
||||
"""Return now() in Apache Common Log Format (no timezone)."""
|
||||
now = datetime.datetime.now()
|
||||
monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
|
||||
'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
|
||||
month = monthnames[now.month - 1].capitalize()
|
||||
return ('[%02d/%s/%04d:%02d:%02d:%02d]' %
|
||||
(now.day, month, now.year, now.hour, now.minute, now.second))
|
||||
|
||||
def _get_builtin_handler(self, log, key):
|
||||
for h in log.handlers:
|
||||
if getattr(h, "_cpbuiltin", None) == key:
|
||||
return h
|
||||
|
||||
# ------------------------- Screen handlers ------------------------- #
|
||||
def _set_screen_handler(self, log, enable, stream=None):
|
||||
h = self._get_builtin_handler(log, "screen")
|
||||
if enable:
|
||||
if not h:
|
||||
if stream is None:
|
||||
stream = sys.stderr
|
||||
h = logging.StreamHandler(stream)
|
||||
h.setFormatter(logfmt)
|
||||
h._cpbuiltin = "screen"
|
||||
log.addHandler(h)
|
||||
elif h:
|
||||
log.handlers.remove(h)
|
||||
|
||||
def _get_screen(self):
|
||||
h = self._get_builtin_handler
|
||||
has_h = h(self.error_log, "screen") or h(self.access_log, "screen")
|
||||
return bool(has_h)
|
||||
|
||||
def _set_screen(self, newvalue):
|
||||
self._set_screen_handler(self.error_log, newvalue, stream=sys.stderr)
|
||||
self._set_screen_handler(self.access_log, newvalue, stream=sys.stdout)
|
||||
screen = property(_get_screen, _set_screen,
|
||||
doc="""Turn stderr/stdout logging on or off.
|
||||
|
||||
If you set this to True, it'll add the appropriate StreamHandler for
|
||||
you. If you set it to False, it will remove the handler.
|
||||
""")
|
||||
|
||||
# -------------------------- File handlers -------------------------- #
|
||||
|
||||
def _add_builtin_file_handler(self, log, fname):
|
||||
h = logging.FileHandler(fname)
|
||||
h.setFormatter(logfmt)
|
||||
h._cpbuiltin = "file"
|
||||
log.addHandler(h)
|
||||
|
||||
def _set_file_handler(self, log, filename):
|
||||
h = self._get_builtin_handler(log, "file")
|
||||
if filename:
|
||||
if h:
|
||||
if h.baseFilename != os.path.abspath(filename):
|
||||
h.close()
|
||||
log.handlers.remove(h)
|
||||
self._add_builtin_file_handler(log, filename)
|
||||
else:
|
||||
self._add_builtin_file_handler(log, filename)
|
||||
else:
|
||||
if h:
|
||||
h.close()
|
||||
log.handlers.remove(h)
|
||||
|
||||
def _get_error_file(self):
|
||||
h = self._get_builtin_handler(self.error_log, "file")
|
||||
if h:
|
||||
return h.baseFilename
|
||||
return ''
|
||||
|
||||
def _set_error_file(self, newvalue):
|
||||
self._set_file_handler(self.error_log, newvalue)
|
||||
error_file = property(_get_error_file, _set_error_file,
|
||||
doc="""The filename for self.error_log.
|
||||
|
||||
If you set this to a string, it'll add the appropriate FileHandler for
|
||||
you. If you set it to ``None`` or ``''``, it will remove the handler.
|
||||
""")
|
||||
|
||||
def _get_access_file(self):
|
||||
h = self._get_builtin_handler(self.access_log, "file")
|
||||
if h:
|
||||
return h.baseFilename
|
||||
return ''
|
||||
|
||||
def _set_access_file(self, newvalue):
|
||||
self._set_file_handler(self.access_log, newvalue)
|
||||
access_file = property(_get_access_file, _set_access_file,
|
||||
doc="""The filename for self.access_log.
|
||||
|
||||
If you set this to a string, it'll add the appropriate FileHandler for
|
||||
you. If you set it to ``None`` or ``''``, it will remove the handler.
|
||||
""")
|
||||
|
||||
# ------------------------- WSGI handlers ------------------------- #
|
||||
|
||||
def _set_wsgi_handler(self, log, enable):
|
||||
h = self._get_builtin_handler(log, "wsgi")
|
||||
if enable:
|
||||
if not h:
|
||||
h = WSGIErrorHandler()
|
||||
h.setFormatter(logfmt)
|
||||
h._cpbuiltin = "wsgi"
|
||||
log.addHandler(h)
|
||||
elif h:
|
||||
log.handlers.remove(h)
|
||||
|
||||
def _get_wsgi(self):
|
||||
return bool(self._get_builtin_handler(self.error_log, "wsgi"))
|
||||
|
||||
def _set_wsgi(self, newvalue):
|
||||
self._set_wsgi_handler(self.error_log, newvalue)
|
||||
wsgi = property(_get_wsgi, _set_wsgi,
|
||||
doc="""Write errors to wsgi.errors.
|
||||
|
||||
If you set this to True, it'll add the appropriate
|
||||
:class:`WSGIErrorHandler<cherrypy._cplogging.WSGIErrorHandler>` for you
|
||||
(which writes errors to ``wsgi.errors``).
|
||||
If you set it to False, it will remove the handler.
|
||||
""")
|
||||
|
||||
|
||||
class WSGIErrorHandler(logging.Handler):
|
||||
|
||||
"A handler class which writes logging records to environ['wsgi.errors']."
|
||||
|
||||
def flush(self):
|
||||
"""Flushes the stream."""
|
||||
try:
|
||||
stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors')
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
else:
|
||||
stream.flush()
|
||||
|
||||
def emit(self, record):
|
||||
"""Emit a record."""
|
||||
try:
|
||||
stream = cherrypy.serving.request.wsgi_environ.get('wsgi.errors')
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
msg = self.format(record)
|
||||
fs = "%s\n"
|
||||
import types
|
||||
# if no unicode support...
|
||||
if not hasattr(types, "UnicodeType"):
|
||||
stream.write(fs % msg)
|
||||
else:
|
||||
try:
|
||||
stream.write(fs % msg)
|
||||
except UnicodeError:
|
||||
stream.write(fs % msg.encode("UTF-8"))
|
||||
self.flush()
|
||||
except:
|
||||
self.handleError(record)
|
|
@ -1,353 +0,0 @@
|
|||
"""Native adapter for serving CherryPy via mod_python
|
||||
|
||||
Basic usage:
|
||||
|
||||
##########################################
|
||||
# Application in a module called myapp.py
|
||||
##########################################
|
||||
|
||||
import cherrypy
|
||||
|
||||
class Root:
|
||||
@cherrypy.expose
|
||||
def index(self):
|
||||
return 'Hi there, Ho there, Hey there'
|
||||
|
||||
|
||||
# We will use this method from the mod_python configuration
|
||||
# as the entry point to our application
|
||||
def setup_server():
|
||||
cherrypy.tree.mount(Root())
|
||||
cherrypy.config.update({'environment': 'production',
|
||||
'log.screen': False,
|
||||
'show_tracebacks': False})
|
||||
|
||||
##########################################
|
||||
# mod_python settings for apache2
|
||||
# This should reside in your httpd.conf
|
||||
# or a file that will be loaded at
|
||||
# apache startup
|
||||
##########################################
|
||||
|
||||
# Start
|
||||
DocumentRoot "/"
|
||||
Listen 8080
|
||||
LoadModule python_module /usr/lib/apache2/modules/mod_python.so
|
||||
|
||||
<Location "/">
|
||||
PythonPath "sys.path+['/path/to/my/application']"
|
||||
SetHandler python-program
|
||||
PythonHandler cherrypy._cpmodpy::handler
|
||||
PythonOption cherrypy.setup myapp::setup_server
|
||||
PythonDebug On
|
||||
</Location>
|
||||
# End
|
||||
|
||||
The actual path to your mod_python.so is dependent on your
|
||||
environment. In this case we suppose a global mod_python
|
||||
installation on a Linux distribution such as Ubuntu.
|
||||
|
||||
We do set the PythonPath configuration setting so that
|
||||
your application can be found by from the user running
|
||||
the apache2 instance. Of course if your application
|
||||
resides in the global site-package this won't be needed.
|
||||
|
||||
Then restart apache2 and access http://127.0.0.1:8080
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import BytesIO, copyitems, ntob
|
||||
from cherrypy._cperror import format_exc, bare_error
|
||||
from cherrypy.lib import httputil
|
||||
|
||||
|
||||
# ------------------------------ Request-handling
|
||||
|
||||
|
||||
def setup(req):
|
||||
from mod_python import apache
|
||||
|
||||
# Run any setup functions defined by a "PythonOption cherrypy.setup"
|
||||
# directive.
|
||||
options = req.get_options()
|
||||
if 'cherrypy.setup' in options:
|
||||
for function in options['cherrypy.setup'].split():
|
||||
atoms = function.split('::', 1)
|
||||
if len(atoms) == 1:
|
||||
mod = __import__(atoms[0], globals(), locals())
|
||||
else:
|
||||
modname, fname = atoms
|
||||
mod = __import__(modname, globals(), locals(), [fname])
|
||||
func = getattr(mod, fname)
|
||||
func()
|
||||
|
||||
cherrypy.config.update({'log.screen': False,
|
||||
"tools.ignore_headers.on": True,
|
||||
"tools.ignore_headers.headers": ['Range'],
|
||||
})
|
||||
|
||||
engine = cherrypy.engine
|
||||
if hasattr(engine, "signal_handler"):
|
||||
engine.signal_handler.unsubscribe()
|
||||
if hasattr(engine, "console_control_handler"):
|
||||
engine.console_control_handler.unsubscribe()
|
||||
engine.autoreload.unsubscribe()
|
||||
cherrypy.server.unsubscribe()
|
||||
|
||||
def _log(msg, level):
|
||||
newlevel = apache.APLOG_ERR
|
||||
if logging.DEBUG >= level:
|
||||
newlevel = apache.APLOG_DEBUG
|
||||
elif logging.INFO >= level:
|
||||
newlevel = apache.APLOG_INFO
|
||||
elif logging.WARNING >= level:
|
||||
newlevel = apache.APLOG_WARNING
|
||||
# On Windows, req.server is required or the msg will vanish. See
|
||||
# http://www.modpython.org/pipermail/mod_python/2003-October/014291.html
|
||||
# Also, "When server is not specified...LogLevel does not apply..."
|
||||
apache.log_error(msg, newlevel, req.server)
|
||||
engine.subscribe('log', _log)
|
||||
|
||||
engine.start()
|
||||
|
||||
def cherrypy_cleanup(data):
|
||||
engine.exit()
|
||||
try:
|
||||
# apache.register_cleanup wasn't available until 3.1.4.
|
||||
apache.register_cleanup(cherrypy_cleanup)
|
||||
except AttributeError:
|
||||
req.server.register_cleanup(req, cherrypy_cleanup)
|
||||
|
||||
|
||||
class _ReadOnlyRequest:
|
||||
expose = ('read', 'readline', 'readlines')
|
||||
|
||||
def __init__(self, req):
|
||||
for method in self.expose:
|
||||
self.__dict__[method] = getattr(req, method)
|
||||
|
||||
|
||||
recursive = False
|
||||
|
||||
_isSetUp = False
|
||||
|
||||
|
||||
def handler(req):
|
||||
from mod_python import apache
|
||||
try:
|
||||
global _isSetUp
|
||||
if not _isSetUp:
|
||||
setup(req)
|
||||
_isSetUp = True
|
||||
|
||||
# Obtain a Request object from CherryPy
|
||||
local = req.connection.local_addr
|
||||
local = httputil.Host(
|
||||
local[0], local[1], req.connection.local_host or "")
|
||||
remote = req.connection.remote_addr
|
||||
remote = httputil.Host(
|
||||
remote[0], remote[1], req.connection.remote_host or "")
|
||||
|
||||
scheme = req.parsed_uri[0] or 'http'
|
||||
req.get_basic_auth_pw()
|
||||
|
||||
try:
|
||||
# apache.mpm_query only became available in mod_python 3.1
|
||||
q = apache.mpm_query
|
||||
threaded = q(apache.AP_MPMQ_IS_THREADED)
|
||||
forked = q(apache.AP_MPMQ_IS_FORKED)
|
||||
except AttributeError:
|
||||
bad_value = ("You must provide a PythonOption '%s', "
|
||||
"either 'on' or 'off', when running a version "
|
||||
"of mod_python < 3.1")
|
||||
|
||||
threaded = options.get('multithread', '').lower()
|
||||
if threaded == 'on':
|
||||
threaded = True
|
||||
elif threaded == 'off':
|
||||
threaded = False
|
||||
else:
|
||||
raise ValueError(bad_value % "multithread")
|
||||
|
||||
forked = options.get('multiprocess', '').lower()
|
||||
if forked == 'on':
|
||||
forked = True
|
||||
elif forked == 'off':
|
||||
forked = False
|
||||
else:
|
||||
raise ValueError(bad_value % "multiprocess")
|
||||
|
||||
sn = cherrypy.tree.script_name(req.uri or "/")
|
||||
if sn is None:
|
||||
send_response(req, '404 Not Found', [], '')
|
||||
else:
|
||||
app = cherrypy.tree.apps[sn]
|
||||
method = req.method
|
||||
path = req.uri
|
||||
qs = req.args or ""
|
||||
reqproto = req.protocol
|
||||
headers = copyitems(req.headers_in)
|
||||
rfile = _ReadOnlyRequest(req)
|
||||
prev = None
|
||||
|
||||
try:
|
||||
redirections = []
|
||||
while True:
|
||||
request, response = app.get_serving(local, remote, scheme,
|
||||
"HTTP/1.1")
|
||||
request.login = req.user
|
||||
request.multithread = bool(threaded)
|
||||
request.multiprocess = bool(forked)
|
||||
request.app = app
|
||||
request.prev = prev
|
||||
|
||||
# Run the CherryPy Request object and obtain the response
|
||||
try:
|
||||
request.run(method, path, qs, reqproto, headers, rfile)
|
||||
break
|
||||
except cherrypy.InternalRedirect:
|
||||
ir = sys.exc_info()[1]
|
||||
app.release_serving()
|
||||
prev = request
|
||||
|
||||
if not recursive:
|
||||
if ir.path in redirections:
|
||||
raise RuntimeError(
|
||||
"InternalRedirector visited the same URL "
|
||||
"twice: %r" % ir.path)
|
||||
else:
|
||||
# Add the *previous* path_info + qs to
|
||||
# redirections.
|
||||
if qs:
|
||||
qs = "?" + qs
|
||||
redirections.append(sn + path + qs)
|
||||
|
||||
# Munge environment and try again.
|
||||
method = "GET"
|
||||
path = ir.path
|
||||
qs = ir.query_string
|
||||
rfile = BytesIO()
|
||||
|
||||
send_response(
|
||||
req, response.output_status, response.header_list,
|
||||
response.body, response.stream)
|
||||
finally:
|
||||
app.release_serving()
|
||||
except:
|
||||
tb = format_exc()
|
||||
cherrypy.log(tb, 'MOD_PYTHON', severity=logging.ERROR)
|
||||
s, h, b = bare_error()
|
||||
send_response(req, s, h, b)
|
||||
return apache.OK
|
||||
|
||||
|
||||
def send_response(req, status, headers, body, stream=False):
|
||||
# Set response status
|
||||
req.status = int(status[:3])
|
||||
|
||||
# Set response headers
|
||||
req.content_type = "text/plain"
|
||||
for header, value in headers:
|
||||
if header.lower() == 'content-type':
|
||||
req.content_type = value
|
||||
continue
|
||||
req.headers_out.add(header, value)
|
||||
|
||||
if stream:
|
||||
# Flush now so the status and headers are sent immediately.
|
||||
req.flush()
|
||||
|
||||
# Set response body
|
||||
if isinstance(body, basestring):
|
||||
req.write(body)
|
||||
else:
|
||||
for seg in body:
|
||||
req.write(seg)
|
||||
|
||||
|
||||
# --------------- Startup tools for CherryPy + mod_python --------------- #
|
||||
import os
|
||||
import re
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
def popen(fullcmd):
|
||||
p = subprocess.Popen(fullcmd, shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
close_fds=True)
|
||||
return p.stdout
|
||||
except ImportError:
|
||||
def popen(fullcmd):
|
||||
pipein, pipeout = os.popen4(fullcmd)
|
||||
return pipeout
|
||||
|
||||
|
||||
def read_process(cmd, args=""):
|
||||
fullcmd = "%s %s" % (cmd, args)
|
||||
pipeout = popen(fullcmd)
|
||||
try:
|
||||
firstline = pipeout.readline()
|
||||
cmd_not_found = re.search(
|
||||
ntob("(not recognized|No such file|not found)"),
|
||||
firstline,
|
||||
re.IGNORECASE
|
||||
)
|
||||
if cmd_not_found:
|
||||
raise IOError('%s must be on your system path.' % cmd)
|
||||
output = firstline + pipeout.read()
|
||||
finally:
|
||||
pipeout.close()
|
||||
return output
|
||||
|
||||
|
||||
class ModPythonServer(object):
|
||||
|
||||
template = """
|
||||
# Apache2 server configuration file for running CherryPy with mod_python.
|
||||
|
||||
DocumentRoot "/"
|
||||
Listen %(port)s
|
||||
LoadModule python_module modules/mod_python.so
|
||||
|
||||
<Location %(loc)s>
|
||||
SetHandler python-program
|
||||
PythonHandler %(handler)s
|
||||
PythonDebug On
|
||||
%(opts)s
|
||||
</Location>
|
||||
"""
|
||||
|
||||
def __init__(self, loc="/", port=80, opts=None, apache_path="apache",
|
||||
handler="cherrypy._cpmodpy::handler"):
|
||||
self.loc = loc
|
||||
self.port = port
|
||||
self.opts = opts
|
||||
self.apache_path = apache_path
|
||||
self.handler = handler
|
||||
|
||||
def start(self):
|
||||
opts = "".join([" PythonOption %s %s\n" % (k, v)
|
||||
for k, v in self.opts])
|
||||
conf_data = self.template % {"port": self.port,
|
||||
"loc": self.loc,
|
||||
"opts": opts,
|
||||
"handler": self.handler,
|
||||
}
|
||||
|
||||
mpconf = os.path.join(os.path.dirname(__file__), "cpmodpy.conf")
|
||||
f = open(mpconf, 'wb')
|
||||
try:
|
||||
f.write(conf_data)
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
response = read_process(self.apache_path, "-k start -f %s" % mpconf)
|
||||
self.ready = True
|
||||
return response
|
||||
|
||||
def stop(self):
|
||||
os.popen("apache -k stop")
|
||||
self.ready = False
|
|
@ -1,154 +0,0 @@
|
|||
"""Native adapter for serving CherryPy via its builtin server."""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import BytesIO
|
||||
from cherrypy._cperror import format_exc, bare_error
|
||||
from cherrypy.lib import httputil
|
||||
from cherrypy import wsgiserver
|
||||
|
||||
|
||||
class NativeGateway(wsgiserver.Gateway):
|
||||
|
||||
recursive = False
|
||||
|
||||
def respond(self):
|
||||
req = self.req
|
||||
try:
|
||||
# Obtain a Request object from CherryPy
|
||||
local = req.server.bind_addr
|
||||
local = httputil.Host(local[0], local[1], "")
|
||||
remote = req.conn.remote_addr, req.conn.remote_port
|
||||
remote = httputil.Host(remote[0], remote[1], "")
|
||||
|
||||
scheme = req.scheme
|
||||
sn = cherrypy.tree.script_name(req.uri or "/")
|
||||
if sn is None:
|
||||
self.send_response('404 Not Found', [], [''])
|
||||
else:
|
||||
app = cherrypy.tree.apps[sn]
|
||||
method = req.method
|
||||
path = req.path
|
||||
qs = req.qs or ""
|
||||
headers = req.inheaders.items()
|
||||
rfile = req.rfile
|
||||
prev = None
|
||||
|
||||
try:
|
||||
redirections = []
|
||||
while True:
|
||||
request, response = app.get_serving(
|
||||
local, remote, scheme, "HTTP/1.1")
|
||||
request.multithread = True
|
||||
request.multiprocess = False
|
||||
request.app = app
|
||||
request.prev = prev
|
||||
|
||||
# Run the CherryPy Request object and obtain the
|
||||
# response
|
||||
try:
|
||||
request.run(method, path, qs,
|
||||
req.request_protocol, headers, rfile)
|
||||
break
|
||||
except cherrypy.InternalRedirect:
|
||||
ir = sys.exc_info()[1]
|
||||
app.release_serving()
|
||||
prev = request
|
||||
|
||||
if not self.recursive:
|
||||
if ir.path in redirections:
|
||||
raise RuntimeError(
|
||||
"InternalRedirector visited the same "
|
||||
"URL twice: %r" % ir.path)
|
||||
else:
|
||||
# Add the *previous* path_info + qs to
|
||||
# redirections.
|
||||
if qs:
|
||||
qs = "?" + qs
|
||||
redirections.append(sn + path + qs)
|
||||
|
||||
# Munge environment and try again.
|
||||
method = "GET"
|
||||
path = ir.path
|
||||
qs = ir.query_string
|
||||
rfile = BytesIO()
|
||||
|
||||
self.send_response(
|
||||
response.output_status, response.header_list,
|
||||
response.body)
|
||||
finally:
|
||||
app.release_serving()
|
||||
except:
|
||||
tb = format_exc()
|
||||
# print tb
|
||||
cherrypy.log(tb, 'NATIVE_ADAPTER', severity=logging.ERROR)
|
||||
s, h, b = bare_error()
|
||||
self.send_response(s, h, b)
|
||||
|
||||
def send_response(self, status, headers, body):
|
||||
req = self.req
|
||||
|
||||
# Set response status
|
||||
req.status = str(status or "500 Server Error")
|
||||
|
||||
# Set response headers
|
||||
for header, value in headers:
|
||||
req.outheaders.append((header, value))
|
||||
if (req.ready and not req.sent_headers):
|
||||
req.sent_headers = True
|
||||
req.send_headers()
|
||||
|
||||
# Set response body
|
||||
for seg in body:
|
||||
req.write(seg)
|
||||
|
||||
|
||||
class CPHTTPServer(wsgiserver.HTTPServer):
|
||||
|
||||
"""Wrapper for wsgiserver.HTTPServer.
|
||||
|
||||
wsgiserver has been designed to not reference CherryPy in any way,
|
||||
so that it can be used in other frameworks and applications.
|
||||
Therefore, we wrap it here, so we can apply some attributes
|
||||
from config -> cherrypy.server -> HTTPServer.
|
||||
"""
|
||||
|
||||
def __init__(self, server_adapter=cherrypy.server):
|
||||
self.server_adapter = server_adapter
|
||||
|
||||
server_name = (self.server_adapter.socket_host or
|
||||
self.server_adapter.socket_file or
|
||||
None)
|
||||
|
||||
wsgiserver.HTTPServer.__init__(
|
||||
self, server_adapter.bind_addr, NativeGateway,
|
||||
minthreads=server_adapter.thread_pool,
|
||||
maxthreads=server_adapter.thread_pool_max,
|
||||
server_name=server_name)
|
||||
|
||||
self.max_request_header_size = (
|
||||
self.server_adapter.max_request_header_size or 0)
|
||||
self.max_request_body_size = (
|
||||
self.server_adapter.max_request_body_size or 0)
|
||||
self.request_queue_size = self.server_adapter.socket_queue_size
|
||||
self.timeout = self.server_adapter.socket_timeout
|
||||
self.shutdown_timeout = self.server_adapter.shutdown_timeout
|
||||
self.protocol = self.server_adapter.protocol_version
|
||||
self.nodelay = self.server_adapter.nodelay
|
||||
|
||||
ssl_module = self.server_adapter.ssl_module or 'pyopenssl'
|
||||
if self.server_adapter.ssl_context:
|
||||
adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module)
|
||||
self.ssl_adapter = adapter_class(
|
||||
self.server_adapter.ssl_certificate,
|
||||
self.server_adapter.ssl_private_key,
|
||||
self.server_adapter.ssl_certificate_chain)
|
||||
self.ssl_adapter.context = self.server_adapter.ssl_context
|
||||
elif self.server_adapter.ssl_certificate:
|
||||
adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module)
|
||||
self.ssl_adapter = adapter_class(
|
||||
self.server_adapter.ssl_certificate,
|
||||
self.server_adapter.ssl_private_key,
|
||||
self.server_adapter.ssl_certificate_chain)
|
File diff suppressed because it is too large
Load Diff
|
@ -1,973 +0,0 @@
|
|||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import basestring, copykeys, ntob, unicodestr
|
||||
from cherrypy._cpcompat import SimpleCookie, CookieError, py3k
|
||||
from cherrypy import _cpreqbody, _cpconfig
|
||||
from cherrypy._cperror import format_exc, bare_error
|
||||
from cherrypy.lib import httputil, file_generator
|
||||
|
||||
|
||||
class Hook(object):
|
||||
|
||||
"""A callback and its metadata: failsafe, priority, and kwargs."""
|
||||
|
||||
callback = None
|
||||
"""
|
||||
The bare callable that this Hook object is wrapping, which will
|
||||
be called when the Hook is called."""
|
||||
|
||||
failsafe = False
|
||||
"""
|
||||
If True, the callback is guaranteed to run even if other callbacks
|
||||
from the same call point raise exceptions."""
|
||||
|
||||
priority = 50
|
||||
"""
|
||||
Defines the order of execution for a list of Hooks. Priority numbers
|
||||
should be limited to the closed interval [0, 100], but values outside
|
||||
this range are acceptable, as are fractional values."""
|
||||
|
||||
kwargs = {}
|
||||
"""
|
||||
A set of keyword arguments that will be passed to the
|
||||
callable on each call."""
|
||||
|
||||
def __init__(self, callback, failsafe=None, priority=None, **kwargs):
|
||||
self.callback = callback
|
||||
|
||||
if failsafe is None:
|
||||
failsafe = getattr(callback, "failsafe", False)
|
||||
self.failsafe = failsafe
|
||||
|
||||
if priority is None:
|
||||
priority = getattr(callback, "priority", 50)
|
||||
self.priority = priority
|
||||
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __lt__(self, other):
|
||||
# Python 3
|
||||
return self.priority < other.priority
|
||||
|
||||
def __cmp__(self, other):
|
||||
# Python 2
|
||||
return cmp(self.priority, other.priority)
|
||||
|
||||
def __call__(self):
|
||||
"""Run self.callback(**self.kwargs)."""
|
||||
return self.callback(**self.kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
cls = self.__class__
|
||||
return ("%s.%s(callback=%r, failsafe=%r, priority=%r, %s)"
|
||||
% (cls.__module__, cls.__name__, self.callback,
|
||||
self.failsafe, self.priority,
|
||||
", ".join(['%s=%r' % (k, v)
|
||||
for k, v in self.kwargs.items()])))
|
||||
|
||||
|
||||
class HookMap(dict):
|
||||
|
||||
"""A map of call points to lists of callbacks (Hook objects)."""
|
||||
|
||||
def __new__(cls, points=None):
|
||||
d = dict.__new__(cls)
|
||||
for p in points or []:
|
||||
d[p] = []
|
||||
return d
|
||||
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
def attach(self, point, callback, failsafe=None, priority=None, **kwargs):
|
||||
"""Append a new Hook made from the supplied arguments."""
|
||||
self[point].append(Hook(callback, failsafe, priority, **kwargs))
|
||||
|
||||
def run(self, point):
|
||||
"""Execute all registered Hooks (callbacks) for the given point."""
|
||||
exc = None
|
||||
hooks = self[point]
|
||||
hooks.sort()
|
||||
for hook in hooks:
|
||||
# Some hooks are guaranteed to run even if others at
|
||||
# the same hookpoint fail. We will still log the failure,
|
||||
# but proceed on to the next hook. The only way
|
||||
# to stop all processing from one of these hooks is
|
||||
# to raise SystemExit and stop the whole server.
|
||||
if exc is None or hook.failsafe:
|
||||
try:
|
||||
hook()
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
raise
|
||||
except (cherrypy.HTTPError, cherrypy.HTTPRedirect,
|
||||
cherrypy.InternalRedirect):
|
||||
exc = sys.exc_info()[1]
|
||||
except:
|
||||
exc = sys.exc_info()[1]
|
||||
cherrypy.log(traceback=True, severity=40)
|
||||
if exc:
|
||||
raise exc
|
||||
|
||||
def __copy__(self):
|
||||
newmap = self.__class__()
|
||||
# We can't just use 'update' because we want copies of the
|
||||
# mutable values (each is a list) as well.
|
||||
for k, v in self.items():
|
||||
newmap[k] = v[:]
|
||||
return newmap
|
||||
copy = __copy__
|
||||
|
||||
def __repr__(self):
|
||||
cls = self.__class__
|
||||
return "%s.%s(points=%r)" % (
|
||||
cls.__module__,
|
||||
cls.__name__,
|
||||
copykeys(self)
|
||||
)
|
||||
|
||||
|
||||
# Config namespace handlers
|
||||
|
||||
def hooks_namespace(k, v):
|
||||
"""Attach bare hooks declared in config."""
|
||||
# Use split again to allow multiple hooks for a single
|
||||
# hookpoint per path (e.g. "hooks.before_handler.1").
|
||||
# Little-known fact you only get from reading source ;)
|
||||
hookpoint = k.split(".", 1)[0]
|
||||
if isinstance(v, basestring):
|
||||
v = cherrypy.lib.attributes(v)
|
||||
if not isinstance(v, Hook):
|
||||
v = Hook(v)
|
||||
cherrypy.serving.request.hooks[hookpoint].append(v)
|
||||
|
||||
|
||||
def request_namespace(k, v):
|
||||
"""Attach request attributes declared in config."""
|
||||
# Provides config entries to set request.body attrs (like
|
||||
# attempt_charsets).
|
||||
if k[:5] == 'body.':
|
||||
setattr(cherrypy.serving.request.body, k[5:], v)
|
||||
else:
|
||||
setattr(cherrypy.serving.request, k, v)
|
||||
|
||||
|
||||
def response_namespace(k, v):
|
||||
"""Attach response attributes declared in config."""
|
||||
# Provides config entries to set default response headers
|
||||
# http://cherrypy.org/ticket/889
|
||||
if k[:8] == 'headers.':
|
||||
cherrypy.serving.response.headers[k.split('.', 1)[1]] = v
|
||||
else:
|
||||
setattr(cherrypy.serving.response, k, v)
|
||||
|
||||
|
||||
def error_page_namespace(k, v):
|
||||
"""Attach error pages declared in config."""
|
||||
if k != 'default':
|
||||
k = int(k)
|
||||
cherrypy.serving.request.error_page[k] = v
|
||||
|
||||
|
||||
hookpoints = ['on_start_resource', 'before_request_body',
|
||||
'before_handler', 'before_finalize',
|
||||
'on_end_resource', 'on_end_request',
|
||||
'before_error_response', 'after_error_response']
|
||||
|
||||
|
||||
class Request(object):
|
||||
|
||||
"""An HTTP request.
|
||||
|
||||
This object represents the metadata of an HTTP request message;
|
||||
that is, it contains attributes which describe the environment
|
||||
in which the request URL, headers, and body were sent (if you
|
||||
want tools to interpret the headers and body, those are elsewhere,
|
||||
mostly in Tools). This 'metadata' consists of socket data,
|
||||
transport characteristics, and the Request-Line. This object
|
||||
also contains data regarding the configuration in effect for
|
||||
the given URL, and the execution plan for generating a response.
|
||||
"""
|
||||
|
||||
prev = None
|
||||
"""
|
||||
The previous Request object (if any). This should be None
|
||||
unless we are processing an InternalRedirect."""
|
||||
|
||||
# Conversation/connection attributes
|
||||
local = httputil.Host("127.0.0.1", 80)
|
||||
"An httputil.Host(ip, port, hostname) object for the server socket."
|
||||
|
||||
remote = httputil.Host("127.0.0.1", 1111)
|
||||
"An httputil.Host(ip, port, hostname) object for the client socket."
|
||||
|
||||
scheme = "http"
|
||||
"""
|
||||
The protocol used between client and server. In most cases,
|
||||
this will be either 'http' or 'https'."""
|
||||
|
||||
server_protocol = "HTTP/1.1"
|
||||
"""
|
||||
The HTTP version for which the HTTP server is at least
|
||||
conditionally compliant."""
|
||||
|
||||
base = ""
|
||||
"""The (scheme://host) portion of the requested URL.
|
||||
In some cases (e.g. when proxying via mod_rewrite), this may contain
|
||||
path segments which cherrypy.url uses when constructing url's, but
|
||||
which otherwise are ignored by CherryPy. Regardless, this value
|
||||
MUST NOT end in a slash."""
|
||||
|
||||
# Request-Line attributes
|
||||
request_line = ""
|
||||
"""
|
||||
The complete Request-Line received from the client. This is a
|
||||
single string consisting of the request method, URI, and protocol
|
||||
version (joined by spaces). Any final CRLF is removed."""
|
||||
|
||||
method = "GET"
|
||||
"""
|
||||
Indicates the HTTP method to be performed on the resource identified
|
||||
by the Request-URI. Common methods include GET, HEAD, POST, PUT, and
|
||||
DELETE. CherryPy allows any extension method; however, various HTTP
|
||||
servers and gateways may restrict the set of allowable methods.
|
||||
CherryPy applications SHOULD restrict the set (on a per-URI basis)."""
|
||||
|
||||
query_string = ""
|
||||
"""
|
||||
The query component of the Request-URI, a string of information to be
|
||||
interpreted by the resource. The query portion of a URI follows the
|
||||
path component, and is separated by a '?'. For example, the URI
|
||||
'http://www.cherrypy.org/wiki?a=3&b=4' has the query component,
|
||||
'a=3&b=4'."""
|
||||
|
||||
query_string_encoding = 'utf8'
|
||||
"""
|
||||
The encoding expected for query string arguments after % HEX HEX decoding).
|
||||
If a query string is provided that cannot be decoded with this encoding,
|
||||
404 is raised (since technically it's a different URI). If you want
|
||||
arbitrary encodings to not error, set this to 'Latin-1'; you can then
|
||||
encode back to bytes and re-decode to whatever encoding you like later.
|
||||
"""
|
||||
|
||||
protocol = (1, 1)
|
||||
"""The HTTP protocol version corresponding to the set
|
||||
of features which should be allowed in the response. If BOTH
|
||||
the client's request message AND the server's level of HTTP
|
||||
compliance is HTTP/1.1, this attribute will be the tuple (1, 1).
|
||||
If either is 1.0, this attribute will be the tuple (1, 0).
|
||||
Lower HTTP protocol versions are not explicitly supported."""
|
||||
|
||||
params = {}
|
||||
"""
|
||||
A dict which combines query string (GET) and request entity (POST)
|
||||
variables. This is populated in two stages: GET params are added
|
||||
before the 'on_start_resource' hook, and POST params are added
|
||||
between the 'before_request_body' and 'before_handler' hooks."""
|
||||
|
||||
# Message attributes
|
||||
header_list = []
|
||||
"""
|
||||
A list of the HTTP request headers as (name, value) tuples.
|
||||
In general, you should use request.headers (a dict) instead."""
|
||||
|
||||
headers = httputil.HeaderMap()
|
||||
"""
|
||||
A dict-like object containing the request headers. Keys are header
|
||||
names (in Title-Case format); however, you may get and set them in
|
||||
a case-insensitive manner. That is, headers['Content-Type'] and
|
||||
headers['content-type'] refer to the same value. Values are header
|
||||
values (decoded according to :rfc:`2047` if necessary). See also:
|
||||
httputil.HeaderMap, httputil.HeaderElement."""
|
||||
|
||||
cookie = SimpleCookie()
|
||||
"""See help(Cookie)."""
|
||||
|
||||
rfile = None
|
||||
"""
|
||||
If the request included an entity (body), it will be available
|
||||
as a stream in this attribute. However, the rfile will normally
|
||||
be read for you between the 'before_request_body' hook and the
|
||||
'before_handler' hook, and the resulting string is placed into
|
||||
either request.params or the request.body attribute.
|
||||
|
||||
You may disable the automatic consumption of the rfile by setting
|
||||
request.process_request_body to False, either in config for the desired
|
||||
path, or in an 'on_start_resource' or 'before_request_body' hook.
|
||||
|
||||
WARNING: In almost every case, you should not attempt to read from the
|
||||
rfile stream after CherryPy's automatic mechanism has read it. If you
|
||||
turn off the automatic parsing of rfile, you should read exactly the
|
||||
number of bytes specified in request.headers['Content-Length'].
|
||||
Ignoring either of these warnings may result in a hung request thread
|
||||
or in corruption of the next (pipelined) request.
|
||||
"""
|
||||
|
||||
process_request_body = True
|
||||
"""
|
||||
If True, the rfile (if any) is automatically read and parsed,
|
||||
and the result placed into request.params or request.body."""
|
||||
|
||||
methods_with_bodies = ("POST", "PUT")
|
||||
"""
|
||||
A sequence of HTTP methods for which CherryPy will automatically
|
||||
attempt to read a body from the rfile. If you are going to change
|
||||
this property, modify it on the configuration (recommended)
|
||||
or on the "hook point" `on_start_resource`.
|
||||
"""
|
||||
|
||||
body = None
|
||||
"""
|
||||
If the request Content-Type is 'application/x-www-form-urlencoded'
|
||||
or multipart, this will be None. Otherwise, this will be an instance
|
||||
of :class:`RequestBody<cherrypy._cpreqbody.RequestBody>` (which you
|
||||
can .read()); this value is set between the 'before_request_body' and
|
||||
'before_handler' hooks (assuming that process_request_body is True)."""
|
||||
|
||||
# Dispatch attributes
|
||||
dispatch = cherrypy.dispatch.Dispatcher()
|
||||
"""
|
||||
The object which looks up the 'page handler' callable and collects
|
||||
config for the current request based on the path_info, other
|
||||
request attributes, and the application architecture. The core
|
||||
calls the dispatcher as early as possible, passing it a 'path_info'
|
||||
argument.
|
||||
|
||||
The default dispatcher discovers the page handler by matching path_info
|
||||
to a hierarchical arrangement of objects, starting at request.app.root.
|
||||
See help(cherrypy.dispatch) for more information."""
|
||||
|
||||
script_name = ""
|
||||
"""
|
||||
The 'mount point' of the application which is handling this request.
|
||||
|
||||
This attribute MUST NOT end in a slash. If the script_name refers to
|
||||
the root of the URI, it MUST be an empty string (not "/").
|
||||
"""
|
||||
|
||||
path_info = "/"
|
||||
"""
|
||||
The 'relative path' portion of the Request-URI. This is relative
|
||||
to the script_name ('mount point') of the application which is
|
||||
handling this request."""
|
||||
|
||||
login = None
|
||||
"""
|
||||
When authentication is used during the request processing this is
|
||||
set to 'False' if it failed and to the 'username' value if it succeeded.
|
||||
The default 'None' implies that no authentication happened."""
|
||||
|
||||
# Note that cherrypy.url uses "if request.app:" to determine whether
|
||||
# the call is during a real HTTP request or not. So leave this None.
|
||||
app = None
|
||||
"""The cherrypy.Application object which is handling this request."""
|
||||
|
||||
handler = None
|
||||
"""
|
||||
The function, method, or other callable which CherryPy will call to
|
||||
produce the response. The discovery of the handler and the arguments
|
||||
it will receive are determined by the request.dispatch object.
|
||||
By default, the handler is discovered by walking a tree of objects
|
||||
starting at request.app.root, and is then passed all HTTP params
|
||||
(from the query string and POST body) as keyword arguments."""
|
||||
|
||||
toolmaps = {}
|
||||
"""
|
||||
A nested dict of all Toolboxes and Tools in effect for this request,
|
||||
of the form: {Toolbox.namespace: {Tool.name: config dict}}."""
|
||||
|
||||
config = None
|
||||
"""
|
||||
A flat dict of all configuration entries which apply to the
|
||||
current request. These entries are collected from global config,
|
||||
application config (based on request.path_info), and from handler
|
||||
config (exactly how is governed by the request.dispatch object in
|
||||
effect for this request; by default, handler config can be attached
|
||||
anywhere in the tree between request.app.root and the final handler,
|
||||
and inherits downward)."""
|
||||
|
||||
is_index = None
|
||||
"""
|
||||
This will be True if the current request is mapped to an 'index'
|
||||
resource handler (also, a 'default' handler if path_info ends with
|
||||
a slash). The value may be used to automatically redirect the
|
||||
user-agent to a 'more canonical' URL which either adds or removes
|
||||
the trailing slash. See cherrypy.tools.trailing_slash."""
|
||||
|
||||
hooks = HookMap(hookpoints)
|
||||
"""
|
||||
A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}.
|
||||
Each key is a str naming the hook point, and each value is a list
|
||||
of hooks which will be called at that hook point during this request.
|
||||
The list of hooks is generally populated as early as possible (mostly
|
||||
from Tools specified in config), but may be extended at any time.
|
||||
See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools."""
|
||||
|
||||
error_response = cherrypy.HTTPError(500).set_response
|
||||
"""
|
||||
The no-arg callable which will handle unexpected, untrapped errors
|
||||
during request processing. This is not used for expected exceptions
|
||||
(like NotFound, HTTPError, or HTTPRedirect) which are raised in
|
||||
response to expected conditions (those should be customized either
|
||||
via request.error_page or by overriding HTTPError.set_response).
|
||||
By default, error_response uses HTTPError(500) to return a generic
|
||||
error response to the user-agent."""
|
||||
|
||||
error_page = {}
|
||||
"""
|
||||
A dict of {error code: response filename or callable} pairs.
|
||||
|
||||
The error code must be an int representing a given HTTP error code,
|
||||
or the string 'default', which will be used if no matching entry
|
||||
is found for a given numeric code.
|
||||
|
||||
If a filename is provided, the file should contain a Python string-
|
||||
formatting template, and can expect by default to receive format
|
||||
values with the mapping keys %(status)s, %(message)s, %(traceback)s,
|
||||
and %(version)s. The set of format mappings can be extended by
|
||||
overriding HTTPError.set_response.
|
||||
|
||||
If a callable is provided, it will be called by default with keyword
|
||||
arguments 'status', 'message', 'traceback', and 'version', as for a
|
||||
string-formatting template. The callable must return a string or
|
||||
iterable of strings which will be set to response.body. It may also
|
||||
override headers or perform any other processing.
|
||||
|
||||
If no entry is given for an error code, and no 'default' entry exists,
|
||||
a default template will be used.
|
||||
"""
|
||||
|
||||
show_tracebacks = True
|
||||
"""
|
||||
If True, unexpected errors encountered during request processing will
|
||||
include a traceback in the response body."""
|
||||
|
||||
show_mismatched_params = True
|
||||
"""
|
||||
If True, mismatched parameters encountered during PageHandler invocation
|
||||
processing will be included in the response body."""
|
||||
|
||||
throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect)
|
||||
"""The sequence of exceptions which Request.run does not trap."""
|
||||
|
||||
throw_errors = False
|
||||
"""
|
||||
If True, Request.run will not trap any errors (except HTTPRedirect and
|
||||
HTTPError, which are more properly called 'exceptions', not errors)."""
|
||||
|
||||
closed = False
|
||||
"""True once the close method has been called, False otherwise."""
|
||||
|
||||
stage = None
|
||||
"""
|
||||
A string containing the stage reached in the request-handling process.
|
||||
This is useful when debugging a live server with hung requests."""
|
||||
|
||||
namespaces = _cpconfig.NamespaceSet(
|
||||
**{"hooks": hooks_namespace,
|
||||
"request": request_namespace,
|
||||
"response": response_namespace,
|
||||
"error_page": error_page_namespace,
|
||||
"tools": cherrypy.tools,
|
||||
})
|
||||
|
||||
def __init__(self, local_host, remote_host, scheme="http",
|
||||
server_protocol="HTTP/1.1"):
|
||||
"""Populate a new Request object.
|
||||
|
||||
local_host should be an httputil.Host object with the server info.
|
||||
remote_host should be an httputil.Host object with the client info.
|
||||
scheme should be a string, either "http" or "https".
|
||||
"""
|
||||
self.local = local_host
|
||||
self.remote = remote_host
|
||||
self.scheme = scheme
|
||||
self.server_protocol = server_protocol
|
||||
|
||||
self.closed = False
|
||||
|
||||
# Put a *copy* of the class error_page into self.
|
||||
self.error_page = self.error_page.copy()
|
||||
|
||||
# Put a *copy* of the class namespaces into self.
|
||||
self.namespaces = self.namespaces.copy()
|
||||
|
||||
self.stage = None
|
||||
|
||||
def close(self):
|
||||
"""Run cleanup code. (Core)"""
|
||||
if not self.closed:
|
||||
self.closed = True
|
||||
self.stage = 'on_end_request'
|
||||
self.hooks.run('on_end_request')
|
||||
self.stage = 'close'
|
||||
|
||||
def run(self, method, path, query_string, req_protocol, headers, rfile):
|
||||
r"""Process the Request. (Core)
|
||||
|
||||
method, path, query_string, and req_protocol should be pulled directly
|
||||
from the Request-Line (e.g. "GET /path?key=val HTTP/1.0").
|
||||
|
||||
path
|
||||
This should be %XX-unquoted, but query_string should not be.
|
||||
|
||||
When using Python 2, they both MUST be byte strings,
|
||||
not unicode strings.
|
||||
|
||||
When using Python 3, they both MUST be unicode strings,
|
||||
not byte strings, and preferably not bytes \x00-\xFF
|
||||
disguised as unicode.
|
||||
|
||||
headers
|
||||
A list of (name, value) tuples.
|
||||
|
||||
rfile
|
||||
A file-like object containing the HTTP request entity.
|
||||
|
||||
When run() is done, the returned object should have 3 attributes:
|
||||
|
||||
* status, e.g. "200 OK"
|
||||
* header_list, a list of (name, value) tuples
|
||||
* body, an iterable yielding strings
|
||||
|
||||
Consumer code (HTTP servers) should then access these response
|
||||
attributes to build the outbound stream.
|
||||
|
||||
"""
|
||||
response = cherrypy.serving.response
|
||||
self.stage = 'run'
|
||||
try:
|
||||
self.error_response = cherrypy.HTTPError(500).set_response
|
||||
|
||||
self.method = method
|
||||
path = path or "/"
|
||||
self.query_string = query_string or ''
|
||||
self.params = {}
|
||||
|
||||
# Compare request and server HTTP protocol versions, in case our
|
||||
# server does not support the requested protocol. Limit our output
|
||||
# to min(req, server). We want the following output:
|
||||
# request server actual written supported response
|
||||
# protocol protocol response protocol feature set
|
||||
# a 1.0 1.0 1.0 1.0
|
||||
# b 1.0 1.1 1.1 1.0
|
||||
# c 1.1 1.0 1.0 1.0
|
||||
# d 1.1 1.1 1.1 1.1
|
||||
# Notice that, in (b), the response will be "HTTP/1.1" even though
|
||||
# the client only understands 1.0. RFC 2616 10.5.6 says we should
|
||||
# only return 505 if the _major_ version is different.
|
||||
rp = int(req_protocol[5]), int(req_protocol[7])
|
||||
sp = int(self.server_protocol[5]), int(self.server_protocol[7])
|
||||
self.protocol = min(rp, sp)
|
||||
response.headers.protocol = self.protocol
|
||||
|
||||
# Rebuild first line of the request (e.g. "GET /path HTTP/1.0").
|
||||
url = path
|
||||
if query_string:
|
||||
url += '?' + query_string
|
||||
self.request_line = '%s %s %s' % (method, url, req_protocol)
|
||||
|
||||
self.header_list = list(headers)
|
||||
self.headers = httputil.HeaderMap()
|
||||
|
||||
self.rfile = rfile
|
||||
self.body = None
|
||||
|
||||
self.cookie = SimpleCookie()
|
||||
self.handler = None
|
||||
|
||||
# path_info should be the path from the
|
||||
# app root (script_name) to the handler.
|
||||
self.script_name = self.app.script_name
|
||||
self.path_info = pi = path[len(self.script_name):]
|
||||
|
||||
self.stage = 'respond'
|
||||
self.respond(pi)
|
||||
|
||||
except self.throws:
|
||||
raise
|
||||
except:
|
||||
if self.throw_errors:
|
||||
raise
|
||||
else:
|
||||
# Failure in setup, error handler or finalize. Bypass them.
|
||||
# Can't use handle_error because we may not have hooks yet.
|
||||
cherrypy.log(traceback=True, severity=40)
|
||||
if self.show_tracebacks:
|
||||
body = format_exc()
|
||||
else:
|
||||
body = ""
|
||||
r = bare_error(body)
|
||||
response.output_status, response.header_list, response.body = r
|
||||
|
||||
if self.method == "HEAD":
|
||||
# HEAD requests MUST NOT return a message-body in the response.
|
||||
response.body = []
|
||||
|
||||
try:
|
||||
cherrypy.log.access()
|
||||
except:
|
||||
cherrypy.log.error(traceback=True)
|
||||
|
||||
if response.timed_out:
|
||||
raise cherrypy.TimeoutError()
|
||||
|
||||
return response
|
||||
|
||||
# Uncomment for stage debugging
|
||||
# stage = property(lambda self: self._stage, lambda self, v: print(v))
|
||||
|
||||
def respond(self, path_info):
|
||||
"""Generate a response for the resource at self.path_info. (Core)"""
|
||||
response = cherrypy.serving.response
|
||||
try:
|
||||
try:
|
||||
try:
|
||||
if self.app is None:
|
||||
raise cherrypy.NotFound()
|
||||
|
||||
# Get the 'Host' header, so we can HTTPRedirect properly.
|
||||
self.stage = 'process_headers'
|
||||
self.process_headers()
|
||||
|
||||
# Make a copy of the class hooks
|
||||
self.hooks = self.__class__.hooks.copy()
|
||||
self.toolmaps = {}
|
||||
|
||||
self.stage = 'get_resource'
|
||||
self.get_resource(path_info)
|
||||
|
||||
self.body = _cpreqbody.RequestBody(
|
||||
self.rfile, self.headers, request_params=self.params)
|
||||
|
||||
self.namespaces(self.config)
|
||||
|
||||
self.stage = 'on_start_resource'
|
||||
self.hooks.run('on_start_resource')
|
||||
|
||||
# Parse the querystring
|
||||
self.stage = 'process_query_string'
|
||||
self.process_query_string()
|
||||
|
||||
# Process the body
|
||||
if self.process_request_body:
|
||||
if self.method not in self.methods_with_bodies:
|
||||
self.process_request_body = False
|
||||
self.stage = 'before_request_body'
|
||||
self.hooks.run('before_request_body')
|
||||
if self.process_request_body:
|
||||
self.body.process()
|
||||
|
||||
# Run the handler
|
||||
self.stage = 'before_handler'
|
||||
self.hooks.run('before_handler')
|
||||
if self.handler:
|
||||
self.stage = 'handler'
|
||||
response.body = self.handler()
|
||||
|
||||
# Finalize
|
||||
self.stage = 'before_finalize'
|
||||
self.hooks.run('before_finalize')
|
||||
response.finalize()
|
||||
except (cherrypy.HTTPRedirect, cherrypy.HTTPError):
|
||||
inst = sys.exc_info()[1]
|
||||
inst.set_response()
|
||||
self.stage = 'before_finalize (HTTPError)'
|
||||
self.hooks.run('before_finalize')
|
||||
response.finalize()
|
||||
finally:
|
||||
self.stage = 'on_end_resource'
|
||||
self.hooks.run('on_end_resource')
|
||||
except self.throws:
|
||||
raise
|
||||
except:
|
||||
if self.throw_errors:
|
||||
raise
|
||||
self.handle_error()
|
||||
|
||||
def process_query_string(self):
|
||||
"""Parse the query string into Python structures. (Core)"""
|
||||
try:
|
||||
p = httputil.parse_query_string(
|
||||
self.query_string, encoding=self.query_string_encoding)
|
||||
except UnicodeDecodeError:
|
||||
raise cherrypy.HTTPError(
|
||||
404, "The given query string could not be processed. Query "
|
||||
"strings for this resource must be encoded with %r." %
|
||||
self.query_string_encoding)
|
||||
|
||||
# Python 2 only: keyword arguments must be byte strings (type 'str').
|
||||
if not py3k:
|
||||
for key, value in p.items():
|
||||
if isinstance(key, unicode):
|
||||
del p[key]
|
||||
p[key.encode(self.query_string_encoding)] = value
|
||||
self.params.update(p)
|
||||
|
||||
def process_headers(self):
|
||||
"""Parse HTTP header data into Python structures. (Core)"""
|
||||
# Process the headers into self.headers
|
||||
headers = self.headers
|
||||
for name, value in self.header_list:
|
||||
# Call title() now (and use dict.__method__(headers))
|
||||
# so title doesn't have to be called twice.
|
||||
name = name.title()
|
||||
value = value.strip()
|
||||
|
||||
# Warning: if there is more than one header entry for cookies
|
||||
# (AFAIK, only Konqueror does that), only the last one will
|
||||
# remain in headers (but they will be correctly stored in
|
||||
# request.cookie).
|
||||
if "=?" in value:
|
||||
dict.__setitem__(headers, name, httputil.decode_TEXT(value))
|
||||
else:
|
||||
dict.__setitem__(headers, name, value)
|
||||
|
||||
# Handle cookies differently because on Konqueror, multiple
|
||||
# cookies come on different lines with the same key
|
||||
if name == 'Cookie':
|
||||
try:
|
||||
self.cookie.load(value)
|
||||
except CookieError:
|
||||
msg = "Illegal cookie name %s" % value.split('=')[0]
|
||||
raise cherrypy.HTTPError(400, msg)
|
||||
|
||||
if not dict.__contains__(headers, 'Host'):
|
||||
# All Internet-based HTTP/1.1 servers MUST respond with a 400
|
||||
# (Bad Request) status code to any HTTP/1.1 request message
|
||||
# which lacks a Host header field.
|
||||
if self.protocol >= (1, 1):
|
||||
msg = "HTTP/1.1 requires a 'Host' request header."
|
||||
raise cherrypy.HTTPError(400, msg)
|
||||
host = dict.get(headers, 'Host')
|
||||
if not host:
|
||||
host = self.local.name or self.local.ip
|
||||
self.base = "%s://%s" % (self.scheme, host)
|
||||
|
||||
def get_resource(self, path):
|
||||
"""Call a dispatcher (which sets self.handler and .config). (Core)"""
|
||||
# First, see if there is a custom dispatch at this URI. Custom
|
||||
# dispatchers can only be specified in app.config, not in _cp_config
|
||||
# (since custom dispatchers may not even have an app.root).
|
||||
dispatch = self.app.find_config(
|
||||
path, "request.dispatch", self.dispatch)
|
||||
|
||||
# dispatch() should set self.handler and self.config
|
||||
dispatch(path)
|
||||
|
||||
def handle_error(self):
|
||||
"""Handle the last unanticipated exception. (Core)"""
|
||||
try:
|
||||
self.hooks.run("before_error_response")
|
||||
if self.error_response:
|
||||
self.error_response()
|
||||
self.hooks.run("after_error_response")
|
||||
cherrypy.serving.response.finalize()
|
||||
except cherrypy.HTTPRedirect:
|
||||
inst = sys.exc_info()[1]
|
||||
inst.set_response()
|
||||
cherrypy.serving.response.finalize()
|
||||
|
||||
# ------------------------- Properties ------------------------- #
|
||||
|
||||
def _get_body_params(self):
|
||||
warnings.warn(
|
||||
"body_params is deprecated in CherryPy 3.2, will be removed in "
|
||||
"CherryPy 3.3.",
|
||||
DeprecationWarning
|
||||
)
|
||||
return self.body.params
|
||||
body_params = property(_get_body_params,
|
||||
doc="""
|
||||
If the request Content-Type is 'application/x-www-form-urlencoded' or
|
||||
multipart, this will be a dict of the params pulled from the entity
|
||||
body; that is, it will be the portion of request.params that come
|
||||
from the message body (sometimes called "POST params", although they
|
||||
can be sent with various HTTP method verbs). This value is set between
|
||||
the 'before_request_body' and 'before_handler' hooks (assuming that
|
||||
process_request_body is True).
|
||||
|
||||
Deprecated in 3.2, will be removed for 3.3 in favor of
|
||||
:attr:`request.body.params<cherrypy._cprequest.RequestBody.params>`.""")
|
||||
|
||||
|
||||
class ResponseBody(object):
|
||||
|
||||
"""The body of the HTTP response (the response entity)."""
|
||||
|
||||
if py3k:
|
||||
unicode_err = ("Page handlers MUST return bytes. Use tools.encode "
|
||||
"if you wish to return unicode.")
|
||||
|
||||
def __get__(self, obj, objclass=None):
|
||||
if obj is None:
|
||||
# When calling on the class instead of an instance...
|
||||
return self
|
||||
else:
|
||||
return obj._body
|
||||
|
||||
def __set__(self, obj, value):
|
||||
# Convert the given value to an iterable object.
|
||||
if py3k and isinstance(value, str):
|
||||
raise ValueError(self.unicode_err)
|
||||
|
||||
if isinstance(value, basestring):
|
||||
# strings get wrapped in a list because iterating over a single
|
||||
# item list is much faster than iterating over every character
|
||||
# in a long string.
|
||||
if value:
|
||||
value = [value]
|
||||
else:
|
||||
# [''] doesn't evaluate to False, so replace it with [].
|
||||
value = []
|
||||
elif py3k and isinstance(value, list):
|
||||
# every item in a list must be bytes...
|
||||
for i, item in enumerate(value):
|
||||
if isinstance(item, str):
|
||||
raise ValueError(self.unicode_err)
|
||||
# Don't use isinstance here; io.IOBase which has an ABC takes
|
||||
# 1000 times as long as, say, isinstance(value, str)
|
||||
elif hasattr(value, 'read'):
|
||||
value = file_generator(value)
|
||||
elif value is None:
|
||||
value = []
|
||||
obj._body = value
|
||||
|
||||
|
||||
class Response(object):
|
||||
|
||||
"""An HTTP Response, including status, headers, and body."""
|
||||
|
||||
status = ""
|
||||
"""The HTTP Status-Code and Reason-Phrase."""
|
||||
|
||||
header_list = []
|
||||
"""
|
||||
A list of the HTTP response headers as (name, value) tuples.
|
||||
In general, you should use response.headers (a dict) instead. This
|
||||
attribute is generated from response.headers and is not valid until
|
||||
after the finalize phase."""
|
||||
|
||||
headers = httputil.HeaderMap()
|
||||
"""
|
||||
A dict-like object containing the response headers. Keys are header
|
||||
names (in Title-Case format); however, you may get and set them in
|
||||
a case-insensitive manner. That is, headers['Content-Type'] and
|
||||
headers['content-type'] refer to the same value. Values are header
|
||||
values (decoded according to :rfc:`2047` if necessary).
|
||||
|
||||
.. seealso:: classes :class:`HeaderMap`, :class:`HeaderElement`
|
||||
"""
|
||||
|
||||
cookie = SimpleCookie()
|
||||
"""See help(Cookie)."""
|
||||
|
||||
body = ResponseBody()
|
||||
"""The body (entity) of the HTTP response."""
|
||||
|
||||
time = None
|
||||
"""The value of time.time() when created. Use in HTTP dates."""
|
||||
|
||||
timeout = 300
|
||||
"""Seconds after which the response will be aborted."""
|
||||
|
||||
timed_out = False
|
||||
"""
|
||||
Flag to indicate the response should be aborted, because it has
|
||||
exceeded its timeout."""
|
||||
|
||||
stream = False
|
||||
"""If False, buffer the response body."""
|
||||
|
||||
def __init__(self):
|
||||
self.status = None
|
||||
self.header_list = None
|
||||
self._body = []
|
||||
self.time = time.time()
|
||||
|
||||
self.headers = httputil.HeaderMap()
|
||||
# Since we know all our keys are titled strings, we can
|
||||
# bypass HeaderMap.update and get a big speed boost.
|
||||
dict.update(self.headers, {
|
||||
"Content-Type": 'text/html',
|
||||
"Server": "CherryPy/" + cherrypy.__version__,
|
||||
"Date": httputil.HTTPDate(self.time),
|
||||
})
|
||||
self.cookie = SimpleCookie()
|
||||
|
||||
def collapse_body(self):
|
||||
"""Collapse self.body to a single string; replace it and return it."""
|
||||
if isinstance(self.body, basestring):
|
||||
return self.body
|
||||
|
||||
newbody = []
|
||||
for chunk in self.body:
|
||||
if py3k and not isinstance(chunk, bytes):
|
||||
raise TypeError("Chunk %s is not of type 'bytes'." %
|
||||
repr(chunk))
|
||||
newbody.append(chunk)
|
||||
newbody = ntob('').join(newbody)
|
||||
|
||||
self.body = newbody
|
||||
return newbody
|
||||
|
||||
def finalize(self):
|
||||
"""Transform headers (and cookies) into self.header_list. (Core)"""
|
||||
try:
|
||||
code, reason, _ = httputil.valid_status(self.status)
|
||||
except ValueError:
|
||||
raise cherrypy.HTTPError(500, sys.exc_info()[1].args[0])
|
||||
|
||||
headers = self.headers
|
||||
|
||||
self.status = "%s %s" % (code, reason)
|
||||
self.output_status = ntob(str(code), 'ascii') + \
|
||||
ntob(" ") + headers.encode(reason)
|
||||
|
||||
if self.stream:
|
||||
# The upshot: wsgiserver will chunk the response if
|
||||
# you pop Content-Length (or set it explicitly to None).
|
||||
# Note that lib.static sets C-L to the file's st_size.
|
||||
if dict.get(headers, 'Content-Length') is None:
|
||||
dict.pop(headers, 'Content-Length', None)
|
||||
elif code < 200 or code in (204, 205, 304):
|
||||
# "All 1xx (informational), 204 (no content),
|
||||
# and 304 (not modified) responses MUST NOT
|
||||
# include a message-body."
|
||||
dict.pop(headers, 'Content-Length', None)
|
||||
self.body = ntob("")
|
||||
else:
|
||||
# Responses which are not streamed should have a Content-Length,
|
||||
# but allow user code to set Content-Length if desired.
|
||||
if dict.get(headers, 'Content-Length') is None:
|
||||
content = self.collapse_body()
|
||||
dict.__setitem__(headers, 'Content-Length', len(content))
|
||||
|
||||
# Transform our header dict into a list of tuples.
|
||||
self.header_list = h = headers.output()
|
||||
|
||||
cookie = self.cookie.output()
|
||||
if cookie:
|
||||
for line in cookie.split("\n"):
|
||||
if line.endswith("\r"):
|
||||
# Python 2.4 emits cookies joined by LF but 2.5+ by CRLF.
|
||||
line = line[:-1]
|
||||
name, value = line.split(": ", 1)
|
||||
if isinstance(name, unicodestr):
|
||||
name = name.encode("ISO-8859-1")
|
||||
if isinstance(value, unicodestr):
|
||||
value = headers.encode(value)
|
||||
h.append((name, value))
|
||||
|
||||
def check_timeout(self):
|
||||
"""If now > self.time + self.timeout, set self.timed_out.
|
||||
|
||||
This purposefully sets a flag, rather than raising an error,
|
||||
so that a monitor thread can interrupt the Response thread.
|
||||
"""
|
||||
if time.time() > self.time + self.timeout:
|
||||
self.timed_out = True
|
|
@ -1,226 +0,0 @@
|
|||
"""Manage HTTP servers with CherryPy."""
|
||||
|
||||
import warnings
|
||||
|
||||
import cherrypy
|
||||
from cherrypy.lib import attributes
|
||||
from cherrypy._cpcompat import basestring, py3k
|
||||
|
||||
# We import * because we want to export check_port
|
||||
# et al as attributes of this module.
|
||||
from cherrypy.process.servers import *
|
||||
|
||||
|
||||
class Server(ServerAdapter):
|
||||
|
||||
"""An adapter for an HTTP server.
|
||||
|
||||
You can set attributes (like socket_host and socket_port)
|
||||
on *this* object (which is probably cherrypy.server), and call
|
||||
quickstart. For example::
|
||||
|
||||
cherrypy.server.socket_port = 80
|
||||
cherrypy.quickstart()
|
||||
"""
|
||||
|
||||
socket_port = 8080
|
||||
"""The TCP port on which to listen for connections."""
|
||||
|
||||
_socket_host = '127.0.0.1'
|
||||
|
||||
def _get_socket_host(self):
|
||||
return self._socket_host
|
||||
|
||||
def _set_socket_host(self, value):
|
||||
if value == '':
|
||||
raise ValueError("The empty string ('') is not an allowed value. "
|
||||
"Use '0.0.0.0' instead to listen on all active "
|
||||
"interfaces (INADDR_ANY).")
|
||||
self._socket_host = value
|
||||
socket_host = property(
|
||||
_get_socket_host,
|
||||
_set_socket_host,
|
||||
doc="""The hostname or IP address on which to listen for connections.
|
||||
|
||||
Host values may be any IPv4 or IPv6 address, or any valid hostname.
|
||||
The string 'localhost' is a synonym for '127.0.0.1' (or '::1', if
|
||||
your hosts file prefers IPv6). The string '0.0.0.0' is a special
|
||||
IPv4 entry meaning "any active interface" (INADDR_ANY), and '::'
|
||||
is the similar IN6ADDR_ANY for IPv6. The empty string or None are
|
||||
not allowed.""")
|
||||
|
||||
socket_file = None
|
||||
"""If given, the name of the UNIX socket to use instead of TCP/IP.
|
||||
|
||||
When this option is not None, the `socket_host` and `socket_port` options
|
||||
are ignored."""
|
||||
|
||||
socket_queue_size = 5
|
||||
"""The 'backlog' argument to socket.listen(); specifies the maximum number
|
||||
of queued connections (default 5)."""
|
||||
|
||||
socket_timeout = 10
|
||||
"""The timeout in seconds for accepted connections (default 10)."""
|
||||
|
||||
accepted_queue_size = -1
|
||||
"""The maximum number of requests which will be queued up before
|
||||
the server refuses to accept it (default -1, meaning no limit)."""
|
||||
|
||||
accepted_queue_timeout = 10
|
||||
"""The timeout in seconds for attempting to add a request to the
|
||||
queue when the queue is full (default 10)."""
|
||||
|
||||
shutdown_timeout = 5
|
||||
"""The time to wait for HTTP worker threads to clean up."""
|
||||
|
||||
protocol_version = 'HTTP/1.1'
|
||||
"""The version string to write in the Status-Line of all HTTP responses,
|
||||
for example, "HTTP/1.1" (the default). Depending on the HTTP server used,
|
||||
this should also limit the supported features used in the response."""
|
||||
|
||||
thread_pool = 10
|
||||
"""The number of worker threads to start up in the pool."""
|
||||
|
||||
thread_pool_max = -1
|
||||
"""The maximum size of the worker-thread pool. Use -1 to indicate no limit.
|
||||
"""
|
||||
|
||||
max_request_header_size = 500 * 1024
|
||||
"""The maximum number of bytes allowable in the request headers.
|
||||
If exceeded, the HTTP server should return "413 Request Entity Too Large".
|
||||
"""
|
||||
|
||||
max_request_body_size = 100 * 1024 * 1024
|
||||
"""The maximum number of bytes allowable in the request body. If exceeded,
|
||||
the HTTP server should return "413 Request Entity Too Large"."""
|
||||
|
||||
instance = None
|
||||
"""If not None, this should be an HTTP server instance (such as
|
||||
CPWSGIServer) which cherrypy.server will control. Use this when you need
|
||||
more control over object instantiation than is available in the various
|
||||
configuration options."""
|
||||
|
||||
ssl_context = None
|
||||
"""When using PyOpenSSL, an instance of SSL.Context."""
|
||||
|
||||
ssl_certificate = None
|
||||
"""The filename of the SSL certificate to use."""
|
||||
|
||||
ssl_certificate_chain = None
|
||||
"""When using PyOpenSSL, the certificate chain to pass to
|
||||
Context.load_verify_locations."""
|
||||
|
||||
ssl_private_key = None
|
||||
"""The filename of the private key to use with SSL."""
|
||||
|
||||
if py3k:
|
||||
ssl_module = 'builtin'
|
||||
"""The name of a registered SSL adaptation module to use with
|
||||
the builtin WSGI server. Builtin options are: 'builtin' (to
|
||||
use the SSL library built into recent versions of Python).
|
||||
You may also register your own classes in the
|
||||
wsgiserver.ssl_adapters dict."""
|
||||
else:
|
||||
ssl_module = 'pyopenssl'
|
||||
"""The name of a registered SSL adaptation module to use with the
|
||||
builtin WSGI server. Builtin options are 'builtin' (to use the SSL
|
||||
library built into recent versions of Python) and 'pyopenssl' (to
|
||||
use the PyOpenSSL project, which you must install separately). You
|
||||
may also register your own classes in the wsgiserver.ssl_adapters
|
||||
dict."""
|
||||
|
||||
statistics = False
|
||||
"""Turns statistics-gathering on or off for aware HTTP servers."""
|
||||
|
||||
nodelay = True
|
||||
"""If True (the default since 3.1), sets the TCP_NODELAY socket option."""
|
||||
|
||||
wsgi_version = (1, 0)
|
||||
"""The WSGI version tuple to use with the builtin WSGI server.
|
||||
The provided options are (1, 0) [which includes support for PEP 3333,
|
||||
which declares it covers WSGI version 1.0.1 but still mandates the
|
||||
wsgi.version (1, 0)] and ('u', 0), an experimental unicode version.
|
||||
You may create and register your own experimental versions of the WSGI
|
||||
protocol by adding custom classes to the wsgiserver.wsgi_gateways dict."""
|
||||
|
||||
def __init__(self):
|
||||
self.bus = cherrypy.engine
|
||||
self.httpserver = None
|
||||
self.interrupt = None
|
||||
self.running = False
|
||||
|
||||
def httpserver_from_self(self, httpserver=None):
|
||||
"""Return a (httpserver, bind_addr) pair based on self attributes."""
|
||||
if httpserver is None:
|
||||
httpserver = self.instance
|
||||
if httpserver is None:
|
||||
from cherrypy import _cpwsgi_server
|
||||
httpserver = _cpwsgi_server.CPWSGIServer(self)
|
||||
if isinstance(httpserver, basestring):
|
||||
# Is anyone using this? Can I add an arg?
|
||||
httpserver = attributes(httpserver)(self)
|
||||
return httpserver, self.bind_addr
|
||||
|
||||
def start(self):
|
||||
"""Start the HTTP server."""
|
||||
if not self.httpserver:
|
||||
self.httpserver, self.bind_addr = self.httpserver_from_self()
|
||||
ServerAdapter.start(self)
|
||||
start.priority = 75
|
||||
|
||||
def _get_bind_addr(self):
|
||||
if self.socket_file:
|
||||
return self.socket_file
|
||||
if self.socket_host is None and self.socket_port is None:
|
||||
return None
|
||||
return (self.socket_host, self.socket_port)
|
||||
|
||||
def _set_bind_addr(self, value):
|
||||
if value is None:
|
||||
self.socket_file = None
|
||||
self.socket_host = None
|
||||
self.socket_port = None
|
||||
elif isinstance(value, basestring):
|
||||
self.socket_file = value
|
||||
self.socket_host = None
|
||||
self.socket_port = None
|
||||
else:
|
||||
try:
|
||||
self.socket_host, self.socket_port = value
|
||||
self.socket_file = None
|
||||
except ValueError:
|
||||
raise ValueError("bind_addr must be a (host, port) tuple "
|
||||
"(for TCP sockets) or a string (for Unix "
|
||||
"domain sockets), not %r" % value)
|
||||
bind_addr = property(
|
||||
_get_bind_addr,
|
||||
_set_bind_addr,
|
||||
doc='A (host, port) tuple for TCP sockets or '
|
||||
'a str for Unix domain sockets.')
|
||||
|
||||
def base(self):
|
||||
"""Return the base (scheme://host[:port] or sock file) for this server.
|
||||
"""
|
||||
if self.socket_file:
|
||||
return self.socket_file
|
||||
|
||||
host = self.socket_host
|
||||
if host in ('0.0.0.0', '::'):
|
||||
# 0.0.0.0 is INADDR_ANY and :: is IN6ADDR_ANY.
|
||||
# Look up the host name, which should be the
|
||||
# safest thing to spit out in a URL.
|
||||
import socket
|
||||
host = socket.gethostname()
|
||||
|
||||
port = self.socket_port
|
||||
|
||||
if self.ssl_certificate:
|
||||
scheme = "https"
|
||||
if port != 443:
|
||||
host += ":%s" % port
|
||||
else:
|
||||
scheme = "http"
|
||||
if port != 80:
|
||||
host += ":%s" % port
|
||||
|
||||
return "%s://%s" % (scheme, host)
|
|
@ -1,241 +0,0 @@
|
|||
# This is a backport of Python-2.4's threading.local() implementation
|
||||
|
||||
"""Thread-local objects
|
||||
|
||||
(Note that this module provides a Python version of thread
|
||||
threading.local class. Depending on the version of Python you're
|
||||
using, there may be a faster one available. You should always import
|
||||
the local class from threading.)
|
||||
|
||||
Thread-local objects support the management of thread-local data.
|
||||
If you have data that you want to be local to a thread, simply create
|
||||
a thread-local object and use its attributes:
|
||||
|
||||
>>> mydata = local()
|
||||
>>> mydata.number = 42
|
||||
>>> mydata.number
|
||||
42
|
||||
|
||||
You can also access the local-object's dictionary:
|
||||
|
||||
>>> mydata.__dict__
|
||||
{'number': 42}
|
||||
>>> mydata.__dict__.setdefault('widgets', [])
|
||||
[]
|
||||
>>> mydata.widgets
|
||||
[]
|
||||
|
||||
What's important about thread-local objects is that their data are
|
||||
local to a thread. If we access the data in a different thread:
|
||||
|
||||
>>> log = []
|
||||
>>> def f():
|
||||
... items = mydata.__dict__.items()
|
||||
... items.sort()
|
||||
... log.append(items)
|
||||
... mydata.number = 11
|
||||
... log.append(mydata.number)
|
||||
|
||||
>>> import threading
|
||||
>>> thread = threading.Thread(target=f)
|
||||
>>> thread.start()
|
||||
>>> thread.join()
|
||||
>>> log
|
||||
[[], 11]
|
||||
|
||||
we get different data. Furthermore, changes made in the other thread
|
||||
don't affect data seen in this thread:
|
||||
|
||||
>>> mydata.number
|
||||
42
|
||||
|
||||
Of course, values you get from a local object, including a __dict__
|
||||
attribute, are for whatever thread was current at the time the
|
||||
attribute was read. For that reason, you generally don't want to save
|
||||
these values across threads, as they apply only to the thread they
|
||||
came from.
|
||||
|
||||
You can create custom local objects by subclassing the local class:
|
||||
|
||||
>>> class MyLocal(local):
|
||||
... number = 2
|
||||
... initialized = False
|
||||
... def __init__(self, **kw):
|
||||
... if self.initialized:
|
||||
... raise SystemError('__init__ called too many times')
|
||||
... self.initialized = True
|
||||
... self.__dict__.update(kw)
|
||||
... def squared(self):
|
||||
... return self.number ** 2
|
||||
|
||||
This can be useful to support default values, methods and
|
||||
initialization. Note that if you define an __init__ method, it will be
|
||||
called each time the local object is used in a separate thread. This
|
||||
is necessary to initialize each thread's dictionary.
|
||||
|
||||
Now if we create a local object:
|
||||
|
||||
>>> mydata = MyLocal(color='red')
|
||||
|
||||
Now we have a default number:
|
||||
|
||||
>>> mydata.number
|
||||
2
|
||||
|
||||
an initial color:
|
||||
|
||||
>>> mydata.color
|
||||
'red'
|
||||
>>> del mydata.color
|
||||
|
||||
And a method that operates on the data:
|
||||
|
||||
>>> mydata.squared()
|
||||
4
|
||||
|
||||
As before, we can access the data in a separate thread:
|
||||
|
||||
>>> log = []
|
||||
>>> thread = threading.Thread(target=f)
|
||||
>>> thread.start()
|
||||
>>> thread.join()
|
||||
>>> log
|
||||
[[('color', 'red'), ('initialized', True)], 11]
|
||||
|
||||
without affecting this thread's data:
|
||||
|
||||
>>> mydata.number
|
||||
2
|
||||
>>> mydata.color
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
AttributeError: 'MyLocal' object has no attribute 'color'
|
||||
|
||||
Note that subclasses can define slots, but they are not thread
|
||||
local. They are shared across threads:
|
||||
|
||||
>>> class MyLocal(local):
|
||||
... __slots__ = 'number'
|
||||
|
||||
>>> mydata = MyLocal()
|
||||
>>> mydata.number = 42
|
||||
>>> mydata.color = 'red'
|
||||
|
||||
So, the separate thread:
|
||||
|
||||
>>> thread = threading.Thread(target=f)
|
||||
>>> thread.start()
|
||||
>>> thread.join()
|
||||
|
||||
affects what we see:
|
||||
|
||||
>>> mydata.number
|
||||
11
|
||||
|
||||
>>> del mydata
|
||||
"""
|
||||
|
||||
# Threading import is at end
|
||||
|
||||
|
||||
class _localbase(object):
|
||||
__slots__ = '_local__key', '_local__args', '_local__lock'
|
||||
|
||||
def __new__(cls, *args, **kw):
|
||||
self = object.__new__(cls)
|
||||
key = 'thread.local.' + str(id(self))
|
||||
object.__setattr__(self, '_local__key', key)
|
||||
object.__setattr__(self, '_local__args', (args, kw))
|
||||
object.__setattr__(self, '_local__lock', RLock())
|
||||
|
||||
if args or kw and (cls.__init__ is object.__init__):
|
||||
raise TypeError("Initialization arguments are not supported")
|
||||
|
||||
# We need to create the thread dict in anticipation of
|
||||
# __init__ being called, to make sure we don't call it
|
||||
# again ourselves.
|
||||
dict = object.__getattribute__(self, '__dict__')
|
||||
currentThread().__dict__[key] = dict
|
||||
|
||||
return self
|
||||
|
||||
|
||||
def _patch(self):
|
||||
key = object.__getattribute__(self, '_local__key')
|
||||
d = currentThread().__dict__.get(key)
|
||||
if d is None:
|
||||
d = {}
|
||||
currentThread().__dict__[key] = d
|
||||
object.__setattr__(self, '__dict__', d)
|
||||
|
||||
# we have a new instance dict, so call out __init__ if we have
|
||||
# one
|
||||
cls = type(self)
|
||||
if cls.__init__ is not object.__init__:
|
||||
args, kw = object.__getattribute__(self, '_local__args')
|
||||
cls.__init__(self, *args, **kw)
|
||||
else:
|
||||
object.__setattr__(self, '__dict__', d)
|
||||
|
||||
|
||||
class local(_localbase):
|
||||
|
||||
def __getattribute__(self, name):
|
||||
lock = object.__getattribute__(self, '_local__lock')
|
||||
lock.acquire()
|
||||
try:
|
||||
_patch(self)
|
||||
return object.__getattribute__(self, name)
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
lock = object.__getattribute__(self, '_local__lock')
|
||||
lock.acquire()
|
||||
try:
|
||||
_patch(self)
|
||||
return object.__setattr__(self, name, value)
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
def __delattr__(self, name):
|
||||
lock = object.__getattribute__(self, '_local__lock')
|
||||
lock.acquire()
|
||||
try:
|
||||
_patch(self)
|
||||
return object.__delattr__(self, name)
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
def __del__():
|
||||
threading_enumerate = enumerate
|
||||
__getattribute__ = object.__getattribute__
|
||||
|
||||
def __del__(self):
|
||||
key = __getattribute__(self, '_local__key')
|
||||
|
||||
try:
|
||||
threads = list(threading_enumerate())
|
||||
except:
|
||||
# if enumerate fails, as it seems to do during
|
||||
# shutdown, we'll skip cleanup under the assumption
|
||||
# that there is nothing to clean up
|
||||
return
|
||||
|
||||
for thread in threads:
|
||||
try:
|
||||
__dict__ = thread.__dict__
|
||||
except AttributeError:
|
||||
# Thread is dying, rest in peace
|
||||
continue
|
||||
|
||||
if key in __dict__:
|
||||
try:
|
||||
del __dict__[key]
|
||||
except KeyError:
|
||||
pass # didn't have anything in this thread
|
||||
|
||||
return __del__
|
||||
__del__ = __del__()
|
||||
|
||||
from threading import currentThread, enumerate, RLock
|
|
@ -1,529 +0,0 @@
|
|||
"""CherryPy tools. A "tool" is any helper, adapted to CP.
|
||||
|
||||
Tools are usually designed to be used in a variety of ways (although some
|
||||
may only offer one if they choose):
|
||||
|
||||
Library calls
|
||||
All tools are callables that can be used wherever needed.
|
||||
The arguments are straightforward and should be detailed within the
|
||||
docstring.
|
||||
|
||||
Function decorators
|
||||
All tools, when called, may be used as decorators which configure
|
||||
individual CherryPy page handlers (methods on the CherryPy tree).
|
||||
That is, "@tools.anytool()" should "turn on" the tool via the
|
||||
decorated function's _cp_config attribute.
|
||||
|
||||
CherryPy config
|
||||
If a tool exposes a "_setup" callable, it will be called
|
||||
once per Request (if the feature is "turned on" via config).
|
||||
|
||||
Tools may be implemented as any object with a namespace. The builtins
|
||||
are generally either modules or instances of the tools.Tool class.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
import cherrypy
|
||||
|
||||
|
||||
def _getargs(func):
|
||||
"""Return the names of all static arguments to the given function."""
|
||||
# Use this instead of importing inspect for less mem overhead.
|
||||
import types
|
||||
if sys.version_info >= (3, 0):
|
||||
if isinstance(func, types.MethodType):
|
||||
func = func.__func__
|
||||
co = func.__code__
|
||||
else:
|
||||
if isinstance(func, types.MethodType):
|
||||
func = func.im_func
|
||||
co = func.func_code
|
||||
return co.co_varnames[:co.co_argcount]
|
||||
|
||||
|
||||
_attr_error = (
|
||||
"CherryPy Tools cannot be turned on directly. Instead, turn them "
|
||||
"on via config, or use them as decorators on your page handlers."
|
||||
)
|
||||
|
||||
|
||||
class Tool(object):
|
||||
|
||||
"""A registered function for use with CherryPy request-processing hooks.
|
||||
|
||||
help(tool.callable) should give you more information about this Tool.
|
||||
"""
|
||||
|
||||
namespace = "tools"
|
||||
|
||||
def __init__(self, point, callable, name=None, priority=50):
|
||||
self._point = point
|
||||
self.callable = callable
|
||||
self._name = name
|
||||
self._priority = priority
|
||||
self.__doc__ = self.callable.__doc__
|
||||
self._setargs()
|
||||
|
||||
def _get_on(self):
|
||||
raise AttributeError(_attr_error)
|
||||
|
||||
def _set_on(self, value):
|
||||
raise AttributeError(_attr_error)
|
||||
on = property(_get_on, _set_on)
|
||||
|
||||
def _setargs(self):
|
||||
"""Copy func parameter names to obj attributes."""
|
||||
try:
|
||||
for arg in _getargs(self.callable):
|
||||
setattr(self, arg, None)
|
||||
except (TypeError, AttributeError):
|
||||
if hasattr(self.callable, "__call__"):
|
||||
for arg in _getargs(self.callable.__call__):
|
||||
setattr(self, arg, None)
|
||||
# IronPython 1.0 raises NotImplementedError because
|
||||
# inspect.getargspec tries to access Python bytecode
|
||||
# in co_code attribute.
|
||||
except NotImplementedError:
|
||||
pass
|
||||
# IronPython 1B1 may raise IndexError in some cases,
|
||||
# but if we trap it here it doesn't prevent CP from working.
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def _merged_args(self, d=None):
|
||||
"""Return a dict of configuration entries for this Tool."""
|
||||
if d:
|
||||
conf = d.copy()
|
||||
else:
|
||||
conf = {}
|
||||
|
||||
tm = cherrypy.serving.request.toolmaps[self.namespace]
|
||||
if self._name in tm:
|
||||
conf.update(tm[self._name])
|
||||
|
||||
if "on" in conf:
|
||||
del conf["on"]
|
||||
|
||||
return conf
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Compile-time decorator (turn on the tool in config).
|
||||
|
||||
For example::
|
||||
|
||||
@tools.proxy()
|
||||
def whats_my_base(self):
|
||||
return cherrypy.request.base
|
||||
whats_my_base.exposed = True
|
||||
"""
|
||||
if args:
|
||||
raise TypeError("The %r Tool does not accept positional "
|
||||
"arguments; you must use keyword arguments."
|
||||
% self._name)
|
||||
|
||||
def tool_decorator(f):
|
||||
if not hasattr(f, "_cp_config"):
|
||||
f._cp_config = {}
|
||||
subspace = self.namespace + "." + self._name + "."
|
||||
f._cp_config[subspace + "on"] = True
|
||||
for k, v in kwargs.items():
|
||||
f._cp_config[subspace + k] = v
|
||||
return f
|
||||
return tool_decorator
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
conf = self._merged_args()
|
||||
p = conf.pop("priority", None)
|
||||
if p is None:
|
||||
p = getattr(self.callable, "priority", self._priority)
|
||||
cherrypy.serving.request.hooks.attach(self._point, self.callable,
|
||||
priority=p, **conf)
|
||||
|
||||
|
||||
class HandlerTool(Tool):
|
||||
|
||||
"""Tool which is called 'before main', that may skip normal handlers.
|
||||
|
||||
If the tool successfully handles the request (by setting response.body),
|
||||
if should return True. This will cause CherryPy to skip any 'normal' page
|
||||
handler. If the tool did not handle the request, it should return False
|
||||
to tell CherryPy to continue on and call the normal page handler. If the
|
||||
tool is declared AS a page handler (see the 'handler' method), returning
|
||||
False will raise NotFound.
|
||||
"""
|
||||
|
||||
def __init__(self, callable, name=None):
|
||||
Tool.__init__(self, 'before_handler', callable, name)
|
||||
|
||||
def handler(self, *args, **kwargs):
|
||||
"""Use this tool as a CherryPy page handler.
|
||||
|
||||
For example::
|
||||
|
||||
class Root:
|
||||
nav = tools.staticdir.handler(section="/nav", dir="nav",
|
||||
root=absDir)
|
||||
"""
|
||||
def handle_func(*a, **kw):
|
||||
handled = self.callable(*args, **self._merged_args(kwargs))
|
||||
if not handled:
|
||||
raise cherrypy.NotFound()
|
||||
return cherrypy.serving.response.body
|
||||
handle_func.exposed = True
|
||||
return handle_func
|
||||
|
||||
def _wrapper(self, **kwargs):
|
||||
if self.callable(**kwargs):
|
||||
cherrypy.serving.request.handler = None
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
conf = self._merged_args()
|
||||
p = conf.pop("priority", None)
|
||||
if p is None:
|
||||
p = getattr(self.callable, "priority", self._priority)
|
||||
cherrypy.serving.request.hooks.attach(self._point, self._wrapper,
|
||||
priority=p, **conf)
|
||||
|
||||
|
||||
class HandlerWrapperTool(Tool):
|
||||
|
||||
"""Tool which wraps request.handler in a provided wrapper function.
|
||||
|
||||
The 'newhandler' arg must be a handler wrapper function that takes a
|
||||
'next_handler' argument, plus ``*args`` and ``**kwargs``. Like all
|
||||
page handler
|
||||
functions, it must return an iterable for use as cherrypy.response.body.
|
||||
|
||||
For example, to allow your 'inner' page handlers to return dicts
|
||||
which then get interpolated into a template::
|
||||
|
||||
def interpolator(next_handler, *args, **kwargs):
|
||||
filename = cherrypy.request.config.get('template')
|
||||
cherrypy.response.template = env.get_template(filename)
|
||||
response_dict = next_handler(*args, **kwargs)
|
||||
return cherrypy.response.template.render(**response_dict)
|
||||
cherrypy.tools.jinja = HandlerWrapperTool(interpolator)
|
||||
"""
|
||||
|
||||
def __init__(self, newhandler, point='before_handler', name=None,
|
||||
priority=50):
|
||||
self.newhandler = newhandler
|
||||
self._point = point
|
||||
self._name = name
|
||||
self._priority = priority
|
||||
|
||||
def callable(self, *args, **kwargs):
|
||||
innerfunc = cherrypy.serving.request.handler
|
||||
|
||||
def wrap(*args, **kwargs):
|
||||
return self.newhandler(innerfunc, *args, **kwargs)
|
||||
cherrypy.serving.request.handler = wrap
|
||||
|
||||
|
||||
class ErrorTool(Tool):
|
||||
|
||||
"""Tool which is used to replace the default request.error_response."""
|
||||
|
||||
def __init__(self, callable, name=None):
|
||||
Tool.__init__(self, None, callable, name)
|
||||
|
||||
def _wrapper(self):
|
||||
self.callable(**self._merged_args())
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
cherrypy.serving.request.error_response = self._wrapper
|
||||
|
||||
|
||||
# Builtin tools #
|
||||
|
||||
from cherrypy.lib import cptools, encoding, auth, static, jsontools
|
||||
from cherrypy.lib import sessions as _sessions, xmlrpcutil as _xmlrpc
|
||||
from cherrypy.lib import caching as _caching
|
||||
from cherrypy.lib import auth_basic, auth_digest
|
||||
|
||||
|
||||
class SessionTool(Tool):
|
||||
|
||||
"""Session Tool for CherryPy.
|
||||
|
||||
sessions.locking
|
||||
When 'implicit' (the default), the session will be locked for you,
|
||||
just before running the page handler.
|
||||
|
||||
When 'early', the session will be locked before reading the request
|
||||
body. This is off by default for safety reasons; for example,
|
||||
a large upload would block the session, denying an AJAX
|
||||
progress meter
|
||||
(`issue <https://bitbucket.org/cherrypy/cherrypy/issue/630>`_).
|
||||
|
||||
When 'explicit' (or any other value), you need to call
|
||||
cherrypy.session.acquire_lock() yourself before using
|
||||
session data.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# _sessions.init must be bound after headers are read
|
||||
Tool.__init__(self, 'before_request_body', _sessions.init)
|
||||
|
||||
def _lock_session(self):
|
||||
cherrypy.serving.session.acquire_lock()
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
hooks = cherrypy.serving.request.hooks
|
||||
|
||||
conf = self._merged_args()
|
||||
|
||||
p = conf.pop("priority", None)
|
||||
if p is None:
|
||||
p = getattr(self.callable, "priority", self._priority)
|
||||
|
||||
hooks.attach(self._point, self.callable, priority=p, **conf)
|
||||
|
||||
locking = conf.pop('locking', 'implicit')
|
||||
if locking == 'implicit':
|
||||
hooks.attach('before_handler', self._lock_session)
|
||||
elif locking == 'early':
|
||||
# Lock before the request body (but after _sessions.init runs!)
|
||||
hooks.attach('before_request_body', self._lock_session,
|
||||
priority=60)
|
||||
else:
|
||||
# Don't lock
|
||||
pass
|
||||
|
||||
hooks.attach('before_finalize', _sessions.save)
|
||||
hooks.attach('on_end_request', _sessions.close)
|
||||
|
||||
def regenerate(self):
|
||||
"""Drop the current session and make a new one (with a new id)."""
|
||||
sess = cherrypy.serving.session
|
||||
sess.regenerate()
|
||||
|
||||
# Grab cookie-relevant tool args
|
||||
conf = dict([(k, v) for k, v in self._merged_args().items()
|
||||
if k in ('path', 'path_header', 'name', 'timeout',
|
||||
'domain', 'secure')])
|
||||
_sessions.set_response_cookie(**conf)
|
||||
|
||||
|
||||
class XMLRPCController(object):
|
||||
|
||||
"""A Controller (page handler collection) for XML-RPC.
|
||||
|
||||
To use it, have your controllers subclass this base class (it will
|
||||
turn on the tool for you).
|
||||
|
||||
You can also supply the following optional config entries::
|
||||
|
||||
tools.xmlrpc.encoding: 'utf-8'
|
||||
tools.xmlrpc.allow_none: 0
|
||||
|
||||
XML-RPC is a rather discontinuous layer over HTTP; dispatching to the
|
||||
appropriate handler must first be performed according to the URL, and
|
||||
then a second dispatch step must take place according to the RPC method
|
||||
specified in the request body. It also allows a superfluous "/RPC2"
|
||||
prefix in the URL, supplies its own handler args in the body, and
|
||||
requires a 200 OK "Fault" response instead of 404 when the desired
|
||||
method is not found.
|
||||
|
||||
Therefore, XML-RPC cannot be implemented for CherryPy via a Tool alone.
|
||||
This Controller acts as the dispatch target for the first half (based
|
||||
on the URL); it then reads the RPC method from the request body and
|
||||
does its own second dispatch step based on that method. It also reads
|
||||
body params, and returns a Fault on error.
|
||||
|
||||
The XMLRPCDispatcher strips any /RPC2 prefix; if you aren't using /RPC2
|
||||
in your URL's, you can safely skip turning on the XMLRPCDispatcher.
|
||||
Otherwise, you need to use declare it in config::
|
||||
|
||||
request.dispatch: cherrypy.dispatch.XMLRPCDispatcher()
|
||||
"""
|
||||
|
||||
# Note we're hard-coding this into the 'tools' namespace. We could do
|
||||
# a huge amount of work to make it relocatable, but the only reason why
|
||||
# would be if someone actually disabled the default_toolbox. Meh.
|
||||
_cp_config = {'tools.xmlrpc.on': True}
|
||||
|
||||
def default(self, *vpath, **params):
|
||||
rpcparams, rpcmethod = _xmlrpc.process_body()
|
||||
|
||||
subhandler = self
|
||||
for attr in str(rpcmethod).split('.'):
|
||||
subhandler = getattr(subhandler, attr, None)
|
||||
|
||||
if subhandler and getattr(subhandler, "exposed", False):
|
||||
body = subhandler(*(vpath + rpcparams), **params)
|
||||
|
||||
else:
|
||||
# https://bitbucket.org/cherrypy/cherrypy/issue/533
|
||||
# if a method is not found, an xmlrpclib.Fault should be returned
|
||||
# raising an exception here will do that; see
|
||||
# cherrypy.lib.xmlrpcutil.on_error
|
||||
raise Exception('method "%s" is not supported' % attr)
|
||||
|
||||
conf = cherrypy.serving.request.toolmaps['tools'].get("xmlrpc", {})
|
||||
_xmlrpc.respond(body,
|
||||
conf.get('encoding', 'utf-8'),
|
||||
conf.get('allow_none', 0))
|
||||
return cherrypy.serving.response.body
|
||||
default.exposed = True
|
||||
|
||||
|
||||
class SessionAuthTool(HandlerTool):
|
||||
|
||||
def _setargs(self):
|
||||
for name in dir(cptools.SessionAuth):
|
||||
if not name.startswith("__"):
|
||||
setattr(self, name, None)
|
||||
|
||||
|
||||
class CachingTool(Tool):
|
||||
|
||||
"""Caching Tool for CherryPy."""
|
||||
|
||||
def _wrapper(self, **kwargs):
|
||||
request = cherrypy.serving.request
|
||||
if _caching.get(**kwargs):
|
||||
request.handler = None
|
||||
else:
|
||||
if request.cacheable:
|
||||
# Note the devious technique here of adding hooks on the fly
|
||||
request.hooks.attach('before_finalize', _caching.tee_output,
|
||||
priority=90)
|
||||
_wrapper.priority = 20
|
||||
|
||||
def _setup(self):
|
||||
"""Hook caching into cherrypy.request."""
|
||||
conf = self._merged_args()
|
||||
|
||||
p = conf.pop("priority", None)
|
||||
cherrypy.serving.request.hooks.attach('before_handler', self._wrapper,
|
||||
priority=p, **conf)
|
||||
|
||||
|
||||
class Toolbox(object):
|
||||
|
||||
"""A collection of Tools.
|
||||
|
||||
This object also functions as a config namespace handler for itself.
|
||||
Custom toolboxes should be added to each Application's toolboxes dict.
|
||||
"""
|
||||
|
||||
def __init__(self, namespace):
|
||||
self.namespace = namespace
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
# If the Tool._name is None, supply it from the attribute name.
|
||||
if isinstance(value, Tool):
|
||||
if value._name is None:
|
||||
value._name = name
|
||||
value.namespace = self.namespace
|
||||
object.__setattr__(self, name, value)
|
||||
|
||||
def __enter__(self):
|
||||
"""Populate request.toolmaps from tools specified in config."""
|
||||
cherrypy.serving.request.toolmaps[self.namespace] = map = {}
|
||||
|
||||
def populate(k, v):
|
||||
toolname, arg = k.split(".", 1)
|
||||
bucket = map.setdefault(toolname, {})
|
||||
bucket[arg] = v
|
||||
return populate
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Run tool._setup() for each tool in our toolmap."""
|
||||
map = cherrypy.serving.request.toolmaps.get(self.namespace)
|
||||
if map:
|
||||
for name, settings in map.items():
|
||||
if settings.get("on", False):
|
||||
tool = getattr(self, name)
|
||||
tool._setup()
|
||||
|
||||
|
||||
class DeprecatedTool(Tool):
|
||||
|
||||
_name = None
|
||||
warnmsg = "This Tool is deprecated."
|
||||
|
||||
def __init__(self, point, warnmsg=None):
|
||||
self.point = point
|
||||
if warnmsg is not None:
|
||||
self.warnmsg = warnmsg
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
warnings.warn(self.warnmsg)
|
||||
|
||||
def tool_decorator(f):
|
||||
return f
|
||||
return tool_decorator
|
||||
|
||||
def _setup(self):
|
||||
warnings.warn(self.warnmsg)
|
||||
|
||||
|
||||
default_toolbox = _d = Toolbox("tools")
|
||||
_d.session_auth = SessionAuthTool(cptools.session_auth)
|
||||
_d.allow = Tool('on_start_resource', cptools.allow)
|
||||
_d.proxy = Tool('before_request_body', cptools.proxy, priority=30)
|
||||
_d.response_headers = Tool('on_start_resource', cptools.response_headers)
|
||||
_d.log_tracebacks = Tool('before_error_response', cptools.log_traceback)
|
||||
_d.log_headers = Tool('before_error_response', cptools.log_request_headers)
|
||||
_d.log_hooks = Tool('on_end_request', cptools.log_hooks, priority=100)
|
||||
_d.err_redirect = ErrorTool(cptools.redirect)
|
||||
_d.etags = Tool('before_finalize', cptools.validate_etags, priority=75)
|
||||
_d.decode = Tool('before_request_body', encoding.decode)
|
||||
# the order of encoding, gzip, caching is important
|
||||
_d.encode = Tool('before_handler', encoding.ResponseEncoder, priority=70)
|
||||
_d.gzip = Tool('before_finalize', encoding.gzip, priority=80)
|
||||
_d.staticdir = HandlerTool(static.staticdir)
|
||||
_d.staticfile = HandlerTool(static.staticfile)
|
||||
_d.sessions = SessionTool()
|
||||
_d.xmlrpc = ErrorTool(_xmlrpc.on_error)
|
||||
_d.caching = CachingTool('before_handler', _caching.get, 'caching')
|
||||
_d.expires = Tool('before_finalize', _caching.expires)
|
||||
_d.tidy = DeprecatedTool(
|
||||
'before_finalize',
|
||||
"The tidy tool has been removed from the standard distribution of "
|
||||
"CherryPy. The most recent version can be found at "
|
||||
"http://tools.cherrypy.org/browser.")
|
||||
_d.nsgmls = DeprecatedTool(
|
||||
'before_finalize',
|
||||
"The nsgmls tool has been removed from the standard distribution of "
|
||||
"CherryPy. The most recent version can be found at "
|
||||
"http://tools.cherrypy.org/browser.")
|
||||
_d.ignore_headers = Tool('before_request_body', cptools.ignore_headers)
|
||||
_d.referer = Tool('before_request_body', cptools.referer)
|
||||
_d.basic_auth = Tool('on_start_resource', auth.basic_auth)
|
||||
_d.digest_auth = Tool('on_start_resource', auth.digest_auth)
|
||||
_d.trailing_slash = Tool('before_handler', cptools.trailing_slash, priority=60)
|
||||
_d.flatten = Tool('before_finalize', cptools.flatten)
|
||||
_d.accept = Tool('on_start_resource', cptools.accept)
|
||||
_d.redirect = Tool('on_start_resource', cptools.redirect)
|
||||
_d.autovary = Tool('on_start_resource', cptools.autovary, priority=0)
|
||||
_d.json_in = Tool('before_request_body', jsontools.json_in, priority=30)
|
||||
_d.json_out = Tool('before_handler', jsontools.json_out, priority=30)
|
||||
_d.auth_basic = Tool('before_handler', auth_basic.basic_auth, priority=1)
|
||||
_d.auth_digest = Tool('before_handler', auth_digest.digest_auth, priority=1)
|
||||
|
||||
del _d, cptools, encoding, auth, static
|
|
@ -1,299 +0,0 @@
|
|||
"""CherryPy Application and Tree objects."""
|
||||
|
||||
import os
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import ntou, py3k
|
||||
from cherrypy import _cpconfig, _cplogging, _cprequest, _cpwsgi, tools
|
||||
from cherrypy.lib import httputil
|
||||
|
||||
|
||||
class Application(object):
|
||||
|
||||
"""A CherryPy Application.
|
||||
|
||||
Servers and gateways should not instantiate Request objects directly.
|
||||
Instead, they should ask an Application object for a request object.
|
||||
|
||||
An instance of this class may also be used as a WSGI callable
|
||||
(WSGI application object) for itself.
|
||||
"""
|
||||
|
||||
root = None
|
||||
"""The top-most container of page handlers for this app. Handlers should
|
||||
be arranged in a hierarchy of attributes, matching the expected URI
|
||||
hierarchy; the default dispatcher then searches this hierarchy for a
|
||||
matching handler. When using a dispatcher other than the default,
|
||||
this value may be None."""
|
||||
|
||||
config = {}
|
||||
"""A dict of {path: pathconf} pairs, where 'pathconf' is itself a dict
|
||||
of {key: value} pairs."""
|
||||
|
||||
namespaces = _cpconfig.NamespaceSet()
|
||||
toolboxes = {'tools': cherrypy.tools}
|
||||
|
||||
log = None
|
||||
"""A LogManager instance. See _cplogging."""
|
||||
|
||||
wsgiapp = None
|
||||
"""A CPWSGIApp instance. See _cpwsgi."""
|
||||
|
||||
request_class = _cprequest.Request
|
||||
response_class = _cprequest.Response
|
||||
|
||||
relative_urls = False
|
||||
|
||||
def __init__(self, root, script_name="", config=None):
|
||||
self.log = _cplogging.LogManager(id(self), cherrypy.log.logger_root)
|
||||
self.root = root
|
||||
self.script_name = script_name
|
||||
self.wsgiapp = _cpwsgi.CPWSGIApp(self)
|
||||
|
||||
self.namespaces = self.namespaces.copy()
|
||||
self.namespaces["log"] = lambda k, v: setattr(self.log, k, v)
|
||||
self.namespaces["wsgi"] = self.wsgiapp.namespace_handler
|
||||
|
||||
self.config = self.__class__.config.copy()
|
||||
if config:
|
||||
self.merge(config)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s.%s(%r, %r)" % (self.__module__, self.__class__.__name__,
|
||||
self.root, self.script_name)
|
||||
|
||||
script_name_doc = """The URI "mount point" for this app. A mount point
|
||||
is that portion of the URI which is constant for all URIs that are
|
||||
serviced by this application; it does not include scheme, host, or proxy
|
||||
("virtual host") portions of the URI.
|
||||
|
||||
For example, if script_name is "/my/cool/app", then the URL
|
||||
"http://www.example.com/my/cool/app/page1" might be handled by a
|
||||
"page1" method on the root object.
|
||||
|
||||
The value of script_name MUST NOT end in a slash. If the script_name
|
||||
refers to the root of the URI, it MUST be an empty string (not "/").
|
||||
|
||||
If script_name is explicitly set to None, then the script_name will be
|
||||
provided for each call from request.wsgi_environ['SCRIPT_NAME'].
|
||||
"""
|
||||
|
||||
def _get_script_name(self):
|
||||
if self._script_name is not None:
|
||||
return self._script_name
|
||||
|
||||
# A `_script_name` with a value of None signals that the script name
|
||||
# should be pulled from WSGI environ.
|
||||
return cherrypy.serving.request.wsgi_environ['SCRIPT_NAME'].rstrip("/")
|
||||
|
||||
def _set_script_name(self, value):
|
||||
if value:
|
||||
value = value.rstrip("/")
|
||||
self._script_name = value
|
||||
script_name = property(fget=_get_script_name, fset=_set_script_name,
|
||||
doc=script_name_doc)
|
||||
|
||||
def merge(self, config):
|
||||
"""Merge the given config into self.config."""
|
||||
_cpconfig.merge(self.config, config)
|
||||
|
||||
# Handle namespaces specified in config.
|
||||
self.namespaces(self.config.get("/", {}))
|
||||
|
||||
def find_config(self, path, key, default=None):
|
||||
"""Return the most-specific value for key along path, or default."""
|
||||
trail = path or "/"
|
||||
while trail:
|
||||
nodeconf = self.config.get(trail, {})
|
||||
|
||||
if key in nodeconf:
|
||||
return nodeconf[key]
|
||||
|
||||
lastslash = trail.rfind("/")
|
||||
if lastslash == -1:
|
||||
break
|
||||
elif lastslash == 0 and trail != "/":
|
||||
trail = "/"
|
||||
else:
|
||||
trail = trail[:lastslash]
|
||||
|
||||
return default
|
||||
|
||||
def get_serving(self, local, remote, scheme, sproto):
|
||||
"""Create and return a Request and Response object."""
|
||||
req = self.request_class(local, remote, scheme, sproto)
|
||||
req.app = self
|
||||
|
||||
for name, toolbox in self.toolboxes.items():
|
||||
req.namespaces[name] = toolbox
|
||||
|
||||
resp = self.response_class()
|
||||
cherrypy.serving.load(req, resp)
|
||||
cherrypy.engine.publish('acquire_thread')
|
||||
cherrypy.engine.publish('before_request')
|
||||
|
||||
return req, resp
|
||||
|
||||
def release_serving(self):
|
||||
"""Release the current serving (request and response)."""
|
||||
req = cherrypy.serving.request
|
||||
|
||||
cherrypy.engine.publish('after_request')
|
||||
|
||||
try:
|
||||
req.close()
|
||||
except:
|
||||
cherrypy.log(traceback=True, severity=40)
|
||||
|
||||
cherrypy.serving.clear()
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
return self.wsgiapp(environ, start_response)
|
||||
|
||||
|
||||
class Tree(object):
|
||||
|
||||
"""A registry of CherryPy applications, mounted at diverse points.
|
||||
|
||||
An instance of this class may also be used as a WSGI callable
|
||||
(WSGI application object), in which case it dispatches to all
|
||||
mounted apps.
|
||||
"""
|
||||
|
||||
apps = {}
|
||||
"""
|
||||
A dict of the form {script name: application}, where "script name"
|
||||
is a string declaring the URI mount point (no trailing slash), and
|
||||
"application" is an instance of cherrypy.Application (or an arbitrary
|
||||
WSGI callable if you happen to be using a WSGI server)."""
|
||||
|
||||
def __init__(self):
|
||||
self.apps = {}
|
||||
|
||||
def mount(self, root, script_name="", config=None):
|
||||
"""Mount a new app from a root object, script_name, and config.
|
||||
|
||||
root
|
||||
An instance of a "controller class" (a collection of page
|
||||
handler methods) which represents the root of the application.
|
||||
This may also be an Application instance, or None if using
|
||||
a dispatcher other than the default.
|
||||
|
||||
script_name
|
||||
A string containing the "mount point" of the application.
|
||||
This should start with a slash, and be the path portion of the
|
||||
URL at which to mount the given root. For example, if root.index()
|
||||
will handle requests to "http://www.example.com:8080/dept/app1/",
|
||||
then the script_name argument would be "/dept/app1".
|
||||
|
||||
It MUST NOT end in a slash. If the script_name refers to the
|
||||
root of the URI, it MUST be an empty string (not "/").
|
||||
|
||||
config
|
||||
A file or dict containing application config.
|
||||
"""
|
||||
if script_name is None:
|
||||
raise TypeError(
|
||||
"The 'script_name' argument may not be None. Application "
|
||||
"objects may, however, possess a script_name of None (in "
|
||||
"order to inpect the WSGI environ for SCRIPT_NAME upon each "
|
||||
"request). You cannot mount such Applications on this Tree; "
|
||||
"you must pass them to a WSGI server interface directly.")
|
||||
|
||||
# Next line both 1) strips trailing slash and 2) maps "/" -> "".
|
||||
script_name = script_name.rstrip("/")
|
||||
|
||||
if isinstance(root, Application):
|
||||
app = root
|
||||
if script_name != "" and script_name != app.script_name:
|
||||
raise ValueError(
|
||||
"Cannot specify a different script name and pass an "
|
||||
"Application instance to cherrypy.mount")
|
||||
script_name = app.script_name
|
||||
else:
|
||||
app = Application(root, script_name)
|
||||
|
||||
# If mounted at "", add favicon.ico
|
||||
if (script_name == "" and root is not None
|
||||
and not hasattr(root, "favicon_ico")):
|
||||
favicon = os.path.join(os.getcwd(), os.path.dirname(__file__),
|
||||
"favicon.ico")
|
||||
root.favicon_ico = tools.staticfile.handler(favicon)
|
||||
|
||||
if config:
|
||||
app.merge(config)
|
||||
|
||||
self.apps[script_name] = app
|
||||
|
||||
return app
|
||||
|
||||
def graft(self, wsgi_callable, script_name=""):
|
||||
"""Mount a wsgi callable at the given script_name."""
|
||||
# Next line both 1) strips trailing slash and 2) maps "/" -> "".
|
||||
script_name = script_name.rstrip("/")
|
||||
self.apps[script_name] = wsgi_callable
|
||||
|
||||
def script_name(self, path=None):
|
||||
"""The script_name of the app at the given path, or None.
|
||||
|
||||
If path is None, cherrypy.request is used.
|
||||
"""
|
||||
if path is None:
|
||||
try:
|
||||
request = cherrypy.serving.request
|
||||
path = httputil.urljoin(request.script_name,
|
||||
request.path_info)
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
while True:
|
||||
if path in self.apps:
|
||||
return path
|
||||
|
||||
if path == "":
|
||||
return None
|
||||
|
||||
# Move one node up the tree and try again.
|
||||
path = path[:path.rfind("/")]
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
# If you're calling this, then you're probably setting SCRIPT_NAME
|
||||
# to '' (some WSGI servers always set SCRIPT_NAME to '').
|
||||
# Try to look up the app using the full path.
|
||||
env1x = environ
|
||||
if environ.get(ntou('wsgi.version')) == (ntou('u'), 0):
|
||||
env1x = _cpwsgi.downgrade_wsgi_ux_to_1x(environ)
|
||||
path = httputil.urljoin(env1x.get('SCRIPT_NAME', ''),
|
||||
env1x.get('PATH_INFO', ''))
|
||||
sn = self.script_name(path or "/")
|
||||
if sn is None:
|
||||
start_response('404 Not Found', [])
|
||||
return []
|
||||
|
||||
app = self.apps[sn]
|
||||
|
||||
# Correct the SCRIPT_NAME and PATH_INFO environ entries.
|
||||
environ = environ.copy()
|
||||
if not py3k:
|
||||
if environ.get(ntou('wsgi.version')) == (ntou('u'), 0):
|
||||
# Python 2/WSGI u.0: all strings MUST be of type unicode
|
||||
enc = environ[ntou('wsgi.url_encoding')]
|
||||
environ[ntou('SCRIPT_NAME')] = sn.decode(enc)
|
||||
environ[ntou('PATH_INFO')] = path[
|
||||
len(sn.rstrip("/")):].decode(enc)
|
||||
else:
|
||||
# Python 2/WSGI 1.x: all strings MUST be of type str
|
||||
environ['SCRIPT_NAME'] = sn
|
||||
environ['PATH_INFO'] = path[len(sn.rstrip("/")):]
|
||||
else:
|
||||
if environ.get(ntou('wsgi.version')) == (ntou('u'), 0):
|
||||
# Python 3/WSGI u.0: all strings MUST be full unicode
|
||||
environ['SCRIPT_NAME'] = sn
|
||||
environ['PATH_INFO'] = path[len(sn.rstrip("/")):]
|
||||
else:
|
||||
# Python 3/WSGI 1.x: all strings MUST be ISO-8859-1 str
|
||||
environ['SCRIPT_NAME'] = sn.encode(
|
||||
'utf-8').decode('ISO-8859-1')
|
||||
environ['PATH_INFO'] = path[
|
||||
len(sn.rstrip("/")):].encode('utf-8').decode('ISO-8859-1')
|
||||
return app(environ, start_response)
|
|
@ -1,438 +0,0 @@
|
|||
"""WSGI interface (see PEP 333 and 3333).
|
||||
|
||||
Note that WSGI environ keys and values are 'native strings'; that is,
|
||||
whatever the type of "" is. For Python 2, that's a byte string; for Python 3,
|
||||
it's a unicode string. But PEP 3333 says: "even if Python's str type is
|
||||
actually Unicode "under the hood", the content of native strings must
|
||||
still be translatable to bytes via the Latin-1 encoding!"
|
||||
"""
|
||||
|
||||
import sys as _sys
|
||||
|
||||
import cherrypy as _cherrypy
|
||||
from cherrypy._cpcompat import BytesIO, bytestr, ntob, ntou, py3k, unicodestr
|
||||
from cherrypy import _cperror
|
||||
from cherrypy.lib import httputil
|
||||
from cherrypy.lib import is_closable_iterator
|
||||
|
||||
def downgrade_wsgi_ux_to_1x(environ):
|
||||
"""Return a new environ dict for WSGI 1.x from the given WSGI u.x environ.
|
||||
"""
|
||||
env1x = {}
|
||||
|
||||
url_encoding = environ[ntou('wsgi.url_encoding')]
|
||||
for k, v in list(environ.items()):
|
||||
if k in [ntou('PATH_INFO'), ntou('SCRIPT_NAME'), ntou('QUERY_STRING')]:
|
||||
v = v.encode(url_encoding)
|
||||
elif isinstance(v, unicodestr):
|
||||
v = v.encode('ISO-8859-1')
|
||||
env1x[k.encode('ISO-8859-1')] = v
|
||||
|
||||
return env1x
|
||||
|
||||
|
||||
class VirtualHost(object):
|
||||
|
||||
"""Select a different WSGI application based on the Host header.
|
||||
|
||||
This can be useful when running multiple sites within one CP server.
|
||||
It allows several domains to point to different applications. For example::
|
||||
|
||||
root = Root()
|
||||
RootApp = cherrypy.Application(root)
|
||||
Domain2App = cherrypy.Application(root)
|
||||
SecureApp = cherrypy.Application(Secure())
|
||||
|
||||
vhost = cherrypy._cpwsgi.VirtualHost(RootApp,
|
||||
domains={'www.domain2.example': Domain2App,
|
||||
'www.domain2.example:443': SecureApp,
|
||||
})
|
||||
|
||||
cherrypy.tree.graft(vhost)
|
||||
"""
|
||||
default = None
|
||||
"""Required. The default WSGI application."""
|
||||
|
||||
use_x_forwarded_host = True
|
||||
"""If True (the default), any "X-Forwarded-Host"
|
||||
request header will be used instead of the "Host" header. This
|
||||
is commonly added by HTTP servers (such as Apache) when proxying."""
|
||||
|
||||
domains = {}
|
||||
"""A dict of {host header value: application} pairs.
|
||||
The incoming "Host" request header is looked up in this dict,
|
||||
and, if a match is found, the corresponding WSGI application
|
||||
will be called instead of the default. Note that you often need
|
||||
separate entries for "example.com" and "www.example.com".
|
||||
In addition, "Host" headers may contain the port number.
|
||||
"""
|
||||
|
||||
def __init__(self, default, domains=None, use_x_forwarded_host=True):
|
||||
self.default = default
|
||||
self.domains = domains or {}
|
||||
self.use_x_forwarded_host = use_x_forwarded_host
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
domain = environ.get('HTTP_HOST', '')
|
||||
if self.use_x_forwarded_host:
|
||||
domain = environ.get("HTTP_X_FORWARDED_HOST", domain)
|
||||
|
||||
nextapp = self.domains.get(domain)
|
||||
if nextapp is None:
|
||||
nextapp = self.default
|
||||
return nextapp(environ, start_response)
|
||||
|
||||
|
||||
class InternalRedirector(object):
|
||||
|
||||
"""WSGI middleware that handles raised cherrypy.InternalRedirect."""
|
||||
|
||||
def __init__(self, nextapp, recursive=False):
|
||||
self.nextapp = nextapp
|
||||
self.recursive = recursive
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
redirections = []
|
||||
while True:
|
||||
environ = environ.copy()
|
||||
try:
|
||||
return self.nextapp(environ, start_response)
|
||||
except _cherrypy.InternalRedirect:
|
||||
ir = _sys.exc_info()[1]
|
||||
sn = environ.get('SCRIPT_NAME', '')
|
||||
path = environ.get('PATH_INFO', '')
|
||||
qs = environ.get('QUERY_STRING', '')
|
||||
|
||||
# Add the *previous* path_info + qs to redirections.
|
||||
old_uri = sn + path
|
||||
if qs:
|
||||
old_uri += "?" + qs
|
||||
redirections.append(old_uri)
|
||||
|
||||
if not self.recursive:
|
||||
# Check to see if the new URI has been redirected to
|
||||
# already
|
||||
new_uri = sn + ir.path
|
||||
if ir.query_string:
|
||||
new_uri += "?" + ir.query_string
|
||||
if new_uri in redirections:
|
||||
ir.request.close()
|
||||
raise RuntimeError("InternalRedirector visited the "
|
||||
"same URL twice: %r" % new_uri)
|
||||
|
||||
# Munge the environment and try again.
|
||||
environ['REQUEST_METHOD'] = "GET"
|
||||
environ['PATH_INFO'] = ir.path
|
||||
environ['QUERY_STRING'] = ir.query_string
|
||||
environ['wsgi.input'] = BytesIO()
|
||||
environ['CONTENT_LENGTH'] = "0"
|
||||
environ['cherrypy.previous_request'] = ir.request
|
||||
|
||||
|
||||
class ExceptionTrapper(object):
|
||||
|
||||
"""WSGI middleware that traps exceptions."""
|
||||
|
||||
def __init__(self, nextapp, throws=(KeyboardInterrupt, SystemExit)):
|
||||
self.nextapp = nextapp
|
||||
self.throws = throws
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
return _TrappedResponse(
|
||||
self.nextapp,
|
||||
environ,
|
||||
start_response,
|
||||
self.throws
|
||||
)
|
||||
|
||||
|
||||
class _TrappedResponse(object):
|
||||
|
||||
response = iter([])
|
||||
|
||||
def __init__(self, nextapp, environ, start_response, throws):
|
||||
self.nextapp = nextapp
|
||||
self.environ = environ
|
||||
self.start_response = start_response
|
||||
self.throws = throws
|
||||
self.started_response = False
|
||||
self.response = self.trap(
|
||||
self.nextapp, self.environ, self.start_response)
|
||||
self.iter_response = iter(self.response)
|
||||
|
||||
def __iter__(self):
|
||||
self.started_response = True
|
||||
return self
|
||||
|
||||
if py3k:
|
||||
def __next__(self):
|
||||
return self.trap(next, self.iter_response)
|
||||
else:
|
||||
def next(self):
|
||||
return self.trap(self.iter_response.next)
|
||||
|
||||
def close(self):
|
||||
if hasattr(self.response, 'close'):
|
||||
self.response.close()
|
||||
|
||||
def trap(self, func, *args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except self.throws:
|
||||
raise
|
||||
except StopIteration:
|
||||
raise
|
||||
except:
|
||||
tb = _cperror.format_exc()
|
||||
#print('trapped (started %s):' % self.started_response, tb)
|
||||
_cherrypy.log(tb, severity=40)
|
||||
if not _cherrypy.request.show_tracebacks:
|
||||
tb = ""
|
||||
s, h, b = _cperror.bare_error(tb)
|
||||
if py3k:
|
||||
# What fun.
|
||||
s = s.decode('ISO-8859-1')
|
||||
h = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1'))
|
||||
for k, v in h]
|
||||
if self.started_response:
|
||||
# Empty our iterable (so future calls raise StopIteration)
|
||||
self.iter_response = iter([])
|
||||
else:
|
||||
self.iter_response = iter(b)
|
||||
|
||||
try:
|
||||
self.start_response(s, h, _sys.exc_info())
|
||||
except:
|
||||
# "The application must not trap any exceptions raised by
|
||||
# start_response, if it called start_response with exc_info.
|
||||
# Instead, it should allow such exceptions to propagate
|
||||
# back to the server or gateway."
|
||||
# But we still log and call close() to clean up ourselves.
|
||||
_cherrypy.log(traceback=True, severity=40)
|
||||
raise
|
||||
|
||||
if self.started_response:
|
||||
return ntob("").join(b)
|
||||
else:
|
||||
return b
|
||||
|
||||
|
||||
# WSGI-to-CP Adapter #
|
||||
|
||||
|
||||
class AppResponse(object):
|
||||
|
||||
"""WSGI response iterable for CherryPy applications."""
|
||||
|
||||
def __init__(self, environ, start_response, cpapp):
|
||||
self.cpapp = cpapp
|
||||
try:
|
||||
if not py3k:
|
||||
if environ.get(ntou('wsgi.version')) == (ntou('u'), 0):
|
||||
environ = downgrade_wsgi_ux_to_1x(environ)
|
||||
self.environ = environ
|
||||
self.run()
|
||||
|
||||
r = _cherrypy.serving.response
|
||||
|
||||
outstatus = r.output_status
|
||||
if not isinstance(outstatus, bytestr):
|
||||
raise TypeError("response.output_status is not a byte string.")
|
||||
|
||||
outheaders = []
|
||||
for k, v in r.header_list:
|
||||
if not isinstance(k, bytestr):
|
||||
raise TypeError(
|
||||
"response.header_list key %r is not a byte string." %
|
||||
k)
|
||||
if not isinstance(v, bytestr):
|
||||
raise TypeError(
|
||||
"response.header_list value %r is not a byte string." %
|
||||
v)
|
||||
outheaders.append((k, v))
|
||||
|
||||
if py3k:
|
||||
# According to PEP 3333, when using Python 3, the response
|
||||
# status and headers must be bytes masquerading as unicode;
|
||||
# that is, they must be of type "str" but are restricted to
|
||||
# code points in the "latin-1" set.
|
||||
outstatus = outstatus.decode('ISO-8859-1')
|
||||
outheaders = [(k.decode('ISO-8859-1'), v.decode('ISO-8859-1'))
|
||||
for k, v in outheaders]
|
||||
|
||||
self.iter_response = iter(r.body)
|
||||
self.write = start_response(outstatus, outheaders)
|
||||
except:
|
||||
self.close()
|
||||
raise
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
if py3k:
|
||||
def __next__(self):
|
||||
return next(self.iter_response)
|
||||
else:
|
||||
def next(self):
|
||||
return self.iter_response.next()
|
||||
|
||||
def close(self):
|
||||
"""Close and de-reference the current request and response. (Core)"""
|
||||
streaming = _cherrypy.serving.response.stream
|
||||
self.cpapp.release_serving()
|
||||
|
||||
# We avoid the expense of examining the iterator to see if it's
|
||||
# closable unless we are streaming the response, as that's the
|
||||
# only situation where we are going to have an iterator which
|
||||
# may not have been exhausted yet.
|
||||
if streaming and is_closable_iterator(self.iter_response):
|
||||
iter_close = self.iter_response.close
|
||||
try:
|
||||
iter_close()
|
||||
except Exception:
|
||||
_cherrypy.log(traceback=True, severity=40)
|
||||
|
||||
def run(self):
|
||||
"""Create a Request object using environ."""
|
||||
env = self.environ.get
|
||||
|
||||
local = httputil.Host('', int(env('SERVER_PORT', 80)),
|
||||
env('SERVER_NAME', ''))
|
||||
remote = httputil.Host(env('REMOTE_ADDR', ''),
|
||||
int(env('REMOTE_PORT', -1) or -1),
|
||||
env('REMOTE_HOST', ''))
|
||||
scheme = env('wsgi.url_scheme')
|
||||
sproto = env('ACTUAL_SERVER_PROTOCOL', "HTTP/1.1")
|
||||
request, resp = self.cpapp.get_serving(local, remote, scheme, sproto)
|
||||
|
||||
# LOGON_USER is served by IIS, and is the name of the
|
||||
# user after having been mapped to a local account.
|
||||
# Both IIS and Apache set REMOTE_USER, when possible.
|
||||
request.login = env('LOGON_USER') or env('REMOTE_USER') or None
|
||||
request.multithread = self.environ['wsgi.multithread']
|
||||
request.multiprocess = self.environ['wsgi.multiprocess']
|
||||
request.wsgi_environ = self.environ
|
||||
request.prev = env('cherrypy.previous_request', None)
|
||||
|
||||
meth = self.environ['REQUEST_METHOD']
|
||||
|
||||
path = httputil.urljoin(self.environ.get('SCRIPT_NAME', ''),
|
||||
self.environ.get('PATH_INFO', ''))
|
||||
qs = self.environ.get('QUERY_STRING', '')
|
||||
|
||||
if py3k:
|
||||
# This isn't perfect; if the given PATH_INFO is in the
|
||||
# wrong encoding, it may fail to match the appropriate config
|
||||
# section URI. But meh.
|
||||
old_enc = self.environ.get('wsgi.url_encoding', 'ISO-8859-1')
|
||||
new_enc = self.cpapp.find_config(self.environ.get('PATH_INFO', ''),
|
||||
"request.uri_encoding", 'utf-8')
|
||||
if new_enc.lower() != old_enc.lower():
|
||||
# Even though the path and qs are unicode, the WSGI server
|
||||
# is required by PEP 3333 to coerce them to ISO-8859-1
|
||||
# masquerading as unicode. So we have to encode back to
|
||||
# bytes and then decode again using the "correct" encoding.
|
||||
try:
|
||||
u_path = path.encode(old_enc).decode(new_enc)
|
||||
u_qs = qs.encode(old_enc).decode(new_enc)
|
||||
except (UnicodeEncodeError, UnicodeDecodeError):
|
||||
# Just pass them through without transcoding and hope.
|
||||
pass
|
||||
else:
|
||||
# Only set transcoded values if they both succeed.
|
||||
path = u_path
|
||||
qs = u_qs
|
||||
|
||||
rproto = self.environ.get('SERVER_PROTOCOL')
|
||||
headers = self.translate_headers(self.environ)
|
||||
rfile = self.environ['wsgi.input']
|
||||
request.run(meth, path, qs, rproto, headers, rfile)
|
||||
|
||||
headerNames = {'HTTP_CGI_AUTHORIZATION': 'Authorization',
|
||||
'CONTENT_LENGTH': 'Content-Length',
|
||||
'CONTENT_TYPE': 'Content-Type',
|
||||
'REMOTE_HOST': 'Remote-Host',
|
||||
'REMOTE_ADDR': 'Remote-Addr',
|
||||
}
|
||||
|
||||
def translate_headers(self, environ):
|
||||
"""Translate CGI-environ header names to HTTP header names."""
|
||||
for cgiName in environ:
|
||||
# We assume all incoming header keys are uppercase already.
|
||||
if cgiName in self.headerNames:
|
||||
yield self.headerNames[cgiName], environ[cgiName]
|
||||
elif cgiName[:5] == "HTTP_":
|
||||
# Hackish attempt at recovering original header names.
|
||||
translatedHeader = cgiName[5:].replace("_", "-")
|
||||
yield translatedHeader, environ[cgiName]
|
||||
|
||||
|
||||
class CPWSGIApp(object):
|
||||
|
||||
"""A WSGI application object for a CherryPy Application."""
|
||||
|
||||
pipeline = [('ExceptionTrapper', ExceptionTrapper),
|
||||
('InternalRedirector', InternalRedirector),
|
||||
]
|
||||
"""A list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a
|
||||
constructor that takes an initial, positional 'nextapp' argument,
|
||||
plus optional keyword arguments, and returns a WSGI application
|
||||
(that takes environ and start_response arguments). The 'name' can
|
||||
be any you choose, and will correspond to keys in self.config."""
|
||||
|
||||
head = None
|
||||
"""Rather than nest all apps in the pipeline on each call, it's only
|
||||
done the first time, and the result is memoized into self.head. Set
|
||||
this to None again if you change self.pipeline after calling self."""
|
||||
|
||||
config = {}
|
||||
"""A dict whose keys match names listed in the pipeline. Each
|
||||
value is a further dict which will be passed to the corresponding
|
||||
named WSGI callable (from the pipeline) as keyword arguments."""
|
||||
|
||||
response_class = AppResponse
|
||||
"""The class to instantiate and return as the next app in the WSGI chain.
|
||||
"""
|
||||
|
||||
def __init__(self, cpapp, pipeline=None):
|
||||
self.cpapp = cpapp
|
||||
self.pipeline = self.pipeline[:]
|
||||
if pipeline:
|
||||
self.pipeline.extend(pipeline)
|
||||
self.config = self.config.copy()
|
||||
|
||||
def tail(self, environ, start_response):
|
||||
"""WSGI application callable for the actual CherryPy application.
|
||||
|
||||
You probably shouldn't call this; call self.__call__ instead,
|
||||
so that any WSGI middleware in self.pipeline can run first.
|
||||
"""
|
||||
return self.response_class(environ, start_response, self.cpapp)
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
head = self.head
|
||||
if head is None:
|
||||
# Create and nest the WSGI apps in our pipeline (in reverse order).
|
||||
# Then memoize the result in self.head.
|
||||
head = self.tail
|
||||
for name, callable in self.pipeline[::-1]:
|
||||
conf = self.config.get(name, {})
|
||||
head = callable(head, **conf)
|
||||
self.head = head
|
||||
return head(environ, start_response)
|
||||
|
||||
def namespace_handler(self, k, v):
|
||||
"""Config handler for the 'wsgi' namespace."""
|
||||
if k == "pipeline":
|
||||
# Note this allows multiple 'wsgi.pipeline' config entries
|
||||
# (but each entry will be processed in a 'random' order).
|
||||
# It should also allow developers to set default middleware
|
||||
# in code (passed to self.__init__) that deployers can add to
|
||||
# (but not remove) via config.
|
||||
self.pipeline.extend(v)
|
||||
elif k == "response_class":
|
||||
self.response_class = v
|
||||
else:
|
||||
name, arg = k.split(".", 1)
|
||||
bucket = self.config.setdefault(name, {})
|
||||
bucket[arg] = v
|
|
@ -1,70 +0,0 @@
|
|||
"""WSGI server interface (see PEP 333). This adds some CP-specific bits to
|
||||
the framework-agnostic wsgiserver package.
|
||||
"""
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy import wsgiserver
|
||||
|
||||
|
||||
class CPWSGIServer(wsgiserver.CherryPyWSGIServer):
|
||||
|
||||
"""Wrapper for wsgiserver.CherryPyWSGIServer.
|
||||
|
||||
wsgiserver has been designed to not reference CherryPy in any way,
|
||||
so that it can be used in other frameworks and applications. Therefore,
|
||||
we wrap it here, so we can set our own mount points from cherrypy.tree
|
||||
and apply some attributes from config -> cherrypy.server -> wsgiserver.
|
||||
"""
|
||||
|
||||
def __init__(self, server_adapter=cherrypy.server):
|
||||
self.server_adapter = server_adapter
|
||||
self.max_request_header_size = (
|
||||
self.server_adapter.max_request_header_size or 0
|
||||
)
|
||||
self.max_request_body_size = (
|
||||
self.server_adapter.max_request_body_size or 0
|
||||
)
|
||||
|
||||
server_name = (self.server_adapter.socket_host or
|
||||
self.server_adapter.socket_file or
|
||||
None)
|
||||
|
||||
self.wsgi_version = self.server_adapter.wsgi_version
|
||||
s = wsgiserver.CherryPyWSGIServer
|
||||
s.__init__(self, server_adapter.bind_addr, cherrypy.tree,
|
||||
self.server_adapter.thread_pool,
|
||||
server_name,
|
||||
max=self.server_adapter.thread_pool_max,
|
||||
request_queue_size=self.server_adapter.socket_queue_size,
|
||||
timeout=self.server_adapter.socket_timeout,
|
||||
shutdown_timeout=self.server_adapter.shutdown_timeout,
|
||||
accepted_queue_size=self.server_adapter.accepted_queue_size,
|
||||
accepted_queue_timeout=self.server_adapter.accepted_queue_timeout,
|
||||
)
|
||||
self.protocol = self.server_adapter.protocol_version
|
||||
self.nodelay = self.server_adapter.nodelay
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
ssl_module = self.server_adapter.ssl_module or 'builtin'
|
||||
else:
|
||||
ssl_module = self.server_adapter.ssl_module or 'pyopenssl'
|
||||
if self.server_adapter.ssl_context:
|
||||
adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module)
|
||||
self.ssl_adapter = adapter_class(
|
||||
self.server_adapter.ssl_certificate,
|
||||
self.server_adapter.ssl_private_key,
|
||||
self.server_adapter.ssl_certificate_chain)
|
||||
self.ssl_adapter.context = self.server_adapter.ssl_context
|
||||
elif self.server_adapter.ssl_certificate:
|
||||
adapter_class = wsgiserver.get_ssl_adapter_class(ssl_module)
|
||||
self.ssl_adapter = adapter_class(
|
||||
self.server_adapter.ssl_certificate,
|
||||
self.server_adapter.ssl_private_key,
|
||||
self.server_adapter.ssl_certificate_chain)
|
||||
|
||||
self.stats['Enabled'] = getattr(
|
||||
self.server_adapter, 'statistics', False)
|
||||
|
||||
def error_log(self, msg="", level=20, traceback=False):
|
||||
cherrypy.engine.log(msg, level, traceback)
|
|
@ -1,110 +0,0 @@
|
|||
#! /usr/bin/env python
|
||||
"""The CherryPy daemon."""
|
||||
|
||||
import sys
|
||||
|
||||
import cherrypy
|
||||
from cherrypy.process import plugins, servers
|
||||
from cherrypy import Application
|
||||
|
||||
|
||||
def start(configfiles=None, daemonize=False, environment=None,
|
||||
fastcgi=False, scgi=False, pidfile=None, imports=None,
|
||||
cgi=False):
|
||||
"""Subscribe all engine plugins and start the engine."""
|
||||
sys.path = [''] + sys.path
|
||||
for i in imports or []:
|
||||
exec("import %s" % i)
|
||||
|
||||
for c in configfiles or []:
|
||||
cherrypy.config.update(c)
|
||||
# If there's only one app mounted, merge config into it.
|
||||
if len(cherrypy.tree.apps) == 1:
|
||||
for app in cherrypy.tree.apps.values():
|
||||
if isinstance(app, Application):
|
||||
app.merge(c)
|
||||
|
||||
engine = cherrypy.engine
|
||||
|
||||
if environment is not None:
|
||||
cherrypy.config.update({'environment': environment})
|
||||
|
||||
# Only daemonize if asked to.
|
||||
if daemonize:
|
||||
# Don't print anything to stdout/sterr.
|
||||
cherrypy.config.update({'log.screen': False})
|
||||
plugins.Daemonizer(engine).subscribe()
|
||||
|
||||
if pidfile:
|
||||
plugins.PIDFile(engine, pidfile).subscribe()
|
||||
|
||||
if hasattr(engine, "signal_handler"):
|
||||
engine.signal_handler.subscribe()
|
||||
if hasattr(engine, "console_control_handler"):
|
||||
engine.console_control_handler.subscribe()
|
||||
|
||||
if (fastcgi and (scgi or cgi)) or (scgi and cgi):
|
||||
cherrypy.log.error("You may only specify one of the cgi, fastcgi, and "
|
||||
"scgi options.", 'ENGINE')
|
||||
sys.exit(1)
|
||||
elif fastcgi or scgi or cgi:
|
||||
# Turn off autoreload when using *cgi.
|
||||
cherrypy.config.update({'engine.autoreload_on': False})
|
||||
# Turn off the default HTTP server (which is subscribed by default).
|
||||
cherrypy.server.unsubscribe()
|
||||
|
||||
addr = cherrypy.server.bind_addr
|
||||
if fastcgi:
|
||||
f = servers.FlupFCGIServer(application=cherrypy.tree,
|
||||
bindAddress=addr)
|
||||
elif scgi:
|
||||
f = servers.FlupSCGIServer(application=cherrypy.tree,
|
||||
bindAddress=addr)
|
||||
else:
|
||||
f = servers.FlupCGIServer(application=cherrypy.tree,
|
||||
bindAddress=addr)
|
||||
s = servers.ServerAdapter(engine, httpserver=f, bind_addr=addr)
|
||||
s.subscribe()
|
||||
|
||||
# Always start the engine; this will start all other services
|
||||
try:
|
||||
engine.start()
|
||||
except:
|
||||
# Assume the error has been logged already via bus.log.
|
||||
sys.exit(1)
|
||||
else:
|
||||
engine.block()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from optparse import OptionParser
|
||||
|
||||
p = OptionParser()
|
||||
p.add_option('-c', '--config', action="append", dest='config',
|
||||
help="specify config file(s)")
|
||||
p.add_option('-d', action="store_true", dest='daemonize',
|
||||
help="run the server as a daemon")
|
||||
p.add_option('-e', '--environment', dest='environment', default=None,
|
||||
help="apply the given config environment")
|
||||
p.add_option('-f', action="store_true", dest='fastcgi',
|
||||
help="start a fastcgi server instead of the default HTTP "
|
||||
"server")
|
||||
p.add_option('-s', action="store_true", dest='scgi',
|
||||
help="start a scgi server instead of the default HTTP server")
|
||||
p.add_option('-x', action="store_true", dest='cgi',
|
||||
help="start a cgi server instead of the default HTTP server")
|
||||
p.add_option('-i', '--import', action="append", dest='imports',
|
||||
help="specify modules to import")
|
||||
p.add_option('-p', '--pidfile', dest='pidfile', default=None,
|
||||
help="store the process id in the given file")
|
||||
p.add_option('-P', '--Path', action="append", dest='Path',
|
||||
help="add the given paths to sys.path")
|
||||
options, args = p.parse_args()
|
||||
|
||||
if options.Path:
|
||||
for p in options.Path:
|
||||
sys.path.insert(0, p)
|
||||
|
||||
start(options.config, options.daemonize,
|
||||
options.environment, options.fastcgi, options.scgi,
|
||||
options.pidfile, options.imports, options.cgi)
|
Binary file not shown.
Before Width: | Height: | Size: 1.4 KiB |
|
@ -1,85 +0,0 @@
|
|||
"""CherryPy Library"""
|
||||
|
||||
# Deprecated in CherryPy 3.2 -- remove in CherryPy 3.3
|
||||
from cherrypy.lib.reprconf import unrepr, modules, attributes
|
||||
|
||||
def is_iterator(obj):
|
||||
'''Returns a boolean indicating if the object provided implements
|
||||
the iterator protocol (i.e. like a generator). This will return
|
||||
false for objects which iterable, but not iterators themselves.'''
|
||||
from types import GeneratorType
|
||||
if isinstance(obj, GeneratorType):
|
||||
return True
|
||||
elif not hasattr(obj, '__iter__'):
|
||||
return False
|
||||
else:
|
||||
# Types which implement the protocol must return themselves when
|
||||
# invoking 'iter' upon them.
|
||||
return iter(obj) is obj
|
||||
|
||||
def is_closable_iterator(obj):
|
||||
|
||||
# Not an iterator.
|
||||
if not is_iterator(obj):
|
||||
return False
|
||||
|
||||
# A generator - the easiest thing to deal with.
|
||||
import inspect
|
||||
if inspect.isgenerator(obj):
|
||||
return True
|
||||
|
||||
# A custom iterator. Look for a close method...
|
||||
if not (hasattr(obj, 'close') and callable(obj.close)):
|
||||
return False
|
||||
|
||||
# ... which doesn't require any arguments.
|
||||
try:
|
||||
inspect.getcallargs(obj.close)
|
||||
except TypeError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
class file_generator(object):
|
||||
|
||||
"""Yield the given input (a file object) in chunks (default 64k). (Core)"""
|
||||
|
||||
def __init__(self, input, chunkSize=65536):
|
||||
self.input = input
|
||||
self.chunkSize = chunkSize
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
chunk = self.input.read(self.chunkSize)
|
||||
if chunk:
|
||||
return chunk
|
||||
else:
|
||||
if hasattr(self.input, 'close'):
|
||||
self.input.close()
|
||||
raise StopIteration()
|
||||
next = __next__
|
||||
|
||||
|
||||
def file_generator_limited(fileobj, count, chunk_size=65536):
|
||||
"""Yield the given file object in chunks, stopping after `count`
|
||||
bytes has been emitted. Default chunk size is 64kB. (Core)
|
||||
"""
|
||||
remaining = count
|
||||
while remaining > 0:
|
||||
chunk = fileobj.read(min(chunk_size, remaining))
|
||||
chunklen = len(chunk)
|
||||
if chunklen == 0:
|
||||
return
|
||||
remaining -= chunklen
|
||||
yield chunk
|
||||
|
||||
|
||||
def set_vary_header(response, header_name):
|
||||
"Add a Vary header to a response"
|
||||
varies = response.headers.get("Vary", "")
|
||||
varies = [x.strip() for x in varies.split(",") if x.strip()]
|
||||
if header_name not in varies:
|
||||
varies.append(header_name)
|
||||
response.headers['Vary'] = ", ".join(varies)
|
|
@ -1,97 +0,0 @@
|
|||
import cherrypy
|
||||
from cherrypy.lib import httpauth
|
||||
|
||||
|
||||
def check_auth(users, encrypt=None, realm=None):
|
||||
"""If an authorization header contains credentials, return True or False.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
if 'authorization' in request.headers:
|
||||
# make sure the provided credentials are correctly set
|
||||
ah = httpauth.parseAuthorization(request.headers['authorization'])
|
||||
if ah is None:
|
||||
raise cherrypy.HTTPError(400, 'Bad Request')
|
||||
|
||||
if not encrypt:
|
||||
encrypt = httpauth.DIGEST_AUTH_ENCODERS[httpauth.MD5]
|
||||
|
||||
if hasattr(users, '__call__'):
|
||||
try:
|
||||
# backward compatibility
|
||||
users = users() # expect it to return a dictionary
|
||||
|
||||
if not isinstance(users, dict):
|
||||
raise ValueError(
|
||||
"Authentication users must be a dictionary")
|
||||
|
||||
# fetch the user password
|
||||
password = users.get(ah["username"], None)
|
||||
except TypeError:
|
||||
# returns a password (encrypted or clear text)
|
||||
password = users(ah["username"])
|
||||
else:
|
||||
if not isinstance(users, dict):
|
||||
raise ValueError("Authentication users must be a dictionary")
|
||||
|
||||
# fetch the user password
|
||||
password = users.get(ah["username"], None)
|
||||
|
||||
# validate the authorization by re-computing it here
|
||||
# and compare it with what the user-agent provided
|
||||
if httpauth.checkResponse(ah, password, method=request.method,
|
||||
encrypt=encrypt, realm=realm):
|
||||
request.login = ah["username"]
|
||||
return True
|
||||
|
||||
request.login = False
|
||||
return False
|
||||
|
||||
|
||||
def basic_auth(realm, users, encrypt=None, debug=False):
|
||||
"""If auth fails, raise 401 with a basic authentication header.
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
|
||||
users
|
||||
A dict of the form: {username: password} or a callable returning
|
||||
a dict.
|
||||
|
||||
encrypt
|
||||
callable used to encrypt the password returned from the user-agent.
|
||||
if None it defaults to a md5 encryption.
|
||||
|
||||
"""
|
||||
if check_auth(users, encrypt):
|
||||
if debug:
|
||||
cherrypy.log('Auth successful', 'TOOLS.BASIC_AUTH')
|
||||
return
|
||||
|
||||
# inform the user-agent this path is protected
|
||||
cherrypy.serving.response.headers[
|
||||
'www-authenticate'] = httpauth.basicAuth(realm)
|
||||
|
||||
raise cherrypy.HTTPError(
|
||||
401, "You are not authorized to access that resource")
|
||||
|
||||
|
||||
def digest_auth(realm, users, debug=False):
|
||||
"""If auth fails, raise 401 with a digest authentication header.
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
users
|
||||
A dict of the form: {username: password} or a callable returning
|
||||
a dict.
|
||||
"""
|
||||
if check_auth(users, realm=realm):
|
||||
if debug:
|
||||
cherrypy.log('Auth successful', 'TOOLS.DIGEST_AUTH')
|
||||
return
|
||||
|
||||
# inform the user-agent this path is protected
|
||||
cherrypy.serving.response.headers[
|
||||
'www-authenticate'] = httpauth.digestAuth(realm)
|
||||
|
||||
raise cherrypy.HTTPError(
|
||||
401, "You are not authorized to access that resource")
|
|
@ -1,90 +0,0 @@
|
|||
# This file is part of CherryPy <http://www.cherrypy.org/>
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:ts=4:sw=4:expandtab:fileencoding=utf-8
|
||||
|
||||
__doc__ = """This module provides a CherryPy 3.x tool which implements
|
||||
the server-side of HTTP Basic Access Authentication, as described in
|
||||
:rfc:`2617`.
|
||||
|
||||
Example usage, using the built-in checkpassword_dict function which uses a dict
|
||||
as the credentials store::
|
||||
|
||||
userpassdict = {'bird' : 'bebop', 'ornette' : 'wayout'}
|
||||
checkpassword = cherrypy.lib.auth_basic.checkpassword_dict(userpassdict)
|
||||
basic_auth = {'tools.auth_basic.on': True,
|
||||
'tools.auth_basic.realm': 'earth',
|
||||
'tools.auth_basic.checkpassword': checkpassword,
|
||||
}
|
||||
app_config = { '/' : basic_auth }
|
||||
|
||||
"""
|
||||
|
||||
__author__ = 'visteya'
|
||||
__date__ = 'April 2009'
|
||||
|
||||
import binascii
|
||||
from cherrypy._cpcompat import base64_decode
|
||||
import cherrypy
|
||||
|
||||
|
||||
def checkpassword_dict(user_password_dict):
|
||||
"""Returns a checkpassword function which checks credentials
|
||||
against a dictionary of the form: {username : password}.
|
||||
|
||||
If you want a simple dictionary-based authentication scheme, use
|
||||
checkpassword_dict(my_credentials_dict) as the value for the
|
||||
checkpassword argument to basic_auth().
|
||||
"""
|
||||
def checkpassword(realm, user, password):
|
||||
p = user_password_dict.get(user)
|
||||
return p and p == password or False
|
||||
|
||||
return checkpassword
|
||||
|
||||
|
||||
def basic_auth(realm, checkpassword, debug=False):
|
||||
"""A CherryPy tool which hooks at before_handler to perform
|
||||
HTTP Basic Access Authentication, as specified in :rfc:`2617`.
|
||||
|
||||
If the request has an 'authorization' header with a 'Basic' scheme, this
|
||||
tool attempts to authenticate the credentials supplied in that header. If
|
||||
the request has no 'authorization' header, or if it does but the scheme is
|
||||
not 'Basic', or if authentication fails, the tool sends a 401 response with
|
||||
a 'WWW-Authenticate' Basic header.
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
|
||||
checkpassword
|
||||
A callable which checks the authentication credentials.
|
||||
Its signature is checkpassword(realm, username, password). where
|
||||
username and password are the values obtained from the request's
|
||||
'authorization' header. If authentication succeeds, checkpassword
|
||||
returns True, else it returns False.
|
||||
|
||||
"""
|
||||
|
||||
if '"' in realm:
|
||||
raise ValueError('Realm cannot contain the " (quote) character.')
|
||||
request = cherrypy.serving.request
|
||||
|
||||
auth_header = request.headers.get('authorization')
|
||||
if auth_header is not None:
|
||||
try:
|
||||
scheme, params = auth_header.split(' ', 1)
|
||||
if scheme.lower() == 'basic':
|
||||
username, password = base64_decode(params).split(':', 1)
|
||||
if checkpassword(realm, username, password):
|
||||
if debug:
|
||||
cherrypy.log('Auth succeeded', 'TOOLS.AUTH_BASIC')
|
||||
request.login = username
|
||||
return # successful authentication
|
||||
# split() error, base64.decodestring() error
|
||||
except (ValueError, binascii.Error):
|
||||
raise cherrypy.HTTPError(400, 'Bad Request')
|
||||
|
||||
# Respond with 401 status and a WWW-Authenticate header
|
||||
cherrypy.serving.response.headers[
|
||||
'www-authenticate'] = 'Basic realm="%s"' % realm
|
||||
raise cherrypy.HTTPError(
|
||||
401, "You are not authorized to access that resource")
|
|
@ -1,390 +0,0 @@
|
|||
# This file is part of CherryPy <http://www.cherrypy.org/>
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:ts=4:sw=4:expandtab:fileencoding=utf-8
|
||||
|
||||
__doc__ = """An implementation of the server-side of HTTP Digest Access
|
||||
Authentication, which is described in :rfc:`2617`.
|
||||
|
||||
Example usage, using the built-in get_ha1_dict_plain function which uses a dict
|
||||
of plaintext passwords as the credentials store::
|
||||
|
||||
userpassdict = {'alice' : '4x5istwelve'}
|
||||
get_ha1 = cherrypy.lib.auth_digest.get_ha1_dict_plain(userpassdict)
|
||||
digest_auth = {'tools.auth_digest.on': True,
|
||||
'tools.auth_digest.realm': 'wonderland',
|
||||
'tools.auth_digest.get_ha1': get_ha1,
|
||||
'tools.auth_digest.key': 'a565c27146791cfb',
|
||||
}
|
||||
app_config = { '/' : digest_auth }
|
||||
"""
|
||||
|
||||
__author__ = 'visteya'
|
||||
__date__ = 'April 2009'
|
||||
|
||||
|
||||
import time
|
||||
from cherrypy._cpcompat import parse_http_list, parse_keqv_list
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import md5, ntob
|
||||
md5_hex = lambda s: md5(ntob(s)).hexdigest()
|
||||
|
||||
qop_auth = 'auth'
|
||||
qop_auth_int = 'auth-int'
|
||||
valid_qops = (qop_auth, qop_auth_int)
|
||||
|
||||
valid_algorithms = ('MD5', 'MD5-sess')
|
||||
|
||||
|
||||
def TRACE(msg):
|
||||
cherrypy.log(msg, context='TOOLS.AUTH_DIGEST')
|
||||
|
||||
# Three helper functions for users of the tool, providing three variants
|
||||
# of get_ha1() functions for three different kinds of credential stores.
|
||||
|
||||
|
||||
def get_ha1_dict_plain(user_password_dict):
|
||||
"""Returns a get_ha1 function which obtains a plaintext password from a
|
||||
dictionary of the form: {username : password}.
|
||||
|
||||
If you want a simple dictionary-based authentication scheme, with plaintext
|
||||
passwords, use get_ha1_dict_plain(my_userpass_dict) as the value for the
|
||||
get_ha1 argument to digest_auth().
|
||||
"""
|
||||
def get_ha1(realm, username):
|
||||
password = user_password_dict.get(username)
|
||||
if password:
|
||||
return md5_hex('%s:%s:%s' % (username, realm, password))
|
||||
return None
|
||||
|
||||
return get_ha1
|
||||
|
||||
|
||||
def get_ha1_dict(user_ha1_dict):
|
||||
"""Returns a get_ha1 function which obtains a HA1 password hash from a
|
||||
dictionary of the form: {username : HA1}.
|
||||
|
||||
If you want a dictionary-based authentication scheme, but with
|
||||
pre-computed HA1 hashes instead of plain-text passwords, use
|
||||
get_ha1_dict(my_userha1_dict) as the value for the get_ha1
|
||||
argument to digest_auth().
|
||||
"""
|
||||
def get_ha1(realm, username):
|
||||
return user_ha1_dict.get(username)
|
||||
|
||||
return get_ha1
|
||||
|
||||
|
||||
def get_ha1_file_htdigest(filename):
|
||||
"""Returns a get_ha1 function which obtains a HA1 password hash from a
|
||||
flat file with lines of the same format as that produced by the Apache
|
||||
htdigest utility. For example, for realm 'wonderland', username 'alice',
|
||||
and password '4x5istwelve', the htdigest line would be::
|
||||
|
||||
alice:wonderland:3238cdfe91a8b2ed8e39646921a02d4c
|
||||
|
||||
If you want to use an Apache htdigest file as the credentials store,
|
||||
then use get_ha1_file_htdigest(my_htdigest_file) as the value for the
|
||||
get_ha1 argument to digest_auth(). It is recommended that the filename
|
||||
argument be an absolute path, to avoid problems.
|
||||
"""
|
||||
def get_ha1(realm, username):
|
||||
result = None
|
||||
f = open(filename, 'r')
|
||||
for line in f:
|
||||
u, r, ha1 = line.rstrip().split(':')
|
||||
if u == username and r == realm:
|
||||
result = ha1
|
||||
break
|
||||
f.close()
|
||||
return result
|
||||
|
||||
return get_ha1
|
||||
|
||||
|
||||
def synthesize_nonce(s, key, timestamp=None):
|
||||
"""Synthesize a nonce value which resists spoofing and can be checked
|
||||
for staleness. Returns a string suitable as the value for 'nonce' in
|
||||
the www-authenticate header.
|
||||
|
||||
s
|
||||
A string related to the resource, such as the hostname of the server.
|
||||
|
||||
key
|
||||
A secret string known only to the server.
|
||||
|
||||
timestamp
|
||||
An integer seconds-since-the-epoch timestamp
|
||||
|
||||
"""
|
||||
if timestamp is None:
|
||||
timestamp = int(time.time())
|
||||
h = md5_hex('%s:%s:%s' % (timestamp, s, key))
|
||||
nonce = '%s:%s' % (timestamp, h)
|
||||
return nonce
|
||||
|
||||
|
||||
def H(s):
|
||||
"""The hash function H"""
|
||||
return md5_hex(s)
|
||||
|
||||
|
||||
class HttpDigestAuthorization (object):
|
||||
|
||||
"""Class to parse a Digest Authorization header and perform re-calculation
|
||||
of the digest.
|
||||
"""
|
||||
|
||||
def errmsg(self, s):
|
||||
return 'Digest Authorization header: %s' % s
|
||||
|
||||
def __init__(self, auth_header, http_method, debug=False):
|
||||
self.http_method = http_method
|
||||
self.debug = debug
|
||||
scheme, params = auth_header.split(" ", 1)
|
||||
self.scheme = scheme.lower()
|
||||
if self.scheme != 'digest':
|
||||
raise ValueError('Authorization scheme is not "Digest"')
|
||||
|
||||
self.auth_header = auth_header
|
||||
|
||||
# make a dict of the params
|
||||
items = parse_http_list(params)
|
||||
paramsd = parse_keqv_list(items)
|
||||
|
||||
self.realm = paramsd.get('realm')
|
||||
self.username = paramsd.get('username')
|
||||
self.nonce = paramsd.get('nonce')
|
||||
self.uri = paramsd.get('uri')
|
||||
self.method = paramsd.get('method')
|
||||
self.response = paramsd.get('response') # the response digest
|
||||
self.algorithm = paramsd.get('algorithm', 'MD5').upper()
|
||||
self.cnonce = paramsd.get('cnonce')
|
||||
self.opaque = paramsd.get('opaque')
|
||||
self.qop = paramsd.get('qop') # qop
|
||||
self.nc = paramsd.get('nc') # nonce count
|
||||
|
||||
# perform some correctness checks
|
||||
if self.algorithm not in valid_algorithms:
|
||||
raise ValueError(
|
||||
self.errmsg("Unsupported value for algorithm: '%s'" %
|
||||
self.algorithm))
|
||||
|
||||
has_reqd = (
|
||||
self.username and
|
||||
self.realm and
|
||||
self.nonce and
|
||||
self.uri and
|
||||
self.response
|
||||
)
|
||||
if not has_reqd:
|
||||
raise ValueError(
|
||||
self.errmsg("Not all required parameters are present."))
|
||||
|
||||
if self.qop:
|
||||
if self.qop not in valid_qops:
|
||||
raise ValueError(
|
||||
self.errmsg("Unsupported value for qop: '%s'" % self.qop))
|
||||
if not (self.cnonce and self.nc):
|
||||
raise ValueError(
|
||||
self.errmsg("If qop is sent then "
|
||||
"cnonce and nc MUST be present"))
|
||||
else:
|
||||
if self.cnonce or self.nc:
|
||||
raise ValueError(
|
||||
self.errmsg("If qop is not sent, "
|
||||
"neither cnonce nor nc can be present"))
|
||||
|
||||
def __str__(self):
|
||||
return 'authorization : %s' % self.auth_header
|
||||
|
||||
def validate_nonce(self, s, key):
|
||||
"""Validate the nonce.
|
||||
Returns True if nonce was generated by synthesize_nonce() and the
|
||||
timestamp is not spoofed, else returns False.
|
||||
|
||||
s
|
||||
A string related to the resource, such as the hostname of
|
||||
the server.
|
||||
|
||||
key
|
||||
A secret string known only to the server.
|
||||
|
||||
Both s and key must be the same values which were used to synthesize
|
||||
the nonce we are trying to validate.
|
||||
"""
|
||||
try:
|
||||
timestamp, hashpart = self.nonce.split(':', 1)
|
||||
s_timestamp, s_hashpart = synthesize_nonce(
|
||||
s, key, timestamp).split(':', 1)
|
||||
is_valid = s_hashpart == hashpart
|
||||
if self.debug:
|
||||
TRACE('validate_nonce: %s' % is_valid)
|
||||
return is_valid
|
||||
except ValueError: # split() error
|
||||
pass
|
||||
return False
|
||||
|
||||
def is_nonce_stale(self, max_age_seconds=600):
|
||||
"""Returns True if a validated nonce is stale. The nonce contains a
|
||||
timestamp in plaintext and also a secure hash of the timestamp.
|
||||
You should first validate the nonce to ensure the plaintext
|
||||
timestamp is not spoofed.
|
||||
"""
|
||||
try:
|
||||
timestamp, hashpart = self.nonce.split(':', 1)
|
||||
if int(timestamp) + max_age_seconds > int(time.time()):
|
||||
return False
|
||||
except ValueError: # int() error
|
||||
pass
|
||||
if self.debug:
|
||||
TRACE("nonce is stale")
|
||||
return True
|
||||
|
||||
def HA2(self, entity_body=''):
|
||||
"""Returns the H(A2) string. See :rfc:`2617` section 3.2.2.3."""
|
||||
# RFC 2617 3.2.2.3
|
||||
# If the "qop" directive's value is "auth" or is unspecified,
|
||||
# then A2 is:
|
||||
# A2 = method ":" digest-uri-value
|
||||
#
|
||||
# If the "qop" value is "auth-int", then A2 is:
|
||||
# A2 = method ":" digest-uri-value ":" H(entity-body)
|
||||
if self.qop is None or self.qop == "auth":
|
||||
a2 = '%s:%s' % (self.http_method, self.uri)
|
||||
elif self.qop == "auth-int":
|
||||
a2 = "%s:%s:%s" % (self.http_method, self.uri, H(entity_body))
|
||||
else:
|
||||
# in theory, this should never happen, since I validate qop in
|
||||
# __init__()
|
||||
raise ValueError(self.errmsg("Unrecognized value for qop!"))
|
||||
return H(a2)
|
||||
|
||||
def request_digest(self, ha1, entity_body=''):
|
||||
"""Calculates the Request-Digest. See :rfc:`2617` section 3.2.2.1.
|
||||
|
||||
ha1
|
||||
The HA1 string obtained from the credentials store.
|
||||
|
||||
entity_body
|
||||
If 'qop' is set to 'auth-int', then A2 includes a hash
|
||||
of the "entity body". The entity body is the part of the
|
||||
message which follows the HTTP headers. See :rfc:`2617` section
|
||||
4.3. This refers to the entity the user agent sent in the
|
||||
request which has the Authorization header. Typically GET
|
||||
requests don't have an entity, and POST requests do.
|
||||
|
||||
"""
|
||||
ha2 = self.HA2(entity_body)
|
||||
# Request-Digest -- RFC 2617 3.2.2.1
|
||||
if self.qop:
|
||||
req = "%s:%s:%s:%s:%s" % (
|
||||
self.nonce, self.nc, self.cnonce, self.qop, ha2)
|
||||
else:
|
||||
req = "%s:%s" % (self.nonce, ha2)
|
||||
|
||||
# RFC 2617 3.2.2.2
|
||||
#
|
||||
# If the "algorithm" directive's value is "MD5" or is unspecified,
|
||||
# then A1 is:
|
||||
# A1 = unq(username-value) ":" unq(realm-value) ":" passwd
|
||||
#
|
||||
# If the "algorithm" directive's value is "MD5-sess", then A1 is
|
||||
# calculated only once - on the first request by the client following
|
||||
# receipt of a WWW-Authenticate challenge from the server.
|
||||
# A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd )
|
||||
# ":" unq(nonce-value) ":" unq(cnonce-value)
|
||||
if self.algorithm == 'MD5-sess':
|
||||
ha1 = H('%s:%s:%s' % (ha1, self.nonce, self.cnonce))
|
||||
|
||||
digest = H('%s:%s' % (ha1, req))
|
||||
return digest
|
||||
|
||||
|
||||
def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth,
|
||||
stale=False):
|
||||
"""Constructs a WWW-Authenticate header for Digest authentication."""
|
||||
if qop not in valid_qops:
|
||||
raise ValueError("Unsupported value for qop: '%s'" % qop)
|
||||
if algorithm not in valid_algorithms:
|
||||
raise ValueError("Unsupported value for algorithm: '%s'" % algorithm)
|
||||
|
||||
if nonce is None:
|
||||
nonce = synthesize_nonce(realm, key)
|
||||
s = 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % (
|
||||
realm, nonce, algorithm, qop)
|
||||
if stale:
|
||||
s += ', stale="true"'
|
||||
return s
|
||||
|
||||
|
||||
def digest_auth(realm, get_ha1, key, debug=False):
|
||||
"""A CherryPy tool which hooks at before_handler to perform
|
||||
HTTP Digest Access Authentication, as specified in :rfc:`2617`.
|
||||
|
||||
If the request has an 'authorization' header with a 'Digest' scheme,
|
||||
this tool authenticates the credentials supplied in that header.
|
||||
If the request has no 'authorization' header, or if it does but the
|
||||
scheme is not "Digest", or if authentication fails, the tool sends
|
||||
a 401 response with a 'WWW-Authenticate' Digest header.
|
||||
|
||||
realm
|
||||
A string containing the authentication realm.
|
||||
|
||||
get_ha1
|
||||
A callable which looks up a username in a credentials store
|
||||
and returns the HA1 string, which is defined in the RFC to be
|
||||
MD5(username : realm : password). The function's signature is:
|
||||
``get_ha1(realm, username)``
|
||||
where username is obtained from the request's 'authorization' header.
|
||||
If username is not found in the credentials store, get_ha1() returns
|
||||
None.
|
||||
|
||||
key
|
||||
A secret string known only to the server, used in the synthesis
|
||||
of nonces.
|
||||
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
|
||||
auth_header = request.headers.get('authorization')
|
||||
nonce_is_stale = False
|
||||
if auth_header is not None:
|
||||
try:
|
||||
auth = HttpDigestAuthorization(
|
||||
auth_header, request.method, debug=debug)
|
||||
except ValueError:
|
||||
raise cherrypy.HTTPError(
|
||||
400, "The Authorization header could not be parsed.")
|
||||
|
||||
if debug:
|
||||
TRACE(str(auth))
|
||||
|
||||
if auth.validate_nonce(realm, key):
|
||||
ha1 = get_ha1(realm, auth.username)
|
||||
if ha1 is not None:
|
||||
# note that for request.body to be available we need to
|
||||
# hook in at before_handler, not on_start_resource like
|
||||
# 3.1.x digest_auth does.
|
||||
digest = auth.request_digest(ha1, entity_body=request.body)
|
||||
if digest == auth.response: # authenticated
|
||||
if debug:
|
||||
TRACE("digest matches auth.response")
|
||||
# Now check if nonce is stale.
|
||||
# The choice of ten minutes' lifetime for nonce is somewhat
|
||||
# arbitrary
|
||||
nonce_is_stale = auth.is_nonce_stale(max_age_seconds=600)
|
||||
if not nonce_is_stale:
|
||||
request.login = auth.username
|
||||
if debug:
|
||||
TRACE("authentication of %s successful" %
|
||||
auth.username)
|
||||
return
|
||||
|
||||
# Respond with 401 status and a WWW-Authenticate header
|
||||
header = www_authenticate(realm, key, stale=nonce_is_stale)
|
||||
if debug:
|
||||
TRACE(header)
|
||||
cherrypy.serving.response.headers['WWW-Authenticate'] = header
|
||||
raise cherrypy.HTTPError(
|
||||
401, "You are not authorized to access that resource")
|
|
@ -1,470 +0,0 @@
|
|||
"""
|
||||
CherryPy implements a simple caching system as a pluggable Tool. This tool
|
||||
tries to be an (in-process) HTTP/1.1-compliant cache. It's not quite there
|
||||
yet, but it's probably good enough for most sites.
|
||||
|
||||
In general, GET responses are cached (along with selecting headers) and, if
|
||||
another request arrives for the same resource, the caching Tool will return 304
|
||||
Not Modified if possible, or serve the cached response otherwise. It also sets
|
||||
request.cached to True if serving a cached representation, and sets
|
||||
request.cacheable to False (so it doesn't get cached again).
|
||||
|
||||
If POST, PUT, or DELETE requests are made for a cached resource, they
|
||||
invalidate (delete) any cached response.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
Configuration file example::
|
||||
|
||||
[/]
|
||||
tools.caching.on = True
|
||||
tools.caching.delay = 3600
|
||||
|
||||
You may use a class other than the default
|
||||
:class:`MemoryCache<cherrypy.lib.caching.MemoryCache>` by supplying the config
|
||||
entry ``cache_class``; supply the full dotted name of the replacement class
|
||||
as the config value. It must implement the basic methods ``get``, ``put``,
|
||||
``delete``, and ``clear``.
|
||||
|
||||
You may set any attribute, including overriding methods, on the cache
|
||||
instance by providing them in config. The above sets the
|
||||
:attr:`delay<cherrypy.lib.caching.MemoryCache.delay>` attribute, for example.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
import cherrypy
|
||||
from cherrypy.lib import cptools, httputil
|
||||
from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted, Event
|
||||
|
||||
|
||||
class Cache(object):
|
||||
|
||||
"""Base class for Cache implementations."""
|
||||
|
||||
def get(self):
|
||||
"""Return the current variant if in the cache, else None."""
|
||||
raise NotImplemented
|
||||
|
||||
def put(self, obj, size):
|
||||
"""Store the current variant in the cache."""
|
||||
raise NotImplemented
|
||||
|
||||
def delete(self):
|
||||
"""Remove ALL cached variants of the current resource."""
|
||||
raise NotImplemented
|
||||
|
||||
def clear(self):
|
||||
"""Reset the cache to its initial, empty state."""
|
||||
raise NotImplemented
|
||||
|
||||
|
||||
# ------------------------------ Memory Cache ------------------------------- #
|
||||
class AntiStampedeCache(dict):
|
||||
|
||||
"""A storage system for cached items which reduces stampede collisions."""
|
||||
|
||||
def wait(self, key, timeout=5, debug=False):
|
||||
"""Return the cached value for the given key, or None.
|
||||
|
||||
If timeout is not None, and the value is already
|
||||
being calculated by another thread, wait until the given timeout has
|
||||
elapsed. If the value is available before the timeout expires, it is
|
||||
returned. If not, None is returned, and a sentinel placed in the cache
|
||||
to signal other threads to wait.
|
||||
|
||||
If timeout is None, no waiting is performed nor sentinels used.
|
||||
"""
|
||||
value = self.get(key)
|
||||
if isinstance(value, Event):
|
||||
if timeout is None:
|
||||
# Ignore the other thread and recalc it ourselves.
|
||||
if debug:
|
||||
cherrypy.log('No timeout', 'TOOLS.CACHING')
|
||||
return None
|
||||
|
||||
# Wait until it's done or times out.
|
||||
if debug:
|
||||
cherrypy.log('Waiting up to %s seconds' %
|
||||
timeout, 'TOOLS.CACHING')
|
||||
value.wait(timeout)
|
||||
if value.result is not None:
|
||||
# The other thread finished its calculation. Use it.
|
||||
if debug:
|
||||
cherrypy.log('Result!', 'TOOLS.CACHING')
|
||||
return value.result
|
||||
# Timed out. Stick an Event in the slot so other threads wait
|
||||
# on this one to finish calculating the value.
|
||||
if debug:
|
||||
cherrypy.log('Timed out', 'TOOLS.CACHING')
|
||||
e = threading.Event()
|
||||
e.result = None
|
||||
dict.__setitem__(self, key, e)
|
||||
|
||||
return None
|
||||
elif value is None:
|
||||
# Stick an Event in the slot so other threads wait
|
||||
# on this one to finish calculating the value.
|
||||
if debug:
|
||||
cherrypy.log('Timed out', 'TOOLS.CACHING')
|
||||
e = threading.Event()
|
||||
e.result = None
|
||||
dict.__setitem__(self, key, e)
|
||||
return value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Set the cached value for the given key."""
|
||||
existing = self.get(key)
|
||||
dict.__setitem__(self, key, value)
|
||||
if isinstance(existing, Event):
|
||||
# Set Event.result so other threads waiting on it have
|
||||
# immediate access without needing to poll the cache again.
|
||||
existing.result = value
|
||||
existing.set()
|
||||
|
||||
|
||||
class MemoryCache(Cache):
|
||||
|
||||
"""An in-memory cache for varying response content.
|
||||
|
||||
Each key in self.store is a URI, and each value is an AntiStampedeCache.
|
||||
The response for any given URI may vary based on the values of
|
||||
"selecting request headers"; that is, those named in the Vary
|
||||
response header. We assume the list of header names to be constant
|
||||
for each URI throughout the lifetime of the application, and store
|
||||
that list in ``self.store[uri].selecting_headers``.
|
||||
|
||||
The items contained in ``self.store[uri]`` have keys which are tuples of
|
||||
request header values (in the same order as the names in its
|
||||
selecting_headers), and values which are the actual responses.
|
||||
"""
|
||||
|
||||
maxobjects = 1000
|
||||
"""The maximum number of cached objects; defaults to 1000."""
|
||||
|
||||
maxobj_size = 100000
|
||||
"""The maximum size of each cached object in bytes; defaults to 100 KB."""
|
||||
|
||||
maxsize = 10000000
|
||||
"""The maximum size of the entire cache in bytes; defaults to 10 MB."""
|
||||
|
||||
delay = 600
|
||||
"""Seconds until the cached content expires; defaults to 600 (10 minutes).
|
||||
"""
|
||||
|
||||
antistampede_timeout = 5
|
||||
"""Seconds to wait for other threads to release a cache lock."""
|
||||
|
||||
expire_freq = 0.1
|
||||
"""Seconds to sleep between cache expiration sweeps."""
|
||||
|
||||
debug = False
|
||||
|
||||
def __init__(self):
|
||||
self.clear()
|
||||
|
||||
# Run self.expire_cache in a separate daemon thread.
|
||||
t = threading.Thread(target=self.expire_cache, name='expire_cache')
|
||||
self.expiration_thread = t
|
||||
set_daemon(t, True)
|
||||
t.start()
|
||||
|
||||
def clear(self):
|
||||
"""Reset the cache to its initial, empty state."""
|
||||
self.store = {}
|
||||
self.expirations = {}
|
||||
self.tot_puts = 0
|
||||
self.tot_gets = 0
|
||||
self.tot_hist = 0
|
||||
self.tot_expires = 0
|
||||
self.tot_non_modified = 0
|
||||
self.cursize = 0
|
||||
|
||||
def expire_cache(self):
|
||||
"""Continuously examine cached objects, expiring stale ones.
|
||||
|
||||
This function is designed to be run in its own daemon thread,
|
||||
referenced at ``self.expiration_thread``.
|
||||
"""
|
||||
# It's possible that "time" will be set to None
|
||||
# arbitrarily, so we check "while time" to avoid exceptions.
|
||||
# See tickets #99 and #180 for more information.
|
||||
while time:
|
||||
now = time.time()
|
||||
# Must make a copy of expirations so it doesn't change size
|
||||
# during iteration
|
||||
for expiration_time, objects in copyitems(self.expirations):
|
||||
if expiration_time <= now:
|
||||
for obj_size, uri, sel_header_values in objects:
|
||||
try:
|
||||
del self.store[uri][tuple(sel_header_values)]
|
||||
self.tot_expires += 1
|
||||
self.cursize -= obj_size
|
||||
except KeyError:
|
||||
# the key may have been deleted elsewhere
|
||||
pass
|
||||
del self.expirations[expiration_time]
|
||||
time.sleep(self.expire_freq)
|
||||
|
||||
def get(self):
|
||||
"""Return the current variant if in the cache, else None."""
|
||||
request = cherrypy.serving.request
|
||||
self.tot_gets += 1
|
||||
|
||||
uri = cherrypy.url(qs=request.query_string)
|
||||
uricache = self.store.get(uri)
|
||||
if uricache is None:
|
||||
return None
|
||||
|
||||
header_values = [request.headers.get(h, '')
|
||||
for h in uricache.selecting_headers]
|
||||
variant = uricache.wait(key=tuple(sorted(header_values)),
|
||||
timeout=self.antistampede_timeout,
|
||||
debug=self.debug)
|
||||
if variant is not None:
|
||||
self.tot_hist += 1
|
||||
return variant
|
||||
|
||||
def put(self, variant, size):
|
||||
"""Store the current variant in the cache."""
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
uri = cherrypy.url(qs=request.query_string)
|
||||
uricache = self.store.get(uri)
|
||||
if uricache is None:
|
||||
uricache = AntiStampedeCache()
|
||||
uricache.selecting_headers = [
|
||||
e.value for e in response.headers.elements('Vary')]
|
||||
self.store[uri] = uricache
|
||||
|
||||
if len(self.store) < self.maxobjects:
|
||||
total_size = self.cursize + size
|
||||
|
||||
# checks if there's space for the object
|
||||
if (size < self.maxobj_size and total_size < self.maxsize):
|
||||
# add to the expirations list
|
||||
expiration_time = response.time + self.delay
|
||||
bucket = self.expirations.setdefault(expiration_time, [])
|
||||
bucket.append((size, uri, uricache.selecting_headers))
|
||||
|
||||
# add to the cache
|
||||
header_values = [request.headers.get(h, '')
|
||||
for h in uricache.selecting_headers]
|
||||
uricache[tuple(sorted(header_values))] = variant
|
||||
self.tot_puts += 1
|
||||
self.cursize = total_size
|
||||
|
||||
def delete(self):
|
||||
"""Remove ALL cached variants of the current resource."""
|
||||
uri = cherrypy.url(qs=cherrypy.serving.request.query_string)
|
||||
self.store.pop(uri, None)
|
||||
|
||||
|
||||
def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs):
|
||||
"""Try to obtain cached output. If fresh enough, raise HTTPError(304).
|
||||
|
||||
If POST, PUT, or DELETE:
|
||||
* invalidates (deletes) any cached response for this resource
|
||||
* sets request.cached = False
|
||||
* sets request.cacheable = False
|
||||
|
||||
else if a cached copy exists:
|
||||
* sets request.cached = True
|
||||
* sets request.cacheable = False
|
||||
* sets response.headers to the cached values
|
||||
* checks the cached Last-Modified response header against the
|
||||
current If-(Un)Modified-Since request headers; raises 304
|
||||
if necessary.
|
||||
* sets response.status and response.body to the cached values
|
||||
* returns True
|
||||
|
||||
otherwise:
|
||||
* sets request.cached = False
|
||||
* sets request.cacheable = True
|
||||
* returns False
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
if not hasattr(cherrypy, "_cache"):
|
||||
# Make a process-wide Cache object.
|
||||
cherrypy._cache = kwargs.pop("cache_class", MemoryCache)()
|
||||
|
||||
# Take all remaining kwargs and set them on the Cache object.
|
||||
for k, v in kwargs.items():
|
||||
setattr(cherrypy._cache, k, v)
|
||||
cherrypy._cache.debug = debug
|
||||
|
||||
# POST, PUT, DELETE should invalidate (delete) the cached copy.
|
||||
# See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10.
|
||||
if request.method in invalid_methods:
|
||||
if debug:
|
||||
cherrypy.log('request.method %r in invalid_methods %r' %
|
||||
(request.method, invalid_methods), 'TOOLS.CACHING')
|
||||
cherrypy._cache.delete()
|
||||
request.cached = False
|
||||
request.cacheable = False
|
||||
return False
|
||||
|
||||
if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]:
|
||||
request.cached = False
|
||||
request.cacheable = True
|
||||
return False
|
||||
|
||||
cache_data = cherrypy._cache.get()
|
||||
request.cached = bool(cache_data)
|
||||
request.cacheable = not request.cached
|
||||
if request.cached:
|
||||
# Serve the cached copy.
|
||||
max_age = cherrypy._cache.delay
|
||||
for v in [e.value for e in request.headers.elements('Cache-Control')]:
|
||||
atoms = v.split('=', 1)
|
||||
directive = atoms.pop(0)
|
||||
if directive == 'max-age':
|
||||
if len(atoms) != 1 or not atoms[0].isdigit():
|
||||
raise cherrypy.HTTPError(
|
||||
400, "Invalid Cache-Control header")
|
||||
max_age = int(atoms[0])
|
||||
break
|
||||
elif directive == 'no-cache':
|
||||
if debug:
|
||||
cherrypy.log(
|
||||
'Ignoring cache due to Cache-Control: no-cache',
|
||||
'TOOLS.CACHING')
|
||||
request.cached = False
|
||||
request.cacheable = True
|
||||
return False
|
||||
|
||||
if debug:
|
||||
cherrypy.log('Reading response from cache', 'TOOLS.CACHING')
|
||||
s, h, b, create_time = cache_data
|
||||
age = int(response.time - create_time)
|
||||
if (age > max_age):
|
||||
if debug:
|
||||
cherrypy.log('Ignoring cache due to age > %d' % max_age,
|
||||
'TOOLS.CACHING')
|
||||
request.cached = False
|
||||
request.cacheable = True
|
||||
return False
|
||||
|
||||
# Copy the response headers. See
|
||||
# https://bitbucket.org/cherrypy/cherrypy/issue/721.
|
||||
response.headers = rh = httputil.HeaderMap()
|
||||
for k in h:
|
||||
dict.__setitem__(rh, k, dict.__getitem__(h, k))
|
||||
|
||||
# Add the required Age header
|
||||
response.headers["Age"] = str(age)
|
||||
|
||||
try:
|
||||
# Note that validate_since depends on a Last-Modified header;
|
||||
# this was put into the cached copy, and should have been
|
||||
# resurrected just above (response.headers = cache_data[1]).
|
||||
cptools.validate_since()
|
||||
except cherrypy.HTTPRedirect:
|
||||
x = sys.exc_info()[1]
|
||||
if x.status == 304:
|
||||
cherrypy._cache.tot_non_modified += 1
|
||||
raise
|
||||
|
||||
# serve it & get out from the request
|
||||
response.status = s
|
||||
response.body = b
|
||||
else:
|
||||
if debug:
|
||||
cherrypy.log('request is not cached', 'TOOLS.CACHING')
|
||||
return request.cached
|
||||
|
||||
|
||||
def tee_output():
|
||||
"""Tee response output to cache storage. Internal."""
|
||||
# Used by CachingTool by attaching to request.hooks
|
||||
|
||||
request = cherrypy.serving.request
|
||||
if 'no-store' in request.headers.values('Cache-Control'):
|
||||
return
|
||||
|
||||
def tee(body):
|
||||
"""Tee response.body into a list."""
|
||||
if ('no-cache' in response.headers.values('Pragma') or
|
||||
'no-store' in response.headers.values('Cache-Control')):
|
||||
for chunk in body:
|
||||
yield chunk
|
||||
return
|
||||
|
||||
output = []
|
||||
for chunk in body:
|
||||
output.append(chunk)
|
||||
yield chunk
|
||||
|
||||
# save the cache data
|
||||
body = ntob('').join(output)
|
||||
cherrypy._cache.put((response.status, response.headers or {},
|
||||
body, response.time), len(body))
|
||||
|
||||
response = cherrypy.serving.response
|
||||
response.body = tee(response.body)
|
||||
|
||||
|
||||
def expires(secs=0, force=False, debug=False):
|
||||
"""Tool for influencing cache mechanisms using the 'Expires' header.
|
||||
|
||||
secs
|
||||
Must be either an int or a datetime.timedelta, and indicates the
|
||||
number of seconds between response.time and when the response should
|
||||
expire. The 'Expires' header will be set to response.time + secs.
|
||||
If secs is zero, the 'Expires' header is set one year in the past, and
|
||||
the following "cache prevention" headers are also set:
|
||||
|
||||
* Pragma: no-cache
|
||||
* Cache-Control': no-cache, must-revalidate
|
||||
|
||||
force
|
||||
If False, the following headers are checked:
|
||||
|
||||
* Etag
|
||||
* Last-Modified
|
||||
* Age
|
||||
* Expires
|
||||
|
||||
If any are already present, none of the above response headers are set.
|
||||
|
||||
"""
|
||||
|
||||
response = cherrypy.serving.response
|
||||
headers = response.headers
|
||||
|
||||
cacheable = False
|
||||
if not force:
|
||||
# some header names that indicate that the response can be cached
|
||||
for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'):
|
||||
if indicator in headers:
|
||||
cacheable = True
|
||||
break
|
||||
|
||||
if not cacheable and not force:
|
||||
if debug:
|
||||
cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES')
|
||||
else:
|
||||
if debug:
|
||||
cherrypy.log('request is cacheable', 'TOOLS.EXPIRES')
|
||||
if isinstance(secs, datetime.timedelta):
|
||||
secs = (86400 * secs.days) + secs.seconds
|
||||
|
||||
if secs == 0:
|
||||
if force or ("Pragma" not in headers):
|
||||
headers["Pragma"] = "no-cache"
|
||||
if cherrypy.serving.request.protocol >= (1, 1):
|
||||
if force or "Cache-Control" not in headers:
|
||||
headers["Cache-Control"] = "no-cache, must-revalidate"
|
||||
# Set an explicit Expires date in the past.
|
||||
expiry = httputil.HTTPDate(1169942400.0)
|
||||
else:
|
||||
expiry = httputil.HTTPDate(response.time + secs)
|
||||
if force or "Expires" not in headers:
|
||||
headers["Expires"] = expiry
|
|
@ -1,387 +0,0 @@
|
|||
"""Code-coverage tools for CherryPy.
|
||||
|
||||
To use this module, or the coverage tools in the test suite,
|
||||
you need to download 'coverage.py', either Gareth Rees' `original
|
||||
implementation <http://www.garethrees.org/2001/12/04/python-coverage/>`_
|
||||
or Ned Batchelder's `enhanced version:
|
||||
<http://www.nedbatchelder.com/code/modules/coverage.html>`_
|
||||
|
||||
To turn on coverage tracing, use the following code::
|
||||
|
||||
cherrypy.engine.subscribe('start', covercp.start)
|
||||
|
||||
DO NOT subscribe anything on the 'start_thread' channel, as previously
|
||||
recommended. Calling start once in the main thread should be sufficient
|
||||
to start coverage on all threads. Calling start again in each thread
|
||||
effectively clears any coverage data gathered up to that point.
|
||||
|
||||
Run your code, then use the ``covercp.serve()`` function to browse the
|
||||
results in a web browser. If you run this module from the command line,
|
||||
it will call ``serve()`` for you.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import cgi
|
||||
from cherrypy._cpcompat import quote_plus
|
||||
import os
|
||||
import os.path
|
||||
localFile = os.path.join(os.path.dirname(__file__), "coverage.cache")
|
||||
|
||||
the_coverage = None
|
||||
try:
|
||||
from coverage import coverage
|
||||
the_coverage = coverage(data_file=localFile)
|
||||
|
||||
def start():
|
||||
the_coverage.start()
|
||||
except ImportError:
|
||||
# Setting the_coverage to None will raise errors
|
||||
# that need to be trapped downstream.
|
||||
the_coverage = None
|
||||
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"No code coverage will be performed; "
|
||||
"coverage.py could not be imported.")
|
||||
|
||||
def start():
|
||||
pass
|
||||
start.priority = 20
|
||||
|
||||
TEMPLATE_MENU = """<html>
|
||||
<head>
|
||||
<title>CherryPy Coverage Menu</title>
|
||||
<style>
|
||||
body {font: 9pt Arial, serif;}
|
||||
#tree {
|
||||
font-size: 8pt;
|
||||
font-family: Andale Mono, monospace;
|
||||
white-space: pre;
|
||||
}
|
||||
#tree a:active, a:focus {
|
||||
background-color: black;
|
||||
padding: 1px;
|
||||
color: white;
|
||||
border: 0px solid #9999FF;
|
||||
-moz-outline-style: none;
|
||||
}
|
||||
.fail { color: red;}
|
||||
.pass { color: #888;}
|
||||
#pct { text-align: right;}
|
||||
h3 {
|
||||
font-size: small;
|
||||
font-weight: bold;
|
||||
font-style: italic;
|
||||
margin-top: 5px;
|
||||
}
|
||||
input { border: 1px solid #ccc; padding: 2px; }
|
||||
.directory {
|
||||
color: #933;
|
||||
font-style: italic;
|
||||
font-weight: bold;
|
||||
font-size: 10pt;
|
||||
}
|
||||
.file {
|
||||
color: #400;
|
||||
}
|
||||
a { text-decoration: none; }
|
||||
#crumbs {
|
||||
color: white;
|
||||
font-size: 8pt;
|
||||
font-family: Andale Mono, monospace;
|
||||
width: 100%;
|
||||
background-color: black;
|
||||
}
|
||||
#crumbs a {
|
||||
color: #f88;
|
||||
}
|
||||
#options {
|
||||
line-height: 2.3em;
|
||||
border: 1px solid black;
|
||||
background-color: #eee;
|
||||
padding: 4px;
|
||||
}
|
||||
#exclude {
|
||||
width: 100%;
|
||||
margin-bottom: 3px;
|
||||
border: 1px solid #999;
|
||||
}
|
||||
#submit {
|
||||
background-color: black;
|
||||
color: white;
|
||||
border: 0;
|
||||
margin-bottom: -9px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h2>CherryPy Coverage</h2>"""
|
||||
|
||||
TEMPLATE_FORM = """
|
||||
<div id="options">
|
||||
<form action='menu' method=GET>
|
||||
<input type='hidden' name='base' value='%(base)s' />
|
||||
Show percentages
|
||||
<input type='checkbox' %(showpct)s name='showpct' value='checked' /><br />
|
||||
Hide files over
|
||||
<input type='text' id='pct' name='pct' value='%(pct)s' size='3' />%%<br />
|
||||
Exclude files matching<br />
|
||||
<input type='text' id='exclude' name='exclude'
|
||||
value='%(exclude)s' size='20' />
|
||||
<br />
|
||||
|
||||
<input type='submit' value='Change view' id="submit"/>
|
||||
</form>
|
||||
</div>"""
|
||||
|
||||
TEMPLATE_FRAMESET = """<html>
|
||||
<head><title>CherryPy coverage data</title></head>
|
||||
<frameset cols='250, 1*'>
|
||||
<frame src='menu?base=%s' />
|
||||
<frame name='main' src='' />
|
||||
</frameset>
|
||||
</html>
|
||||
"""
|
||||
|
||||
TEMPLATE_COVERAGE = """<html>
|
||||
<head>
|
||||
<title>Coverage for %(name)s</title>
|
||||
<style>
|
||||
h2 { margin-bottom: .25em; }
|
||||
p { margin: .25em; }
|
||||
.covered { color: #000; background-color: #fff; }
|
||||
.notcovered { color: #fee; background-color: #500; }
|
||||
.excluded { color: #00f; background-color: #fff; }
|
||||
table .covered, table .notcovered, table .excluded
|
||||
{ font-family: Andale Mono, monospace;
|
||||
font-size: 10pt; white-space: pre; }
|
||||
|
||||
.lineno { background-color: #eee;}
|
||||
.notcovered .lineno { background-color: #000;}
|
||||
table { border-collapse: collapse;
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h2>%(name)s</h2>
|
||||
<p>%(fullpath)s</p>
|
||||
<p>Coverage: %(pc)s%%</p>"""
|
||||
|
||||
TEMPLATE_LOC_COVERED = """<tr class="covered">
|
||||
<td class="lineno">%s </td>
|
||||
<td>%s</td>
|
||||
</tr>\n"""
|
||||
TEMPLATE_LOC_NOT_COVERED = """<tr class="notcovered">
|
||||
<td class="lineno">%s </td>
|
||||
<td>%s</td>
|
||||
</tr>\n"""
|
||||
TEMPLATE_LOC_EXCLUDED = """<tr class="excluded">
|
||||
<td class="lineno">%s </td>
|
||||
<td>%s</td>
|
||||
</tr>\n"""
|
||||
|
||||
TEMPLATE_ITEM = (
|
||||
"%s%s<a class='file' href='report?name=%s' target='main'>%s</a>\n"
|
||||
)
|
||||
|
||||
|
||||
def _percent(statements, missing):
|
||||
s = len(statements)
|
||||
e = s - len(missing)
|
||||
if s > 0:
|
||||
return int(round(100.0 * e / s))
|
||||
return 0
|
||||
|
||||
|
||||
def _show_branch(root, base, path, pct=0, showpct=False, exclude="",
|
||||
coverage=the_coverage):
|
||||
|
||||
# Show the directory name and any of our children
|
||||
dirs = [k for k, v in root.items() if v]
|
||||
dirs.sort()
|
||||
for name in dirs:
|
||||
newpath = os.path.join(path, name)
|
||||
|
||||
if newpath.lower().startswith(base):
|
||||
relpath = newpath[len(base):]
|
||||
yield "| " * relpath.count(os.sep)
|
||||
yield (
|
||||
"<a class='directory' "
|
||||
"href='menu?base=%s&exclude=%s'>%s</a>\n" %
|
||||
(newpath, quote_plus(exclude), name)
|
||||
)
|
||||
|
||||
for chunk in _show_branch(
|
||||
root[name], base, newpath, pct, showpct,
|
||||
exclude, coverage=coverage
|
||||
):
|
||||
yield chunk
|
||||
|
||||
# Now list the files
|
||||
if path.lower().startswith(base):
|
||||
relpath = path[len(base):]
|
||||
files = [k for k, v in root.items() if not v]
|
||||
files.sort()
|
||||
for name in files:
|
||||
newpath = os.path.join(path, name)
|
||||
|
||||
pc_str = ""
|
||||
if showpct:
|
||||
try:
|
||||
_, statements, _, missing, _ = coverage.analysis2(newpath)
|
||||
except:
|
||||
# Yes, we really want to pass on all errors.
|
||||
pass
|
||||
else:
|
||||
pc = _percent(statements, missing)
|
||||
pc_str = ("%3d%% " % pc).replace(' ', ' ')
|
||||
if pc < float(pct) or pc == -1:
|
||||
pc_str = "<span class='fail'>%s</span>" % pc_str
|
||||
else:
|
||||
pc_str = "<span class='pass'>%s</span>" % pc_str
|
||||
|
||||
yield TEMPLATE_ITEM % ("| " * (relpath.count(os.sep) + 1),
|
||||
pc_str, newpath, name)
|
||||
|
||||
|
||||
def _skip_file(path, exclude):
|
||||
if exclude:
|
||||
return bool(re.search(exclude, path))
|
||||
|
||||
|
||||
def _graft(path, tree):
|
||||
d = tree
|
||||
|
||||
p = path
|
||||
atoms = []
|
||||
while True:
|
||||
p, tail = os.path.split(p)
|
||||
if not tail:
|
||||
break
|
||||
atoms.append(tail)
|
||||
atoms.append(p)
|
||||
if p != "/":
|
||||
atoms.append("/")
|
||||
|
||||
atoms.reverse()
|
||||
for node in atoms:
|
||||
if node:
|
||||
d = d.setdefault(node, {})
|
||||
|
||||
|
||||
def get_tree(base, exclude, coverage=the_coverage):
|
||||
"""Return covered module names as a nested dict."""
|
||||
tree = {}
|
||||
runs = coverage.data.executed_files()
|
||||
for path in runs:
|
||||
if not _skip_file(path, exclude) and not os.path.isdir(path):
|
||||
_graft(path, tree)
|
||||
return tree
|
||||
|
||||
|
||||
class CoverStats(object):
|
||||
|
||||
def __init__(self, coverage, root=None):
|
||||
self.coverage = coverage
|
||||
if root is None:
|
||||
# Guess initial depth. Files outside this path will not be
|
||||
# reachable from the web interface.
|
||||
import cherrypy
|
||||
root = os.path.dirname(cherrypy.__file__)
|
||||
self.root = root
|
||||
|
||||
def index(self):
|
||||
return TEMPLATE_FRAMESET % self.root.lower()
|
||||
index.exposed = True
|
||||
|
||||
def menu(self, base="/", pct="50", showpct="",
|
||||
exclude=r'python\d\.\d|test|tut\d|tutorial'):
|
||||
|
||||
# The coverage module uses all-lower-case names.
|
||||
base = base.lower().rstrip(os.sep)
|
||||
|
||||
yield TEMPLATE_MENU
|
||||
yield TEMPLATE_FORM % locals()
|
||||
|
||||
# Start by showing links for parent paths
|
||||
yield "<div id='crumbs'>"
|
||||
path = ""
|
||||
atoms = base.split(os.sep)
|
||||
atoms.pop()
|
||||
for atom in atoms:
|
||||
path += atom + os.sep
|
||||
yield ("<a href='menu?base=%s&exclude=%s'>%s</a> %s"
|
||||
% (path, quote_plus(exclude), atom, os.sep))
|
||||
yield "</div>"
|
||||
|
||||
yield "<div id='tree'>"
|
||||
|
||||
# Then display the tree
|
||||
tree = get_tree(base, exclude, self.coverage)
|
||||
if not tree:
|
||||
yield "<p>No modules covered.</p>"
|
||||
else:
|
||||
for chunk in _show_branch(tree, base, "/", pct,
|
||||
showpct == 'checked', exclude,
|
||||
coverage=self.coverage):
|
||||
yield chunk
|
||||
|
||||
yield "</div>"
|
||||
yield "</body></html>"
|
||||
menu.exposed = True
|
||||
|
||||
def annotated_file(self, filename, statements, excluded, missing):
|
||||
source = open(filename, 'r')
|
||||
buffer = []
|
||||
for lineno, line in enumerate(source.readlines()):
|
||||
lineno += 1
|
||||
line = line.strip("\n\r")
|
||||
empty_the_buffer = True
|
||||
if lineno in excluded:
|
||||
template = TEMPLATE_LOC_EXCLUDED
|
||||
elif lineno in missing:
|
||||
template = TEMPLATE_LOC_NOT_COVERED
|
||||
elif lineno in statements:
|
||||
template = TEMPLATE_LOC_COVERED
|
||||
else:
|
||||
empty_the_buffer = False
|
||||
buffer.append((lineno, line))
|
||||
if empty_the_buffer:
|
||||
for lno, pastline in buffer:
|
||||
yield template % (lno, cgi.escape(pastline))
|
||||
buffer = []
|
||||
yield template % (lineno, cgi.escape(line))
|
||||
|
||||
def report(self, name):
|
||||
filename, statements, excluded, missing, _ = self.coverage.analysis2(
|
||||
name)
|
||||
pc = _percent(statements, missing)
|
||||
yield TEMPLATE_COVERAGE % dict(name=os.path.basename(name),
|
||||
fullpath=name,
|
||||
pc=pc)
|
||||
yield '<table>\n'
|
||||
for line in self.annotated_file(filename, statements, excluded,
|
||||
missing):
|
||||
yield line
|
||||
yield '</table>'
|
||||
yield '</body>'
|
||||
yield '</html>'
|
||||
report.exposed = True
|
||||
|
||||
|
||||
def serve(path=localFile, port=8080, root=None):
|
||||
if coverage is None:
|
||||
raise ImportError("The coverage module could not be imported.")
|
||||
from coverage import coverage
|
||||
cov = coverage(data_file=path)
|
||||
cov.load()
|
||||
|
||||
import cherrypy
|
||||
cherrypy.config.update({'server.socket_port': int(port),
|
||||
'server.thread_pool': 10,
|
||||
'environment': "production",
|
||||
})
|
||||
cherrypy.quickstart(CoverStats(cov, root))
|
||||
|
||||
if __name__ == "__main__":
|
||||
serve(*tuple(sys.argv[1:]))
|
|
@ -1,687 +0,0 @@
|
|||
"""CPStats, a package for collecting and reporting on program statistics.
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
Statistics about program operation are an invaluable monitoring and debugging
|
||||
tool. Unfortunately, the gathering and reporting of these critical values is
|
||||
usually ad-hoc. This package aims to add a centralized place for gathering
|
||||
statistical performance data, a structure for recording that data which
|
||||
provides for extrapolation of that data into more useful information,
|
||||
and a method of serving that data to both human investigators and
|
||||
monitoring software. Let's examine each of those in more detail.
|
||||
|
||||
Data Gathering
|
||||
--------------
|
||||
|
||||
Just as Python's `logging` module provides a common importable for gathering
|
||||
and sending messages, performance statistics would benefit from a similar
|
||||
common mechanism, and one that does *not* require each package which wishes
|
||||
to collect stats to import a third-party module. Therefore, we choose to
|
||||
re-use the `logging` module by adding a `statistics` object to it.
|
||||
|
||||
That `logging.statistics` object is a nested dict. It is not a custom class,
|
||||
because that would:
|
||||
|
||||
1. require libraries and applications to import a third-party module in
|
||||
order to participate
|
||||
2. inhibit innovation in extrapolation approaches and in reporting tools, and
|
||||
3. be slow.
|
||||
|
||||
There are, however, some specifications regarding the structure of the dict.::
|
||||
|
||||
{
|
||||
+----"SQLAlchemy": {
|
||||
| "Inserts": 4389745,
|
||||
| "Inserts per Second":
|
||||
| lambda s: s["Inserts"] / (time() - s["Start"]),
|
||||
| C +---"Table Statistics": {
|
||||
| o | "widgets": {-----------+
|
||||
N | l | "Rows": 1.3M, | Record
|
||||
a | l | "Inserts": 400, |
|
||||
m | e | },---------------------+
|
||||
e | c | "froobles": {
|
||||
s | t | "Rows": 7845,
|
||||
p | i | "Inserts": 0,
|
||||
a | o | },
|
||||
c | n +---},
|
||||
e | "Slow Queries":
|
||||
| [{"Query": "SELECT * FROM widgets;",
|
||||
| "Processing Time": 47.840923343,
|
||||
| },
|
||||
| ],
|
||||
+----},
|
||||
}
|
||||
|
||||
The `logging.statistics` dict has four levels. The topmost level is nothing
|
||||
more than a set of names to introduce modularity, usually along the lines of
|
||||
package names. If the SQLAlchemy project wanted to participate, for example,
|
||||
it might populate the item `logging.statistics['SQLAlchemy']`, whose value
|
||||
would be a second-layer dict we call a "namespace". Namespaces help multiple
|
||||
packages to avoid collisions over key names, and make reports easier to read,
|
||||
to boot. The maintainers of SQLAlchemy should feel free to use more than one
|
||||
namespace if needed (such as 'SQLAlchemy ORM'). Note that there are no case
|
||||
or other syntax constraints on the namespace names; they should be chosen
|
||||
to be maximally readable by humans (neither too short nor too long).
|
||||
|
||||
Each namespace, then, is a dict of named statistical values, such as
|
||||
'Requests/sec' or 'Uptime'. You should choose names which will look
|
||||
good on a report: spaces and capitalization are just fine.
|
||||
|
||||
In addition to scalars, values in a namespace MAY be a (third-layer)
|
||||
dict, or a list, called a "collection". For example, the CherryPy
|
||||
:class:`StatsTool` keeps track of what each request is doing (or has most
|
||||
recently done) in a 'Requests' collection, where each key is a thread ID; each
|
||||
value in the subdict MUST be a fourth dict (whew!) of statistical data about
|
||||
each thread. We call each subdict in the collection a "record". Similarly,
|
||||
the :class:`StatsTool` also keeps a list of slow queries, where each record
|
||||
contains data about each slow query, in order.
|
||||
|
||||
Values in a namespace or record may also be functions, which brings us to:
|
||||
|
||||
Extrapolation
|
||||
-------------
|
||||
|
||||
The collection of statistical data needs to be fast, as close to unnoticeable
|
||||
as possible to the host program. That requires us to minimize I/O, for example,
|
||||
but in Python it also means we need to minimize function calls. So when you
|
||||
are designing your namespace and record values, try to insert the most basic
|
||||
scalar values you already have on hand.
|
||||
|
||||
When it comes time to report on the gathered data, however, we usually have
|
||||
much more freedom in what we can calculate. Therefore, whenever reporting
|
||||
tools (like the provided :class:`StatsPage` CherryPy class) fetch the contents
|
||||
of `logging.statistics` for reporting, they first call
|
||||
`extrapolate_statistics` (passing the whole `statistics` dict as the only
|
||||
argument). This makes a deep copy of the statistics dict so that the
|
||||
reporting tool can both iterate over it and even change it without harming
|
||||
the original. But it also expands any functions in the dict by calling them.
|
||||
For example, you might have a 'Current Time' entry in the namespace with the
|
||||
value "lambda scope: time.time()". The "scope" parameter is the current
|
||||
namespace dict (or record, if we're currently expanding one of those
|
||||
instead), allowing you access to existing static entries. If you're truly
|
||||
evil, you can even modify more than one entry at a time.
|
||||
|
||||
However, don't try to calculate an entry and then use its value in further
|
||||
extrapolations; the order in which the functions are called is not guaranteed.
|
||||
This can lead to a certain amount of duplicated work (or a redesign of your
|
||||
schema), but that's better than complicating the spec.
|
||||
|
||||
After the whole thing has been extrapolated, it's time for:
|
||||
|
||||
Reporting
|
||||
---------
|
||||
|
||||
The :class:`StatsPage` class grabs the `logging.statistics` dict, extrapolates
|
||||
it all, and then transforms it to HTML for easy viewing. Each namespace gets
|
||||
its own header and attribute table, plus an extra table for each collection.
|
||||
This is NOT part of the statistics specification; other tools can format how
|
||||
they like.
|
||||
|
||||
You can control which columns are output and how they are formatted by updating
|
||||
StatsPage.formatting, which is a dict that mirrors the keys and nesting of
|
||||
`logging.statistics`. The difference is that, instead of data values, it has
|
||||
formatting values. Use None for a given key to indicate to the StatsPage that a
|
||||
given column should not be output. Use a string with formatting
|
||||
(such as '%.3f') to interpolate the value(s), or use a callable (such as
|
||||
lambda v: v.isoformat()) for more advanced formatting. Any entry which is not
|
||||
mentioned in the formatting dict is output unchanged.
|
||||
|
||||
Monitoring
|
||||
----------
|
||||
|
||||
Although the HTML output takes pains to assign unique id's to each <td> with
|
||||
statistical data, you're probably better off fetching /cpstats/data, which
|
||||
outputs the whole (extrapolated) `logging.statistics` dict in JSON format.
|
||||
That is probably easier to parse, and doesn't have any formatting controls,
|
||||
so you get the "original" data in a consistently-serialized format.
|
||||
Note: there's no treatment yet for datetime objects. Try time.time() instead
|
||||
for now if you can. Nagios will probably thank you.
|
||||
|
||||
Turning Collection Off
|
||||
----------------------
|
||||
|
||||
It is recommended each namespace have an "Enabled" item which, if False,
|
||||
stops collection (but not reporting) of statistical data. Applications
|
||||
SHOULD provide controls to pause and resume collection by setting these
|
||||
entries to False or True, if present.
|
||||
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
To collect statistics on CherryPy applications::
|
||||
|
||||
from cherrypy.lib import cpstats
|
||||
appconfig['/']['tools.cpstats.on'] = True
|
||||
|
||||
To collect statistics on your own code::
|
||||
|
||||
import logging
|
||||
# Initialize the repository
|
||||
if not hasattr(logging, 'statistics'): logging.statistics = {}
|
||||
# Initialize my namespace
|
||||
mystats = logging.statistics.setdefault('My Stuff', {})
|
||||
# Initialize my namespace's scalars and collections
|
||||
mystats.update({
|
||||
'Enabled': True,
|
||||
'Start Time': time.time(),
|
||||
'Important Events': 0,
|
||||
'Events/Second': lambda s: (
|
||||
(s['Important Events'] / (time.time() - s['Start Time']))),
|
||||
})
|
||||
...
|
||||
for event in events:
|
||||
...
|
||||
# Collect stats
|
||||
if mystats.get('Enabled', False):
|
||||
mystats['Important Events'] += 1
|
||||
|
||||
To report statistics::
|
||||
|
||||
root.cpstats = cpstats.StatsPage()
|
||||
|
||||
To format statistics reports::
|
||||
|
||||
See 'Reporting', above.
|
||||
|
||||
"""
|
||||
|
||||
# ------------------------------- Statistics -------------------------------- #
|
||||
|
||||
import logging
|
||||
if not hasattr(logging, 'statistics'):
|
||||
logging.statistics = {}
|
||||
|
||||
|
||||
def extrapolate_statistics(scope):
|
||||
"""Return an extrapolated copy of the given scope."""
|
||||
c = {}
|
||||
for k, v in list(scope.items()):
|
||||
if isinstance(v, dict):
|
||||
v = extrapolate_statistics(v)
|
||||
elif isinstance(v, (list, tuple)):
|
||||
v = [extrapolate_statistics(record) for record in v]
|
||||
elif hasattr(v, '__call__'):
|
||||
v = v(scope)
|
||||
c[k] = v
|
||||
return c
|
||||
|
||||
|
||||
# -------------------- CherryPy Applications Statistics --------------------- #
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
import cherrypy
|
||||
|
||||
appstats = logging.statistics.setdefault('CherryPy Applications', {})
|
||||
appstats.update({
|
||||
'Enabled': True,
|
||||
'Bytes Read/Request': lambda s: (
|
||||
s['Total Requests'] and
|
||||
(s['Total Bytes Read'] / float(s['Total Requests'])) or
|
||||
0.0
|
||||
),
|
||||
'Bytes Read/Second': lambda s: s['Total Bytes Read'] / s['Uptime'](s),
|
||||
'Bytes Written/Request': lambda s: (
|
||||
s['Total Requests'] and
|
||||
(s['Total Bytes Written'] / float(s['Total Requests'])) or
|
||||
0.0
|
||||
),
|
||||
'Bytes Written/Second': lambda s: (
|
||||
s['Total Bytes Written'] / s['Uptime'](s)
|
||||
),
|
||||
'Current Time': lambda s: time.time(),
|
||||
'Current Requests': 0,
|
||||
'Requests/Second': lambda s: float(s['Total Requests']) / s['Uptime'](s),
|
||||
'Server Version': cherrypy.__version__,
|
||||
'Start Time': time.time(),
|
||||
'Total Bytes Read': 0,
|
||||
'Total Bytes Written': 0,
|
||||
'Total Requests': 0,
|
||||
'Total Time': 0,
|
||||
'Uptime': lambda s: time.time() - s['Start Time'],
|
||||
'Requests': {},
|
||||
})
|
||||
|
||||
proc_time = lambda s: time.time() - s['Start Time']
|
||||
|
||||
|
||||
class ByteCountWrapper(object):
|
||||
|
||||
"""Wraps a file-like object, counting the number of bytes read."""
|
||||
|
||||
def __init__(self, rfile):
|
||||
self.rfile = rfile
|
||||
self.bytes_read = 0
|
||||
|
||||
def read(self, size=-1):
|
||||
data = self.rfile.read(size)
|
||||
self.bytes_read += len(data)
|
||||
return data
|
||||
|
||||
def readline(self, size=-1):
|
||||
data = self.rfile.readline(size)
|
||||
self.bytes_read += len(data)
|
||||
return data
|
||||
|
||||
def readlines(self, sizehint=0):
|
||||
# Shamelessly stolen from StringIO
|
||||
total = 0
|
||||
lines = []
|
||||
line = self.readline()
|
||||
while line:
|
||||
lines.append(line)
|
||||
total += len(line)
|
||||
if 0 < sizehint <= total:
|
||||
break
|
||||
line = self.readline()
|
||||
return lines
|
||||
|
||||
def close(self):
|
||||
self.rfile.close()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
data = self.rfile.next()
|
||||
self.bytes_read += len(data)
|
||||
return data
|
||||
|
||||
|
||||
average_uriset_time = lambda s: s['Count'] and (s['Sum'] / s['Count']) or 0
|
||||
|
||||
|
||||
class StatsTool(cherrypy.Tool):
|
||||
|
||||
"""Record various information about the current request."""
|
||||
|
||||
def __init__(self):
|
||||
cherrypy.Tool.__init__(self, 'on_end_request', self.record_stop)
|
||||
|
||||
def _setup(self):
|
||||
"""Hook this tool into cherrypy.request.
|
||||
|
||||
The standard CherryPy request object will automatically call this
|
||||
method when the tool is "turned on" in config.
|
||||
"""
|
||||
if appstats.get('Enabled', False):
|
||||
cherrypy.Tool._setup(self)
|
||||
self.record_start()
|
||||
|
||||
def record_start(self):
|
||||
"""Record the beginning of a request."""
|
||||
request = cherrypy.serving.request
|
||||
if not hasattr(request.rfile, 'bytes_read'):
|
||||
request.rfile = ByteCountWrapper(request.rfile)
|
||||
request.body.fp = request.rfile
|
||||
|
||||
r = request.remote
|
||||
|
||||
appstats['Current Requests'] += 1
|
||||
appstats['Total Requests'] += 1
|
||||
appstats['Requests'][threading._get_ident()] = {
|
||||
'Bytes Read': None,
|
||||
'Bytes Written': None,
|
||||
# Use a lambda so the ip gets updated by tools.proxy later
|
||||
'Client': lambda s: '%s:%s' % (r.ip, r.port),
|
||||
'End Time': None,
|
||||
'Processing Time': proc_time,
|
||||
'Request-Line': request.request_line,
|
||||
'Response Status': None,
|
||||
'Start Time': time.time(),
|
||||
}
|
||||
|
||||
def record_stop(
|
||||
self, uriset=None, slow_queries=1.0, slow_queries_count=100,
|
||||
debug=False, **kwargs):
|
||||
"""Record the end of a request."""
|
||||
resp = cherrypy.serving.response
|
||||
w = appstats['Requests'][threading._get_ident()]
|
||||
|
||||
r = cherrypy.request.rfile.bytes_read
|
||||
w['Bytes Read'] = r
|
||||
appstats['Total Bytes Read'] += r
|
||||
|
||||
if resp.stream:
|
||||
w['Bytes Written'] = 'chunked'
|
||||
else:
|
||||
cl = int(resp.headers.get('Content-Length', 0))
|
||||
w['Bytes Written'] = cl
|
||||
appstats['Total Bytes Written'] += cl
|
||||
|
||||
w['Response Status'] = getattr(
|
||||
resp, 'output_status', None) or resp.status
|
||||
|
||||
w['End Time'] = time.time()
|
||||
p = w['End Time'] - w['Start Time']
|
||||
w['Processing Time'] = p
|
||||
appstats['Total Time'] += p
|
||||
|
||||
appstats['Current Requests'] -= 1
|
||||
|
||||
if debug:
|
||||
cherrypy.log('Stats recorded: %s' % repr(w), 'TOOLS.CPSTATS')
|
||||
|
||||
if uriset:
|
||||
rs = appstats.setdefault('URI Set Tracking', {})
|
||||
r = rs.setdefault(uriset, {
|
||||
'Min': None, 'Max': None, 'Count': 0, 'Sum': 0,
|
||||
'Avg': average_uriset_time})
|
||||
if r['Min'] is None or p < r['Min']:
|
||||
r['Min'] = p
|
||||
if r['Max'] is None or p > r['Max']:
|
||||
r['Max'] = p
|
||||
r['Count'] += 1
|
||||
r['Sum'] += p
|
||||
|
||||
if slow_queries and p > slow_queries:
|
||||
sq = appstats.setdefault('Slow Queries', [])
|
||||
sq.append(w.copy())
|
||||
if len(sq) > slow_queries_count:
|
||||
sq.pop(0)
|
||||
|
||||
|
||||
import cherrypy
|
||||
cherrypy.tools.cpstats = StatsTool()
|
||||
|
||||
|
||||
# ---------------------- CherryPy Statistics Reporting ---------------------- #
|
||||
|
||||
import os
|
||||
thisdir = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
try:
|
||||
import json
|
||||
except ImportError:
|
||||
try:
|
||||
import simplejson as json
|
||||
except ImportError:
|
||||
json = None
|
||||
|
||||
|
||||
missing = object()
|
||||
|
||||
locale_date = lambda v: time.strftime('%c', time.gmtime(v))
|
||||
iso_format = lambda v: time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(v))
|
||||
|
||||
|
||||
def pause_resume(ns):
|
||||
def _pause_resume(enabled):
|
||||
pause_disabled = ''
|
||||
resume_disabled = ''
|
||||
if enabled:
|
||||
resume_disabled = 'disabled="disabled" '
|
||||
else:
|
||||
pause_disabled = 'disabled="disabled" '
|
||||
return """
|
||||
<form action="pause" method="POST" style="display:inline">
|
||||
<input type="hidden" name="namespace" value="%s" />
|
||||
<input type="submit" value="Pause" %s/>
|
||||
</form>
|
||||
<form action="resume" method="POST" style="display:inline">
|
||||
<input type="hidden" name="namespace" value="%s" />
|
||||
<input type="submit" value="Resume" %s/>
|
||||
</form>
|
||||
""" % (ns, pause_disabled, ns, resume_disabled)
|
||||
return _pause_resume
|
||||
|
||||
|
||||
class StatsPage(object):
|
||||
|
||||
formatting = {
|
||||
'CherryPy Applications': {
|
||||
'Enabled': pause_resume('CherryPy Applications'),
|
||||
'Bytes Read/Request': '%.3f',
|
||||
'Bytes Read/Second': '%.3f',
|
||||
'Bytes Written/Request': '%.3f',
|
||||
'Bytes Written/Second': '%.3f',
|
||||
'Current Time': iso_format,
|
||||
'Requests/Second': '%.3f',
|
||||
'Start Time': iso_format,
|
||||
'Total Time': '%.3f',
|
||||
'Uptime': '%.3f',
|
||||
'Slow Queries': {
|
||||
'End Time': None,
|
||||
'Processing Time': '%.3f',
|
||||
'Start Time': iso_format,
|
||||
},
|
||||
'URI Set Tracking': {
|
||||
'Avg': '%.3f',
|
||||
'Max': '%.3f',
|
||||
'Min': '%.3f',
|
||||
'Sum': '%.3f',
|
||||
},
|
||||
'Requests': {
|
||||
'Bytes Read': '%s',
|
||||
'Bytes Written': '%s',
|
||||
'End Time': None,
|
||||
'Processing Time': '%.3f',
|
||||
'Start Time': None,
|
||||
},
|
||||
},
|
||||
'CherryPy WSGIServer': {
|
||||
'Enabled': pause_resume('CherryPy WSGIServer'),
|
||||
'Connections/second': '%.3f',
|
||||
'Start time': iso_format,
|
||||
},
|
||||
}
|
||||
|
||||
def index(self):
|
||||
# Transform the raw data into pretty output for HTML
|
||||
yield """
|
||||
<html>
|
||||
<head>
|
||||
<title>Statistics</title>
|
||||
<style>
|
||||
|
||||
th, td {
|
||||
padding: 0.25em 0.5em;
|
||||
border: 1px solid #666699;
|
||||
}
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
table.stats1 {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
table.stats1 th {
|
||||
font-weight: bold;
|
||||
text-align: right;
|
||||
background-color: #CCD5DD;
|
||||
}
|
||||
|
||||
table.stats2, h2 {
|
||||
margin-left: 50px;
|
||||
}
|
||||
|
||||
table.stats2 th {
|
||||
font-weight: bold;
|
||||
text-align: center;
|
||||
background-color: #CCD5DD;
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
"""
|
||||
for title, scalars, collections in self.get_namespaces():
|
||||
yield """
|
||||
<h1>%s</h1>
|
||||
|
||||
<table class='stats1'>
|
||||
<tbody>
|
||||
""" % title
|
||||
for i, (key, value) in enumerate(scalars):
|
||||
colnum = i % 3
|
||||
if colnum == 0:
|
||||
yield """
|
||||
<tr>"""
|
||||
yield (
|
||||
"""
|
||||
<th>%(key)s</th><td id='%(title)s-%(key)s'>%(value)s</td>""" %
|
||||
vars()
|
||||
)
|
||||
if colnum == 2:
|
||||
yield """
|
||||
</tr>"""
|
||||
|
||||
if colnum == 0:
|
||||
yield """
|
||||
<th></th><td></td>
|
||||
<th></th><td></td>
|
||||
</tr>"""
|
||||
elif colnum == 1:
|
||||
yield """
|
||||
<th></th><td></td>
|
||||
</tr>"""
|
||||
yield """
|
||||
</tbody>
|
||||
</table>"""
|
||||
|
||||
for subtitle, headers, subrows in collections:
|
||||
yield """
|
||||
<h2>%s</h2>
|
||||
<table class='stats2'>
|
||||
<thead>
|
||||
<tr>""" % subtitle
|
||||
for key in headers:
|
||||
yield """
|
||||
<th>%s</th>""" % key
|
||||
yield """
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>"""
|
||||
for subrow in subrows:
|
||||
yield """
|
||||
<tr>"""
|
||||
for value in subrow:
|
||||
yield """
|
||||
<td>%s</td>""" % value
|
||||
yield """
|
||||
</tr>"""
|
||||
yield """
|
||||
</tbody>
|
||||
</table>"""
|
||||
yield """
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
index.exposed = True
|
||||
|
||||
def get_namespaces(self):
|
||||
"""Yield (title, scalars, collections) for each namespace."""
|
||||
s = extrapolate_statistics(logging.statistics)
|
||||
for title, ns in sorted(s.items()):
|
||||
scalars = []
|
||||
collections = []
|
||||
ns_fmt = self.formatting.get(title, {})
|
||||
for k, v in sorted(ns.items()):
|
||||
fmt = ns_fmt.get(k, {})
|
||||
if isinstance(v, dict):
|
||||
headers, subrows = self.get_dict_collection(v, fmt)
|
||||
collections.append((k, ['ID'] + headers, subrows))
|
||||
elif isinstance(v, (list, tuple)):
|
||||
headers, subrows = self.get_list_collection(v, fmt)
|
||||
collections.append((k, headers, subrows))
|
||||
else:
|
||||
format = ns_fmt.get(k, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if hasattr(format, '__call__'):
|
||||
v = format(v)
|
||||
elif format is not missing:
|
||||
v = format % v
|
||||
scalars.append((k, v))
|
||||
yield title, scalars, collections
|
||||
|
||||
def get_dict_collection(self, v, formatting):
|
||||
"""Return ([headers], [rows]) for the given collection."""
|
||||
# E.g., the 'Requests' dict.
|
||||
headers = []
|
||||
for record in v.itervalues():
|
||||
for k3 in record:
|
||||
format = formatting.get(k3, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if k3 not in headers:
|
||||
headers.append(k3)
|
||||
headers.sort()
|
||||
|
||||
subrows = []
|
||||
for k2, record in sorted(v.items()):
|
||||
subrow = [k2]
|
||||
for k3 in headers:
|
||||
v3 = record.get(k3, '')
|
||||
format = formatting.get(k3, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if hasattr(format, '__call__'):
|
||||
v3 = format(v3)
|
||||
elif format is not missing:
|
||||
v3 = format % v3
|
||||
subrow.append(v3)
|
||||
subrows.append(subrow)
|
||||
|
||||
return headers, subrows
|
||||
|
||||
def get_list_collection(self, v, formatting):
|
||||
"""Return ([headers], [subrows]) for the given collection."""
|
||||
# E.g., the 'Slow Queries' list.
|
||||
headers = []
|
||||
for record in v:
|
||||
for k3 in record:
|
||||
format = formatting.get(k3, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if k3 not in headers:
|
||||
headers.append(k3)
|
||||
headers.sort()
|
||||
|
||||
subrows = []
|
||||
for record in v:
|
||||
subrow = []
|
||||
for k3 in headers:
|
||||
v3 = record.get(k3, '')
|
||||
format = formatting.get(k3, missing)
|
||||
if format is None:
|
||||
# Don't output this column.
|
||||
continue
|
||||
if hasattr(format, '__call__'):
|
||||
v3 = format(v3)
|
||||
elif format is not missing:
|
||||
v3 = format % v3
|
||||
subrow.append(v3)
|
||||
subrows.append(subrow)
|
||||
|
||||
return headers, subrows
|
||||
|
||||
if json is not None:
|
||||
def data(self):
|
||||
s = extrapolate_statistics(logging.statistics)
|
||||
cherrypy.response.headers['Content-Type'] = 'application/json'
|
||||
return json.dumps(s, sort_keys=True, indent=4)
|
||||
data.exposed = True
|
||||
|
||||
def pause(self, namespace):
|
||||
logging.statistics.get(namespace, {})['Enabled'] = False
|
||||
raise cherrypy.HTTPRedirect('./')
|
||||
pause.exposed = True
|
||||
pause.cp_config = {'tools.allow.on': True,
|
||||
'tools.allow.methods': ['POST']}
|
||||
|
||||
def resume(self, namespace):
|
||||
logging.statistics.get(namespace, {})['Enabled'] = True
|
||||
raise cherrypy.HTTPRedirect('./')
|
||||
resume.exposed = True
|
||||
resume.cp_config = {'tools.allow.on': True,
|
||||
'tools.allow.methods': ['POST']}
|
|
@ -1,630 +0,0 @@
|
|||
"""Functions for builtin CherryPy tools."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
import cherrypy
|
||||
from cherrypy._cpcompat import basestring, md5, set, unicodestr
|
||||
from cherrypy.lib import httputil as _httputil
|
||||
from cherrypy.lib import is_iterator
|
||||
|
||||
|
||||
# Conditional HTTP request support #
|
||||
|
||||
def validate_etags(autotags=False, debug=False):
|
||||
"""Validate the current ETag against If-Match, If-None-Match headers.
|
||||
|
||||
If autotags is True, an ETag response-header value will be provided
|
||||
from an MD5 hash of the response body (unless some other code has
|
||||
already provided an ETag header). If False (the default), the ETag
|
||||
will not be automatic.
|
||||
|
||||
WARNING: the autotags feature is not designed for URL's which allow
|
||||
methods other than GET. For example, if a POST to the same URL returns
|
||||
no content, the automatic ETag will be incorrect, breaking a fundamental
|
||||
use for entity tags in a possibly destructive fashion. Likewise, if you
|
||||
raise 304 Not Modified, the response body will be empty, the ETag hash
|
||||
will be incorrect, and your application will break.
|
||||
See :rfc:`2616` Section 14.24.
|
||||
"""
|
||||
response = cherrypy.serving.response
|
||||
|
||||
# Guard against being run twice.
|
||||
if hasattr(response, "ETag"):
|
||||
return
|
||||
|
||||
status, reason, msg = _httputil.valid_status(response.status)
|
||||
|
||||
etag = response.headers.get('ETag')
|
||||
|
||||
# Automatic ETag generation. See warning in docstring.
|
||||
if etag:
|
||||
if debug:
|
||||
cherrypy.log('ETag already set: %s' % etag, 'TOOLS.ETAGS')
|
||||
elif not autotags:
|
||||
if debug:
|
||||
cherrypy.log('Autotags off', 'TOOLS.ETAGS')
|
||||
elif status != 200:
|
||||
if debug:
|
||||
cherrypy.log('Status not 200', 'TOOLS.ETAGS')
|
||||
else:
|
||||
etag = response.collapse_body()
|
||||
etag = '"%s"' % md5(etag).hexdigest()
|
||||
if debug:
|
||||
cherrypy.log('Setting ETag: %s' % etag, 'TOOLS.ETAGS')
|
||||
response.headers['ETag'] = etag
|
||||
|
||||
response.ETag = etag
|
||||
|
||||
# "If the request would, without the If-Match header field, result in
|
||||
# anything other than a 2xx or 412 status, then the If-Match header
|
||||
# MUST be ignored."
|
||||
if debug:
|
||||
cherrypy.log('Status: %s' % status, 'TOOLS.ETAGS')
|
||||
if status >= 200 and status <= 299:
|
||||
request = cherrypy.serving.request
|
||||
|
||||
conditions = request.headers.elements('If-Match') or []
|
||||
conditions = [str(x) for x in conditions]
|
||||
if debug:
|
||||
cherrypy.log('If-Match conditions: %s' % repr(conditions),
|
||||
'TOOLS.ETAGS')
|
||||
if conditions and not (conditions == ["*"] or etag in conditions):
|
||||
raise cherrypy.HTTPError(412, "If-Match failed: ETag %r did "
|
||||
"not match %r" % (etag, conditions))
|
||||
|
||||
conditions = request.headers.elements('If-None-Match') or []
|
||||
conditions = [str(x) for x in conditions]
|
||||
if debug:
|
||||
cherrypy.log('If-None-Match conditions: %s' % repr(conditions),
|
||||
'TOOLS.ETAGS')
|
||||
if conditions == ["*"] or etag in conditions:
|
||||
if debug:
|
||||
cherrypy.log('request.method: %s' %
|
||||
request.method, 'TOOLS.ETAGS')
|
||||
if request.method in ("GET", "HEAD"):
|
||||
raise cherrypy.HTTPRedirect([], 304)
|
||||
else:
|
||||
raise cherrypy.HTTPError(412, "If-None-Match failed: ETag %r "
|
||||
"matched %r" % (etag, conditions))
|
||||
|
||||
|
||||
def validate_since():
|
||||
"""Validate the current Last-Modified against If-Modified-Since headers.
|
||||
|
||||
If no code has set the Last-Modified response header, then no validation
|
||||
will be performed.
|
||||
"""
|
||||
response = cherrypy.serving.response
|
||||
lastmod = response.headers.get('Last-Modified')
|
||||
if lastmod:
|
||||
status, reason, msg = _httputil.valid_status(response.status)
|
||||
|
||||
request = cherrypy.serving.request
|
||||
|
||||
since = request.headers.get('If-Unmodified-Since')
|
||||
if since and since != lastmod:
|
||||
if (status >= 200 and status <= 299) or status == 412:
|
||||
raise cherrypy.HTTPError(412)
|
||||
|
||||
since = request.headers.get('If-Modified-Since')
|
||||
if since and since == lastmod:
|
||||
if (status >= 200 and status <= 299) or status == 304:
|
||||
if request.method in ("GET", "HEAD"):
|
||||
raise cherrypy.HTTPRedirect([], 304)
|
||||
else:
|
||||
raise cherrypy.HTTPError(412)
|
||||
|
||||
|
||||
# Tool code #
|
||||
|
||||
def allow(methods=None, debug=False):
|
||||
"""Raise 405 if request.method not in methods (default ['GET', 'HEAD']).
|
||||
|
||||
The given methods are case-insensitive, and may be in any order.
|
||||
If only one method is allowed, you may supply a single string;
|
||||
if more than one, supply a list of strings.
|
||||
|
||||
Regardless of whether the current method is allowed or not, this
|
||||
also emits an 'Allow' response header, containing the given methods.
|
||||
"""
|
||||
if not isinstance(methods, (tuple, list)):
|
||||
methods = [methods]
|
||||
methods = [m.upper() for m in methods if m]
|
||||
if not methods:
|
||||
methods = ['GET', 'HEAD']
|
||||
elif 'GET' in methods and 'HEAD' not in methods:
|
||||
methods.append('HEAD')
|
||||
|
||||
cherrypy.response.headers['Allow'] = ', '.join(methods)
|
||||
if cherrypy.request.method not in methods:
|
||||
if debug:
|
||||
cherrypy.log('request.method %r not in methods %r' %
|
||||
(cherrypy.request.method, methods), 'TOOLS.ALLOW')
|
||||
raise cherrypy.HTTPError(405)
|
||||
else:
|
||||
if debug:
|
||||
cherrypy.log('request.method %r in methods %r' %
|
||||
(cherrypy.request.method, methods), 'TOOLS.ALLOW')
|
||||
|
||||
|
||||
def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For',
|
||||
scheme='X-Forwarded-Proto', debug=False):
|
||||
"""Change the base URL (scheme://host[:port][/path]).
|
||||
|
||||
For running a CP server behind Apache, lighttpd, or other HTTP server.
|
||||
|
||||
For Apache and lighttpd, you should leave the 'local' argument at the
|
||||
default value of 'X-Forwarded-Host'. For Squid, you probably want to set
|
||||
tools.proxy.local = 'Origin'.
|
||||
|
||||
If you want the new request.base to include path info (not just the host),
|
||||
you must explicitly set base to the full base path, and ALSO set 'local'
|
||||
to '', so that the X-Forwarded-Host request header (which never includes
|
||||
path info) does not override it. Regardless, the value for 'base' MUST
|
||||
NOT end in a slash.
|
||||
|
||||
cherrypy.request.remote.ip (the IP address of the client) will be
|
||||
rewritten if the header specified by the 'remote' arg is valid.
|
||||
By default, 'remote' is set to 'X-Forwarded-For'. If you do not
|
||||
want to rewrite remote.ip, set the 'remote' arg to an empty string.
|
||||
"""
|
||||
|
||||
request = cherrypy.serving.request
|
||||
|
||||
if scheme:
|
||||
s = request.headers.get(scheme, None)
|
||||
if debug:
|
||||
cherrypy.log('Testing scheme %r:%r' % (scheme, s), 'TOOLS.PROXY')
|
||||
if s == 'on' and 'ssl' in scheme.lower():
|
||||
# This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header
|
||||
scheme = 'https'
|
||||
else:
|
||||
# This is for lighttpd/pound/Mongrel's 'X-Forwarded-Proto: https'
|
||||
scheme = s
|
||||
if not scheme:
|
||||
scheme = request.base[:request.base.find("://")]
|
||||
|
||||
if local:
|
||||
lbase = request.headers.get(local, None)
|
||||
if debug:
|
||||
cherrypy.log('Testing local %r:%r' % (local, lbase), 'TOOLS.PROXY')
|
||||
if lbase is not None:
|
||||
base = lbase.split(',')[0]
|
||||
if not base:
|
||||
port = request.local.port
|
||||
if port == 80:
|
||||
base = '127.0.0.1'
|
||||
else:
|
||||
base = '127.0.0.1:%s' % port
|
||||
|
||||
if base.find("://") == -1:
|
||||
# add http:// or https:// if needed
|
||||
base = scheme + "://" + base
|
||||
|
||||
request.base = base
|
||||
|
||||
if remote:
|
||||
xff = request.headers.get(remote)
|
||||
if debug:
|
||||
cherrypy.log('Testing remote %r:%r' % (remote, xff), 'TOOLS.PROXY')
|
||||
if xff:
|
||||
if remote == 'X-Forwarded-For':
|
||||
#Bug #1268
|
||||
xff = xff.split(',')[0].strip()
|
||||
request.remote.ip = xff
|
||||
|
||||
|
||||
def ignore_headers(headers=('Range',), debug=False):
|
||||
"""Delete request headers whose field names are included in 'headers'.
|
||||
|
||||
This is a useful tool for working behind certain HTTP servers;
|
||||
for example, Apache duplicates the work that CP does for 'Range'
|
||||
headers, and will doubly-truncate the response.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
for name in headers:
|
||||
if name in request.headers:
|
||||
if debug:
|
||||
cherrypy.log('Ignoring request header %r' % name,
|
||||
'TOOLS.IGNORE_HEADERS')
|
||||
del request.headers[name]
|
||||
|
||||
|
||||
def response_headers(headers=None, debug=False):
|
||||
"""Set headers on the response."""
|
||||
if debug:
|
||||
cherrypy.log('Setting response headers: %s' % repr(headers),
|
||||
'TOOLS.RESPONSE_HEADERS')
|
||||
for name, value in (headers or []):
|
||||
cherrypy.serving.response.headers[name] = value
|
||||
response_headers.failsafe = True
|
||||
|
||||
|
||||
def referer(pattern, accept=True, accept_missing=False, error=403,
|
||||
message='Forbidden Referer header.', debug=False):
|
||||
"""Raise HTTPError if Referer header does/does not match the given pattern.
|
||||
|
||||
pattern
|
||||
A regular expression pattern to test against the Referer.
|
||||
|
||||
accept
|
||||
If True, the Referer must match the pattern; if False,
|
||||
the Referer must NOT match the pattern.
|
||||
|
||||
accept_missing
|
||||
If True, permit requests with no Referer header.
|
||||
|
||||
error
|
||||
The HTTP error code to return to the client on failure.
|
||||
|
||||
message
|
||||
A string to include in the response body on failure.
|
||||
|
||||
"""
|
||||
try:
|
||||
ref = cherrypy.serving.request.headers['Referer']
|
||||
match = bool(re.match(pattern, ref))
|
||||
if debug:
|
||||
cherrypy.log('Referer %r matches %r' % (ref, pattern),
|
||||
'TOOLS.REFERER')
|
||||
if accept == match:
|
||||
return
|
||||
except KeyError:
|
||||
if debug:
|
||||
cherrypy.log('No Referer header', 'TOOLS.REFERER')
|
||||
if accept_missing:
|
||||
return
|
||||
|
||||
raise cherrypy.HTTPError(error, message)
|
||||
|
||||
|
||||
class SessionAuth(object):
|
||||
|
||||
"""Assert that the user is logged in."""
|
||||
|
||||
session_key = "username"
|
||||
debug = False
|
||||
|
||||
def check_username_and_password(self, username, password):
|
||||
pass
|
||||
|
||||
def anonymous(self):
|
||||
"""Provide a temporary user name for anonymous users."""
|
||||
pass
|
||||
|
||||
def on_login(self, username):
|
||||
pass
|
||||
|
||||
def on_logout(self, username):
|
||||
pass
|
||||
|
||||
def on_check(self, username):
|
||||
pass
|
||||
|
||||
def login_screen(self, from_page='..', username='', error_msg='',
|
||||
**kwargs):
|
||||
return (unicodestr("""<html><body>
|
||||
Message: %(error_msg)s
|
||||
<form method="post" action="do_login">
|
||||
Login: <input type="text" name="username" value="%(username)s" size="10" />
|
||||
<br />
|
||||
Password: <input type="password" name="password" size="10" />
|
||||
<br />
|
||||
<input type="hidden" name="from_page" value="%(from_page)s" />
|
||||
<br />
|
||||
<input type="submit" />
|
||||
</form>
|
||||
</body></html>""") % vars()).encode("utf-8")
|
||||
|
||||
def do_login(self, username, password, from_page='..', **kwargs):
|
||||
"""Login. May raise redirect, or return True if request handled."""
|
||||
response = cherrypy.serving.response
|
||||
error_msg = self.check_username_and_password(username, password)
|
||||
if error_msg:
|
||||
body = self.login_screen(from_page, username, error_msg)
|
||||
response.body = body
|
||||
if "Content-Length" in response.headers:
|
||||
# Delete Content-Length header so finalize() recalcs it.
|
||||
del response.headers["Content-Length"]
|
||||
return True
|
||||
else:
|
||||
cherrypy.serving.request.login = username
|
||||
cherrypy.session[self.session_key] = username
|
||||
self.on_login(username)
|
||||
raise cherrypy.HTTPRedirect(from_page or "/")
|
||||
|
||||
def do_logout(self, from_page='..', **kwargs):
|
||||
"""Logout. May raise redirect, or return True if request handled."""
|
||||
sess = cherrypy.session
|
||||
username = sess.get(self.session_key)
|
||||
sess[self.session_key] = None
|
||||
if username:
|
||||
cherrypy.serving.request.login = None
|
||||
self.on_logout(username)
|
||||
raise cherrypy.HTTPRedirect(from_page)
|
||||
|
||||
def do_check(self):
|
||||
"""Assert username. Raise redirect, or return True if request handled.
|
||||
"""
|
||||
sess = cherrypy.session
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
username = sess.get(self.session_key)
|
||||
if not username:
|
||||
sess[self.session_key] = username = self.anonymous()
|
||||
self._debug_message('No session[username], trying anonymous')
|
||||
if not username:
|
||||
url = cherrypy.url(qs=request.query_string)
|
||||
self._debug_message(
|
||||
'No username, routing to login_screen with from_page %(url)r',
|
||||
locals(),
|
||||
)
|
||||
response.body = self.login_screen(url)
|
||||
if "Content-Length" in response.headers:
|
||||
# Delete Content-Length header so finalize() recalcs it.
|
||||
del response.headers["Content-Length"]
|
||||
return True
|
||||
self._debug_message('Setting request.login to %(username)r', locals())
|
||||
request.login = username
|
||||
self.on_check(username)
|
||||
|
||||
def _debug_message(self, template, context={}):
|
||||
if not self.debug:
|
||||
return
|
||||
cherrypy.log(template % context, 'TOOLS.SESSAUTH')
|
||||
|
||||
def run(self):
|
||||
request = cherrypy.serving.request
|
||||
response = cherrypy.serving.response
|
||||
|
||||
path = request.path_info
|
||||
if path.endswith('login_screen'):
|
||||
self._debug_message('routing %(path)r to login_screen', locals())
|
||||
response.body = self.login_screen()
|
||||
return True
|
||||
elif path.endswith('do_login'):
|
||||
if request.method != 'POST':
|
||||
response.headers['Allow'] = "POST"
|
||||
self._debug_message('do_login requires POST')
|
||||
raise cherrypy.HTTPError(405)
|
||||
self._debug_message('routing %(path)r to do_login', locals())
|
||||
return self.do_login(**request.params)
|
||||
elif path.endswith('do_logout'):
|
||||
if request.method != 'POST':
|
||||
response.headers['Allow'] = "POST"
|
||||
raise cherrypy.HTTPError(405)
|
||||
self._debug_message('routing %(path)r to do_logout', locals())
|
||||
return self.do_logout(**request.params)
|
||||
else:
|
||||
self._debug_message('No special path, running do_check')
|
||||
return self.do_check()
|
||||
|
||||
|
||||
def session_auth(**kwargs):
|
||||
sa = SessionAuth()
|
||||
for k, v in kwargs.items():
|
||||
setattr(sa, k, v)
|
||||
return sa.run()
|
||||
session_auth.__doc__ = """Session authentication hook.
|
||||
|
||||
Any attribute of the SessionAuth class may be overridden via a keyword arg
|
||||
to this function:
|
||||
|
||||
""" + "\n".join(["%s: %s" % (k, type(getattr(SessionAuth, k)).__name__)
|
||||
for k in dir(SessionAuth) if not k.startswith("__")])
|
||||
|
||||
|
||||
def log_traceback(severity=logging.ERROR, debug=False):
|
||||
"""Write the last error's traceback to the cherrypy error log."""
|
||||
cherrypy.log("", "HTTP", severity=severity, traceback=True)
|
||||
|
||||
|
||||
def log_request_headers(debug=False):
|
||||
"""Write request headers to the cherrypy error log."""
|
||||
h = [" %s: %s" % (k, v) for k, v in cherrypy.serving.request.header_list]
|
||||
cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), "HTTP")
|
||||
|
||||
|
||||
def log_hooks(debug=False):
|
||||
"""Write request.hooks to the cherrypy error log."""
|
||||
request = cherrypy.serving.request
|
||||
|
||||
msg = []
|
||||
# Sort by the standard points if possible.
|
||||
from cherrypy import _cprequest
|
||||
points = _cprequest.hookpoints
|
||||
for k in request.hooks.keys():
|
||||
if k not in points:
|
||||
points.append(k)
|
||||
|
||||
for k in points:
|
||||
msg.append(" %s:" % k)
|
||||
v = request.hooks.get(k, [])
|
||||
v.sort()
|
||||
for h in v:
|
||||
msg.append(" %r" % h)
|
||||
cherrypy.log('\nRequest Hooks for ' + cherrypy.url() +
|
||||
':\n' + '\n'.join(msg), "HTTP")
|
||||
|
||||
|
||||
def redirect(url='', internal=True, debug=False):
|
||||
"""Raise InternalRedirect or HTTPRedirect to the given url."""
|
||||
if debug:
|
||||
cherrypy.log('Redirecting %sto: %s' %
|
||||
({True: 'internal ', False: ''}[internal], url),
|
||||
'TOOLS.REDIRECT')
|
||||
if internal:
|
||||
raise cherrypy.InternalRedirect(url)
|
||||
else:
|
||||
raise cherrypy.HTTPRedirect(url)
|
||||
|
||||
|
||||
def trailing_slash(missing=True, extra=False, status=None, debug=False):
|
||||
"""Redirect if path_info has (missing|extra) trailing slash."""
|
||||
request = cherrypy.serving.request
|
||||
pi = request.path_info
|
||||
|
||||
if debug:
|
||||
cherrypy.log('is_index: %r, missing: %r, extra: %r, path_info: %r' %
|
||||
(request.is_index, missing, extra, pi),
|
||||
'TOOLS.TRAILING_SLASH')
|
||||
if request.is_index is True:
|
||||
if missing:
|
||||
if not pi.endswith('/'):
|
||||
new_url = cherrypy.url(pi + '/', request.query_string)
|
||||
raise cherrypy.HTTPRedirect(new_url, status=status or 301)
|
||||
elif request.is_index is False:
|
||||
if extra:
|
||||
# If pi == '/', don't redirect to ''!
|
||||
if pi.endswith('/') and pi != '/':
|
||||
new_url = cherrypy.url(pi[:-1], request.query_string)
|
||||
raise cherrypy.HTTPRedirect(new_url, status=status or 301)
|
||||
|
||||
|
||||
def flatten(debug=False):
|
||||
"""Wrap response.body in a generator that recursively iterates over body.
|
||||
|
||||
This allows cherrypy.response.body to consist of 'nested generators';
|
||||
that is, a set of generators that yield generators.
|
||||
"""
|
||||
def flattener(input):
|
||||
numchunks = 0
|
||||
for x in input:
|
||||
if not is_iterator(x):
|
||||
numchunks += 1
|
||||
yield x
|
||||
else:
|
||||
for y in flattener(x):
|
||||
numchunks += 1
|
||||
yield y
|
||||
if debug:
|
||||
cherrypy.log('Flattened %d chunks' % numchunks, 'TOOLS.FLATTEN')
|
||||
response = cherrypy.serving.response
|
||||
response.body = flattener(response.body)
|
||||
|
||||
|
||||
def accept(media=None, debug=False):
|
||||
"""Return the client's preferred media-type (from the given Content-Types).
|
||||
|
||||
If 'media' is None (the default), no test will be performed.
|
||||
|
||||
If 'media' is provided, it should be the Content-Type value (as a string)
|
||||
or values (as a list or tuple of strings) which the current resource
|
||||
can emit. The client's acceptable media ranges (as declared in the
|
||||
Accept request header) will be matched in order to these Content-Type
|
||||
values; the first such string is returned. That is, the return value
|
||||
will always be one of the strings provided in the 'media' arg (or None
|
||||
if 'media' is None).
|
||||
|
||||
If no match is found, then HTTPError 406 (Not Acceptable) is raised.
|
||||
Note that most web browsers send */* as a (low-quality) acceptable
|
||||
media range, which should match any Content-Type. In addition, "...if
|
||||
no Accept header field is present, then it is assumed that the client
|
||||
accepts all media types."
|
||||
|
||||
Matching types are checked in order of client preference first,
|
||||
and then in the order of the given 'media' values.
|
||||
|
||||
Note that this function does not honor accept-params (other than "q").
|
||||
"""
|
||||
if not media:
|
||||
return
|
||||
if isinstance(media, basestring):
|
||||
media = [media]
|
||||
request = cherrypy.serving.request
|
||||
|
||||
# Parse the Accept request header, and try to match one
|
||||
# of the requested media-ranges (in order of preference).
|
||||
ranges = request.headers.elements('Accept')
|
||||
if not ranges:
|
||||
# Any media type is acceptable.
|
||||
if debug:
|
||||
cherrypy.log('No Accept header elements', 'TOOLS.ACCEPT')
|
||||
return media[0]
|
||||
else:
|
||||
# Note that 'ranges' is sorted in order of preference
|
||||
for element in ranges:
|
||||
if element.qvalue > 0:
|
||||
if element.value == "*/*":
|
||||
# Matches any type or subtype
|
||||
if debug:
|
||||
cherrypy.log('Match due to */*', 'TOOLS.ACCEPT')
|
||||
return media[0]
|
||||
elif element.value.endswith("/*"):
|
||||
# Matches any subtype
|
||||
mtype = element.value[:-1] # Keep the slash
|
||||
for m in media:
|
||||
if m.startswith(mtype):
|
||||
if debug:
|
||||
cherrypy.log('Match due to %s' % element.value,
|
||||
'TOOLS.ACCEPT')
|
||||
return m
|
||||
else:
|
||||
# Matches exact value
|
||||
if element.value in media:
|
||||
if debug:
|
||||
cherrypy.log('Match due to %s' % element.value,
|
||||
'TOOLS.ACCEPT')
|
||||
return element.value
|
||||
|
||||
# No suitable media-range found.
|
||||
ah = request.headers.get('Accept')
|
||||
if ah is None:
|
||||
msg = "Your client did not send an Accept header."
|
||||
else:
|
||||
msg = "Your client sent this Accept header: %s." % ah
|
||||
msg += (" But this resource only emits these media types: %s." %
|
||||
", ".join(media))
|
||||
raise cherrypy.HTTPError(406, msg)
|
||||
|
||||
|
||||
class MonitoredHeaderMap(_httputil.HeaderMap):
|
||||
|
||||
def __init__(self):
|
||||
self.accessed_headers = set()
|
||||
|
||||
def __getitem__(self, key):
|
||||
self.accessed_headers.add(key)
|
||||
return _httputil.HeaderMap.__getitem__(self, key)
|
||||
|
||||
def __contains__(self, key):
|
||||
self.accessed_headers.add(key)
|
||||
return _httputil.HeaderMap.__contains__(self, key)
|
||||
|
||||
def get(self, key, default=None):
|
||||
self.accessed_headers.add(key)
|
||||
return _httputil.HeaderMap.get(self, key, default=default)
|
||||
|
||||
if hasattr({}, 'has_key'):
|
||||
# Python 2
|
||||
def has_key(self, key):
|
||||
self.accessed_headers.add(key)
|
||||
return _httputil.HeaderMap.has_key(self, key)
|
||||
|
||||
|
||||
def autovary(ignore=None, debug=False):
|
||||
"""Auto-populate the Vary response header based on request.header access.
|
||||
"""
|
||||
request = cherrypy.serving.request
|
||||
|
||||
req_h = request.headers
|
||||
request.headers = MonitoredHeaderMap()
|
||||
request.headers.update(req_h)
|
||||
if ignore is None:
|
||||
ignore = set(['Content-Disposition', 'Content-Length', 'Content-Type'])
|
||||
|
||||
def set_response_header():
|
||||
resp_h = cherrypy.serving.response.headers
|
||||
v = set([e.value for e in resp_h.elements('Vary')])
|
||||
if debug:
|
||||
cherrypy.log(
|
||||
'Accessed headers: %s' % request.headers.accessed_headers,
|
||||
'TOOLS.AUTOVARY')
|
||||
v = v.union(request.headers.accessed_headers)
|
||||
v = v.difference(ignore)
|
||||
v = list(v)
|
||||
v.sort()
|
||||
resp_h['Vary'] = ', '.join(v)
|
||||
request.hooks.attach('before_finalize', set_response_header, 95)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue