bazarr/libs/urllib3/fields.py

273 lines
8.4 KiB
Python
Raw Normal View History

from __future__ import absolute_import
import email.utils
import mimetypes
2019-09-18 15:30:46 +00:00
import re
from .packages import six
def guess_content_type(filename, default='application/octet-stream'):
"""
Guess the "Content-Type" of a file.
:param filename:
The filename to guess the "Content-Type" of using :mod:`mimetypes`.
:param default:
If no "Content-Type" can be guessed, default to `default`.
"""
if filename:
return mimetypes.guess_type(filename)[0] or default
return default
2019-09-18 15:30:46 +00:00
def format_header_param_rfc2231(name, value):
"""
2019-09-18 15:30:46 +00:00
Helper function to format and quote a single header parameter using the
strategy defined in RFC 2231.
Particularly useful for header parameters which might contain
2019-09-18 15:30:46 +00:00
non-ASCII values, like file names. This follows RFC 2388 Section 4.4.
:param name:
The name of the parameter, a string expected to be ASCII only.
:param value:
2019-09-18 15:30:46 +00:00
The value of the parameter, provided as ``bytes`` or `str``.
:ret:
An RFC-2231-formatted unicode string.
"""
2019-09-18 15:30:46 +00:00
if isinstance(value, six.binary_type):
value = value.decode("utf-8")
if not any(ch in value for ch in '"\\\r\n'):
2019-09-18 15:30:46 +00:00
result = u'%s="%s"' % (name, value)
try:
result.encode('ascii')
except (UnicodeEncodeError, UnicodeDecodeError):
pass
else:
return result
2019-09-18 15:30:46 +00:00
if not six.PY3: # Python 2:
value = value.encode('utf-8')
2019-09-18 15:30:46 +00:00
# encode_rfc2231 accepts an encoded string and returns an ascii-encoded
# string in Python 2 but accepts and returns unicode strings in Python 3
value = email.utils.encode_rfc2231(value, 'utf-8')
value = '%s*=%s' % (name, value)
2019-09-18 15:30:46 +00:00
if not six.PY3: # Python 2:
value = value.decode('utf-8')
return value
2019-09-18 15:30:46 +00:00
_HTML5_REPLACEMENTS = {
u"\u0022": u"%22",
# Replace "\" with "\\".
u"\u005C": u"\u005C\u005C",
u"\u005C": u"\u005C\u005C",
}
# All control characters from 0x00 to 0x1F *except* 0x1B.
_HTML5_REPLACEMENTS.update({
six.unichr(cc): u"%{:02X}".format(cc)
for cc
in range(0x00, 0x1F+1)
if cc not in (0x1B,)
})
def _replace_multiple(value, needles_and_replacements):
def replacer(match):
return needles_and_replacements[match.group(0)]
pattern = re.compile(
r"|".join([
re.escape(needle) for needle in needles_and_replacements.keys()
])
)
result = pattern.sub(replacer, value)
return result
def format_header_param_html5(name, value):
"""
Helper function to format and quote a single header parameter using the
HTML5 strategy.
Particularly useful for header parameters which might contain
non-ASCII values, like file names. This follows the `HTML5 Working Draft
Section 4.10.22.7`_ and matches the behavior of curl and modern browsers.
.. _HTML5 Working Draft Section 4.10.22.7:
https://w3c.github.io/html/sec-forms.html#multipart-form-data
:param name:
The name of the parameter, a string expected to be ASCII only.
:param value:
The value of the parameter, provided as ``bytes`` or `str``.
:ret:
A unicode string, stripped of troublesome characters.
"""
if isinstance(value, six.binary_type):
value = value.decode("utf-8")
value = _replace_multiple(value, _HTML5_REPLACEMENTS)
return u'%s="%s"' % (name, value)
# For backwards-compatibility.
format_header_param = format_header_param_html5
class RequestField(object):
"""
A data container for request body parameters.
:param name:
2019-09-18 15:30:46 +00:00
The name of this request field. Must be unicode.
:param data:
The data/value body.
:param filename:
2019-09-18 15:30:46 +00:00
An optional filename of the request field. Must be unicode.
:param headers:
An optional dict-like object of headers to initially use for the field.
2019-09-18 15:30:46 +00:00
:param header_formatter:
An optional callable that is used to encode and format the headers. By
default, this is :func:`format_header_param_html5`.
"""
2019-09-18 15:30:46 +00:00
def __init__(
self,
name,
data,
filename=None,
headers=None,
header_formatter=format_header_param_html5):
self._name = name
self._filename = filename
self.data = data
self.headers = {}
if headers:
self.headers = dict(headers)
2019-09-18 15:30:46 +00:00
self.header_formatter = header_formatter
@classmethod
2019-09-18 15:30:46 +00:00
def from_tuples(
cls,
fieldname,
value,
header_formatter=format_header_param_html5):
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
Supports constructing :class:`~urllib3.fields.RequestField` from
parameter of key/value strings AND key/filetuple. A filetuple is a
(filename, data, MIME type) tuple where the MIME type is optional.
For example::
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'realfile': ('barfile.txt', open('realfile').read()),
'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
'nonamefile': 'contents of nonamefile field',
Field names and filenames must be unicode.
"""
if isinstance(value, tuple):
if len(value) == 3:
filename, data, content_type = value
else:
filename, data = value
content_type = guess_content_type(filename)
else:
filename = None
content_type = None
data = value
2019-09-18 15:30:46 +00:00
request_param = cls(
fieldname, data, filename=filename, header_formatter=header_formatter)
request_param.make_multipart(content_type=content_type)
return request_param
def _render_part(self, name, value):
"""
2019-09-18 15:30:46 +00:00
Overridable helper function to format a single header parameter. By
default, this calls ``self.header_formatter``.
:param name:
The name of the parameter, a string expected to be ASCII only.
:param value:
The value of the parameter, provided as a unicode string.
"""
2019-09-18 15:30:46 +00:00
return self.header_formatter(name, value)
def _render_parts(self, header_parts):
"""
Helper function to format and quote a single header.
Useful for single headers that are composed of multiple items. E.g.,
'Content-Disposition' fields.
:param header_parts:
A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
as `k1="v1"; k2="v2"; ...`.
"""
parts = []
iterable = header_parts
if isinstance(header_parts, dict):
iterable = header_parts.items()
for name, value in iterable:
if value is not None:
parts.append(self._render_part(name, value))
2019-09-18 15:30:46 +00:00
return u'; '.join(parts)
def render_headers(self):
"""
Renders the headers for this request field.
"""
lines = []
sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location']
for sort_key in sort_keys:
if self.headers.get(sort_key, False):
2019-09-18 15:30:46 +00:00
lines.append(u'%s: %s' % (sort_key, self.headers[sort_key]))
for header_name, header_value in self.headers.items():
if header_name not in sort_keys:
if header_value:
2019-09-18 15:30:46 +00:00
lines.append(u'%s: %s' % (header_name, header_value))
2019-09-18 15:30:46 +00:00
lines.append(u'\r\n')
return u'\r\n'.join(lines)
def make_multipart(self, content_disposition=None, content_type=None,
content_location=None):
"""
Makes this request field into a multipart request field.
This method overrides "Content-Disposition", "Content-Type" and
"Content-Location" headers to the request parameter.
:param content_type:
The 'Content-Type' of the request body.
:param content_location:
The 'Content-Location' of the request body.
"""
2019-09-18 15:30:46 +00:00
self.headers['Content-Disposition'] = content_disposition or u'form-data'
self.headers['Content-Disposition'] += u'; '.join([
u'', self._render_parts(
((u'name', self._name), (u'filename', self._filename))
)
])
self.headers['Content-Type'] = content_type
self.headers['Content-Location'] = content_location