2019-04-11 00:02:14 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""Utilities for dealing with streamed requests."""
|
2022-01-24 04:07:52 +00:00
|
|
|
import collections
|
2019-04-11 00:02:14 +00:00
|
|
|
import os.path
|
|
|
|
import re
|
|
|
|
|
|
|
|
from .. import exceptions as exc
|
|
|
|
|
|
|
|
# Regular expressions stolen from werkzeug/http.py
|
|
|
|
# cd2c97bb0a076da2322f11adce0b2731f9193396 L62-L64
|
|
|
|
_QUOTED_STRING_RE = r'"[^"\\]*(?:\\.[^"\\]*)*"'
|
|
|
|
_OPTION_HEADER_PIECE_RE = re.compile(
|
|
|
|
r';\s*(%s|[^\s;=]+)\s*(?:=\s*(%s|[^;]+))?\s*' % (_QUOTED_STRING_RE,
|
|
|
|
_QUOTED_STRING_RE)
|
|
|
|
)
|
|
|
|
_DEFAULT_CHUNKSIZE = 512
|
|
|
|
|
|
|
|
|
|
|
|
def _get_filename(content_disposition):
|
|
|
|
for match in _OPTION_HEADER_PIECE_RE.finditer(content_disposition):
|
|
|
|
k, v = match.groups()
|
|
|
|
if k == 'filename':
|
|
|
|
# ignore any directory paths in the filename
|
|
|
|
return os.path.split(v)[1]
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_download_file_path(response, path):
|
|
|
|
"""
|
|
|
|
Given a response and a path, return a file path for a download.
|
|
|
|
|
|
|
|
If a ``path`` parameter is a directory, this function will parse the
|
|
|
|
``Content-Disposition`` header on the response to determine the name of the
|
|
|
|
file as reported by the server, and return a file path in the specified
|
|
|
|
directory.
|
|
|
|
|
|
|
|
If ``path`` is empty or None, this function will return a path relative
|
|
|
|
to the process' current working directory.
|
|
|
|
|
|
|
|
If path is a full file path, return it.
|
|
|
|
|
|
|
|
:param response: A Response object from requests
|
|
|
|
:type response: requests.models.Response
|
|
|
|
:param str path: Directory or file path.
|
|
|
|
:returns: full file path to download as
|
|
|
|
:rtype: str
|
|
|
|
:raises: :class:`requests_toolbelt.exceptions.StreamingError`
|
|
|
|
"""
|
|
|
|
path_is_dir = path and os.path.isdir(path)
|
|
|
|
|
|
|
|
if path and not path_is_dir:
|
|
|
|
# fully qualified file path
|
|
|
|
filepath = path
|
|
|
|
else:
|
|
|
|
response_filename = _get_filename(
|
|
|
|
response.headers.get('content-disposition', '')
|
|
|
|
)
|
|
|
|
if not response_filename:
|
|
|
|
raise exc.StreamingError('No filename given to stream response to')
|
|
|
|
|
|
|
|
if path_is_dir:
|
|
|
|
# directory to download to
|
|
|
|
filepath = os.path.join(path, response_filename)
|
|
|
|
else:
|
|
|
|
# fallback to downloading to current working directory
|
|
|
|
filepath = response_filename
|
|
|
|
|
|
|
|
return filepath
|
|
|
|
|
|
|
|
|
|
|
|
def stream_response_to_file(response, path=None, chunksize=_DEFAULT_CHUNKSIZE):
|
|
|
|
"""Stream a response body to the specified file.
|
|
|
|
|
|
|
|
Either use the ``path`` provided or use the name provided in the
|
|
|
|
``Content-Disposition`` header.
|
|
|
|
|
|
|
|
.. warning::
|
|
|
|
|
|
|
|
If you pass this function an open file-like object as the ``path``
|
|
|
|
parameter, the function will not close that file for you.
|
|
|
|
|
|
|
|
.. warning::
|
|
|
|
|
|
|
|
This function will not automatically close the response object
|
|
|
|
passed in as the ``response`` parameter.
|
|
|
|
|
|
|
|
If a ``path`` parameter is a directory, this function will parse the
|
|
|
|
``Content-Disposition`` header on the response to determine the name of the
|
|
|
|
file as reported by the server, and return a file path in the specified
|
|
|
|
directory. If no ``path`` parameter is supplied, this function will default
|
|
|
|
to the process' current working directory.
|
|
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
import requests
|
|
|
|
from requests_toolbelt import exceptions
|
|
|
|
from requests_toolbelt.downloadutils import stream
|
|
|
|
|
|
|
|
r = requests.get(url, stream=True)
|
|
|
|
try:
|
|
|
|
filename = stream.stream_response_to_file(r)
|
|
|
|
except exceptions.StreamingError as e:
|
|
|
|
# The toolbelt could not find the filename in the
|
|
|
|
# Content-Disposition
|
|
|
|
print(e.message)
|
|
|
|
|
|
|
|
You can also specify the filename as a string. This will be passed to
|
|
|
|
the built-in :func:`open` and we will read the content into the file.
|
|
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
import requests
|
|
|
|
from requests_toolbelt.downloadutils import stream
|
|
|
|
|
|
|
|
r = requests.get(url, stream=True)
|
|
|
|
filename = stream.stream_response_to_file(r, path='myfile')
|
|
|
|
|
|
|
|
If the calculated download file path already exists, this function will
|
|
|
|
raise a StreamingError.
|
|
|
|
|
|
|
|
Instead, if you want to manage the file object yourself, you need to
|
|
|
|
provide either a :class:`io.BytesIO` object or a file opened with the
|
|
|
|
`'b'` flag. See the two examples below for more details.
|
|
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
import requests
|
|
|
|
from requests_toolbelt.downloadutils import stream
|
|
|
|
|
|
|
|
with open('myfile', 'wb') as fd:
|
|
|
|
r = requests.get(url, stream=True)
|
|
|
|
filename = stream.stream_response_to_file(r, path=fd)
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
print('{0} saved to {1}'.format(url, filename))
|
2019-04-11 00:02:14 +00:00
|
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
import io
|
|
|
|
import requests
|
|
|
|
from requests_toolbelt.downloadutils import stream
|
|
|
|
|
|
|
|
b = io.BytesIO()
|
|
|
|
r = requests.get(url, stream=True)
|
|
|
|
filename = stream.stream_response_to_file(r, path=b)
|
|
|
|
assert filename is None
|
|
|
|
|
|
|
|
:param response: A Response object from requests
|
|
|
|
:type response: requests.models.Response
|
|
|
|
:param path: *(optional)*, Either a string with the path to the location
|
|
|
|
to save the response content, or a file-like object expecting bytes.
|
|
|
|
:type path: :class:`str`, or object with a :meth:`write`
|
|
|
|
:param int chunksize: (optional), Size of chunk to attempt to stream
|
|
|
|
(default 512B).
|
|
|
|
:returns: The name of the file, if one can be determined, else None
|
|
|
|
:rtype: str
|
|
|
|
:raises: :class:`requests_toolbelt.exceptions.StreamingError`
|
|
|
|
"""
|
|
|
|
pre_opened = False
|
|
|
|
fd = None
|
|
|
|
filename = None
|
2022-01-24 04:07:52 +00:00
|
|
|
if path and isinstance(getattr(path, 'write', None), collections.Callable):
|
2019-04-11 00:02:14 +00:00
|
|
|
pre_opened = True
|
|
|
|
fd = path
|
|
|
|
filename = getattr(fd, 'name', None)
|
|
|
|
else:
|
|
|
|
filename = get_download_file_path(response, path)
|
|
|
|
if os.path.exists(filename):
|
|
|
|
raise exc.StreamingError("File already exists: %s" % filename)
|
|
|
|
fd = open(filename, 'wb')
|
|
|
|
|
|
|
|
for chunk in response.iter_content(chunk_size=chunksize):
|
|
|
|
fd.write(chunk)
|
|
|
|
|
|
|
|
if not pre_opened:
|
|
|
|
fd.close()
|
|
|
|
|
|
|
|
return filename
|