2020-06-10 16:04:54 +00:00
|
|
|
"""
|
|
|
|
Thin wrappers around `concurrent.futures`.
|
|
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
2021-12-01 20:47:00 +00:00
|
|
|
|
|
|
|
from contextlib import contextmanager
|
|
|
|
|
|
|
|
from ..auto import tqdm as tqdm_auto
|
|
|
|
from ..std import TqdmWarning
|
|
|
|
|
2020-06-10 16:04:54 +00:00
|
|
|
try:
|
|
|
|
from operator import length_hint
|
|
|
|
except ImportError:
|
|
|
|
def length_hint(it, default=0):
|
|
|
|
"""Returns `len(it)`, falling back to `default`"""
|
|
|
|
try:
|
|
|
|
return len(it)
|
|
|
|
except TypeError:
|
|
|
|
return default
|
|
|
|
try:
|
|
|
|
from os import cpu_count
|
|
|
|
except ImportError:
|
|
|
|
try:
|
|
|
|
from multiprocessing import cpu_count
|
|
|
|
except ImportError:
|
|
|
|
def cpu_count():
|
|
|
|
return 4
|
|
|
|
import sys
|
2021-12-01 20:47:00 +00:00
|
|
|
|
2020-06-10 16:04:54 +00:00
|
|
|
__author__ = {"github.com/": ["casperdcl"]}
|
|
|
|
__all__ = ['thread_map', 'process_map']
|
|
|
|
|
|
|
|
|
2021-12-01 20:47:00 +00:00
|
|
|
@contextmanager
|
|
|
|
def ensure_lock(tqdm_class, lock_name=""):
|
|
|
|
"""get (create if necessary) and then restore `tqdm_class`'s lock"""
|
|
|
|
old_lock = getattr(tqdm_class, '_lock', None) # don't create a new lock
|
|
|
|
lock = old_lock or tqdm_class.get_lock() # maybe create a new lock
|
|
|
|
lock = getattr(lock, lock_name, lock) # maybe subtype
|
|
|
|
tqdm_class.set_lock(lock)
|
|
|
|
yield lock
|
|
|
|
if old_lock is None:
|
|
|
|
del tqdm_class._lock
|
|
|
|
else:
|
|
|
|
tqdm_class.set_lock(old_lock)
|
|
|
|
|
|
|
|
|
2020-06-10 16:04:54 +00:00
|
|
|
def _executor_map(PoolExecutor, fn, *iterables, **tqdm_kwargs):
|
|
|
|
"""
|
|
|
|
Implementation of `thread_map` and `process_map`.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
tqdm_class : [default: tqdm.auto.tqdm].
|
|
|
|
max_workers : [default: min(32, cpu_count() + 4)].
|
|
|
|
chunksize : [default: 1].
|
2021-12-01 20:47:00 +00:00
|
|
|
lock_name : [default: "":str].
|
2020-06-10 16:04:54 +00:00
|
|
|
"""
|
2021-12-01 20:47:00 +00:00
|
|
|
kwargs = tqdm_kwargs.copy()
|
2020-06-10 16:04:54 +00:00
|
|
|
if "total" not in kwargs:
|
2021-12-01 20:47:00 +00:00
|
|
|
kwargs["total"] = length_hint(iterables[0])
|
2020-06-10 16:04:54 +00:00
|
|
|
tqdm_class = kwargs.pop("tqdm_class", tqdm_auto)
|
|
|
|
max_workers = kwargs.pop("max_workers", min(32, cpu_count() + 4))
|
|
|
|
chunksize = kwargs.pop("chunksize", 1)
|
2021-12-01 20:47:00 +00:00
|
|
|
lock_name = kwargs.pop("lock_name", "")
|
|
|
|
with ensure_lock(tqdm_class, lock_name=lock_name) as lk:
|
|
|
|
pool_kwargs = {'max_workers': max_workers}
|
|
|
|
sys_version = sys.version_info[:2]
|
|
|
|
if sys_version >= (3, 7):
|
|
|
|
# share lock in case workers are already using `tqdm`
|
|
|
|
pool_kwargs.update(initializer=tqdm_class.set_lock, initargs=(lk,))
|
|
|
|
map_args = {}
|
|
|
|
if not (3, 0) < sys_version < (3, 5):
|
|
|
|
map_args.update(chunksize=chunksize)
|
|
|
|
with PoolExecutor(**pool_kwargs) as ex:
|
|
|
|
return list(tqdm_class(ex.map(fn, *iterables, **map_args), **kwargs))
|
2020-06-10 16:04:54 +00:00
|
|
|
|
|
|
|
|
|
|
|
def thread_map(fn, *iterables, **tqdm_kwargs):
|
|
|
|
"""
|
|
|
|
Equivalent of `list(map(fn, *iterables))`
|
|
|
|
driven by `concurrent.futures.ThreadPoolExecutor`.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2021-12-01 20:47:00 +00:00
|
|
|
tqdm_class : optional
|
2020-06-10 16:04:54 +00:00
|
|
|
`tqdm` class to use for bars [default: tqdm.auto.tqdm].
|
2021-12-01 20:47:00 +00:00
|
|
|
max_workers : int, optional
|
2020-06-10 16:04:54 +00:00
|
|
|
Maximum number of workers to spawn; passed to
|
|
|
|
`concurrent.futures.ThreadPoolExecutor.__init__`.
|
|
|
|
[default: max(32, cpu_count() + 4)].
|
|
|
|
"""
|
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
return _executor_map(ThreadPoolExecutor, fn, *iterables, **tqdm_kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
def process_map(fn, *iterables, **tqdm_kwargs):
|
|
|
|
"""
|
|
|
|
Equivalent of `list(map(fn, *iterables))`
|
|
|
|
driven by `concurrent.futures.ProcessPoolExecutor`.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
tqdm_class : optional
|
|
|
|
`tqdm` class to use for bars [default: tqdm.auto.tqdm].
|
2021-12-01 20:47:00 +00:00
|
|
|
max_workers : int, optional
|
2020-06-10 16:04:54 +00:00
|
|
|
Maximum number of workers to spawn; passed to
|
|
|
|
`concurrent.futures.ProcessPoolExecutor.__init__`.
|
|
|
|
[default: min(32, cpu_count() + 4)].
|
2021-12-01 20:47:00 +00:00
|
|
|
chunksize : int, optional
|
2020-06-10 16:04:54 +00:00
|
|
|
Size of chunks sent to worker processes; passed to
|
|
|
|
`concurrent.futures.ProcessPoolExecutor.map`. [default: 1].
|
2021-12-01 20:47:00 +00:00
|
|
|
lock_name : str, optional
|
|
|
|
Member of `tqdm_class.get_lock()` to use [default: mp_lock].
|
2020-06-10 16:04:54 +00:00
|
|
|
"""
|
|
|
|
from concurrent.futures import ProcessPoolExecutor
|
|
|
|
if iterables and "chunksize" not in tqdm_kwargs:
|
|
|
|
# default `chunksize=1` has poor performance for large iterables
|
|
|
|
# (most time spent dispatching items to workers).
|
|
|
|
longest_iterable_len = max(map(length_hint, iterables))
|
|
|
|
if longest_iterable_len > 1000:
|
|
|
|
from warnings import warn
|
|
|
|
warn("Iterable length %d > 1000 but `chunksize` is not set."
|
|
|
|
" This may seriously degrade multiprocess performance."
|
|
|
|
" Set `chunksize=1` or more." % longest_iterable_len,
|
|
|
|
TqdmWarning, stacklevel=2)
|
2021-12-01 20:47:00 +00:00
|
|
|
if "lock_name" not in tqdm_kwargs:
|
|
|
|
tqdm_kwargs = tqdm_kwargs.copy()
|
|
|
|
tqdm_kwargs["lock_name"] = "mp_lock"
|
2020-06-10 16:04:54 +00:00
|
|
|
return _executor_map(ProcessPoolExecutor, fn, *iterables, **tqdm_kwargs)
|