from __future__ import annotations # built-in import json import math from collections import defaultdict from timeit import timeit from typing import Iterable, Iterator, NamedTuple # external from tabulate import tabulate # app from .libraries import LIBRARIES_PATH, prototype # python3 -m textdistance.benchmark libraries = prototype.clone() class Lib(NamedTuple): algorithm: str library: str function: str time: float setup: str @property def row(self) -> tuple[str, ...]: time = '' if math.isinf(self.time) else f'{self.time:0.05f}' return (self.algorithm, self.library.split('.')[0], time) INTERNAL_SETUP = """ from textdistance import {} as cls func = cls(external=False) """ STMT = """ func('text', 'test') func('qwer', 'asdf') func('a' * 15, 'b' * 15) """ RUNS = 4000 class Benchmark: @staticmethod def get_installed() -> Iterator[Lib]: for alg in libraries.get_algorithms(): for lib in libraries.get_libs(alg): # try load function if not lib.get_function(): print(f'WARNING: cannot get func for {lib}') continue # return library info yield Lib( algorithm=alg, library=lib.module_name, function=lib.func_name, time=float('Inf'), setup=lib.setup, ) @staticmethod def get_external_benchmark(installed: Iterable[Lib]) -> Iterator[Lib]: for lib in installed: time = timeit( stmt=STMT, setup=lib.setup, number=RUNS, ) yield lib._replace(time=time) @staticmethod def get_internal_benchmark() -> Iterator[Lib]: for alg in libraries.get_algorithms(): setup = f'func = __import__("textdistance").{alg}(external=False)' yield Lib( algorithm=alg, library='**textdistance**', function=alg, time=timeit( stmt=STMT, setup=setup, number=RUNS, ), setup=setup, ) @staticmethod def filter_benchmark( external: Iterable[Lib], internal: Iterable[Lib], ) -> Iterator[Lib]: limits = {i.algorithm: i.time for i in internal} return filter(lambda x: x.time < limits[x.algorithm], external) @staticmethod def get_table(libs: list[Lib]) -> str: table = tabulate( [lib.row for lib in libs], headers=['algorithm', 'library', 'time'], tablefmt='github', ) table += f'\nTotal: {len(libs)} libs.\n\n' return table @staticmethod def save(libs: Iterable[Lib]) -> None: data = defaultdict(list) for lib in libs: data[lib.algorithm].append([lib.library, lib.function]) with LIBRARIES_PATH.open('w', encoding='utf8') as f: json.dump(obj=data, fp=f, indent=2, sort_keys=True) @classmethod def run(cls) -> None: print('# Installed libraries:\n') installed = list(cls.get_installed()) installed.sort() print(cls.get_table(installed)) print('# Benchmarks (with textdistance):\n') benchmark = list(cls.get_external_benchmark(installed)) benchmark_internal = list(cls.get_internal_benchmark()) benchmark += benchmark_internal benchmark.sort(key=lambda x: (x.algorithm, x.time)) print(cls.get_table(benchmark)) benchmark = list(cls.filter_benchmark(benchmark, benchmark_internal)) cls.save(benchmark) if __name__ == '__main__': Benchmark.run()