Source code for pybe.benchmark
from typing import List, Callable, Dict, Union, Optional
import pandas as pd
from tqdm import tqdm
import multiprocessing as mp
[docs]class Benchmark:
"""Benchmark any Python function
pybe.Benchmark allows you to:
* **benchmark** any Python function (with vectors of real numbers as output)
* **store** the results in a csv (default) or excel file and
* **read** from previous benchmark results.
.. epigraph::
**How it works:**
Specify a list of inputs and apply a given function to those inputs a specified number of times
"""
def __init__(self,
benchmark_csv_file_path: Optional[str] = None,
):
if benchmark_csv_file_path is not None:
self.read_from_csv(benchmark_csv_file_path)
[docs] def __call__(self,
function: Callable[..., Dict[str, float]],
inputs: List[Union[str, float]],
name: str,
number_runs: int = 10,
store: bool = True,
parallel: bool = False,
):
"""Benchmark a function
Parameters
----------
function : Callable[..., Dict[str, float]]
function to be benchmarked which returns a dictionary with keys the _name of the output (string)
and value the value of the function (float)
inputs : List[Union[str, float]]
inputs on which the function is to be benchmarked stored as a list of strings or floats
number_runs : int
number of runs for each inputs
store : bool
if true, store the output of the benchmark as a benchmark.yaml
parallel : bool
if true, run the benchmark on parallel (using multiprocessing)
Examples
--------
Initialize the benchmark class.
>>> benchmark = Benchmark()
Define the function to be benchmarked. This function must take a single argument (float or string) and return
a dictionary where each key represents one output
>>> def test_function(i: int):
>>> return {"value": i}
Define the list of inputs
>>> inputs = [1, 2, 3]
Specify the number of runs
>>> number_runs=3
Run the benchmark
>>> benchmark(function=test_function,
... name="test-benchmark",
... inputs=inputs,
... number_runs=number_runs)
"""
self._name = name
self._inputs = inputs
# TODO: test what if dict contains numpy array or others...
if parallel:
for x in tqdm(inputs):
# TODO: doesnt work for generating instances of classes
try:
with mp.Pool() as pool:
results_x = pool.map(function, [x for _ in range(number_runs)])
# re-arrange results
self._result[x] = {key: [value[key] for value in results_x] for key in results_x[0].keys()}
except RuntimeError:
print("benchmark(test_function) needs to be inside the if __name__ == '__main__': "
'clause to prevent spawning infinite processes.')
break
else:
self._result = []
for x in tqdm(inputs):
for _ in range(number_runs):
result_x = function(x)
result_x['Input'] = x
result_x['Name'] = name
self._result.append(result_x)
self._name_outputs = list(result_x.keys())
self._result = pd.concat([pd.DataFrame(result, index=[0]) for result in self._result], ignore_index=True)
if store:
self.to_csv(name)
[docs] def to_excel(self, name: str = 'benchmark'):
"""Save results to excel (xlsx) file (file path is file path of script)
Parameters
----------
name : str
name of the benchmark
"""
self.result.to_excel(f'{name}.xlsx')
[docs] def to_csv(self, name: str = 'benchmark'):
"""Save results to csv file (file path is file path of script)
Parameters
----------
name : str
name of the benchmark
"""
self.result.to_csv(f'{name}.csv', index=False)
@property
def result(self) -> pd.DataFrame:
"""Return the result of the benchmark as a pandas DataFrame
Returns
-------
pd.DataFrame
result of the benchmark
"""
return self._result
[docs] def read_from_csv(self, benchmark_csv_file_path: str):
"""Read previous results of corresponding yaml file and store them in this instance
Parameters
----------
benchmark_csv_file_path : str
path of benchmark yaml file
Examples
--------
>>> benchmark = Benchmark() # initialize benchmark instance
>>> benchmark.read_from_csv(benchmark_csv_file_path="./benchmark.csv") # read results
>>> print(benchmark.result) # print result
"""
self._result = pd.read_csv(benchmark_csv_file_path)
[docs] def return_outputs(self, input: float):
return self.result.loc[self.result['Input'] == input]
@property
def inputs(self) -> List[Union[str, float]]:
"""Return the list of inputs of the benchmark
Returns
-------
List[Union[str, float]]
inputs of the benchmark
"""
return list(set(self.result['Input'].values))
@property
def name_outputs(self) -> List[str]:
"""Return the list of names of outputs of the benchmark
Returns
-------
List[str]
name of outputs
"""
return list(self.result.columns.drop(['Input', 'Name']))
@property
def means(self) -> pd.DataFrame:
"""Return the means of the outputs as pandas DataFrame
Returns
-------
pd.DataFrame
means of the benchmark
"""
means = self.result.groupby(['Input']).mean(numeric_only=True).reset_index()
means['Name'] = self.name
return means
@property
def std(self) -> pd.DataFrame:
"""Return the standard deviation of the outputs as pandas DataFrame
Returns
-------
pd.DataFrame
std of the benchmark
"""
std = self.result.groupby(['Input']).std(numeric_only=True).reset_index()
std['Name'] = self.name
return std
@property
def name(self) -> str:
"""Return the name of the benchmark
Returns
-------
str
name of the benchmark
"""
return self.result['Name'].values[0]