How to use ld15iqr method in pytest-benchmark

Best Python code snippet using pytest-benchmark

benchmark.py

Source:benchmark.py Github

copy

Full Screen

1# Copyright 2021 Xilinx Inc.2#3# Licensed under the Apache License, Version 2.0 (the "License");4# you may not use this file except in compliance with the License.5# You may obtain a copy of the License at6#7# http://www.apache.org/licenses/LICENSE-2.08#9# Unless required by applicable law or agreed to in writing, software10# distributed under the License is distributed on an "AS IS" BASIS,11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12# See the License for the specific language governing permissions and13# limitations under the License.14import argparse15import copy16from enum import Enum, auto17import functools18import ipaddress19import json20import itertools21import math22import os23import pathlib24import pprint25import socket26import statistics27import subprocess28import sys29from typing import Optional30import proteus31from proteus.server import Server32from proteus.rest import Client33import pytest_benchmark.utils34from _pytest.mark import KeywordMatcher35from _pytest.mark.expression import Expression36from rich.console import Console37from rich.table import Table38from rich.progress import Progress39import yaml40class Highlight(Enum):41 smallest = auto()42 largest = auto()43 none = auto()44class Options:45 def __init__(self, options) -> None:46 self.options = options47 def __getattr__(self, name):48 if name in self.options:49 return self.options[name]50 raise AttributeError(f"'Options' object has no attribute '{name}'")51class Config:52 def __init__(self, path) -> None:53 with open(path, "r") as f:54 self.config = yaml.safe_load(f)55 self.pytest = Options(self.config["pytest"])56 self.wrk = Options(self.config["wrk"])57 self.cpp = Options(self.config["cpp"])58 @property59 def benchmarks(self):60 return self.config["benchmarks"]61 @benchmarks.setter62 def benchmarks(self, value):63 self.config["benchmarks"] = value64 @property65 def repeat(self):66 return self.config["repeat_count"]67 @property68 def verbosity(self):69 return self.config["verbosity"]70 @property71 def http_address(self):72 return self.config["http_address"]73 @property74 def start_server(self):75 return self.config["start_local_server"]76 def __str__(self):77 return pprint.pformat(self.config, 2)78class Benchmark:79 """80 This class holds a single group's benchmark results. Each statistic and81 metadata associated with a benchmark is stored in lists internally so it82 can be plotted in order when printing the table to the console.83 """84 def __init__(self, benchmark: dict) -> None:85 """86 Initialize the internal lists using the stats from the initial benchmark87 used to create this object88 Args:89 benchmark (dict): The dict defining the benchmark. This is in the90 same format as output from pytest_benchmark.91 """92 try:93 load = benchmark["load"]94 benchmark_type = benchmark["type"]95 benchmark_config = benchmark["config"]96 self.stats = {97 "load": [load],98 "type": [benchmark_type],99 "config": [benchmark_config],100 "name": [benchmark["name"]],101 "min": [benchmark["stats"]["min"]],102 "max": [benchmark["stats"]["max"]],103 "mean": [benchmark["stats"]["mean"]],104 "stddev": [benchmark["stats"]["stddev"]],105 "rounds": [benchmark["stats"]["rounds"]],106 "median": [benchmark["stats"]["median"]],107 "iqr": [benchmark["stats"]["iqr"]],108 "q1": [benchmark["stats"]["q1"]],109 "q3": [benchmark["stats"]["q3"]],110 "iqr_outliers": [benchmark["stats"]["iqr_outliers"]],111 "stddev_outliers": [benchmark["stats"]["stddev_outliers"]],112 "outliers": [benchmark["stats"]["outliers"]],113 "ld15iqr": [benchmark["stats"]["ld15iqr"]],114 "hd15iqr": [benchmark["stats"]["hd15iqr"]],115 "ops": [benchmark["stats"]["ops"]],116 "ops_uncertainty": [benchmark["stats"]["ops_uncertainty"]],117 "total": [benchmark["stats"]["total"]],118 "iterations": [benchmark["stats"]["iterations"]],119 }120 except KeyError:121 print(122 "This benchmark cannot be opened as it has not been normalized due to an error"123 )124 sys.exit(1)125 self._final_stats = {}126 def add(self, benchmark: dict):127 """128 Add a benchmark to this group129 Args:130 benchmark (dict): The dict defining the benchmark. This is in the131 same format as output from pytest_benchmark.132 """133 load = benchmark["load"]134 benchmark_type = benchmark["type"]135 benchmark_config = benchmark["config"]136 self.stats["name"].append(benchmark["name"])137 self.stats["load"].append(load)138 self.stats["type"].append(benchmark_type)139 self.stats["config"].append(benchmark_config)140 for key in self.stats.keys():141 try:142 self.stats[key].append(benchmark["stats"][key])143 except KeyError:144 continue145 @staticmethod146 def _data_format(arg):147 """148 Define the highlighting and formatting rules for the different metadata149 in the benchmarks150 Args:151 arg (str): The metadata type to format (e.g. load, min, max etc.)152 Returns:153 partial([data]): A function that accepts [data] of the type of arg and154 returns it as a list of formatted strings for rich to print out155 """156 data_format = {157 "load": {"highlight": Highlight.none, "format": "d"},158 "name": {"highlight": Highlight.none, "format": "s"},159 "type": {"highlight": Highlight.none, "format": "s"},160 "config": {"highlight": Highlight.none, "format": "s"},161 "min": {"highlight": Highlight.smallest, "format": "1.2e"},162 "max": {"highlight": Highlight.smallest, "format": "1.2e"},163 "mean": {"highlight": Highlight.smallest, "format": "1.2e"},164 "stddev": {"highlight": Highlight.smallest, "format": "1.2e"},165 "rounds": {"highlight": Highlight.none, "format": "1.2e"},166 "median": {"highlight": Highlight.smallest, "format": "1.2e"},167 "iqr": {"highlight": Highlight.smallest, "format": "1.2e"},168 "q1": {"highlight": Highlight.smallest, "format": "1.2e"},169 "q3": {"highlight": Highlight.smallest, "format": "1.2e"},170 "iqr_outliers": {"highlight": Highlight.none, "format": "1.2e"},171 "stddev_outliers": {"highlight": Highlight.none, "format": "1.2e"},172 "outliers": {"highlight": Highlight.none, "format": "1.2e"},173 "ld15iqr": {"highlight": Highlight.none, "format": "1.2e"},174 "hd15iqr": {"highlight": Highlight.none, "format": "1.2e"},175 "ops": {"highlight": Highlight.largest, "format": ",.2f"},176 "ops_uncertainty": {"highlight": Highlight.smallest, "format": ",.2f"},177 "total": {"highlight": Highlight.none, "format": "1.2e"},178 "iterations": {"highlight": Highlight.none, "format": "1.2e"},179 }180 def format_func(colors, data):181 """182 Formats the list of data using the given colors to highlight the min183 or max if formatting is enabled for the data type184 Args:185 colors (iterable): Iterable of colors (first is used for min, second for max) or None186 data (iterable): List of data to format187 Returns:188 list: Formatted data in rich format189 """190 numeric_values = [191 value for value in data if isinstance(value, (int, float))192 ]193 if numeric_values:194 min_value = min(numeric_values)195 max_value = max(numeric_values)196 enable_colors = len(numeric_values) != 1197 formatted_data = []198 for datum in data:199 if isinstance(datum, (int, float)):200 if colors is not None and enable_colors:201 if datum == min_value:202 formatted_data.append(203 f"[{colors[0]}]{datum:{data_format[arg]['format']}}[/{colors[0]}]"204 )205 elif datum == max_value:206 formatted_data.append(207 f"[{colors[1]}]{datum:{data_format[arg]['format']}}[/{colors[1]}]"208 )209 else:210 formatted_data.append(211 f"{datum:{data_format[arg]['format']}}"212 )213 else:214 formatted_data.append(f"{datum:{data_format[arg]['format']}}")215 else:216 formatted_data.append(str(datum))217 return formatted_data218 if data_format[arg]["highlight"] != Highlight.none:219 if data_format[arg]["highlight"] == Highlight.smallest:220 return functools.partial(format_func, ("green", "red"))221 return functools.partial(format_func, ("red", "green"))222 return functools.partial(format_func, None)223 def finalize(self, *args):224 """225 This should be called prior to printing the benchmark. The arguments to this226 function are the stats that are going to be printed. For these stats,227 it computes the formatted strings and saves them for rich to print later.228 """229 for arg in args:230 format_func = self._data_format(arg)231 self._final_stats[arg] = format_func(self.stats[arg])232 def has_type(self, search_str: str):233 for benchmark_type in self.stats["type"]:234 if search_str.upper() in str(benchmark_type).upper():235 return True236 return False237 def get_row(self):238 """239 Get rows of data for this benchmark based on what was done by finalize()240 Yields:241 list[str]: one row242 """243 for index, _ in enumerate(self.stats["min"]):244 yield [245 str(self._final_stats[arg][index]) for arg in self._final_stats.keys()246 ]247class Benchmarks:248 """249 This class holds all the benchmarks and can be used to print them to console.250 """251 def __init__(self, benchmark, path=None, normalize=False) -> None:252 if normalize:253 normalized_benchmark = self._normalize(benchmark)254 self._benchmark = normalized_benchmark255 else:256 self._benchmark = benchmark257 self.benchmarks = {}258 self.path = path259 for test in benchmark["benchmarks"]:260 self._analyze(test)261 @property262 def machine_info(self):263 return self._benchmark["machine_info"]264 @property265 def commit_info(self):266 return self._benchmark["commit_info"]267 def _analyze(self, test: dict):268 """269 For each benchmark, add a new Benchmark object for it if it's new or add270 it to an existing object271 Args:272 test (dict): The dict defining the benchmark. This is in the273 same format as output from pytest_benchmark.274 """275 group = test["group"]276 if group not in self.benchmarks:277 self.benchmarks[group] = Benchmark(test)278 else:279 self.benchmarks[group].add(test)280 def _normalize(self, benchmarks: dict):281 normalized_benchmarks = []282 for benchmark in benchmarks["benchmarks"]:283 if "load" not in benchmark:284 benchmark["load"] = 1285 if "type" not in benchmark:286 if "type" in benchmark["extra_info"]:287 benchmark["type"] = benchmark["extra_info"]["type"]288 else:289 benchmark["type"] = "Unknown"290 if "config" not in benchmark:291 if "config" in benchmark["extra_info"]:292 benchmark["config"] = benchmark["extra_info"]["config"]293 else:294 benchmark["config"] = "Unknown"295 if "ops_uncertainty" not in benchmark["stats"]:296 benchmark["stats"]["ops_uncertainty"] = benchmark["stats"]["ops"] - (297 1 / (benchmark["stats"]["mean"] + benchmark["stats"]["stddev"])298 )299 normalized_benchmarks.append(benchmark)300 benchmarks["benchmarks"] = normalized_benchmarks301 return benchmarks302 def add(self, benchmark):303 self._benchmark["benchmarks"].append(benchmark)304 self._analyze(benchmark)305 def get(self) -> list:306 return self._benchmark["benchmarks"]307 def has_type(self, search_str):308 for _, benchmark in self.benchmarks.items():309 if benchmark.has_type(search_str):310 return True311 return False312 def write(self, path=None):313 if path is None:314 path = self.path315 if path is None:316 print(f"No path specified to write to")317 return318 with open(path, "w") as f:319 json.dump(self._benchmark, f, indent=4)320 def clear(self):321 self.benchmarks = {}322 self._benchmark["benchmarks"] = []323 def print(self):324 console = Console()325 table = Table(326 show_header=True,327 header_style="bold magenta",328 title="Legend",329 title_style="bold yellow",330 )331 table.add_column("Term")332 table.add_column("Meaning", max_width=80)333 show_legend = False334 if self.has_type("wrk"):335 table.add_row(336 "Tx:Cy:tz",337 "Indicates wrk's configuration i.e. that the wrk application was run with x threads, y total TCP connections and for z time",338 )339 show_legend = True340 if self.has_type("cpp"):341 table.add_row(342 "Ix:Ty",343 "Indicates the cpp benchmark's configuration i.e. it was run with x images and y threads",344 )345 show_legend = True346 if show_legend:347 console.print(table)348 for key, value in self.benchmarks.items():349 table = Table(350 show_header=True,351 header_style="bold magenta",352 title=key,353 title_style="bold yellow",354 )355 table.add_column("Name")356 table.add_column("Type")357 table.add_column("Config")358 table.add_column("Workers")359 table.add_column("Min (s)")360 table.add_column("Max (s)")361 table.add_column("Mean (s)")362 table.add_column("StdDev (s)")363 # table.add_column("Median (s)")364 table.add_column("Op/s")365 table.add_column("Op/s +/-")366 value.finalize(367 "name",368 "type",369 "config",370 "load",371 "min",372 "max",373 "mean",374 "stddev",375 "ops",376 "ops_uncertainty",377 )378 for row in value.get_row():379 table.add_row(*row)380 console.print(table)381 def __str__(self):382 return json.dumps(self._benchmark)383def get_last_benchmark_path(index) -> Optional[str]:384 """385 Get the path to the last generated benchmark in the benchmark directory386 Args:387 index (int): return the nth benchmark (0 means newest)388 Returns:389 Optional[str]: Path to the benchmark if found390 """391 dir = os.getenv("PROTEUS_ROOT") + "/tests/.benchmarks"392 machine_id = pytest_benchmark.utils.get_machine_id()393 list_of_files = pathlib.Path(f"{dir}/{machine_id}").rglob("*.json")394 if list_of_files:395 return sorted(list_of_files, key=os.path.getctime, reverse=True)[index]396 return None397def get_benchmark(398 path: Optional[str] = None, index=0, normalize=False399) -> Optional[Benchmarks]:400 """401 Get the last nth generated benchmark from the directory402 Args:403 path (Optional[str], optional): Get a specific benchmark. Defaults to None.404 index (int, optional): Get the nth newest benchmark. Defaults to 0.405 normalize (bool, optional): Normalize the benchmark. Defaults to False.406 Returns:407 Optional[Benchmarks]: The chosen benchmark or None if an empty directory is used408 """409 if path is not None:410 last_benchmark = path411 else:412 last_benchmark = get_last_benchmark_path(index)413 if last_benchmark is not None:414 with open(last_benchmark, "r") as f:415 return Benchmarks(json.load(f), last_benchmark, normalize)416 return None417def parse_wrk_output(output: str) -> dict:418 """419 Parse the terminal output of wrk into a dictionary. The output format is420 defined by write_lua() in the tests.421 Args:422 output (str): Raw terminal output from wrk423 Returns:424 dict: Organized results from wrk425 """426 lines = output.split("\n")427 raw_stats = lines[-4:-1]428 latencies = raw_stats[0].split(",")429 requests = raw_stats[1].split(",")430 summaries = raw_stats[2].split(",")431 stats = {432 "latencies": {},433 "requests": {},434 "summaries": {},435 }436 metrics = ["min", "max", "mean", "stdev"]437 metrics_summary = ["duration", "requests", "bytes"]438 for latency, label in zip(latencies[:2], metrics[:2]):439 stats["latencies"][label] = int(latency)440 for latency, label in zip(latencies[2:], metrics[2:]):441 stats["latencies"][label] = float(latency)442 for request, label in zip(requests[:2], metrics[:2]):443 stats["requests"][label] = int(request)444 for request, label in zip(requests[2:], metrics[2:]):445 stats["requests"][label] = float(request)446 for summary, label in zip(summaries, metrics_summary):447 stats["summaries"][label] = int(summary)448 return stats449BASE_BENCHMARK = {450 "group": "facedetect_dpucadf8h",451 "name": "test_benchmark_facedetect_dpucadf8h_1",452 "fullname": "test_facedetect.py::TestInferImageFacedetectDPUCADF8H::test_benchmark_facedetect_dpucadf8h_1",453 "params": None,454 "param": None,455 "stats": {456 "min": 0.0855791429639794,457 "max": 0.10293014405760914,458 "mean": 0.093361033460083,459 "stddev": 0.005477697831711018,460 "rounds": 11,461 "median": 0.09155373001703992,462 "iqr": 0.005927730540861376,463 "q1": 0.09056435847014654,464 "q3": 0.09649208901100792,465 "iqr_outliers": 0,466 "stddev_outliers": 4,467 "outliers": "4;0",468 "ld15iqr": 0.0855791429639794,469 "hd15iqr": 0.10293014405760914,470 "ops": 10.71110679625837,471 "ops_uncertainty": 10.71110679625837,472 "total": 1.026971368060913,473 "iterations": 1,474 },475}476def make_wrk_benchmarks(raw_stats, benchmark, wrk_config, load):477 wrk_setting = f"T{wrk_config[0]}:C{wrk_config[1]}:t{wrk_config[2]}"478 benchmark_wrk = copy.deepcopy(benchmark)479 # clear other stats from pytest-benchmark480 del benchmark_wrk["extra_info"]481 del benchmark_wrk["options"]482 benchmark_wrk["load"] = load483 benchmark_wrk["type"] = "rest (wrk)"484 benchmark_wrk["config"] = wrk_setting485 benchmark_wrk["name"] = benchmark["name"]486 benchmark_wrk["stats"]["rounds"] = 1487 benchmark_wrk["stats"]["median"] = None488 benchmark_wrk["stats"]["iqr"] = None489 benchmark_wrk["stats"]["q1"] = None490 benchmark_wrk["stats"]["q3"] = None491 benchmark_wrk["stats"]["iqr_outliers"] = None492 benchmark_wrk["stats"]["stddev_outliers"] = None493 benchmark_wrk["stats"]["outliers"] = None494 benchmark_wrk["stats"]["ld15iqr"] = None495 benchmark_wrk["stats"]["hd15iqr"] = None496 benchmark_wrk["stats"]["total"] = None497 benchmark_wrk["stats"]["iterations"] = 1498 # these tables are not currently being used499 # benchmark_latency = copy.deepcopy(benchmark_wrk)500 # benchmark_latency["group"] = benchmark_latency["group"] + " (wrk latencies per thread)"501 # # convert latencies from us to s502 # benchmark_latency["stats"]["min"] = raw_stats["latencies"]["min"] / 1E6503 # benchmark_latency["stats"]["max"] = raw_stats["latencies"]["max"] / 1E6504 # benchmark_latency["stats"]["mean"] = raw_stats["latencies"]["mean"] / 1E6505 # benchmark_latency["stats"]["stddev"] = raw_stats["latencies"]["stdev"] / 1E6506 # benchmark_latency["stats"]["ops"] = 1 / (raw_stats["latencies"]["mean"] / 1E6)507 # benchmark_request = copy.deepcopy(benchmark_wrk)508 # benchmark_request["group"] = benchmark_request["group"] + " (wrk requests/time per thread)"509 # benchmark_request["stats"]["min"] = raw_stats["requests"]["min"]510 # benchmark_request["stats"]["max"] = raw_stats["requests"]["max"]511 # benchmark_request["stats"]["mean"] = raw_stats["requests"]["mean"]512 # benchmark_request["stats"]["stddev"] = raw_stats["requests"]["stdev"]513 # benchmark_request["stats"]["ops"] = raw_stats["requests"]["mean"]514 benchmark_summary = copy.deepcopy(benchmark_wrk)515 benchmark_summary["stats"]["min"] = raw_stats["latencies"]["min"] / 1e6516 benchmark_summary["stats"]["max"] = raw_stats["latencies"]["max"] / 1e6517 benchmark_summary["stats"]["mean"] = raw_stats["latencies"]["mean"] / 1e6518 benchmark_summary["stats"]["stddev"] = raw_stats["latencies"]["stdev"] / 1e6519 benchmark_summary["stats"]["ops"] = raw_stats["summaries"]["requests"] / (520 raw_stats["summaries"]["duration"] / 1e6521 )522 # wrk's stddev and means are per thread which doesn't correlate directly to the mean523 # so calculating uncertainty with them is not accurate524 benchmark_summary["stats"]["ops_uncertainty"] = "N/A"525 return (526 # (benchmark_latency, "wrk latencies per thread"),527 # (benchmark_request, "wrk requests/time per thread"),528 benchmark_summary,529 )530def combine_wrk_stats(samples):531 stats = {532 "latencies": {},533 "requests": {},534 "summaries": {},535 }536 stats["latencies"]["min"] = min([x["latencies"]["min"] for x in samples])537 stats["latencies"]["max"] = max([x["latencies"]["max"] for x in samples])538 stats["latencies"]["mean"] = statistics.mean(539 [x["latencies"]["mean"] for x in samples]540 )541 # pooled std dev542 stats["latencies"]["stdev"] = math.sqrt(543 sum([math.pow(x["latencies"]["stdev"], 2) for x in samples])544 )545 stats["summaries"]["requests"] = sum([x["summaries"]["requests"] for x in samples])546 stats["summaries"]["duration"] = sum([x["summaries"]["duration"] for x in samples])547 return stats548def wrk_benchmarks(config: Config, benchmarks: Benchmarks):549 client = Client(config.http_address)550 addr = socket.gethostbyname(config.http_address.split(":")[0])551 if not ipaddress.ip_address(addr).is_loopback:552 assert client.server_live()553 server = None554 else:555 server = Server()556 wrk_options = config.wrk557 with Progress() as progress:558 task0 = progress.add_task(559 "Running wrk benchmarks...", total=len(benchmarks.get())560 )561 for benchmark in benchmarks.get():562 try:563 extra_info = benchmark["extra_info"]564 except KeyError:565 progress.update(task0, advance=1)566 continue567 if "lua" in extra_info:568 lua_file = (569 os.getenv("PROTEUS_ROOT")570 + f"/tests/workers/{extra_info['lua']}.lua"571 )572 if not os.path.exists(lua_file):573 print(f"Lua file not found, skipping: {lua_file}")574 continue575 if wrk_options.workers is not None:576 loads = wrk_options.workers577 else:578 loads = [1]579 if server is not None and config.start_server:580 server.start(True)581 client.wait_until_live()582 else:583 assert client.server_live()584 repeat_wrk_count = config.repeat585 task1 = progress.add_task(586 f"Running {benchmark['name']}", total=len(loads)587 )588 for load in loads:589 model = extra_info["model"]590 parameters = extra_info["parameters"]591 if parameters is None:592 parameters = {"share": False}593 else:594 parameters["share"] = False595 for _ in range(load):596 client.load(model, parameters)597 while not client.model_ready(model):598 pass599 infer_endpoint = client.get_address("infer", model)600 # print(f"Loading {load} copies of the {model} model")601 total = (602 len(wrk_options.threads)603 * len(wrk_options.connections)604 * len(wrk_options.time)605 )606 task2 = progress.add_task(607 f"Running with {load} worker(s)...", total=total608 )609 for wrk_config in itertools.product(610 wrk_options.threads, wrk_options.connections, wrk_options.time611 ):612 # TODO(varunsh): we need to check how many requests actually succeeded613 wrk_command = [614 "wrk",615 f"-t{wrk_config[0]}",616 f"-c{wrk_config[1]}",617 f"-d{wrk_config[2]}",618 "-s",619 lua_file,620 infer_endpoint,621 ]622 if config.verbosity > 0:623 print(f"wrk command: \n {' '.join(wrk_command)}")624 wrk_stats = []625 task3 = progress.add_task(626 f"Running wrk: threads: {wrk_config[0]}, TCP connections: {wrk_config[1]}, time: {wrk_config[2]}",627 total=repeat_wrk_count,628 )629 for _ in range(repeat_wrk_count):630 try:631 ret = subprocess.run(632 wrk_command,633 check=True,634 stdout=subprocess.PIPE,635 stderr=subprocess.PIPE,636 )637 except subprocess.CalledProcessError as ex:638 print(ex.stdout)639 print(ex.stderr)640 sys.exit(ex.returncode)641 wrk_output = ret.stdout.decode("utf-8")642 wrk_stats.append(parse_wrk_output(wrk_output))643 progress.update(task3, advance=1)644 raw_stats = combine_wrk_stats(wrk_stats)645 wrk_benchmarks = make_wrk_benchmarks(646 raw_stats, benchmark, wrk_config, load647 )648 for wrk_benchmark in wrk_benchmarks:649 benchmarks.add(wrk_benchmark)650 progress.update(task2, advance=1)651 for _ in range(load):652 client.unload(model)653 progress.update(task1, advance=1)654 if server is not None and config.start_server:655 server.stop()656 client.wait_until_stop()657 progress.update(task0, advance=1)658 return benchmarks659def parse_cpp_output(output: str) -> dict:660 """661 Parse the terminal output of cpp tests into a dictionary662 Args:663 output (str): Raw terminal output from the executable664 Returns:665 dict: Organized results from the executable666 """667 lines = output.split("\n")668 raw_stats = lines[-3:-1]669 queries = raw_stats[0].split(" ")[4]670 time = raw_stats[0].split(" ")[6]671 queries_per_sec = raw_stats[1].split(" ")[4]672 stats = {673 "queries": int(queries),674 "time": float(time) / 1000, # convert to seconds675 "qps": float(queries_per_sec),676 }677 return stats678def combine_cpp_stats(samples):679 stats = {}680 stats["min"] = min([x["time"] / x["queries"] for x in samples])681 stats["max"] = max([x["time"] / x["queries"] for x in samples])682 stats["mean"] = statistics.mean([x["time"] / x["queries"] for x in samples])683 if len(samples) > 1:684 stats["stdev"] = statistics.stdev(685 [x["time"] / x["queries"] for x in samples], stats["mean"]686 )687 else:688 stats["stdev"] = 0689 stats["ops"] = statistics.mean([x["qps"] for x in samples])690 return stats691def make_cpp_benchmarks(raw_stats, path: pathlib.Path, cpp_config, repeat):692 cpp_setting = f"I{cpp_config[0]}:T{cpp_config[1]}"693 name = path.stem694 benchmark = copy.deepcopy(BASE_BENCHMARK)695 if name.startswith("test_"):696 benchmark["group"] = name[len("test_") :]697 else:698 benchmark["group"] = name699 benchmark["type"] = "native (cpp)"700 benchmark["config"] = cpp_setting701 benchmark["fullname"] = str(path)702 benchmark["load"] = cpp_config[2]703 benchmark["name"] = name704 if cpp_config[3]:705 benchmark["name"] += f" (reference)"706 benchmark["stats"]["rounds"] = 1707 benchmark["stats"]["median"] = None708 benchmark["stats"]["iqr"] = None709 benchmark["stats"]["q1"] = None710 benchmark["stats"]["q3"] = None711 benchmark["stats"]["iqr_outliers"] = None712 benchmark["stats"]["stddev_outliers"] = None713 benchmark["stats"]["outliers"] = None714 benchmark["stats"]["ld15iqr"] = None715 benchmark["stats"]["hd15iqr"] = None716 benchmark["stats"]["total"] = None717 benchmark["stats"]["iterations"] = repeat718 benchmark["stats"]["min"] = raw_stats["min"]719 benchmark["stats"]["max"] = raw_stats["max"]720 benchmark["stats"]["mean"] = raw_stats["mean"]721 benchmark["stats"]["stddev"] = raw_stats["stdev"]722 benchmark["stats"]["ops"] = raw_stats["ops"]723 mean = benchmark["stats"]["mean"]724 stddev = benchmark["stats"]["stddev"]725 ops = benchmark["stats"]["ops"]726 benchmark["stats"]["ops_uncertainty"] = ops - (1 / (mean + stddev))727 return benchmark728def get_benchmark_exe(path: pathlib.Path):729 relative_path_to_exe = (730 str(path.parent)[len(os.getenv("PROTEUS_ROOT")) :] + f"/{path.stem}"731 )732 benchmark_path = os.getenv("PROTEUS_ROOT") + f"/build/Release{relative_path_to_exe}"733 if not os.path.exists(benchmark_path):734 return None735 with open(path, "r") as f:736 for line in f:737 if "@brief Benchmark" in line:738 return benchmark_path739 if line.strip().startswith("#include"):740 return None741 return None742def cpp_benchmarks(config: Config, benchmarks: Benchmarks):743 benchmarks_to_run = set()744 benchmark_dir = os.getenv("PROTEUS_ROOT") + "/tests"745 accept_all_benchmarks = True if config.benchmarks is None else False746 for path in pathlib.Path(benchmark_dir).rglob("*.cpp"):747 if not accept_all_benchmarks:748 expression = Expression.compile(config.benchmarks)749 if not expression.evaluate(KeywordMatcher([path.stem])):750 continue751 benchmark = get_benchmark_exe(path)752 if benchmark is not None:753 benchmarks_to_run.add(benchmark)754 cpp_options = config.cpp755 repeat_count = config.repeat756 with Progress() as progress:757 task0 = progress.add_task(758 f"Running cpp benchmarks...", total=len(benchmarks_to_run)759 )760 for benchmark in benchmarks_to_run:761 valid_flags = [""]762 for flag in cpp_options.flags:763 if not flag:764 continue765 cpp_command = [benchmark, "--help"]766 ret = subprocess.run(cpp_command, check=True, stdout=subprocess.PIPE)767 help_output = ret.stdout.decode("utf-8")768 if help_output.find(flag.split(" ")[0]) != -1:769 valid_flags.append(flag)770 total = (771 len(cpp_options.images)772 * len(cpp_options.threads)773 * len(cpp_options.workers * len(valid_flags))774 )775 task1 = progress.add_task(f"Running {benchmark}", total=total)776 for cpp_config in itertools.product(777 cpp_options.images,778 cpp_options.threads,779 cpp_options.workers,780 valid_flags,781 ):782 cpp_command = [783 benchmark,784 "-i",785 str(cpp_config[0]),786 "-t",787 str(cpp_config[1]),788 "-r",789 str(cpp_config[2]),790 cpp_config[3],791 ]792 cpp_stats = []793 task2 = progress.add_task(794 f"Configuration: Images: {cpp_config[0]}, Threads: {cpp_config[1]}, Workers: {cpp_config[2]}, Flags: {cpp_config[3]}",795 total=repeat_count,796 )797 if config.verbosity > 0:798 print(f"cpp command: \n {' '.join(cpp_command)}")799 for _ in range(repeat_count):800 ret = subprocess.run(801 cpp_command, check=True, stdout=subprocess.PIPE802 )803 cpp_output = ret.stdout.decode("utf-8")804 # print(cpp_output)805 cpp_stats.append(parse_cpp_output(cpp_output))806 progress.update(task2, advance=1)807 raw_stats = combine_cpp_stats(cpp_stats)808 cpp_benchmark = make_cpp_benchmarks(809 raw_stats, pathlib.Path(benchmark), cpp_config, repeat_count810 )811 benchmarks.add(cpp_benchmark)812 progress.update(task1, advance=1)813 progress.update(task0, advance=1)814 return benchmarks815def pytest_benchmarks(config: Config, quiet=False):816 hostname, port = config.http_address.split(":")817 client = Client(config.http_address)818 addr = socket.gethostbyname(hostname)819 if not ipaddress.ip_address(addr).is_loopback or not config.start_server:820 try:821 assert client.server_live()822 except proteus.exceptions.ConnectionError:823 print(824 f"Cannot connect to HTTP server at {config.http_address}. Check the address or set it to start automatically"825 )826 sys.exit(1)827 cmd = (828 os.getenv("PROTEUS_ROOT")829 + f"/tests/test.sh --benchmark only --hostname {hostname} --http_port {port}"830 )831 if config.benchmarks:832 cmd += f' -k "{config.benchmarks}"'833 try:834 if not quiet:835 subprocess.run(836 ["/bin/bash", "-c", cmd],837 stdout=sys.stdout,838 stderr=subprocess.PIPE,839 check=True,840 )841 else:842 subprocess.run(843 ["/bin/bash", "-c", cmd],844 stdout=subprocess.DEVNULL,845 stderr=subprocess.DEVNULL,846 check=True,847 )848 except subprocess.CalledProcessError as ex:849 print("Running pytest tests failed. Exiting benchmarking")850 sys.exit(ex.returncode)851 benchmarks = get_benchmark(normalize=True)852 benchmarks.write()853if __name__ == "__main__":854 parser = argparse.ArgumentParser(description="Run Proteus benchmarking")855 parser.add_argument(856 "-k",857 action="store",858 default="",859 help="choose which benchmarks to run by substring",860 )861 parser.add_argument(862 "-print",863 action="store",864 default=None,865 type=int,866 help="print the last nth benchmark and exit",867 )868 parser.add_argument(869 "--force",870 action="store_true",871 default=False,872 help="don't prompt to verify configuration",873 )874 args = parser.parse_args()875 if args.print is not None:876 benchmarks = get_benchmark(index=int(args.print))877 benchmarks.print()878 sys.exit(0)879 config = Config(os.getenv("PROTEUS_ROOT") + "/tools/benchmark.yml")880 if args.k:881 config.benchmarks = args.k882 if not args.force:883 print(config)884 retval = input("Run benchmarking with this configuration? [Y/N] ")885 if retval.upper() != "Y":886 print("Exiting...")887 sys.exit(0)888 # if wrk tests are run, we need to also run pytest tests889 if config.pytest.enabled or config.wrk.enabled:890 pytest_benchmarks(config)891 else:892 # if pytest is disabled, run anyway with a small test suite to create a893 # new benchmark file that can be populated by later benchmarks894 tmp = config.benchmarks895 config.benchmarks = "echo"896 pytest_benchmarks(config, True)897 config.benchmarks = tmp898 benchmarks = get_benchmark(normalize=True)899 benchmarks.clear()900 benchmarks.write()901 # normalize the pytest generated benchmarks to be in line with the others902 benchmarks = get_benchmark(normalize=True)903 if config.wrk.enabled:904 benchmarks = wrk_benchmarks(config, benchmarks)905 if config.cpp.enabled:906 benchmarks = cpp_benchmarks(config, benchmarks)907 benchmarks.write()...

Full Screen

Full Screen

stats.py

Source:stats.py Github

copy

Full Screen

...64 @cached_property65 def median(self):66 return statistics.median(self.data)67 @cached_property68 def ld15iqr(self):69 """70 Tukey-style Lowest Datum within 1.5 IQR under Q1.71 """72 if len(self.data) == 1:73 return self.data[0]74 else:75 return self.sorted_data[bisect_left(self.sorted_data, self.q1 - 1.5 * self.iqr)]76 @cached_property77 def hd15iqr(self):78 """79 Tukey-style Highest Datum within 1.5 IQR over Q3.80 """81 if len(self.data) == 1:82 return self.data[0]...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pytest-benchmark automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful