diff --git a/README.md b/README.md index 1b66e2f..2656029 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ Running the **simple.conf** job: ## Mandatory - python >= 3.9 - [python dependencies](./requirements/base.in) -- turbostat >= 2022.07.28 +- turbostat >= 2022.04.16 - numactl - dmidecode - util-linux >= 2.32 @@ -75,4 +75,4 @@ Running the **simple.conf** job: ## Optional - ipmitool - ilorest (for HPE servers) -- stress-ng >= 0.17.04 \ No newline at end of file +- stress-ng >= 0.17.04 diff --git a/hwbench/bench/engine.py b/hwbench/bench/engine.py index bd44efa..110e030 100644 --- a/hwbench/bench/engine.py +++ b/hwbench/bench/engine.py @@ -3,6 +3,7 @@ from typing import Optional from ..utils.external import External +from ..utils.helpers import fatal from .parameters import BenchmarkParameters @@ -47,6 +48,12 @@ def __init__( self.engine_name = name self.binary = binary self.modules = modules + # FIXME: If the import is done at the file level, the mocking is lost here + # So I'm importing is_binary_available just before the call :/ + from ..utils.helpers import is_binary_available + + if not is_binary_available(self.binary): + fatal(f"Engine {name} requires '{binary}' binary, please install it.") def get_binary(self) -> str: return self.binary diff --git a/hwbench/bench/monitoring.py b/hwbench/bench/monitoring.py index 65c7957..ee27d68 100644 --- a/hwbench/bench/monitoring.py +++ b/hwbench/bench/monitoring.py @@ -57,21 +57,16 @@ def __set_metric(self, metric: Metrics, value: dict[str, dict[str, MonitorMetric def prepare(self): """Preparing the monitoring""" - # Let's be sure the monitoring is functional by - # - checking the BMC is actually connected to the network - if self.vendor.get_bmc().get_ip() == "0.0.0.0": - h.fatal("BMC has no IP, monitoring will not be possible") - print( - f"Starting monitoring for {self.vendor.name()} vendor with {self.vendor.get_bmc().get_ip()}" - ) + v = self.vendor + bmc = self.vendor.get_bmc() - def check_monitoring(metric: Metrics): + def check_monitoring(source: str, metric: Metrics): data = self.get_metric(metric) if not len(data): h.fatal(f"Cannot detect {str(metric)} metrics") print( - f"Monitoring {str(metric)} metrics:" + f"Monitoring/{source}: {str(metric)} metrics:" + ", ".join( [f"{len(data[pc])}x{pc}" for pc in data if len(data[pc]) > 0] ) @@ -79,30 +74,35 @@ def check_monitoring(metric: Metrics): # - checking if the CPU monitoring works if self.hardware.cpu.get_arch() == "x86_64": + print("Monitoring/turbostat: initialize") self.turbostat = Turbostat( self.hardware, self.get_metric(Metrics.FREQ), self.get_metric(Metrics.POWER_CONSUMPTION), ) - check_monitoring(Metrics.FREQ) + check_monitoring("turbostat", Metrics.FREQ) + + print( + f"Monitoring/BMC: initialize {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}" + ) # - checking if the bmc monitoring works # These calls will also initialize the datastructures out of the monitoring loop self.vendor.get_bmc().read_thermals(self.get_metric(Metrics.THERMAL)) - check_monitoring(Metrics.THERMAL) + check_monitoring("BMC", Metrics.THERMAL) self.vendor.get_bmc().read_fans(self.get_metric(Metrics.FANS)) - check_monitoring(Metrics.FANS) + check_monitoring("BMC", Metrics.FANS) self.vendor.get_bmc().read_power_consumption( self.get_metric(Metrics.POWER_CONSUMPTION) ) - check_monitoring(Metrics.POWER_CONSUMPTION) + check_monitoring("BMC", Metrics.POWER_CONSUMPTION) self.vendor.get_bmc().read_power_supplies( self.get_metric(Metrics.POWER_SUPPLIES) ) - check_monitoring(Metrics.POWER_SUPPLIES) + check_monitoring("BMC", Metrics.POWER_SUPPLIES) def __monitor_bmc(self): """Monitor the bmc metrics""" diff --git a/hwbench/bench/test_benchmarks_common.py b/hwbench/bench/test_benchmarks_common.py index 35818ab..14cadd6 100644 --- a/hwbench/bench/test_benchmarks_common.py +++ b/hwbench/bench/test_benchmarks_common.py @@ -59,11 +59,15 @@ def get_benches(self): def parse_jobs_config(self, validate_parameters=True): # We need to mock turbostat when parsing config with monitoring - # We mock the run() command to get a constant output - with patch("hwbench.environment.turbostat.Turbostat.run") as ts: - with open("tests/parsing/turbostat/run", "r") as f: - ts.return_value = ast.literal_eval(f.read()) - return self.benches.parse_jobs_config(validate_parameters) + with patch("hwbench.utils.helpers.is_binary_available") as iba: + iba.return_value = True + # We mock the run() and check_version() command to get a constant output + with patch("hwbench.environment.turbostat.Turbostat.check_version") as cv: + cv.return_value = True + with patch("hwbench.environment.turbostat.Turbostat.run") as ts: + with open("tests/parsing/turbostat/run", "r") as f: + ts.return_value = ast.literal_eval(f.read()) + return self.benches.parse_jobs_config(validate_parameters) def get_jobs_config(self) -> config.Config: return self.jobs_config diff --git a/hwbench/config/test_parse.py b/hwbench/config/test_parse.py index 0afc0e7..7690e5c 100644 --- a/hwbench/config/test_parse.py +++ b/hwbench/config/test_parse.py @@ -47,7 +47,9 @@ def test_keywords(self): .read_bytes() .split(b":", 1) ) - self.get_jobs_config().validate_sections() + with patch("hwbench.utils.helpers.is_binary_available") as iba: + iba.return_value = True + self.get_jobs_config().validate_sections() except Exception as exc: assert False, f"'validate_sections' detected a syntax error {exc}" diff --git a/hwbench/engines/test_parse.py b/hwbench/engines/test_parse.py index 7e33d5a..ce7d315 100644 --- a/hwbench/engines/test_parse.py +++ b/hwbench/engines/test_parse.py @@ -22,13 +22,17 @@ def mock_engine(version: str) -> StressNG: # We need to patch list_module_parameters() function # to avoid considering the local stress-ng binary - with patch("hwbench.engines.stressng.EngineModuleCpu.list_module_parameters") as p: - p.return_value = ( - pathlib.Path(f"./tests/parsing/stressngmethods/{version}/stdout") - .read_bytes() - .split(b":", 1) - ) - return StressNG() + with patch("hwbench.utils.helpers.is_binary_available") as iba: + iba.return_value = True + with patch( + "hwbench.engines.stressng.EngineModuleCpu.list_module_parameters" + ) as p: + p.return_value = ( + pathlib.Path(f"./tests/parsing/stressngmethods/{version}/stdout") + .read_bytes() + .split(b":", 1) + ) + return StressNG() class TestParse(unittest.TestCase): diff --git a/hwbench/environment/hardware.py b/hwbench/environment/hardware.py index 122abe6..7520905 100644 --- a/hwbench/environment/hardware.py +++ b/hwbench/environment/hardware.py @@ -52,6 +52,7 @@ def dump(self) -> dict[str, Optional[str | int] | dict]: return { "dmi": self.dmi.dump(), "cpu": self.cpu.dump(), + "bmc": self.vendor.get_bmc().dump(), } def cpu_flags(self) -> list[str]: diff --git a/hwbench/environment/turbostat.py b/hwbench/environment/turbostat.py index c7af416..8a48793 100644 --- a/hwbench/environment/turbostat.py +++ b/hwbench/environment/turbostat.py @@ -1,8 +1,11 @@ import os +import re import subprocess from enum import Enum +from packaging.version import Version from ..environment.hardware import BaseHardware from ..bench.monitoring_structs import MonitorMetric, CPUContext, PowerContext +from ..utils.helpers import is_binary_available, fatal CORE = "core" PACKAGE = "package" @@ -48,6 +51,7 @@ def __init__( CPUSTATS.CORE_WATTS, CPUSTATS.PACKAGE_WATTS, } + self.min_release = Version("2022.04.16") self.header = "" self.freq_metrics = freq_metrics self.power_metrics = power_metrics @@ -55,9 +59,41 @@ def __init__( self.process: subprocess.Popen[bytes] = None # type: ignore[assignment] self.freq_metrics[str(CPUContext.CPU)] = {} # type: ignore[no-redef] self.power_metrics[str(PowerContext.CPU)] = {} # type: ignore[no-redef] + # Let's make a first quick run to detect system + self.check_version() self.pre_run() + def check_version(self): + english_env = os.environ.copy() + english_env["LC_ALL"] = "C" + + if not is_binary_available("turbostat"): + fatal("Missing turbostat binary, please install it.") + + self.process = subprocess.Popen( + ["turbostat", "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=english_env, + stdin=subprocess.DEVNULL, + ) + # turbostat version 2022.04.16 - Len Brown + match = re.search( + r"turbostat version (?P[0-9]+\.[0-9]+\.[0-9]+).*", + str(self.get_process_output()), + ) + + current_version = Version(match.group("version")) + if not match: + fatal("Monitoring/turbostat: Cannot detect turbostat version") + + print(f"Monitoring/turbostat: Detected release {current_version}") + if current_version < self.min_release: + fatal( + f"Monitoring/turbostat: minimal expected release is {self.min_release}" + ) + def reset_metrics(self, power_metrics=None): if power_metrics is not None: self.power_metrics = power_metrics @@ -115,14 +151,14 @@ def run(self, interval: float = 1, wait=False): "-c", f"{self.hardware.get_cpu().get_logical_cores_count()-1}", "turbostat", - "-c", + "--cpu", "core", - "-q", + "--quiet", "--interval", str(interval), - "-n", + "--num_iterations", "1", - "-s", + "--show", ] sensors = "" for sensor in CPUSTATS: @@ -189,9 +225,17 @@ def parse(self): items = line.split() core_nb = items[int(self.__get_field_position(CPUSTATS.CPU))] if self.has(CPUSTATS.CORE_WATTS): - self.power_metrics[str(PowerContext.CPU)][f"Core_{core_nb}"].add( - float(items[int(self.__get_field_position(CPUSTATS.CORE_WATTS))]) - ) + try: + self.power_metrics[str(PowerContext.CPU)][f"Core_{core_nb}"].add( + float( + items[int(self.__get_field_position(CPUSTATS.CORE_WATTS))] + ) + ) + except IndexError: + # Some processors reports the corewatt in the header but not for all cores ... + # So let's ignore if the metrics does not exist for this core + pass + self.freq_metrics[str(CPUContext.CPU)][f"Core_{core_nb}"].add( float(items[int(self.__get_field_position(CPUSTATS.BUSY_MHZ))]) ) diff --git a/hwbench/environment/vendors/vendor.py b/hwbench/environment/vendors/vendor.py index 344bfe2..087773b 100644 --- a/hwbench/environment/vendors/vendor.py +++ b/hwbench/environment/vendors/vendor.py @@ -64,6 +64,14 @@ def get_ip(self) -> str: return ip + def get_driver_name(self) -> str: + """Return the BMC driver name""" + return type(self).__name__ + + def dump(self) -> dict[str, str]: + """Return the dump of the BMC""" + return {"driver": self.get_driver_name()} + def connect_redfish(self): """Connect to the BMC using Redfish.""" if not self.vendor.get_monitoring_config_filename(): diff --git a/hwbench/tuning/scheduler.py b/hwbench/tuning/scheduler.py index 69f7217..6d5ab23 100644 --- a/hwbench/tuning/scheduler.py +++ b/hwbench/tuning/scheduler.py @@ -15,6 +15,9 @@ def run(self): for dirname in dirnames: diskdir = pathlib.Path(rootpath) / dirname file = diskdir / "queue/scheduler" + # Some block devices like zram do not have scheduler + if not os.path.isfile(file): + continue previous = file.read_text(encoding="utf-8").rstrip() # see https://docs.kernel.org/block/switching-sched.html # for deeper explanation diff --git a/hwbench/utils/external.py b/hwbench/utils/external.py index 19682f0..41f5bab 100644 --- a/hwbench/utils/external.py +++ b/hwbench/utils/external.py @@ -2,6 +2,7 @@ import pathlib import subprocess from abc import abstractmethod, ABC +from .helpers import fatal class External(ABC): @@ -39,24 +40,27 @@ def run(self): """Returns the output of parse_cmd (a json-able type)""" english_env = os.environ.copy() english_env["LC_ALL"] = "C" - if self.run_cmd_version(): - ver = subprocess.run( - self.run_cmd_version(), + try: + if self.run_cmd_version(): + ver = subprocess.run( + self.run_cmd_version(), + capture_output=True, + cwd=self.out_dir, + env=english_env, + stdin=subprocess.DEVNULL, + ) + self._write_output("version-stdout", ver.stdout) + self._write_output("version-stderr", ver.stderr) + self.parse_version(ver.stdout, ver.stderr) + out = subprocess.run( + self.run_cmd(), capture_output=True, cwd=self.out_dir, env=english_env, stdin=subprocess.DEVNULL, ) - self._write_output("version-stdout", ver.stdout) - self._write_output("version-stderr", ver.stderr) - self.parse_version(ver.stdout, ver.stderr) - out = subprocess.run( - self.run_cmd(), - capture_output=True, - cwd=self.out_dir, - env=english_env, - stdin=subprocess.DEVNULL, - ) + except FileNotFoundError as e: + fatal(f"Missing {e.filename} binary, please install it.") # save outputs self._write_output("stdout", out.stdout) diff --git a/hwbench/utils/helpers.py b/hwbench/utils/helpers.py index 31b1e0a..b9dacd5 100644 --- a/hwbench/utils/helpers.py +++ b/hwbench/utils/helpers.py @@ -2,6 +2,7 @@ import logging import sys from datetime import timedelta +from shutil import which from typing import NoReturn @@ -24,3 +25,8 @@ def time_to_next_sync(safe_start=True): # Let's bump to the next minute o'clock next_sync += timedelta(seconds=60 - next_sync.second) return (next_sync - now).total_seconds(), next_sync + + +def is_binary_available(binary_name: str) -> bool: + """A function to check if a binary is available""" + return which(binary_name) is not None