From ffc59dd1c0606930c6aff29dee55b36cb64e13dc Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 28 May 2024 14:47:56 +0200 Subject: [PATCH 1/9] hwbench: Report BMC driver in use When starting hwbench or when reading a result file, there is no mention of the BMC driver used. This could be useful to understand some metrics or even for hwgraph to take some decision. This commit is : - adding BMC.get_driver_name() to report the class name as the driver name - adding a BMC.dump() so the driver name can be added in the result file. The hardware data structure looks like the following : "hardware": { "dmi": { "vendor": "Dell Inc.", "product": "PowerEdge C6615", "serial": "XXXXXX", "bios": { "version": "1.2.3", "release": "1.2" }, "chassis": { "product": "PowerEdge C6600", "serial": "XXXXXX" }, "sysconf_threads": 128 }, "cpu": { "vendor": "AuthenticAMD", "model": "AMD EPYC 8534P 64-Core Processor", "logical_cores": 128, "physical_cores": 64, "numa_domains": 8, "sockets": 1 }, "bmc": { "driver": "IDRAC" } - updating the startup message to indicate which driver is used, a typical output looks like : python3 -m hwbench.hwbench -j configs/mini.conf -m monitoring.cfg Starting monitoring for DELL vendor with driver IDRAC @ 10.168.97.148 ... Signed-off-by: Erwan Velu --- hwbench/bench/monitoring.py | 6 ++++-- hwbench/environment/hardware.py | 1 + hwbench/environment/vendors/vendor.py | 8 ++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/hwbench/bench/monitoring.py b/hwbench/bench/monitoring.py index 65c7957..bdc4c02 100644 --- a/hwbench/bench/monitoring.py +++ b/hwbench/bench/monitoring.py @@ -57,12 +57,14 @@ def __set_metric(self, metric: Metrics, value: dict[str, dict[str, MonitorMetric def prepare(self): """Preparing the monitoring""" + v = self.vendor + bmc = self.vendor.get_bmc() # Let's be sure the monitoring is functional by # - checking the BMC is actually connected to the network - if self.vendor.get_bmc().get_ip() == "0.0.0.0": + if bmc.get_ip() == "0.0.0.0": h.fatal("BMC has no IP, monitoring will not be possible") print( - f"Starting monitoring for {self.vendor.name()} vendor with {self.vendor.get_bmc().get_ip()}" + f"Starting monitoring for {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}" ) def check_monitoring(metric: Metrics): diff --git a/hwbench/environment/hardware.py b/hwbench/environment/hardware.py index 122abe6..7520905 100644 --- a/hwbench/environment/hardware.py +++ b/hwbench/environment/hardware.py @@ -52,6 +52,7 @@ def dump(self) -> dict[str, Optional[str | int] | dict]: return { "dmi": self.dmi.dump(), "cpu": self.cpu.dump(), + "bmc": self.vendor.get_bmc().dump(), } def cpu_flags(self) -> list[str]: diff --git a/hwbench/environment/vendors/vendor.py b/hwbench/environment/vendors/vendor.py index 344bfe2..087773b 100644 --- a/hwbench/environment/vendors/vendor.py +++ b/hwbench/environment/vendors/vendor.py @@ -64,6 +64,14 @@ def get_ip(self) -> str: return ip + def get_driver_name(self) -> str: + """Return the BMC driver name""" + return type(self).__name__ + + def dump(self) -> dict[str, str]: + """Return the dump of the BMC""" + return {"driver": self.get_driver_name()} + def connect_redfish(self): """Connect to the BMC using Redfish.""" if not self.vendor.get_monitoring_config_filename(): From 6ae594b9e85b3d4733f27f07a8783ba878aaedf9 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 28 May 2024 16:31:03 +0200 Subject: [PATCH 2/9] hwbench: Do not crash if block device has no scheduler Some block devices like zram does not have any scheduler. This case made hwbench crashing at starting time. This commit is just ignoring block devices with no scheduler. Signed-off-by: Erwan Velu --- hwbench/tuning/scheduler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hwbench/tuning/scheduler.py b/hwbench/tuning/scheduler.py index 69f7217..6d5ab23 100644 --- a/hwbench/tuning/scheduler.py +++ b/hwbench/tuning/scheduler.py @@ -15,6 +15,9 @@ def run(self): for dirname in dirnames: diskdir = pathlib.Path(rootpath) / dirname file = diskdir / "queue/scheduler" + # Some block devices like zram do not have scheduler + if not os.path.isfile(file): + continue previous = file.read_text(encoding="utf-8").rstrip() # see https://docs.kernel.org/block/switching-sched.html # for deeper explanation From bafe4cf528abdf20ceea82fdfe57d2aa1e76a71a Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 28 May 2024 17:41:21 +0200 Subject: [PATCH 3/9] hwbench: Adding helper to check if a binary is installed When an engine is using a 3rd-party binary, it's mandatory to test its presence unless the code will crash. This commit is : - adding a new helper (is_binary_available) to check if a binary is available - Add a generic check for engines Signed-off-by: Erwan Velu --- hwbench/bench/engine.py | 7 +++++++ hwbench/bench/test_benchmarks_common.py | 10 ++++++---- hwbench/config/test_parse.py | 4 +++- hwbench/engines/test_parse.py | 18 +++++++++++------- hwbench/utils/helpers.py | 6 ++++++ 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/hwbench/bench/engine.py b/hwbench/bench/engine.py index bd44efa..110e030 100644 --- a/hwbench/bench/engine.py +++ b/hwbench/bench/engine.py @@ -3,6 +3,7 @@ from typing import Optional from ..utils.external import External +from ..utils.helpers import fatal from .parameters import BenchmarkParameters @@ -47,6 +48,12 @@ def __init__( self.engine_name = name self.binary = binary self.modules = modules + # FIXME: If the import is done at the file level, the mocking is lost here + # So I'm importing is_binary_available just before the call :/ + from ..utils.helpers import is_binary_available + + if not is_binary_available(self.binary): + fatal(f"Engine {name} requires '{binary}' binary, please install it.") def get_binary(self) -> str: return self.binary diff --git a/hwbench/bench/test_benchmarks_common.py b/hwbench/bench/test_benchmarks_common.py index 35818ab..ac4ae86 100644 --- a/hwbench/bench/test_benchmarks_common.py +++ b/hwbench/bench/test_benchmarks_common.py @@ -60,10 +60,12 @@ def get_benches(self): def parse_jobs_config(self, validate_parameters=True): # We need to mock turbostat when parsing config with monitoring # We mock the run() command to get a constant output - with patch("hwbench.environment.turbostat.Turbostat.run") as ts: - with open("tests/parsing/turbostat/run", "r") as f: - ts.return_value = ast.literal_eval(f.read()) - return self.benches.parse_jobs_config(validate_parameters) + with patch("hwbench.utils.helpers.is_binary_available") as iba: + iba.return_value = True + with patch("hwbench.environment.turbostat.Turbostat.run") as ts: + with open("tests/parsing/turbostat/run", "r") as f: + ts.return_value = ast.literal_eval(f.read()) + return self.benches.parse_jobs_config(validate_parameters) def get_jobs_config(self) -> config.Config: return self.jobs_config diff --git a/hwbench/config/test_parse.py b/hwbench/config/test_parse.py index 0afc0e7..7690e5c 100644 --- a/hwbench/config/test_parse.py +++ b/hwbench/config/test_parse.py @@ -47,7 +47,9 @@ def test_keywords(self): .read_bytes() .split(b":", 1) ) - self.get_jobs_config().validate_sections() + with patch("hwbench.utils.helpers.is_binary_available") as iba: + iba.return_value = True + self.get_jobs_config().validate_sections() except Exception as exc: assert False, f"'validate_sections' detected a syntax error {exc}" diff --git a/hwbench/engines/test_parse.py b/hwbench/engines/test_parse.py index 7e33d5a..ce7d315 100644 --- a/hwbench/engines/test_parse.py +++ b/hwbench/engines/test_parse.py @@ -22,13 +22,17 @@ def mock_engine(version: str) -> StressNG: # We need to patch list_module_parameters() function # to avoid considering the local stress-ng binary - with patch("hwbench.engines.stressng.EngineModuleCpu.list_module_parameters") as p: - p.return_value = ( - pathlib.Path(f"./tests/parsing/stressngmethods/{version}/stdout") - .read_bytes() - .split(b":", 1) - ) - return StressNG() + with patch("hwbench.utils.helpers.is_binary_available") as iba: + iba.return_value = True + with patch( + "hwbench.engines.stressng.EngineModuleCpu.list_module_parameters" + ) as p: + p.return_value = ( + pathlib.Path(f"./tests/parsing/stressngmethods/{version}/stdout") + .read_bytes() + .split(b":", 1) + ) + return StressNG() class TestParse(unittest.TestCase): diff --git a/hwbench/utils/helpers.py b/hwbench/utils/helpers.py index 31b1e0a..b9dacd5 100644 --- a/hwbench/utils/helpers.py +++ b/hwbench/utils/helpers.py @@ -2,6 +2,7 @@ import logging import sys from datetime import timedelta +from shutil import which from typing import NoReturn @@ -24,3 +25,8 @@ def time_to_next_sync(safe_start=True): # Let's bump to the next minute o'clock next_sync += timedelta(seconds=60 - next_sync.second) return (next_sync - now).total_seconds(), next_sync + + +def is_binary_available(binary_name: str) -> bool: + """A function to check if a binary is available""" + return which(binary_name) is not None From 327bc15a9a14b994ce3542f6eb352ad66ee524ec Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Wed, 29 May 2024 16:14:50 +0200 Subject: [PATCH 4/9] hwbench/monitoring: Removing test on ip 0.0.0.0 Testing if the BMC IP is set to 0.0.0.0 is useless since: - Some vendors uses dedicated channel interface like CHIF on HPE - If a network connection is required (like redfish), the connection is already established or generate a fault. So this commit is removing this code that is useless Signed-off-by: Erwan Velu --- hwbench/bench/monitoring.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hwbench/bench/monitoring.py b/hwbench/bench/monitoring.py index bdc4c02..605e3b8 100644 --- a/hwbench/bench/monitoring.py +++ b/hwbench/bench/monitoring.py @@ -59,10 +59,6 @@ def prepare(self): """Preparing the monitoring""" v = self.vendor bmc = self.vendor.get_bmc() - # Let's be sure the monitoring is functional by - # - checking the BMC is actually connected to the network - if bmc.get_ip() == "0.0.0.0": - h.fatal("BMC has no IP, monitoring will not be possible") print( f"Starting monitoring for {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}" ) From 0d957614aa97f2523f8ab6d67fbec817bcfc96b7 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Wed, 29 May 2024 16:52:11 +0200 Subject: [PATCH 5/9] hwbench/monitoring: Update monitoring output This simple commit is updating the monitoring text at start time. A typical output looks like the following: Monitoring/turbostat: initialize Monitoring/turbostat: Freq metrics:64xCPU Monitoring/BMC: initialize DELL vendor with IDRAC driver @ 10.168.97.148 Monitoring/BMC: Thermal metrics:1xCPU, 1xIntake Monitoring/BMC: Fans metrics:10xFan Monitoring/BMC: PowerConsumption metrics:65xCPU, 4xBMC Monitoring/BMC: PowerSupplies metrics:2xBMC Signed-off-by: Erwan Velu --- hwbench/bench/monitoring.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/hwbench/bench/monitoring.py b/hwbench/bench/monitoring.py index 605e3b8..ee27d68 100644 --- a/hwbench/bench/monitoring.py +++ b/hwbench/bench/monitoring.py @@ -59,17 +59,14 @@ def prepare(self): """Preparing the monitoring""" v = self.vendor bmc = self.vendor.get_bmc() - print( - f"Starting monitoring for {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}" - ) - def check_monitoring(metric: Metrics): + def check_monitoring(source: str, metric: Metrics): data = self.get_metric(metric) if not len(data): h.fatal(f"Cannot detect {str(metric)} metrics") print( - f"Monitoring {str(metric)} metrics:" + f"Monitoring/{source}: {str(metric)} metrics:" + ", ".join( [f"{len(data[pc])}x{pc}" for pc in data if len(data[pc]) > 0] ) @@ -77,30 +74,35 @@ def check_monitoring(metric: Metrics): # - checking if the CPU monitoring works if self.hardware.cpu.get_arch() == "x86_64": + print("Monitoring/turbostat: initialize") self.turbostat = Turbostat( self.hardware, self.get_metric(Metrics.FREQ), self.get_metric(Metrics.POWER_CONSUMPTION), ) - check_monitoring(Metrics.FREQ) + check_monitoring("turbostat", Metrics.FREQ) + + print( + f"Monitoring/BMC: initialize {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}" + ) # - checking if the bmc monitoring works # These calls will also initialize the datastructures out of the monitoring loop self.vendor.get_bmc().read_thermals(self.get_metric(Metrics.THERMAL)) - check_monitoring(Metrics.THERMAL) + check_monitoring("BMC", Metrics.THERMAL) self.vendor.get_bmc().read_fans(self.get_metric(Metrics.FANS)) - check_monitoring(Metrics.FANS) + check_monitoring("BMC", Metrics.FANS) self.vendor.get_bmc().read_power_consumption( self.get_metric(Metrics.POWER_CONSUMPTION) ) - check_monitoring(Metrics.POWER_CONSUMPTION) + check_monitoring("BMC", Metrics.POWER_CONSUMPTION) self.vendor.get_bmc().read_power_supplies( self.get_metric(Metrics.POWER_SUPPLIES) ) - check_monitoring(Metrics.POWER_SUPPLIES) + check_monitoring("BMC", Metrics.POWER_SUPPLIES) def __monitor_bmc(self): """Monitor the bmc metrics""" From 44f8358b6dcde401f19663e35120df6560976cd6 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Thu, 30 May 2024 11:58:43 +0200 Subject: [PATCH 6/9] hwbench: Report error message when a binary is missing When External class is used, if the pointed binary is not installed, a FileNotFoundError exception is triggered. Instead of this crash, let's have a custom fatal message to indicate what binary is missing. Signed-off-by: Erwan Velu --- hwbench/utils/external.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/hwbench/utils/external.py b/hwbench/utils/external.py index 19682f0..41f5bab 100644 --- a/hwbench/utils/external.py +++ b/hwbench/utils/external.py @@ -2,6 +2,7 @@ import pathlib import subprocess from abc import abstractmethod, ABC +from .helpers import fatal class External(ABC): @@ -39,24 +40,27 @@ def run(self): """Returns the output of parse_cmd (a json-able type)""" english_env = os.environ.copy() english_env["LC_ALL"] = "C" - if self.run_cmd_version(): - ver = subprocess.run( - self.run_cmd_version(), + try: + if self.run_cmd_version(): + ver = subprocess.run( + self.run_cmd_version(), + capture_output=True, + cwd=self.out_dir, + env=english_env, + stdin=subprocess.DEVNULL, + ) + self._write_output("version-stdout", ver.stdout) + self._write_output("version-stderr", ver.stderr) + self.parse_version(ver.stdout, ver.stderr) + out = subprocess.run( + self.run_cmd(), capture_output=True, cwd=self.out_dir, env=english_env, stdin=subprocess.DEVNULL, ) - self._write_output("version-stdout", ver.stdout) - self._write_output("version-stderr", ver.stderr) - self.parse_version(ver.stdout, ver.stderr) - out = subprocess.run( - self.run_cmd(), - capture_output=True, - cwd=self.out_dir, - env=english_env, - stdin=subprocess.DEVNULL, - ) + except FileNotFoundError as e: + fatal(f"Missing {e.filename} binary, please install it.") # save outputs self._write_output("stdout", out.stdout) From 938cbde536a99395dacc9d7038b0b646dc878b48 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Thu, 30 May 2024 12:55:29 +0200 Subject: [PATCH 7/9] hwbench/turbostat: Implement version checking hwbench requires at least turbostat 2022.04.16 (from Kernel 5.19) unless filtering C1% field would not be possible. This commit is: - update the requirement in the documentation - implements a simple test when Turbostat() is instantiated to guarantee the minimal release is present. - If no suitable release is found, hwbench will stop with a fatal message. A typical example looks like the following : Monitoring/turbostat: Detected release 19.8.31 ERROR:root:Monitoring/turbostat: minimal expected release is 2022.4.16 Signed-off-by: Erwan Velu --- README.md | 4 +-- hwbench/bench/test_benchmarks_common.py | 12 +++++---- hwbench/environment/turbostat.py | 36 +++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1b66e2f..2656029 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ Running the **simple.conf** job: ## Mandatory - python >= 3.9 - [python dependencies](./requirements/base.in) -- turbostat >= 2022.07.28 +- turbostat >= 2022.04.16 - numactl - dmidecode - util-linux >= 2.32 @@ -75,4 +75,4 @@ Running the **simple.conf** job: ## Optional - ipmitool - ilorest (for HPE servers) -- stress-ng >= 0.17.04 \ No newline at end of file +- stress-ng >= 0.17.04 diff --git a/hwbench/bench/test_benchmarks_common.py b/hwbench/bench/test_benchmarks_common.py index ac4ae86..14cadd6 100644 --- a/hwbench/bench/test_benchmarks_common.py +++ b/hwbench/bench/test_benchmarks_common.py @@ -59,13 +59,15 @@ def get_benches(self): def parse_jobs_config(self, validate_parameters=True): # We need to mock turbostat when parsing config with monitoring - # We mock the run() command to get a constant output with patch("hwbench.utils.helpers.is_binary_available") as iba: iba.return_value = True - with patch("hwbench.environment.turbostat.Turbostat.run") as ts: - with open("tests/parsing/turbostat/run", "r") as f: - ts.return_value = ast.literal_eval(f.read()) - return self.benches.parse_jobs_config(validate_parameters) + # We mock the run() and check_version() command to get a constant output + with patch("hwbench.environment.turbostat.Turbostat.check_version") as cv: + cv.return_value = True + with patch("hwbench.environment.turbostat.Turbostat.run") as ts: + with open("tests/parsing/turbostat/run", "r") as f: + ts.return_value = ast.literal_eval(f.read()) + return self.benches.parse_jobs_config(validate_parameters) def get_jobs_config(self) -> config.Config: return self.jobs_config diff --git a/hwbench/environment/turbostat.py b/hwbench/environment/turbostat.py index c7af416..185834c 100644 --- a/hwbench/environment/turbostat.py +++ b/hwbench/environment/turbostat.py @@ -1,8 +1,11 @@ import os +import re import subprocess from enum import Enum +from packaging.version import Version from ..environment.hardware import BaseHardware from ..bench.monitoring_structs import MonitorMetric, CPUContext, PowerContext +from ..utils.helpers import is_binary_available, fatal CORE = "core" PACKAGE = "package" @@ -48,6 +51,7 @@ def __init__( CPUSTATS.CORE_WATTS, CPUSTATS.PACKAGE_WATTS, } + self.min_release = Version("2022.04.16") self.header = "" self.freq_metrics = freq_metrics self.power_metrics = power_metrics @@ -55,9 +59,41 @@ def __init__( self.process: subprocess.Popen[bytes] = None # type: ignore[assignment] self.freq_metrics[str(CPUContext.CPU)] = {} # type: ignore[no-redef] self.power_metrics[str(PowerContext.CPU)] = {} # type: ignore[no-redef] + # Let's make a first quick run to detect system + self.check_version() self.pre_run() + def check_version(self): + english_env = os.environ.copy() + english_env["LC_ALL"] = "C" + + if not is_binary_available("turbostat"): + fatal("Missing turbostat binary, please install it.") + + self.process = subprocess.Popen( + ["turbostat", "-v"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=english_env, + stdin=subprocess.DEVNULL, + ) + # turbostat version 2022.04.16 - Len Brown + match = re.search( + r"turbostat version (?P[0-9]+\.[0-9]+\.[0-9]+).*", + str(self.get_process_output()), + ) + + current_version = Version(match.group("version")) + if not match: + fatal("Monitoring/turbostat: Cannot detect turbostat version") + + print(f"Monitoring/turbostat: Detected release {current_version}") + if current_version < self.min_release: + fatal( + f"Monitoring/turbostat: minimal expected release is {self.min_release}" + ) + def reset_metrics(self, power_metrics=None): if power_metrics is not None: self.power_metrics = power_metrics From 78e19969a241e99986d7739c1cda5b7e4baaa292 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Thu, 30 May 2024 13:32:55 +0200 Subject: [PATCH 8/9] hwbench/turbostat: Do not crash if cores does not have corewatt Some processors like Intel(R) Core(TM) i7-9750H, report the Corewatt only for Core0. This commit is about to just ignore cores that do not report corewatt even if the header mention it. A typical turbostat output of such processor: Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz IPC IRQ SMI POLL C1 C1E C3 C6 C7s C8 C9 C10 POLL% C1% C1E% C3% C6% C7s% C8% C9% C10% CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp CoreThr PkgTmp Totl%C0 Any%C0 GFX%C0 CPUGFX% Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 Pkg%pc8 Pkg%pc9 Pk%pc10 CPU%LPI SYS%LPI PkgWatt CorWatt GFXWatt RAMWatt PKG_% RAM_% UncMHz - - 3 0.33 800 2592 0.50 1620 0 1 3 10 16 206 0 214 1 1342 0.00 0.00 0.00 0.00 0.26 0.00 0.36 0.02 99.05 0.64 0.00 0.47 98.57 40 2592 40 4.90 4.24 0.00 0.00 9.55 85.04 0.00 0.00 0.00 0.00 0.00 0.00 0.00 11.38 0.25 0.00 1.17 0.00 0.00 800 0 0 1 0.09 800 2592 0.35 20 0 0 0 0 0 2 0 4 0 113 0.00 0.00 0.00 0.00 0.02 0.00 0.07 0.00 99.82 1.13 0.00 0.08 98.69 37 2592 40 4.90 4.24 0.00 0.00 9.55 85.04 0.00 0.00 0.00 0.00 0.00 0.00 0.00 11.38 0.25 0.00 1.17 0.00 0.00 800 0 6 6 0.69 800 2592 0.31 341 0 0 0 0 0 7 0 3 0 311 0.00 0.00 0.00 0.00 0.08 0.00 0.06 0.00 99.20 0.53 1 1 6 0.70 800 2592 0.51 260 0 1 3 3 3 15 0 23 0 187 0.00 0.00 0.01 0.01 0.20 0.00 0.47 0.00 98.64 0.62 0.01 0.32 98.35 40 1352 1 7 2 0.31 800 2592 1.57 67 0 0 0 1 0 11 0 10 0 36 0.00 0.00 0.00 0.00 0.16 0.00 0.21 0.00 99.33 1.00 2 2 5 0.57 800 2592 0.33 66 0 0 0 1 3 11 0 9 0 145 0.00 0.00 0.00 0.00 0.17 0.00 0.19 0.00 99.08 0.46 0.00 0.52 98.44 38 1255 2 8 1 0.17 800 2592 0.38 108 0 0 0 1 2 24 0 21 0 66 0.00 0.00 0.00 0.01 0.42 0.00 0.41 0.00 99.01 0.86 3 3 4 0.44 800 2592 0.32 230 0 0 0 1 0 9 0 15 0 203 0.00 0.00 0.00 0.00 0.11 0.00 0.30 0.00 99.17 0.70 0.00 0.75 98.11 37 1078 3 9 2 0.29 800 2592 0.54 151 0 0 0 0 0 48 0 50 1 62 0.00 0.00 0.00 0.00 0.73 0.00 1.00 0.21 97.79 0.85 4 4 3 0.39 800 2592 0.30 264 0 0 0 2 7 34 0 57 0 158 0.00 0.00 0.00 0.01 0.52 0.00 1.13 0.00 97.98 0.38 0.00 0.50 98.73 37 237 4 10 1 0.08 800 2592 0.58 18 0 0 0 0 0 5 0 6 0 17 0.00 0.00 0.00 0.00 0.08 0.00 0.12 0.00 99.72 0.68 5 5 0 0.05 800 2592 0.47 25 0 0 0 1 0 7 0 1 0 22 0.00 0.00 0.00 0.00 0.10 0.00 0.02 0.00 99.84 0.26 0.01 0.62 99.07 36 0 5 11 1 0.14 800 2592 0.90 70 0 0 0 0 1 33 0 15 0 22 0.00 0.00 0.00 0.01 0.58 0.00 0.30 0.00 98.98 0.17 Signed-off-by: Erwan Velu --- hwbench/environment/turbostat.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/hwbench/environment/turbostat.py b/hwbench/environment/turbostat.py index 185834c..33df7af 100644 --- a/hwbench/environment/turbostat.py +++ b/hwbench/environment/turbostat.py @@ -225,9 +225,17 @@ def parse(self): items = line.split() core_nb = items[int(self.__get_field_position(CPUSTATS.CPU))] if self.has(CPUSTATS.CORE_WATTS): - self.power_metrics[str(PowerContext.CPU)][f"Core_{core_nb}"].add( - float(items[int(self.__get_field_position(CPUSTATS.CORE_WATTS))]) - ) + try: + self.power_metrics[str(PowerContext.CPU)][f"Core_{core_nb}"].add( + float( + items[int(self.__get_field_position(CPUSTATS.CORE_WATTS))] + ) + ) + except IndexError: + # Some processors reports the corewatt in the header but not for all cores ... + # So let's ignore if the metrics does not exist for this core + pass + self.freq_metrics[str(CPUContext.CPU)][f"Core_{core_nb}"].add( float(items[int(self.__get_field_position(CPUSTATS.BUSY_MHZ))]) ) From da3e8ad4e25a04bd82a33b1a8ac79d26c6b4503b Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Thu, 30 May 2024 15:06:36 +0200 Subject: [PATCH 9/9] hwbench/turbostat: Using long name options Starting Kernel 6.9, the -n option became ambigous which prevents turbostat to run with the following message: turbostat: option '-n' is ambiguous; possibilities: '-num_iterations' '-no-msr' '-no-perf' This commit is removing all short name options and replace them with long name to avoid this case. This patch got tested successfully from Kernel 5.19 (2022.4.16) up to the incoming 6.10 (2024.5.10). Signed-off-by: Erwan Velu --- hwbench/environment/turbostat.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hwbench/environment/turbostat.py b/hwbench/environment/turbostat.py index 33df7af..8a48793 100644 --- a/hwbench/environment/turbostat.py +++ b/hwbench/environment/turbostat.py @@ -72,7 +72,7 @@ def check_version(self): fatal("Missing turbostat binary, please install it.") self.process = subprocess.Popen( - ["turbostat", "-v"], + ["turbostat", "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=english_env, @@ -151,14 +151,14 @@ def run(self, interval: float = 1, wait=False): "-c", f"{self.hardware.get_cpu().get_logical_cores_count()-1}", "turbostat", - "-c", + "--cpu", "core", - "-q", + "--quiet", "--interval", str(interval), - "-n", + "--num_iterations", "1", - "-s", + "--show", ] sensors = "" for sensor in CPUSTATS: