From ffc59dd1c0606930c6aff29dee55b36cb64e13dc Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Tue, 28 May 2024 14:47:56 +0200
Subject: [PATCH 1/9] hwbench: Report BMC driver in use

When starting hwbench or when reading a result file, there is no mention
of the BMC driver used. This could be useful to understand some metrics
or even for hwgraph to take some decision.

This commit is :
- adding BMC.get_driver_name() to report the class name as the driver
  name

- adding a BMC.dump() so the driver name can be added in the result
  file. The hardware data structure looks like the following :

  "hardware": {
    "dmi": {
      "vendor": "Dell Inc.",
      "product": "PowerEdge C6615",
      "serial": "XXXXXX",
      "bios": {
        "version": "1.2.3",
        "release": "1.2"
      },
      "chassis": {
        "product": "PowerEdge C6600",
        "serial": "XXXXXX"
      },
      "sysconf_threads": 128
    },
    "cpu": {
      "vendor": "AuthenticAMD",
      "model": "AMD EPYC 8534P 64-Core Processor",
      "logical_cores": 128,
      "physical_cores": 64,
      "numa_domains": 8,
      "sockets": 1
    },
    "bmc": {
      "driver": "IDRAC"
    }

- updating the startup message to indicate which driver is used, a
  typical output looks like :

  python3 -m hwbench.hwbench -j configs/mini.conf -m monitoring.cfg
  Starting monitoring for DELL vendor with driver IDRAC @ 10.168.97.148
  ...

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 hwbench/bench/monitoring.py           | 6 ++++--
 hwbench/environment/hardware.py       | 1 +
 hwbench/environment/vendors/vendor.py | 8 ++++++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/hwbench/bench/monitoring.py b/hwbench/bench/monitoring.py
index 65c7957..bdc4c02 100644
--- a/hwbench/bench/monitoring.py
+++ b/hwbench/bench/monitoring.py
@@ -57,12 +57,14 @@ def __set_metric(self, metric: Metrics, value: dict[str, dict[str, MonitorMetric
 
     def prepare(self):
         """Preparing the monitoring"""
+        v = self.vendor
+        bmc = self.vendor.get_bmc()
         # Let's be sure the monitoring is functional by
         # - checking the BMC is actually connected to the network
-        if self.vendor.get_bmc().get_ip() == "0.0.0.0":
+        if bmc.get_ip() == "0.0.0.0":
             h.fatal("BMC has no IP, monitoring will not be possible")
         print(
-            f"Starting monitoring for {self.vendor.name()} vendor with {self.vendor.get_bmc().get_ip()}"
+            f"Starting monitoring for {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}"
         )
 
         def check_monitoring(metric: Metrics):
diff --git a/hwbench/environment/hardware.py b/hwbench/environment/hardware.py
index 122abe6..7520905 100644
--- a/hwbench/environment/hardware.py
+++ b/hwbench/environment/hardware.py
@@ -52,6 +52,7 @@ def dump(self) -> dict[str, Optional[str | int] | dict]:
         return {
             "dmi": self.dmi.dump(),
             "cpu": self.cpu.dump(),
+            "bmc": self.vendor.get_bmc().dump(),
         }
 
     def cpu_flags(self) -> list[str]:
diff --git a/hwbench/environment/vendors/vendor.py b/hwbench/environment/vendors/vendor.py
index 344bfe2..087773b 100644
--- a/hwbench/environment/vendors/vendor.py
+++ b/hwbench/environment/vendors/vendor.py
@@ -64,6 +64,14 @@ def get_ip(self) -> str:
 
         return ip
 
+    def get_driver_name(self) -> str:
+        """Return the BMC driver name"""
+        return type(self).__name__
+
+    def dump(self) -> dict[str, str]:
+        """Return the dump of the BMC"""
+        return {"driver": self.get_driver_name()}
+
     def connect_redfish(self):
         """Connect to the BMC using Redfish."""
         if not self.vendor.get_monitoring_config_filename():

From 6ae594b9e85b3d4733f27f07a8783ba878aaedf9 Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Tue, 28 May 2024 16:31:03 +0200
Subject: [PATCH 2/9] hwbench: Do not crash if block device has no scheduler

Some block devices like zram does not have any scheduler.
This case made hwbench crashing at starting time.

This commit is just ignoring block devices with no scheduler.

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 hwbench/tuning/scheduler.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hwbench/tuning/scheduler.py b/hwbench/tuning/scheduler.py
index 69f7217..6d5ab23 100644
--- a/hwbench/tuning/scheduler.py
+++ b/hwbench/tuning/scheduler.py
@@ -15,6 +15,9 @@ def run(self):
             for dirname in dirnames:
                 diskdir = pathlib.Path(rootpath) / dirname
                 file = diskdir / "queue/scheduler"
+                # Some block devices like zram do not have scheduler
+                if not os.path.isfile(file):
+                    continue
                 previous = file.read_text(encoding="utf-8").rstrip()
                 # see https://docs.kernel.org/block/switching-sched.html
                 # for deeper explanation

From bafe4cf528abdf20ceea82fdfe57d2aa1e76a71a Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Tue, 28 May 2024 17:41:21 +0200
Subject: [PATCH 3/9] hwbench: Adding helper to check if a binary is installed

When an engine is using a 3rd-party binary, it's mandatory to test its
presence unless the code will crash.

This commit is :
- adding a new helper (is_binary_available) to check if a binary is
  available
- Add a generic check for engines

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 hwbench/bench/engine.py                 |  7 +++++++
 hwbench/bench/test_benchmarks_common.py | 10 ++++++----
 hwbench/config/test_parse.py            |  4 +++-
 hwbench/engines/test_parse.py           | 18 +++++++++++-------
 hwbench/utils/helpers.py                |  6 ++++++
 5 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/hwbench/bench/engine.py b/hwbench/bench/engine.py
index bd44efa..110e030 100644
--- a/hwbench/bench/engine.py
+++ b/hwbench/bench/engine.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 from ..utils.external import External
+from ..utils.helpers import fatal
 from .parameters import BenchmarkParameters
 
 
@@ -47,6 +48,12 @@ def __init__(
         self.engine_name = name
         self.binary = binary
         self.modules = modules
+        # FIXME: If the import is done at the file level, the mocking is lost here
+        # So I'm importing is_binary_available just before the call :/
+        from ..utils.helpers import is_binary_available
+
+        if not is_binary_available(self.binary):
+            fatal(f"Engine {name} requires '{binary}' binary, please install it.")
 
     def get_binary(self) -> str:
         return self.binary
diff --git a/hwbench/bench/test_benchmarks_common.py b/hwbench/bench/test_benchmarks_common.py
index 35818ab..ac4ae86 100644
--- a/hwbench/bench/test_benchmarks_common.py
+++ b/hwbench/bench/test_benchmarks_common.py
@@ -60,10 +60,12 @@ def get_benches(self):
     def parse_jobs_config(self, validate_parameters=True):
         # We need to mock turbostat when parsing config with monitoring
         # We mock the run() command to get a constant output
-        with patch("hwbench.environment.turbostat.Turbostat.run") as ts:
-            with open("tests/parsing/turbostat/run", "r") as f:
-                ts.return_value = ast.literal_eval(f.read())
-                return self.benches.parse_jobs_config(validate_parameters)
+        with patch("hwbench.utils.helpers.is_binary_available") as iba:
+            iba.return_value = True
+            with patch("hwbench.environment.turbostat.Turbostat.run") as ts:
+                with open("tests/parsing/turbostat/run", "r") as f:
+                    ts.return_value = ast.literal_eval(f.read())
+                    return self.benches.parse_jobs_config(validate_parameters)
 
     def get_jobs_config(self) -> config.Config:
         return self.jobs_config
diff --git a/hwbench/config/test_parse.py b/hwbench/config/test_parse.py
index 0afc0e7..7690e5c 100644
--- a/hwbench/config/test_parse.py
+++ b/hwbench/config/test_parse.py
@@ -47,7 +47,9 @@ def test_keywords(self):
                     .read_bytes()
                     .split(b":", 1)
                 )
-                self.get_jobs_config().validate_sections()
+                with patch("hwbench.utils.helpers.is_binary_available") as iba:
+                    iba.return_value = True
+                    self.get_jobs_config().validate_sections()
         except Exception as exc:
             assert False, f"'validate_sections' detected a syntax error {exc}"
 
diff --git a/hwbench/engines/test_parse.py b/hwbench/engines/test_parse.py
index 7e33d5a..ce7d315 100644
--- a/hwbench/engines/test_parse.py
+++ b/hwbench/engines/test_parse.py
@@ -22,13 +22,17 @@
 def mock_engine(version: str) -> StressNG:
     # We need to patch list_module_parameters() function
     # to avoid considering the local stress-ng binary
-    with patch("hwbench.engines.stressng.EngineModuleCpu.list_module_parameters") as p:
-        p.return_value = (
-            pathlib.Path(f"./tests/parsing/stressngmethods/{version}/stdout")
-            .read_bytes()
-            .split(b":", 1)
-        )
-        return StressNG()
+    with patch("hwbench.utils.helpers.is_binary_available") as iba:
+        iba.return_value = True
+        with patch(
+            "hwbench.engines.stressng.EngineModuleCpu.list_module_parameters"
+        ) as p:
+            p.return_value = (
+                pathlib.Path(f"./tests/parsing/stressngmethods/{version}/stdout")
+                .read_bytes()
+                .split(b":", 1)
+            )
+            return StressNG()
 
 
 class TestParse(unittest.TestCase):
diff --git a/hwbench/utils/helpers.py b/hwbench/utils/helpers.py
index 31b1e0a..b9dacd5 100644
--- a/hwbench/utils/helpers.py
+++ b/hwbench/utils/helpers.py
@@ -2,6 +2,7 @@
 import logging
 import sys
 from datetime import timedelta
+from shutil import which
 from typing import NoReturn
 
 
@@ -24,3 +25,8 @@ def time_to_next_sync(safe_start=True):
     # Let's bump to the next minute o'clock
     next_sync += timedelta(seconds=60 - next_sync.second)
     return (next_sync - now).total_seconds(), next_sync
+
+
+def is_binary_available(binary_name: str) -> bool:
+    """A function to check if a binary is available"""
+    return which(binary_name) is not None

From 327bc15a9a14b994ce3542f6eb352ad66ee524ec Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Wed, 29 May 2024 16:14:50 +0200
Subject: [PATCH 4/9] hwbench/monitoring: Removing test on ip 0.0.0.0

Testing if the BMC IP is set to 0.0.0.0 is useless since:
- Some vendors uses dedicated channel interface like CHIF on HPE
- If a network connection is required (like redfish), the connection is
  already established or generate a fault.

So this commit is removing this code that is useless

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 hwbench/bench/monitoring.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/hwbench/bench/monitoring.py b/hwbench/bench/monitoring.py
index bdc4c02..605e3b8 100644
--- a/hwbench/bench/monitoring.py
+++ b/hwbench/bench/monitoring.py
@@ -59,10 +59,6 @@ def prepare(self):
         """Preparing the monitoring"""
         v = self.vendor
         bmc = self.vendor.get_bmc()
-        # Let's be sure the monitoring is functional by
-        # - checking the BMC is actually connected to the network
-        if bmc.get_ip() == "0.0.0.0":
-            h.fatal("BMC has no IP, monitoring will not be possible")
         print(
             f"Starting monitoring for {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}"
         )

From 0d957614aa97f2523f8ab6d67fbec817bcfc96b7 Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Wed, 29 May 2024 16:52:11 +0200
Subject: [PATCH 5/9] hwbench/monitoring: Update monitoring output

This simple commit is updating the monitoring text at start time.

A typical output looks like the following:

	Monitoring/turbostat: initialize
	Monitoring/turbostat: Freq metrics:64xCPU
	Monitoring/BMC: initialize DELL vendor with IDRAC driver @ 10.168.97.148
	Monitoring/BMC: Thermal metrics:1xCPU, 1xIntake
	Monitoring/BMC: Fans metrics:10xFan
	Monitoring/BMC: PowerConsumption metrics:65xCPU, 4xBMC
	Monitoring/BMC: PowerSupplies metrics:2xBMC

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 hwbench/bench/monitoring.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/hwbench/bench/monitoring.py b/hwbench/bench/monitoring.py
index 605e3b8..ee27d68 100644
--- a/hwbench/bench/monitoring.py
+++ b/hwbench/bench/monitoring.py
@@ -59,17 +59,14 @@ def prepare(self):
         """Preparing the monitoring"""
         v = self.vendor
         bmc = self.vendor.get_bmc()
-        print(
-            f"Starting monitoring for {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}"
-        )
 
-        def check_monitoring(metric: Metrics):
+        def check_monitoring(source: str, metric: Metrics):
             data = self.get_metric(metric)
             if not len(data):
                 h.fatal(f"Cannot detect {str(metric)} metrics")
 
             print(
-                f"Monitoring {str(metric)} metrics:"
+                f"Monitoring/{source}: {str(metric)} metrics:"
                 + ", ".join(
                     [f"{len(data[pc])}x{pc}" for pc in data if len(data[pc]) > 0]
                 )
@@ -77,30 +74,35 @@ def check_monitoring(metric: Metrics):
 
         # - checking if the CPU monitoring works
         if self.hardware.cpu.get_arch() == "x86_64":
+            print("Monitoring/turbostat: initialize")
             self.turbostat = Turbostat(
                 self.hardware,
                 self.get_metric(Metrics.FREQ),
                 self.get_metric(Metrics.POWER_CONSUMPTION),
             )
-            check_monitoring(Metrics.FREQ)
+            check_monitoring("turbostat", Metrics.FREQ)
+
+        print(
+            f"Monitoring/BMC: initialize {v.name()} vendor with {bmc.get_driver_name()} driver @ {bmc.get_ip()}"
+        )
 
         # - checking if the bmc monitoring works
         # These calls will also initialize the datastructures out of the monitoring loop
         self.vendor.get_bmc().read_thermals(self.get_metric(Metrics.THERMAL))
-        check_monitoring(Metrics.THERMAL)
+        check_monitoring("BMC", Metrics.THERMAL)
 
         self.vendor.get_bmc().read_fans(self.get_metric(Metrics.FANS))
-        check_monitoring(Metrics.FANS)
+        check_monitoring("BMC", Metrics.FANS)
 
         self.vendor.get_bmc().read_power_consumption(
             self.get_metric(Metrics.POWER_CONSUMPTION)
         )
-        check_monitoring(Metrics.POWER_CONSUMPTION)
+        check_monitoring("BMC", Metrics.POWER_CONSUMPTION)
 
         self.vendor.get_bmc().read_power_supplies(
             self.get_metric(Metrics.POWER_SUPPLIES)
         )
-        check_monitoring(Metrics.POWER_SUPPLIES)
+        check_monitoring("BMC", Metrics.POWER_SUPPLIES)
 
     def __monitor_bmc(self):
         """Monitor the bmc metrics"""

From 44f8358b6dcde401f19663e35120df6560976cd6 Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Thu, 30 May 2024 11:58:43 +0200
Subject: [PATCH 6/9] hwbench: Report error message when a binary is missing

When External class is used, if the pointed binary is not installed, a
FileNotFoundError exception is triggered.

Instead of this crash, let's have a custom fatal message to indicate
what binary is missing.

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 hwbench/utils/external.py | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/hwbench/utils/external.py b/hwbench/utils/external.py
index 19682f0..41f5bab 100644
--- a/hwbench/utils/external.py
+++ b/hwbench/utils/external.py
@@ -2,6 +2,7 @@
 import pathlib
 import subprocess
 from abc import abstractmethod, ABC
+from .helpers import fatal
 
 
 class External(ABC):
@@ -39,24 +40,27 @@ def run(self):
         """Returns the output of parse_cmd (a json-able type)"""
         english_env = os.environ.copy()
         english_env["LC_ALL"] = "C"
-        if self.run_cmd_version():
-            ver = subprocess.run(
-                self.run_cmd_version(),
+        try:
+            if self.run_cmd_version():
+                ver = subprocess.run(
+                    self.run_cmd_version(),
+                    capture_output=True,
+                    cwd=self.out_dir,
+                    env=english_env,
+                    stdin=subprocess.DEVNULL,
+                )
+                self._write_output("version-stdout", ver.stdout)
+                self._write_output("version-stderr", ver.stderr)
+                self.parse_version(ver.stdout, ver.stderr)
+            out = subprocess.run(
+                self.run_cmd(),
                 capture_output=True,
                 cwd=self.out_dir,
                 env=english_env,
                 stdin=subprocess.DEVNULL,
             )
-            self._write_output("version-stdout", ver.stdout)
-            self._write_output("version-stderr", ver.stderr)
-            self.parse_version(ver.stdout, ver.stderr)
-        out = subprocess.run(
-            self.run_cmd(),
-            capture_output=True,
-            cwd=self.out_dir,
-            env=english_env,
-            stdin=subprocess.DEVNULL,
-        )
+        except FileNotFoundError as e:
+            fatal(f"Missing {e.filename} binary, please install it.")
         # save outputs
 
         self._write_output("stdout", out.stdout)

From 938cbde536a99395dacc9d7038b0b646dc878b48 Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Thu, 30 May 2024 12:55:29 +0200
Subject: [PATCH 7/9] hwbench/turbostat: Implement version checking

hwbench requires at least turbostat 2022.04.16 (from Kernel 5.19) unless
filtering C1% field would not be possible.

This commit is:
- update the requirement in the documentation

- implements a simple test when Turbostat() is instantiated to guarantee
  the minimal release is present.

- If no suitable release is found, hwbench will stop with a fatal
  message. A typical example looks like the following :

	Monitoring/turbostat: Detected release 19.8.31
	ERROR:root:Monitoring/turbostat: minimal expected release is 2022.4.16

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 README.md                               |  4 +--
 hwbench/bench/test_benchmarks_common.py | 12 +++++----
 hwbench/environment/turbostat.py        | 36 +++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 1b66e2f..2656029 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ Running the **simple.conf** job:
 ## Mandatory
 - python >= 3.9
 - [python dependencies](./requirements/base.in)
-- turbostat >= 2022.07.28
+- turbostat >= 2022.04.16
 - numactl
 - dmidecode
 - util-linux >= 2.32
@@ -75,4 +75,4 @@ Running the **simple.conf** job:
 ## Optional
 - ipmitool
 - ilorest (for HPE servers)
-- stress-ng >= 0.17.04
\ No newline at end of file
+- stress-ng >= 0.17.04
diff --git a/hwbench/bench/test_benchmarks_common.py b/hwbench/bench/test_benchmarks_common.py
index ac4ae86..14cadd6 100644
--- a/hwbench/bench/test_benchmarks_common.py
+++ b/hwbench/bench/test_benchmarks_common.py
@@ -59,13 +59,15 @@ def get_benches(self):
 
     def parse_jobs_config(self, validate_parameters=True):
         # We need to mock turbostat when parsing config with monitoring
-        # We mock the run() command to get a constant output
         with patch("hwbench.utils.helpers.is_binary_available") as iba:
             iba.return_value = True
-            with patch("hwbench.environment.turbostat.Turbostat.run") as ts:
-                with open("tests/parsing/turbostat/run", "r") as f:
-                    ts.return_value = ast.literal_eval(f.read())
-                    return self.benches.parse_jobs_config(validate_parameters)
+            # We mock the run() and check_version() command to get a constant output
+            with patch("hwbench.environment.turbostat.Turbostat.check_version") as cv:
+                cv.return_value = True
+                with patch("hwbench.environment.turbostat.Turbostat.run") as ts:
+                    with open("tests/parsing/turbostat/run", "r") as f:
+                        ts.return_value = ast.literal_eval(f.read())
+                        return self.benches.parse_jobs_config(validate_parameters)
 
     def get_jobs_config(self) -> config.Config:
         return self.jobs_config
diff --git a/hwbench/environment/turbostat.py b/hwbench/environment/turbostat.py
index c7af416..185834c 100644
--- a/hwbench/environment/turbostat.py
+++ b/hwbench/environment/turbostat.py
@@ -1,8 +1,11 @@
 import os
+import re
 import subprocess
 from enum import Enum
+from packaging.version import Version
 from ..environment.hardware import BaseHardware
 from ..bench.monitoring_structs import MonitorMetric, CPUContext, PowerContext
+from ..utils.helpers import is_binary_available, fatal
 
 CORE = "core"
 PACKAGE = "package"
@@ -48,6 +51,7 @@ def __init__(
             CPUSTATS.CORE_WATTS,
             CPUSTATS.PACKAGE_WATTS,
         }
+        self.min_release = Version("2022.04.16")
         self.header = ""
         self.freq_metrics = freq_metrics
         self.power_metrics = power_metrics
@@ -55,9 +59,41 @@ def __init__(
         self.process: subprocess.Popen[bytes] = None  # type: ignore[assignment]
         self.freq_metrics[str(CPUContext.CPU)] = {}  # type: ignore[no-redef]
         self.power_metrics[str(PowerContext.CPU)] = {}  # type: ignore[no-redef]
+
         # Let's make a first quick run to detect system
+        self.check_version()
         self.pre_run()
 
+    def check_version(self):
+        english_env = os.environ.copy()
+        english_env["LC_ALL"] = "C"
+
+        if not is_binary_available("turbostat"):
+            fatal("Missing turbostat binary, please install it.")
+
+        self.process = subprocess.Popen(
+            ["turbostat", "-v"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            env=english_env,
+            stdin=subprocess.DEVNULL,
+        )
+        # turbostat version 2022.04.16 - Len Brown <lenb@kernel.org>
+        match = re.search(
+            r"turbostat version (?P<version>[0-9]+\.[0-9]+\.[0-9]+).*",
+            str(self.get_process_output()),
+        )
+
+        current_version = Version(match.group("version"))
+        if not match:
+            fatal("Monitoring/turbostat: Cannot detect turbostat version")
+
+        print(f"Monitoring/turbostat: Detected release {current_version}")
+        if current_version < self.min_release:
+            fatal(
+                f"Monitoring/turbostat: minimal expected release is {self.min_release}"
+            )
+
     def reset_metrics(self, power_metrics=None):
         if power_metrics is not None:
             self.power_metrics = power_metrics

From 78e19969a241e99986d7739c1cda5b7e4baaa292 Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Thu, 30 May 2024 13:32:55 +0200
Subject: [PATCH 8/9] hwbench/turbostat: Do not crash if cores does not have
 corewatt

Some processors like Intel(R) Core(TM) i7-9750H, report the Corewatt only for Core0.

This commit is about to just ignore cores that do not report corewatt
even if the header mention it.

A typical turbostat output of such processor:

Core	CPU	Avg_MHz	Busy%	Bzy_MHz	TSC_MHz	IPC	IRQ	SMI	POLL	C1	C1E	C3	C6	C7s	C8	C9	C10	POLL%	C1%	C1E%	C3%	C6%	C7s%	C8%	C9%	C10%	CPU%c1	CPU%c3	CPU%c6	CPU%c7	CoreTmp	CoreThr	PkgTmp	Totl%C0	Any%C0	GFX%C0	CPUGFX%	Pkg%pc2	Pkg%pc3	Pkg%pc6	Pkg%pc7	Pkg%pc8	Pkg%pc9	Pk%pc10	CPU%LPI	SYS%LPI	PkgWatt	CorWatt	GFXWatt	RAMWatt	PKG_%	RAM_%	UncMHz
-	-	3	0.33	800	2592	0.50	1620	0	1	3	10	16	206	0	214	1	1342	0.00	0.00	0.00	0.00	0.26	0.00	0.36	0.02	99.05	0.64	0.00	0.47	98.57	40	2592	40	4.90	4.24	0.00	0.00	9.55	85.04	0.00	0.00	0.00	0.00	0.00	0.00	0.00	11.38	0.25	0.00	1.17	0.00	0.00	800
0	0	1	0.09	800	2592	0.35	20	0	0	0	0	0	2	0	4	0	113	0.00	0.00	0.00	0.00	0.02	0.00	0.07	0.00	99.82	1.13	0.00	0.08	98.69	37	2592	40	4.90	4.24	0.00	0.00	9.55	85.04	0.00	0.00	0.00	0.00	0.00	0.00	0.00	11.38	0.25	0.00	1.17	0.00	0.00	800
0	6	6	0.69	800	2592	0.31	341	0	0	0	0	0	7	0	3	0	311	0.00	0.00	0.00	0.00	0.08	0.00	0.06	0.00	99.20	0.53
1	1	6	0.70	800	2592	0.51	260	0	1	3	3	3	15	0	23	0	187	0.00	0.00	0.01	0.01	0.20	0.00	0.47	0.00	98.64	0.62	0.01	0.32	98.35	40	1352
1	7	2	0.31	800	2592	1.57	67	0	0	0	1	0	11	0	10	0	36	0.00	0.00	0.00	0.00	0.16	0.00	0.21	0.00	99.33	1.00
2	2	5	0.57	800	2592	0.33	66	0	0	0	1	3	11	0	9	0	145	0.00	0.00	0.00	0.00	0.17	0.00	0.19	0.00	99.08	0.46	0.00	0.52	98.44	38	1255
2	8	1	0.17	800	2592	0.38	108	0	0	0	1	2	24	0	21	0	66	0.00	0.00	0.00	0.01	0.42	0.00	0.41	0.00	99.01	0.86
3	3	4	0.44	800	2592	0.32	230	0	0	0	1	0	9	0	15	0	203	0.00	0.00	0.00	0.00	0.11	0.00	0.30	0.00	99.17	0.70	0.00	0.75	98.11	37	1078
3	9	2	0.29	800	2592	0.54	151	0	0	0	0	0	48	0	50	1	62	0.00	0.00	0.00	0.00	0.73	0.00	1.00	0.21	97.79	0.85
4	4	3	0.39	800	2592	0.30	264	0	0	0	2	7	34	0	57	0	158	0.00	0.00	0.00	0.01	0.52	0.00	1.13	0.00	97.98	0.38	0.00	0.50	98.73	37	237
4	10	1	0.08	800	2592	0.58	18	0	0	0	0	0	5	0	6	0	17	0.00	0.00	0.00	0.00	0.08	0.00	0.12	0.00	99.72	0.68
5	5	0	0.05	800	2592	0.47	25	0	0	0	1	0	7	0	1	0	22	0.00	0.00	0.00	0.00	0.10	0.00	0.02	0.00	99.84	0.26	0.01	0.62	99.07	36	0
5	11	1	0.14	800	2592	0.90	70	0	0	0	0	1	33	0	15	0	22	0.00	0.00	0.00	0.01	0.58	0.00	0.30	0.00	98.98	0.17

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 hwbench/environment/turbostat.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/hwbench/environment/turbostat.py b/hwbench/environment/turbostat.py
index 185834c..33df7af 100644
--- a/hwbench/environment/turbostat.py
+++ b/hwbench/environment/turbostat.py
@@ -225,9 +225,17 @@ def parse(self):
             items = line.split()
             core_nb = items[int(self.__get_field_position(CPUSTATS.CPU))]
             if self.has(CPUSTATS.CORE_WATTS):
-                self.power_metrics[str(PowerContext.CPU)][f"Core_{core_nb}"].add(
-                    float(items[int(self.__get_field_position(CPUSTATS.CORE_WATTS))])
-                )
+                try:
+                    self.power_metrics[str(PowerContext.CPU)][f"Core_{core_nb}"].add(
+                        float(
+                            items[int(self.__get_field_position(CPUSTATS.CORE_WATTS))]
+                        )
+                    )
+                except IndexError:
+                    # Some processors reports the corewatt in the header but not for all cores ...
+                    # So let's ignore if the metrics does not exist for this core
+                    pass
+
             self.freq_metrics[str(CPUContext.CPU)][f"Core_{core_nb}"].add(
                 float(items[int(self.__get_field_position(CPUSTATS.BUSY_MHZ))])
             )

From da3e8ad4e25a04bd82a33b1a8ac79d26c6b4503b Mon Sep 17 00:00:00 2001
From: Erwan Velu <e.velu@criteo.com>
Date: Thu, 30 May 2024 15:06:36 +0200
Subject: [PATCH 9/9] hwbench/turbostat: Using long name options

Starting Kernel 6.9, the -n option became ambigous which prevents
turbostat to run with the following message:

	turbostat: option '-n' is ambiguous; possibilities: '-num_iterations' '-no-msr' '-no-perf'

This commit is removing all short name options and replace them with
long name to avoid this case.

This patch got tested successfully from Kernel 5.19 (2022.4.16) up
to the incoming 6.10 (2024.5.10).

Signed-off-by: Erwan Velu <e.velu@criteo.com>
---
 hwbench/environment/turbostat.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hwbench/environment/turbostat.py b/hwbench/environment/turbostat.py
index 33df7af..8a48793 100644
--- a/hwbench/environment/turbostat.py
+++ b/hwbench/environment/turbostat.py
@@ -72,7 +72,7 @@ def check_version(self):
             fatal("Missing turbostat binary, please install it.")
 
         self.process = subprocess.Popen(
-            ["turbostat", "-v"],
+            ["turbostat", "--version"],
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
             env=english_env,
@@ -151,14 +151,14 @@ def run(self, interval: float = 1, wait=False):
             "-c",
             f"{self.hardware.get_cpu().get_logical_cores_count()-1}",
             "turbostat",
-            "-c",
+            "--cpu",
             "core",
-            "-q",
+            "--quiet",
             "--interval",
             str(interval),
-            "-n",
+            "--num_iterations",
             "1",
-            "-s",
+            "--show",
         ]
         sensors = ""
         for sensor in CPUSTATS: