Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OC Event Counter #597

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion jtop/core/power.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from .common import cat, check_file
import glob
import os
# Logging
import logging
Expand Down Expand Up @@ -105,6 +106,46 @@ def find_all_i2c_power_monitor(i2c_path):
return power_sensor


def find_all_oc_event_counters():
"""Find all the overcurrent event counters on the system"""
event_cnt_files = glob.glob('/sys/class/hwmon/hwmon*/oc*_event_cnt')
if (len(event_cnt_files) == 0):
logger.warning("No OC event counters found")
return {}

event_counts = {filename: -1 for filename in event_cnt_files}

update_oc_event_counts(event_counts)

return event_counts


def update_oc_event_counts(event_counts):
"""
Function to update overcurrent event counts.
Update the event counts in the event_counts dictionary, and return True if any of the counts have increased
"""
# We can report more granular information about the throttling events if we really want to, but there
# is no direct mapping from oc*_event_cnt to which power rail/system is being measured, we
# would need to hard code a mapping from board type to oc*_event_cnt->power rail mappings,
# this is fragile, and most users will probably only care about throttling or not throttling,
# and can use the existing power panel to see currents and current limits if they want to dig deeper.
# https://docs.nvidia.com/jetson/archives/r36.4/DeveloperGuide/SD/PlatformPowerAndPerformance/JetsonOrinNanoSeriesJetsonOrinNxSeriesAndJetsonAgxOrinSeries.html#jetson-agx-orin-series
throttling = False
for filename in event_counts:
try:
with open(filename, 'r') as f:
count = int(f.read())
if count > event_counts[filename]:
event_counts[filename] = count
throttling = True
except Exception as e:
logger.error("Error reading OC event counter from {filename}: {e}".format(filename=filename, e=e))
return throttling
return throttling


def read_power_status(data):
values = {}
power_type = data['type']
Expand Down Expand Up @@ -237,6 +278,7 @@ class PowerService(object):
def __init__(self):
self._power_sensor = {}
self._power_avg = {}
self._oc_event_counts = {}
# Find all I2C sensors on board
i2c_path = "/sys/bus/i2c/devices"
system_monitor = "/sys/class/power_supply"
Expand All @@ -248,6 +290,7 @@ def __init__(self):
# Load all power sensors
self._power_sensor = find_all_i2c_power_monitor(i2c_path)
self._power_sensor.update(find_all_system_monitor(system_monitor))
self._oc_event_counts = find_all_oc_event_counters()
if not self._power_sensor:
logger.warning("Power sensors not found!")
# Sort all power sensors
Expand Down Expand Up @@ -287,5 +330,15 @@ def get_status(self):
rails[name] = values
# Measure total power
total, rails = total_power(rails)
return {'rail': rails, 'tot': total}

# If there are OC counters, update those as well
oc_events = {}
if self._oc_event_counts:
oc_events['is_throttling'] = update_oc_event_counts(self._oc_event_counts)
oc_events['count'] = 0
# Sum up all the events:
for filename, count in self._oc_event_counts.items():
oc_events['count'] += count

return {'rail': rails, 'tot': total, 'oc_events': oc_events}
Comment on lines +334 to +343
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Consider only including 'oc_events' in the return dict when OC counters are present

This would make the API more consistent and avoid clients needing to check both for key existence and empty dict.

Suggested change
# If there are OC counters, update those as well
oc_events = {}
if self._oc_event_counts:
oc_events['is_throttling'] = update_oc_event_counts(self._oc_event_counts)
oc_events['count'] = 0
# Sum up all the events:
for filename, count in self._oc_event_counts.items():
oc_events['count'] += count
return {'rail': rails, 'tot': total, 'oc_events': oc_events}
# Build return dictionary
ret_dict = {'rail': rails, 'tot': total}
# Only include OC events if counters exist
if self._oc_event_counts:
oc_events = {
'is_throttling': update_oc_event_counts(self._oc_event_counts),
'count': sum(self._oc_event_counts.values())
}
ret_dict['oc_events'] = oc_events
return ret_dict

# EOF
30 changes: 29 additions & 1 deletion jtop/gui/pcontrol.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,23 @@ def compact_power(stdscr, pos_y, pos_x, width, height, jetson):
if width > LIMIT:
unit_avg = unit_to_string(total['avg'], 'm', 'W')
stdscr.addstr(pos_y + len_power + 1, center_x + column_power - 3, unit_avg, curses.A_BOLD)
return len(power) + 1

# If there is no more space, return
if len_power + 3 >= height:
return len(power) + 1

# if there are no OC events, return
if not jetson.power['oc_events']:
return len(power) + 1

# Plot OC_EVENT_CNT
oc_event_cnt = jetson.power['oc_events']['count']
is_throttling = jetson.power['oc_events']['is_throttling']
# Plot OC_EVENT_CNT with color based on throttling status
color = NColors.red() if is_throttling else (NColors.yellow() if oc_event_cnt > 0 else NColors.green())
stdscr.addstr(pos_y + len_power + 3, center_x - column_power - 5, "OC EVENT COUNT: ", curses.A_BOLD)
stdscr.addstr(pos_y + len_power + 3, center_x + 2, str(oc_event_cnt), curses.A_BOLD | color)
Comment on lines +121 to +123
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Consider extracting duplicated OC event display logic into a helper function

This logic appears in both compact_power and control_power. A helper would reduce duplication and make future updates easier.

Suggested implementation:

    # If there are no OC events or no space, return
    if not jetson.power['oc_events'] or len_power + 3 >= height:
        return len(power) + 1

    return display_oc_events(stdscr, jetson.power['oc_events'], pos_y, len_power, center_x, column_power)

def display_oc_events(stdscr, oc_events, pos_y, len_power, center_x, column_power):
    """Helper function to display OC events with appropriate coloring"""
    oc_event_cnt = oc_events['count']
    is_throttling = oc_events['is_throttling']

    # Set color based on throttling status and event count
    color = NColors.red() if is_throttling else (NColors.yellow() if oc_event_cnt > 0 else NColors.green())

    # Display OC event count
    stdscr.addstr(pos_y + len_power + 3, center_x - column_power - 5, "OC EVENT COUNT: ", curses.A_BOLD)
    stdscr.addstr(pos_y + len_power + 3, center_x + 2, str(oc_event_cnt), curses.A_BOLD | color)

    return len_power + 3

You'll need to:

  1. Make sure the NColors and curses imports are available in the scope where the helper function is defined
  2. Update any other locations in the codebase that display OC events to use this new helper function
  3. Consider adding this helper function to a utilities module if it might be useful in other parts of the application

return len(power) + 3


class CTRL(Page):
Expand Down Expand Up @@ -374,6 +390,18 @@ def control_power(self, pos_y, pos_x, key, mouse):
except curses.error:
pass

# if there are no OC events, return
if not self.jetson.power['oc_events']:
return

# Plot OC_EVENT_CNT
oc_event_cnt = self.jetson.power['oc_events']['count']
is_throttling = self.jetson.power['oc_events']['is_throttling']
# Plot OC_EVENT_CNT with color based on throttling status
color = NColors.red() if is_throttling else (NColors.yellow() if oc_event_cnt > 0 else NColors.green())
self.stdscr.addstr(pos_y_table + len_power + 2, pos_x, "OC EVENT COUNT: ", curses.A_BOLD)
self.stdscr.addstr(pos_y_table + len_power + 2, pos_x + 16, str(oc_event_cnt), curses.A_BOLD | color)

def draw(self, key, mouse):
# Screen size
height, width, first = self.size_page()
Expand Down
Loading