Skip to content

Commit

Permalink
(fix) Embodied Carbon was not reported due to missing SCI values.
Browse files Browse the repository at this point in the history
  • Loading branch information
ArneTR committed Oct 7, 2024
1 parent d03ad47 commit cf05574
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 27 deletions.
8 changes: 3 additions & 5 deletions lib/phase_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,13 @@ def build_and_store_phase_stats(run_id, sci=None):
else:
network_io_co2_in_ug = decimal.Decimal(0)


if sci.get('EL', None) is not None and sci.get('TE', None) is not None and sci.get('RS', None) is not None:
duration_in_years = duration_in_s * 60 * 60 * 24 * 365
embodied_carbon_share_g = (duration_in_years / sci.get('EL', None) ) * sci.get('TE', None) * sci.get('RS', None)
duration_in_years = duration_in_s / (60 * 60 * 24 * 365)
embodied_carbon_share_g = (duration_in_years / sci['EL'] ) * sci['TE'] * sci['RS']
embodied_carbon_share_ug = decimal.Decimal(embodied_carbon_share_g * 1_000_000)
csv_buffer.write(generate_csv_line(run_id, 'embodied_carbon_share_machine', '[SYSTEM]', f"{idx:03}_{phase['name']}", embodied_carbon_share_ug, 'TOTAL', None, None, 'ug'))

if phase['name'] == '[RUNTIME]' and machine_co2_in_ug is not None and sci is not None \
and sci.get('R', None) is not None and sci['R'] != 0:
if phase['name'] == '[RUNTIME]' and machine_co2_in_ug is not None and sci is not None and sci.get('R', 0) != 0:
csv_buffer.write(generate_csv_line(run_id, 'software_carbon_intensity_global', '[SYSTEM]', f"{idx:03}_{phase['name']}", (machine_co2_in_ug + embodied_carbon_share_ug + network_io_co2_in_ug) / sci['R'], 'TOTAL', None, None, f"ugCO2e/{sci['R_d']}"))

if machine_power_idle and cpu_utilization_machine and cpu_utilization_containers:
Expand Down
5 changes: 4 additions & 1 deletion runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ def __init__(self,
self._tmp_folder = Path('/tmp/green-metrics-tool').resolve() # since linux has /tmp and macos /private/tmp
self._usage_scenario = {}
self._architecture = utils.get_architecture()

self._sci = {'R_d': None, 'R': 0}
self._sci |= GlobalConfig().config.get('sci', None) # merge in data from machine config like I, TE etc.

self._job_id = job_id
self._arguments = locals()
self._repo_folder = f"{self._tmp_folder}/repo" # default if not changed in checkout_repository
Expand Down Expand Up @@ -461,7 +464,7 @@ def update_and_insert_specs(self):

measurement_config = {}
measurement_config['providers'] = utils.get_metric_providers(config)
measurement_config['sci'] = config.get('sci', None)
measurement_config['sci'] = self._sci

# Insert auxilary info for the run. Not critical.
DB().query("""
Expand Down
6 changes: 3 additions & 3 deletions test-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ measurement:
Hardware_Availability_Year: 2011

sci:
EL: 3.5
EL: 4
RS: 1
TE: 194000
I: 475
TE: 181000
I: 436

optimization:
ignore:
Expand Down
40 changes: 31 additions & 9 deletions tests/metric_providers/test_metric_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,26 +142,27 @@ def test_network_providers():

assert seen_network_total_procfs_system is True

def test_cpu_memory_providers():
if utils.get_architecture() == 'macos':
return
def test_cpu_memory_carbon_providers():

assert(run_id is not None and run_id != '')

query = """
SELECT metric, detail_name, value, unit, max_value
FROM phase_stats
WHERE run_id = %s and phase = '006_VM Stress'
ORDER BY metric DESC -- this will assure that the phase_time metric will come first and can be saved
"""

data = DB().fetch_all(query, (run_id,), fetch_mode='dict')
assert(data is not None and data != [])

## get the current used disj
seen_phase_time_syscall_system = False
seen_cpu_utilization_procfs_system = False
seen_cpu_utilization = False
seen_memory_used_procfs_system = False
seen_embodied_carbon_share_machine = False
MICROSECONDS = 1_000_000
phase_time = None

for metric_provider in data:
metric = metric_provider['metric']
Expand All @@ -172,16 +173,37 @@ def test_cpu_memory_providers():
assert 9000 < val <= 10000 , f"cpu_utilization_procfs_system is not between 90_00 and 100_00 but {metric_provider['value']} {metric_provider['unit']}"
assert 9500 < max_value <= 10500 , f"cpu_utilization_procfs_system max is not between 95_00 and 105_00 but {metric_provider['value']} {metric_provider['unit']}"

seen_cpu_utilization_procfs_system = True
seen_cpu_utilization = True
elif metric == 'cpu_utilization_mach_system': # macOS values do not get as high due to the VM.
assert 5500 < val <= 10000 , f"cpu_utilization_mach_system is not between 90_00 and 100_00 but {metric_provider['value']} {metric_provider['unit']}"
assert 8000 < max_value <= 10500 , f"cpu_utilization_mach_system max is not between 95_00 and 105_00 but {metric_provider['value']} {metric_provider['unit']}"

seen_cpu_utilization = True

elif metric == 'memory_used_procfs_system':
if not os.getenv("GITHUB_ACTIONS") == "true": # skip test for GitHub Actions VM. Memory seems weirdly assigned here
if not os.getenv("GITHUB_ACTIONS") == "true" and utils.get_architecture() != 'macos': # skip test for GitHub Actions VM. Memory seems weirdly assigned here. Also skip macos
assert psutil.virtual_memory().total*0.55 <= val <= psutil.virtual_memory().total * 0.65 , f"memory_used_procfs_system avg is not between 55% and 65% of total memory but {metric_provider['value']} {metric_provider['unit']}"

seen_memory_used_procfs_system = True
elif metric == 'phase_time_syscall_system':
assert 5*MICROSECONDS < val < 5.5*MICROSECONDS , f"phase_time_syscall_system is not between 5 and 5.5 s but {metric_provider['value']} {metric_provider['unit']}"
seen_phase_time_syscall_system = True
phase_time = val

elif metric == 'embodied_carbon_share_machine':
# we have the phase time value as we sort by metric DESC
phase_time_in_years = phase_time / (MICROSECONDS * 60 * 60 * 24 * 365)
sci = {"EL": 4, "TE": 181000, "RS": 1}
embodied_carbon_expected = int((phase_time_in_years / sci['EL']) * sci['TE'] * sci['RS'] * 1_000_000)
# Make a range because of rounding errors
assert embodied_carbon_expected*0.99 < val < embodied_carbon_expected*1.01 , f"embodied_carbon_share_machine is not {embodied_carbon_expected} but {metric_provider['value']} {metric_provider['unit']}\n. This might be also because the values in the test are hardcoded. Check reporter but also if test-config.yml configuration is still accurate"
seen_embodied_carbon_share_machine = True

assert seen_phase_time_syscall_system is True, "Did not see seen_phase_time_syscall_system metric"
assert seen_cpu_utilization is True, "Did not see seen_cpu_utilization metric"
assert seen_embodied_carbon_share_machine is True, "Did not see seen_embodied_carbon_share_machine metric"

if utils.get_architecture() == 'macos': # skip following test for macos as we do not have that provider there
return

assert seen_phase_time_syscall_system is True
assert seen_cpu_utilization_procfs_system is True
assert seen_memory_used_procfs_system is True
assert seen_memory_used_procfs_system is True, "Did not see seen_memory_used_procfs_system metric"
18 changes: 9 additions & 9 deletions tools/rebuild_phase_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@
from lib.db import DB

if __name__ == '__main__':
print('This will remove ALL phase_stats and completely rebuild them. No data will get lost, but it will take some time. Continue? (y/N)')
print('This will remove ALL phase_stats and completely rebuild them. Not data will get lost, but it will take some time. Continue? (y/N)')
answer = sys.stdin.readline()
if answer.strip().lower() == 'y':
print('Deleting old phase_stats ...')
DB().query('DELETE FROM phase_stats')
print('Fetching runs ...')
query = '''
SELECT id
SELECT id, measurement_config
FROM runs
WHERE
end_measurement IS NOT NULL AND phases IS NOT NULL
WHERE end_measurement IS NOT NULL AND phases IS NOT NULL
'''
runs = DB().fetch_all(query)
runs = DB().fetch_all(query, fetch_mode='dict')

print(f"Fetched {len(runs)} runs. Commencing ...")
for idx, run_id in enumerate(runs):

print(f"Rebuilding phase_stats for run #{idx} {run_id[0]}")
build_and_store_phase_stats(run_id[0])
print(f"Fetched {len(runs)} runs. Commencing ...")
for idx, data in enumerate(runs):
print(f"Rebuilding phase_stats for run #{idx} {data['id']}")
build_and_store_phase_stats(data['id'], data['measurement_config']['sci'])
print('Done')

0 comments on commit cf05574

Please sign in to comment.