Skip to content

Commit 5cba687

Browse files
committed
carbon emission plots
1 parent f8f8f16 commit 5cba687

File tree

8 files changed

+730
-99
lines changed

8 files changed

+730
-99
lines changed
Lines changed: 192 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,224 @@
11
import pandas as pd
22
import numpy as np
3+
import matplotlib.pyplot as plt
4+
import matplotlib.dates as mdates
35
from datetime import datetime, timedelta
46

57
from .carbon_intensity import compute_ci
68

79
def compute_ce(
8-
country: str,
10+
server:dict,
911
start_time:datetime,
1012
runtime_minutes: int,
11-
number_core: int,
12-
memory_gb: int,
13-
power_draw_core:float=15.8,
14-
usage_factor_core:int=1,
15-
power_draw_mem:float=0.3725,
16-
power_usage_efficiency:float=1.6
17-
):
13+
)->tuple[float,pd.DataFrame]:
1814
"""
19-
Calculates the carbon footprint of a job, given its hardware config, time and location of the job.
20-
This method returns an hourly time series of the carbon emission.
21-
The methodology is defined in the documentation
22-
23-
:param country: The country code where the job was performed (required to fetch energy data)
24-
:param start_time: The starting time of the computation as datetime object in local time zone
25-
:param runtime_minutes: running time in minutes
26-
:param number_core: the number of core
27-
:param memory_gb: the size of memory available (in Gigabytes)
28-
:param power_draw_core: power draw of a computing core (Watt)
29-
:param usage_factor_core: the core usage factor (between 0 and 1)
30-
:param power_draw_mem: power draw of memory (Watt)
31-
:param power_usage_efficiency: efficiency coefficient of the data center
15+
Calculates the carbon footprint of a job, given its hardware configuration, time, and location.
16+
This method returns an hourly time series of the carbon emissions.
17+
18+
The methodology is defined in the documentation.
19+
20+
:param server: A dictionary containing the details about the server, including its hardware specifications.
21+
The dictionary should include the following keys:
22+
23+
- `country` (str): The country code where the job was performed (required to fetch energy data).
24+
- `number_core` (int): The number of CPU cores.
25+
- `memory_gb` (float): The size of memory available in Gigabytes.
26+
- `power_draw_core` (float): Power draw of a computing core in Watts.
27+
- `usage_factor_core` (float): The core usage factor, a value between 0 and 1.
28+
- `power_draw_mem` (float): Power draw of memory in Watts.
29+
- `power_usage_efficiency` (float): Efficiency coefficient of the data center.
30+
31+
:param start_time: The start time of the job (datetime).
32+
:param runtime_minutes: Total running time of the job in minutes (int).
33+
34+
:return: A tuple containing:
35+
- (float): The total carbon footprint of the job in kilograms of CO2 equivalent.
36+
- (pandas.DataFrame): A DataFrame containing the hourly time series of carbon emissions.
3237
"""
38+
3339
# Round to the nearest hour (in minutes)
3440
# base valued taken from http://calculator.green-algorithms.org/
41+
42+
43+
3544
rounded_runtime_minutes = round(runtime_minutes / 60) * 60
3645
end_time = start_time + timedelta(minutes=rounded_runtime_minutes)
37-
ci_ts = compute_ci(country, start_time, end_time)
38-
ce_total,ce_df = compute_ce_from_energy(ci_ts, number_core,memory_gb,power_draw_core,usage_factor_core,power_draw_mem,power_usage_efficiency)
46+
ci_ts = compute_ci(server['country'], start_time, end_time)
47+
ce_total,ce_df = compute_ce_from_energy(server,ci_ts)
3948
return ce_total,ce_df
4049

41-
def compute_energy_used(runtime_minutes, number_core, power_draw_core, usage_factor_core, mem_size_gb, power_draw_mem, PUE):
50+
def _compute_energy_used(runtime_minutes, number_core, power_draw_core, usage_factor_core, mem_size_gb, power_draw_mem, PUE):
4251
return round((runtime_minutes/60)*(number_core * power_draw_core * usage_factor_core + mem_size_gb * power_draw_mem) * PUE * 0.001, 2)
4352

4453
def compute_savings_same_device(country_code,start_time_request,start_time_predicted,runtime,cpu_cores,cpu_memory):
4554
ce_job1,ci1 = compute_ce(country_code,start_time_request,runtime,cpu_cores,cpu_memory)
4655
ce_job2,ci2 = compute_ce(country_code,start_time_predicted,runtime,cpu_cores,cpu_memory)
4756
return ce_job1-ce_job2 # ideally this should be positive todo what if this is negative?, make a note in the comments
4857

58+
def compare_carbon_emissions(server1,server2,start_time1,start_time2,runtime_minutes):
59+
"""
60+
Compares the carbon emissions of running a job with the same duration on two different servers.
61+
62+
:param server1: A dictionary containing the details of the first server's hardware and location specifications.
63+
Required keys include:
64+
65+
- `country` (str): The country code for the server's location (used for energy data).
66+
- `number_core` (int): The number of CPU cores.
67+
- `memory_gb` (float): The memory available in Gigabytes.
68+
- `power_draw_core` (float): Power draw of each computing core in Watts.
69+
- `usage_factor_core` (float): The core usage factor, a value between 0 and 1.
70+
- `power_draw_mem` (float): Power draw of memory in Watts.
71+
- `power_usage_efficiency` (float): Efficiency coefficient of the data center.
72+
73+
:param server2: A dictionary containing the details of the second server's hardware and location specifications.
74+
Required keys are identical to those in `server1`:
75+
76+
- `country` (str): The country code for the server's location.
77+
- `number_core` (int): The number of CPU cores.
78+
- `memory_gb` (float): The memory available in Gigabytes.
79+
- `power_draw_core` (float): Power draw of each computing core in Watts.
80+
- `usage_factor_core` (float): The core usage factor, a value between 0 and 1.
81+
- `power_draw_mem` (float): Power draw of memory in Watts.
82+
- `power_usage_efficiency` (float): Efficiency coefficient of the data center.
83+
84+
:param start_time1: The start time of the job on `server1` (datetime).
85+
:param start_time2: The start time of the job on `server2` (datetime).
86+
:param runtime_minutes: The total running time of the job in minutes (int).
87+
88+
:return: A dictionary with the carbon emissions for each server and the percentage difference, structured as follows:
89+
- `emissions_server1` (float): Total carbon emissions for `server1` in kilograms of CO2 equivalent.
90+
- `emissions_server2` (float): Total carbon emissions for `server2` in kilograms of CO2 equivalent.
91+
- `absolute_difference` (float): The absolute difference in emissions between the two servers.
92+
- `higher_emission_server` (str): Indicates which server has higher emissions ("server1" or "server2").
93+
"""
94+
ce1,ce1_ts =compute_ce(server1,start_time1,runtime_minutes)
95+
ce2,ce2_ts = compute_ce(server2,start_time2,runtime_minutes)
96+
abs_difference = ce2-ce1
97+
if ce1 > ce2:
98+
higher_emission_server = "server1"
99+
elif ce2 > ce1:
100+
higher_emission_server = "server2"
101+
else:
102+
higher_emission_server = "equal"
103+
104+
return ce1,ce2,abs_difference,higher_emission_server
49105

50106
def compute_ce_from_energy(
51-
ci_data:pd.DataFrame,
52-
number_core: int,
53-
memory_gb: int,
54-
power_draw_core:float=15.8,
55-
usage_factor_core:int=1,
56-
power_draw_mem:float=0.3725,
57-
power_usage_efficiency:float=1.6):
107+
server,
108+
ci_data:pd.DataFrame
109+
):
58110

59111
"""
60-
Calculates the carbon footprint for energy consumption time series
61-
This method returns an hourly time series of the carbon emission.
62-
The methodology is defined in the documentation
63-
64-
:param ci_data: DataFrame of energy consumption. Required cols : startTimeUTC, ci_default
65-
:param number_core: the number of core
66-
:param memory_gb: the size of memory available (in Gigabytes)
67-
:param power_draw_core: power draw of a computing core (Watt)
68-
:param usage_factor_core: the core usage factor (between 0 and 1)
69-
:param power_draw_mem: power draw of memory (Watt)
70-
:param power_usage_efficiency: efficiency coefficient of the data center
112+
Calculates the carbon footprint for energy consumption over a time series.
113+
This method returns an hourly time series of the carbon emissions.
114+
115+
The methodology is defined in the documentation. Note that the start and end
116+
times for the computation are derived from the first and last rows of the
117+
`ci_data` DataFrame.
118+
119+
:param server: A dictionary containing details about the server, including its hardware specifications.
120+
The dictionary should include:
121+
122+
- `number_core` (int): The number of CPU cores.
123+
- `memory_gb` (float): The size of memory available in Gigabytes.
124+
- `power_draw_core` (float): Power draw of a computing core in Watts.
125+
- `usage_factor_core` (float): The core usage factor, a value between 0 and 1.
126+
- `power_draw_mem` (float): Power draw of memory in Watts.
127+
- `power_usage_efficiency` (float): Efficiency coefficient of the data center.
128+
129+
:param ci_data: A pandas DataFrame of energy consumption over time.
130+
The DataFrame should include the following columns:
131+
132+
- `startTimeUTC` (datetime): The start time of each energy measurement in UTC.
133+
- `ci_default` (float): Carbon intensity values for the energy consumption.
134+
135+
:return: A tuple containing:
136+
- (float): The total carbon footprint of the job in kilograms of CO2 equivalent.
137+
- (pandas.DataFrame): A DataFrame containing the hourly time series of carbon emissions.
71138
"""
72-
time_diff = ci_data['startTimeUTC'].iloc[-1] - ci_data['startTimeUTC'].iloc[0]
139+
date_format = "%Y%m%d%H%M" # Year, Month, Day, Hour, Minute
140+
141+
server_defaults = {
142+
"power_draw_core":15.8,
143+
"usage_factor_core": 1,
144+
"power_draw_mem": 0.3725,
145+
"power_usage_efficiency" : 1.6
146+
}
147+
server = server_defaults | server # set defaults if not provided
148+
149+
150+
# to make sure startTimeUTC is in date format
151+
if not pd.api.types.is_datetime64_any_dtype(ci_data['startTimeUTC']):
152+
ci_data['startTimeUTC'] = pd.to_datetime(ci_data['startTimeUTC'])
153+
154+
end = ci_data['startTimeUTC'].iloc[-1]
155+
start = ci_data['startTimeUTC'].iloc[0]
156+
157+
# note that the run time is calculated based on the energy data frame provided
158+
time_diff = end-start
73159
runtime_minutes = time_diff.total_seconds() / 60
74-
energy_consumed = compute_energy_used(runtime_minutes, number_core, power_draw_core,
75-
usage_factor_core, memory_gb, power_draw_mem, power_usage_efficiency)
76-
e_hour = energy_consumed/(runtime_minutes*60)
160+
161+
energy_consumed = _compute_energy_used(runtime_minutes, server["number_core"], server["power_draw_core"],
162+
server["usage_factor_core"], server["memory_gb"], server["power_draw_mem"], server["power_usage_efficiency"])
163+
164+
e_hour = energy_consumed/(runtime_minutes*60) # assuming equal energy usage throughout the computation
77165
ci_data["carbon_emission"] = ci_data["ci_default"] * e_hour
78166
ce = round(sum(ci_data["carbon_emission"]),4) # grams CO2 equivalent
79-
return ce,ci_data
167+
return ce,ci_data
168+
169+
170+
def _compute_ce_bulk(server,jobs):
171+
for job in jobs :
172+
job.end_time= job["start_time"] + timedelta(minutes=job["runtime_minutes"])
173+
174+
min_start_date = min(job['start_time'] for job in jobs)
175+
max_end_date = max(job['end_time'] for job in jobs)
176+
# print(min_start_date)
177+
# print(max_end_date)
178+
energy_data = compute_ci(server["country"],min_start_date,max_end_date)
179+
energy_data['startTimeUTC'] = pd.to_datetime(energy_data['startTimeUTC'])
180+
for job in jobs :
181+
filtered_energy = energy_data[(energy_data['startTimeUTC'] >= job["start_time"]) & (energy_data['startTimeUTC'] <= job["end_time"])]
182+
job["emissions"],temp = compute_ce_from_energy(filtered_energy,server["number_core"],server["memory_gb"],server["power_draw_core"],server["usage_factor_core"],server["power_draw_mem"],server["power_usage_efficiency"])
183+
return energy_data,jobs, min_start_date, max_end_date
184+
185+
def plot_ce_jobs(server,jobs):
186+
energy_data,jobs, min_start_date, max_end_date = _compute_ce_bulk(server,jobs)
187+
Color = {
188+
"red":"#D6A99A",
189+
"green":"#99D19C",
190+
"blue":"#3DA5D9",
191+
"yellow":"#E2C044",
192+
"black":"#0F1A20"
193+
}
194+
fig, ax1 = plt.subplots(figsize=(10, 6))
195+
plt.title("Green Energy and Jobs")
196+
end = energy_data['startTimeUTC'].iloc[-1]
197+
start = energy_data['startTimeUTC'].iloc[0]
198+
ax1.plot(energy_data['startTimeUTC'], energy_data['percentRenewable'], color=Color['green'], label='Percentage of Renewable Energy')
199+
ax1.set_xlabel('Time')
200+
ax1.set_ylabel('% Renewable energy')
201+
ax1.tick_params(axis='y')
202+
203+
# Set x-axis to show dates properly
204+
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m %H:%M'))
205+
plt.xticks(rotation=45)
206+
207+
# # Create a second y-axis
208+
ax2 = ax1.twinx()
209+
210+
# Define y-values for each job (e.g., 1 for Job A, 2 for Job B, etc.)
211+
for idx, job in enumerate(jobs):
212+
lbl = str(job["emissions"])
213+
ax2.plot([job['start_time'], job['end_time']], [idx+1 , idx+1], marker='o', linewidth=25,label=lbl,color=Color["blue"])
214+
# Calculate the midpoint for the text placement
215+
labelpoint = job['start_time'] + (job['end_time'] - job['start_time']) / 2 # + timedelta(minutes=100)
216+
ax2.text(labelpoint, idx+1, lbl, color='black', ha='center', va='center', fontsize=12)
217+
218+
# Adjust y-axis labels to match the number of jobs
219+
ax2.set_yticks(range(1, len(jobs) + 1))
220+
221+
# Add legend and show the plot
222+
fig.tight_layout()
223+
# plt.legend(loc='lower right')
224+
plt.show()

docs/plot.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,13 @@ def plot_multiple_percentage_clean(dfs, labels,save_fig_path=None):
130130

131131
def show_clean_energy(country,start,end,save_fig_path=None):
132132
"""note that these plots are based on actual energy production and not the forecasts"""
133-
actual1 = energy(country,start,end)
133+
d = energy(country,start,end)
134+
actual1 = d["data"]
134135
plot_percentage_clean(actual1,country,save_fig_path)
135136

136137

137138
def show_clean_energy_multiple(countries,start,end,save_fig_path=None):
138139
data = []
139140
for c in countries :
140-
data.append(energy(c,start,end))
141+
data.append(energy(c,start,end)["data"])
141142
plot_multiple_percentage_clean(data,countries,save_fig_path)

0 commit comments

Comments
 (0)