1
1
import pandas as pd
2
2
import numpy as np
3
+ import matplotlib .pyplot as plt
4
+ import matplotlib .dates as mdates
3
5
from datetime import datetime , timedelta
4
6
5
7
from .carbon_intensity import compute_ci
6
8
7
9
def compute_ce (
8
- country : str ,
10
+ server : dict ,
9
11
start_time :datetime ,
10
12
runtime_minutes : int ,
11
- number_core : int ,
12
- memory_gb : int ,
13
- power_draw_core :float = 15.8 ,
14
- usage_factor_core :int = 1 ,
15
- power_draw_mem :float = 0.3725 ,
16
- power_usage_efficiency :float = 1.6
17
- ):
13
+ )-> tuple [float ,pd .DataFrame ]:
18
14
"""
19
- Calculates the carbon footprint of a job, given its hardware config, time and location of the job.
20
- This method returns an hourly time series of the carbon emission.
21
- The methodology is defined in the documentation
22
-
23
- :param country: The country code where the job was performed (required to fetch energy data)
24
- :param start_time: The starting time of the computation as datetime object in local time zone
25
- :param runtime_minutes: running time in minutes
26
- :param number_core: the number of core
27
- :param memory_gb: the size of memory available (in Gigabytes)
28
- :param power_draw_core: power draw of a computing core (Watt)
29
- :param usage_factor_core: the core usage factor (between 0 and 1)
30
- :param power_draw_mem: power draw of memory (Watt)
31
- :param power_usage_efficiency: efficiency coefficient of the data center
15
+ Calculates the carbon footprint of a job, given its hardware configuration, time, and location.
16
+ This method returns an hourly time series of the carbon emissions.
17
+
18
+ The methodology is defined in the documentation.
19
+
20
+ :param server: A dictionary containing the details about the server, including its hardware specifications.
21
+ The dictionary should include the following keys:
22
+
23
+ - `country` (str): The country code where the job was performed (required to fetch energy data).
24
+ - `number_core` (int): The number of CPU cores.
25
+ - `memory_gb` (float): The size of memory available in Gigabytes.
26
+ - `power_draw_core` (float): Power draw of a computing core in Watts.
27
+ - `usage_factor_core` (float): The core usage factor, a value between 0 and 1.
28
+ - `power_draw_mem` (float): Power draw of memory in Watts.
29
+ - `power_usage_efficiency` (float): Efficiency coefficient of the data center.
30
+
31
+ :param start_time: The start time of the job (datetime).
32
+ :param runtime_minutes: Total running time of the job in minutes (int).
33
+
34
+ :return: A tuple containing:
35
+ - (float): The total carbon footprint of the job in kilograms of CO2 equivalent.
36
+ - (pandas.DataFrame): A DataFrame containing the hourly time series of carbon emissions.
32
37
"""
38
+
33
39
# Round to the nearest hour (in minutes)
34
40
# base valued taken from http://calculator.green-algorithms.org/
41
+
42
+
43
+
35
44
rounded_runtime_minutes = round (runtime_minutes / 60 ) * 60
36
45
end_time = start_time + timedelta (minutes = rounded_runtime_minutes )
37
- ci_ts = compute_ci (country , start_time , end_time )
38
- ce_total ,ce_df = compute_ce_from_energy (ci_ts , number_core , memory_gb , power_draw_core , usage_factor_core , power_draw_mem , power_usage_efficiency )
46
+ ci_ts = compute_ci (server [ ' country' ] , start_time , end_time )
47
+ ce_total ,ce_df = compute_ce_from_energy (server , ci_ts )
39
48
return ce_total ,ce_df
40
49
41
- def compute_energy_used (runtime_minutes , number_core , power_draw_core , usage_factor_core , mem_size_gb , power_draw_mem , PUE ):
50
+ def _compute_energy_used (runtime_minutes , number_core , power_draw_core , usage_factor_core , mem_size_gb , power_draw_mem , PUE ):
42
51
return round ((runtime_minutes / 60 )* (number_core * power_draw_core * usage_factor_core + mem_size_gb * power_draw_mem ) * PUE * 0.001 , 2 )
43
52
44
53
def compute_savings_same_device (country_code ,start_time_request ,start_time_predicted ,runtime ,cpu_cores ,cpu_memory ):
45
54
ce_job1 ,ci1 = compute_ce (country_code ,start_time_request ,runtime ,cpu_cores ,cpu_memory )
46
55
ce_job2 ,ci2 = compute_ce (country_code ,start_time_predicted ,runtime ,cpu_cores ,cpu_memory )
47
56
return ce_job1 - ce_job2 # ideally this should be positive todo what if this is negative?, make a note in the comments
48
57
58
+ def compare_carbon_emissions (server1 ,server2 ,start_time1 ,start_time2 ,runtime_minutes ):
59
+ """
60
+ Compares the carbon emissions of running a job with the same duration on two different servers.
61
+
62
+ :param server1: A dictionary containing the details of the first server's hardware and location specifications.
63
+ Required keys include:
64
+
65
+ - `country` (str): The country code for the server's location (used for energy data).
66
+ - `number_core` (int): The number of CPU cores.
67
+ - `memory_gb` (float): The memory available in Gigabytes.
68
+ - `power_draw_core` (float): Power draw of each computing core in Watts.
69
+ - `usage_factor_core` (float): The core usage factor, a value between 0 and 1.
70
+ - `power_draw_mem` (float): Power draw of memory in Watts.
71
+ - `power_usage_efficiency` (float): Efficiency coefficient of the data center.
72
+
73
+ :param server2: A dictionary containing the details of the second server's hardware and location specifications.
74
+ Required keys are identical to those in `server1`:
75
+
76
+ - `country` (str): The country code for the server's location.
77
+ - `number_core` (int): The number of CPU cores.
78
+ - `memory_gb` (float): The memory available in Gigabytes.
79
+ - `power_draw_core` (float): Power draw of each computing core in Watts.
80
+ - `usage_factor_core` (float): The core usage factor, a value between 0 and 1.
81
+ - `power_draw_mem` (float): Power draw of memory in Watts.
82
+ - `power_usage_efficiency` (float): Efficiency coefficient of the data center.
83
+
84
+ :param start_time1: The start time of the job on `server1` (datetime).
85
+ :param start_time2: The start time of the job on `server2` (datetime).
86
+ :param runtime_minutes: The total running time of the job in minutes (int).
87
+
88
+ :return: A dictionary with the carbon emissions for each server and the percentage difference, structured as follows:
89
+ - `emissions_server1` (float): Total carbon emissions for `server1` in kilograms of CO2 equivalent.
90
+ - `emissions_server2` (float): Total carbon emissions for `server2` in kilograms of CO2 equivalent.
91
+ - `absolute_difference` (float): The absolute difference in emissions between the two servers.
92
+ - `higher_emission_server` (str): Indicates which server has higher emissions ("server1" or "server2").
93
+ """
94
+ ce1 ,ce1_ts = compute_ce (server1 ,start_time1 ,runtime_minutes )
95
+ ce2 ,ce2_ts = compute_ce (server2 ,start_time2 ,runtime_minutes )
96
+ abs_difference = ce2 - ce1
97
+ if ce1 > ce2 :
98
+ higher_emission_server = "server1"
99
+ elif ce2 > ce1 :
100
+ higher_emission_server = "server2"
101
+ else :
102
+ higher_emission_server = "equal"
103
+
104
+ return ce1 ,ce2 ,abs_difference ,higher_emission_server
49
105
50
106
def compute_ce_from_energy (
51
- ci_data :pd .DataFrame ,
52
- number_core : int ,
53
- memory_gb : int ,
54
- power_draw_core :float = 15.8 ,
55
- usage_factor_core :int = 1 ,
56
- power_draw_mem :float = 0.3725 ,
57
- power_usage_efficiency :float = 1.6 ):
107
+ server ,
108
+ ci_data :pd .DataFrame
109
+ ):
58
110
59
111
"""
60
- Calculates the carbon footprint for energy consumption time series
61
- This method returns an hourly time series of the carbon emission.
62
- The methodology is defined in the documentation
63
-
64
- :param ci_data: DataFrame of energy consumption. Required cols : startTimeUTC, ci_default
65
- :param number_core: the number of core
66
- :param memory_gb: the size of memory available (in Gigabytes)
67
- :param power_draw_core: power draw of a computing core (Watt)
68
- :param usage_factor_core: the core usage factor (between 0 and 1)
69
- :param power_draw_mem: power draw of memory (Watt)
70
- :param power_usage_efficiency: efficiency coefficient of the data center
112
+ Calculates the carbon footprint for energy consumption over a time series.
113
+ This method returns an hourly time series of the carbon emissions.
114
+
115
+ The methodology is defined in the documentation. Note that the start and end
116
+ times for the computation are derived from the first and last rows of the
117
+ `ci_data` DataFrame.
118
+
119
+ :param server: A dictionary containing details about the server, including its hardware specifications.
120
+ The dictionary should include:
121
+
122
+ - `number_core` (int): The number of CPU cores.
123
+ - `memory_gb` (float): The size of memory available in Gigabytes.
124
+ - `power_draw_core` (float): Power draw of a computing core in Watts.
125
+ - `usage_factor_core` (float): The core usage factor, a value between 0 and 1.
126
+ - `power_draw_mem` (float): Power draw of memory in Watts.
127
+ - `power_usage_efficiency` (float): Efficiency coefficient of the data center.
128
+
129
+ :param ci_data: A pandas DataFrame of energy consumption over time.
130
+ The DataFrame should include the following columns:
131
+
132
+ - `startTimeUTC` (datetime): The start time of each energy measurement in UTC.
133
+ - `ci_default` (float): Carbon intensity values for the energy consumption.
134
+
135
+ :return: A tuple containing:
136
+ - (float): The total carbon footprint of the job in kilograms of CO2 equivalent.
137
+ - (pandas.DataFrame): A DataFrame containing the hourly time series of carbon emissions.
71
138
"""
72
- time_diff = ci_data ['startTimeUTC' ].iloc [- 1 ] - ci_data ['startTimeUTC' ].iloc [0 ]
139
+ date_format = "%Y%m%d%H%M" # Year, Month, Day, Hour, Minute
140
+
141
+ server_defaults = {
142
+ "power_draw_core" :15.8 ,
143
+ "usage_factor_core" : 1 ,
144
+ "power_draw_mem" : 0.3725 ,
145
+ "power_usage_efficiency" : 1.6
146
+ }
147
+ server = server_defaults | server # set defaults if not provided
148
+
149
+
150
+ # to make sure startTimeUTC is in date format
151
+ if not pd .api .types .is_datetime64_any_dtype (ci_data ['startTimeUTC' ]):
152
+ ci_data ['startTimeUTC' ] = pd .to_datetime (ci_data ['startTimeUTC' ])
153
+
154
+ end = ci_data ['startTimeUTC' ].iloc [- 1 ]
155
+ start = ci_data ['startTimeUTC' ].iloc [0 ]
156
+
157
+ # note that the run time is calculated based on the energy data frame provided
158
+ time_diff = end - start
73
159
runtime_minutes = time_diff .total_seconds () / 60
74
- energy_consumed = compute_energy_used (runtime_minutes , number_core , power_draw_core ,
75
- usage_factor_core , memory_gb , power_draw_mem , power_usage_efficiency )
76
- e_hour = energy_consumed / (runtime_minutes * 60 )
160
+
161
+ energy_consumed = _compute_energy_used (runtime_minutes , server ["number_core" ], server ["power_draw_core" ],
162
+ server ["usage_factor_core" ], server ["memory_gb" ], server ["power_draw_mem" ], server ["power_usage_efficiency" ])
163
+
164
+ e_hour = energy_consumed / (runtime_minutes * 60 ) # assuming equal energy usage throughout the computation
77
165
ci_data ["carbon_emission" ] = ci_data ["ci_default" ] * e_hour
78
166
ce = round (sum (ci_data ["carbon_emission" ]),4 ) # grams CO2 equivalent
79
- return ce ,ci_data
167
+ return ce ,ci_data
168
+
169
+
170
+ def _compute_ce_bulk (server ,jobs ):
171
+ for job in jobs :
172
+ job .end_time = job ["start_time" ] + timedelta (minutes = job ["runtime_minutes" ])
173
+
174
+ min_start_date = min (job ['start_time' ] for job in jobs )
175
+ max_end_date = max (job ['end_time' ] for job in jobs )
176
+ # print(min_start_date)
177
+ # print(max_end_date)
178
+ energy_data = compute_ci (server ["country" ],min_start_date ,max_end_date )
179
+ energy_data ['startTimeUTC' ] = pd .to_datetime (energy_data ['startTimeUTC' ])
180
+ for job in jobs :
181
+ filtered_energy = energy_data [(energy_data ['startTimeUTC' ] >= job ["start_time" ]) & (energy_data ['startTimeUTC' ] <= job ["end_time" ])]
182
+ job ["emissions" ],temp = compute_ce_from_energy (filtered_energy ,server ["number_core" ],server ["memory_gb" ],server ["power_draw_core" ],server ["usage_factor_core" ],server ["power_draw_mem" ],server ["power_usage_efficiency" ])
183
+ return energy_data ,jobs , min_start_date , max_end_date
184
+
185
+ def plot_ce_jobs (server ,jobs ):
186
+ energy_data ,jobs , min_start_date , max_end_date = _compute_ce_bulk (server ,jobs )
187
+ Color = {
188
+ "red" :"#D6A99A" ,
189
+ "green" :"#99D19C" ,
190
+ "blue" :"#3DA5D9" ,
191
+ "yellow" :"#E2C044" ,
192
+ "black" :"#0F1A20"
193
+ }
194
+ fig , ax1 = plt .subplots (figsize = (10 , 6 ))
195
+ plt .title ("Green Energy and Jobs" )
196
+ end = energy_data ['startTimeUTC' ].iloc [- 1 ]
197
+ start = energy_data ['startTimeUTC' ].iloc [0 ]
198
+ ax1 .plot (energy_data ['startTimeUTC' ], energy_data ['percentRenewable' ], color = Color ['green' ], label = 'Percentage of Renewable Energy' )
199
+ ax1 .set_xlabel ('Time' )
200
+ ax1 .set_ylabel ('% Renewable energy' )
201
+ ax1 .tick_params (axis = 'y' )
202
+
203
+ # Set x-axis to show dates properly
204
+ ax1 .xaxis .set_major_formatter (mdates .DateFormatter ('%d-%m %H:%M' ))
205
+ plt .xticks (rotation = 45 )
206
+
207
+ # # Create a second y-axis
208
+ ax2 = ax1 .twinx ()
209
+
210
+ # Define y-values for each job (e.g., 1 for Job A, 2 for Job B, etc.)
211
+ for idx , job in enumerate (jobs ):
212
+ lbl = str (job ["emissions" ])
213
+ ax2 .plot ([job ['start_time' ], job ['end_time' ]], [idx + 1 , idx + 1 ], marker = 'o' , linewidth = 25 ,label = lbl ,color = Color ["blue" ])
214
+ # Calculate the midpoint for the text placement
215
+ labelpoint = job ['start_time' ] + (job ['end_time' ] - job ['start_time' ]) / 2 # + timedelta(minutes=100)
216
+ ax2 .text (labelpoint , idx + 1 , lbl , color = 'black' , ha = 'center' , va = 'center' , fontsize = 12 )
217
+
218
+ # Adjust y-axis labels to match the number of jobs
219
+ ax2 .set_yticks (range (1 , len (jobs ) + 1 ))
220
+
221
+ # Add legend and show the plot
222
+ fig .tight_layout ()
223
+ # plt.legend(loc='lower right')
224
+ plt .show ()
0 commit comments