Skip to content

Commit a76287e

Browse files
committed
caching and offline with cron jobs
1 parent f44090e commit a76287e

File tree

7 files changed

+187
-85
lines changed

7 files changed

+187
-85
lines changed

codegreen_core/data/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,11 @@ def energy(country, start_time, end_time, type="generation") -> dict:
8686
offline_data = off.get_offline_data(country,start_time,end_time)
8787
if offline_data["available"] is True and offline_data["partial"] is False and offline_data["data"] is not None:
8888
# todo fix this if partial get remaining data and merge instead of fetching the complete data
89-
return {"data":offline_data["data"],"data_available":True,"error":"None","time_interval":60,"message":"Data from offline source"}
89+
return {"data":offline_data["data"],"data_available":True,"error":"None","time_interval":60,"source":offline_data["source"]}
9090
else:
9191
energy_data = et.get_actual_production_percentage(country, start_time, end_time, interval60=True)
9292
energy_data["data"] = energy_data["data"]
93+
energy_data["source"] = "public_data"
9394
return energy_data
9495
elif type == "forecast":
9596
energy_data = et.get_forecast_percent_renewable(country, start_time, end_time)

codegreen_core/data/offline.py

Lines changed: 44 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,16 @@
1111
from ..utilities.log import log_stuff
1212

1313

14-
def get_redis_client(redis_url):
15-
"""
16-
Returns a Redis client instance using a Redis URL.
17-
18-
:param redis_url: Redis connection URL (e.g., "redis://localhost:6379/0")
19-
:return: Redis client instance
20-
"""
14+
def _get_redis_client(redis_url):
2115
try:
2216
return redis.from_url(redis_url, decode_responses=True)
2317
except redis.RedisError as e:
2418
print(f"Redis connection error: {e}")
2519
return None
2620

2721

28-
def get_key_from_redis(redis_url, key):
29-
"""
30-
Retrieves a key's value from Redis. Returns None if the key does not exist.
31-
32-
:param redis_url: Redis connection URL
33-
:param key: Key to retrieve from Redis
34-
:return: Value of the key or None if key does not exist
35-
"""
36-
client = get_redis_client(redis_url)
22+
def _get_key_from_redis(redis_url, key):
23+
client = _get_redis_client(redis_url)
3724
if client:
3825
try:
3926
return client.get(key) # Returns None if key does not exist
@@ -42,59 +29,39 @@ def get_key_from_redis(redis_url, key):
4229
return None
4330

4431

45-
def set_key_in_redis(redis_url, key, value, expiry=None):
46-
"""
47-
Sets a key-value pair in Redis with an optional expiry time.
48-
49-
:param redis_url: Redis connection URL
50-
:param key: Key to store in Redis
51-
:param value: Value to store in Redis
52-
:param expiry: Expiry time in seconds (optional)
53-
"""
54-
client = get_redis_client(redis_url)
32+
def _set_key_in_redis(redis_url, key, value, expiry=None):
33+
client = _get_redis_client(redis_url)
5534
if client:
5635
try:
5736
if expiry:
5837
client.set(key, value, ex=expiry) # Set key with expiry
5938
else:
6039
client.set(key, value) # Set key without expiry
61-
# print(f"Key '{key}' set successfully in Redis.")
6240
except redis.RedisError as e:
6341
print(f"Redis error: {e}")
6442

6543

66-
6744
def _get_country_key(country_code):
6845
"""Returns the key name for the given country to be stored in redis cache"""
6946
return "codegreen_generation_public_data_"+ country_code
7047

71-
def round_to_nearest_hour(dt):
72-
"""
73-
Rounds a given datetime to the nearest hour.
74-
"""
48+
def _round_to_nearest_hour(dt):
49+
""" Rounds a given datetime to the nearest hour."""
7550
return dt.replace(minute=0, second=0, microsecond=0)
7651

77-
def get_time_range(nHours):
78-
"""
79-
Returns a tuple (start_date, end_date) where:
80-
- start_date is current datetime minus nHours
81-
- end_date is the current datetime
82-
"""
83-
end_date = round_to_nearest_hour(datetime.now().replace(microsecond=0))
52+
def _get_time_range(nHours):
53+
""" Returns a tuple (start_date, end_date) where: start_date is current datetime minus nHours, end_date is the current datetime """
54+
end_date = _round_to_nearest_hour(datetime.now().replace(microsecond=0))
8455
start_date = end_date - timedelta(hours=nHours)
8556
return start_date, end_date
8657

87-
def gather_energy_data(country, start_time, end_time):
88-
"""
89-
Gets energy data form public energy sources (online)
90-
"""
58+
def _gather_energy_data(country, start_time, end_time):
59+
""" Gets energy data form public energy sources (online) """
9160
energy_data = et.get_actual_production_percentage(country, start_time, end_time,interval60=True)["data"]
9261
return energy_data
9362

94-
def get_filtered_data(dataframe, start_time, end_time):
95-
"""
96-
Function that returns a tuple (partial: True/False, data: DataFrame/None)
97-
indicating if the data is partially available and the corresponding data.
63+
def _get_filtered_data(dataframe, start_time, end_time):
64+
"""Function that returns a tuple (partial: True/False, data: DataFrame/None) indicating if the data is partially available and the corresponding data.
9865
"""
9966
if dataframe.empty:
10067
return (False, None)
@@ -137,12 +104,12 @@ def _sync_offline_file(country):
137104

138105
current_time = datetime.now()
139106
# storing data from 5 hours from now.
140-
end_time = round_to_nearest_hour(current_time) - timedelta(hours=5)
107+
end_time = _round_to_nearest_hour(current_time) - timedelta(hours=5)
141108

142109
if not (os.path.exists(json_file_path) and os.path.exists(csv_file_path)):
143110
print("Files do not exist. Gathering new data.")
144111
try:
145-
data = gather_energy_data(country, start_time, end_time)
112+
data = _gather_energy_data(country, start_time, end_time)
146113

147114
data.to_csv(csv_file_path, index=False)
148115
metadata = {
@@ -153,8 +120,8 @@ def _sync_offline_file(country):
153120
"updated_on": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
154121
}
155122
with open(json_file_path, "w") as f:
156-
print(metadata)
157123
json.dump(metadata, f, indent=4)
124+
log_stuff("Successfully created new offline file for "+country)
158125
return data
159126
except Exception as e:
160127
print(e)
@@ -172,12 +139,12 @@ def _sync_offline_file(country):
172139
update_required = False
173140
if start_diff.total_seconds() > 0:
174141
print("Gathering missing data before current start time.")
175-
new_data = gather_energy_data(country, start_time, current_start_time )
142+
new_data = _gather_energy_data(country, start_time, current_start_time )
176143
df = pd.concat([new_data, df], ignore_index=True)
177144
update_required = True
178145
if end_diff.total_seconds() > 0:
179146
print("Gathering missing data after current end time.")
180-
new_data = gather_energy_data(country, current_end_time, end_time)
147+
new_data = _gather_energy_data(country, current_end_time, end_time)
181148
#print(new_data)
182149
df = pd.concat([df, new_data], ignore_index=True)
183150
update_required = True
@@ -190,39 +157,39 @@ def _sync_offline_file(country):
190157
metadata["updated_on"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
191158
with open(json_file_path, "w") as f:
192159
json.dump(metadata, f, indent=4)
160+
log_stuff("Successfully synced offline file for "+country)
193161
else:
194162
print("No update required")
195163
#last_72_hours = end_time - timedelta(hours=72)
196164
#recent_data = df[pd.to_datetime(df["timestamp"]) >= last_72_hours]
197-
log_stuff("Successfully synced offline file for "+country)
165+
198166

199167
def _sync_offline_cache(country):
200168
# print("syncs offline cache for the given country")
201169
if not Config.get("enable_energy_caching"):
202170
raise Exception("This method cannot be used to get data since enable_energy_caching option is not enabled")
203171

204172
c_key = _get_country_key(country)
205-
206-
data = get_key_from_redis(Config.get("energy_redis_path"),c_key)
173+
hour_count = int(Config.get("generation_cache_hour"))
174+
quarter_time = hour_count/4
175+
data = _get_key_from_redis(Config.get("energy_redis_path"),c_key)
176+
update_required = False
177+
s,e = _get_time_range(hour_count)
207178
if data is not None:
208-
print("check if updated to the latest")
209179
metadata = json.loads(data)
210-
# print(metadata)
211180
dataframe = pd.DataFrame.from_dict(metadata["dataframe"])
212-
dataframe["startTime"] = pd.to_datetime(dataframe["startTime"]) # Converts to pandas.Timestamp
213-
print(dataframe)
214-
s,e = get_time_range(72)
181+
dataframe["startTime"] = pd.to_datetime(dataframe["startTime"])
215182
last_start_time = pd.to_datetime(dataframe.iloc[-1]["startTime"])
216-
217183
# Calculate the difference in hours
218184
time_difference = abs((e - last_start_time).total_seconds()) / 3600
219-
print(last_start_time)
220-
print(e)
221-
print(time_difference)
185+
if quarter_time <= time_difference :
186+
update_required = True
222187
else:
223-
print("new_data_to_add")
224-
s,e = get_time_range(72)
225-
dataframe = gather_energy_data(country,s,e)
188+
update_required = True
189+
190+
if update_required :
191+
# todo : see if offline data have the required data
192+
dataframe = _gather_energy_data(country,s,e)
226193
dataframe["startTime"] = pd.to_datetime(dataframe["startTime"])
227194
dataframe["startTime"] = dataframe["startTime"].dt.tz_localize(None)
228195
metadata = {
@@ -233,8 +200,7 @@ def _sync_offline_cache(country):
233200
"updated_on": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
234201
"dataframe":dataframe.to_dict()
235202
}
236-
set_key_in_redis(Config.get("energy_redis_path"),c_key,json.dumps(metadata, default=str))
237-
203+
_set_key_in_redis(Config.get("energy_redis_path"),c_key,json.dumps(metadata, default=str))
238204

239205

240206
def _get_offline_file_data(country,start_time, end_time):
@@ -258,25 +224,24 @@ def _get_offline_file_data(country,start_time, end_time):
258224
return (False, None)
259225

260226
local_data = pd.read_csv(csv_file_path)
261-
return get_filtered_data(local_data, start_time, end_time)
227+
return _get_filtered_data(local_data, start_time, end_time)
262228

263229

264230
def _get_offline_cache_data(country,start,end):
265231
print("offline cache data")
266232
if not Config.get("enable_energy_caching"):
267233
raise Exception("This method cannot be used to get data since enable_energy_caching option is not enabled")
268-
data = get_key_from_redis(Config.get("energy_redis_path"),_get_country_key(country))
234+
data = _get_key_from_redis(Config.get("energy_redis_path"),_get_country_key(country))
269235
# print(data)
270236
if data is not None:
271237
metadata = json.loads(data)
272238
# print(metadata)
273239
dataframe = pd.DataFrame.from_dict(metadata["dataframe"])
274240
dataframe["startTime"] = pd.to_datetime(dataframe["startTime"]) # Converts to pandas.Timestamp
275-
return get_filtered_data(dataframe, start, end)
241+
return _get_filtered_data(dataframe, start, end)
276242
else:
277243
return False,None
278244

279-
280245

281246
def get_offline_data(country,start,end,sync_first=False):
282247
"""
@@ -288,7 +253,7 @@ def get_offline_data(country,start,end,sync_first=False):
288253
returns {available:True/False, data:dataframe}
289254
Note that this method assumes that syncing of the sources is being handled separately
290255
"""
291-
output = {"available":False,"data":None, "partial":False}
256+
output = {"available":False,"data":None, "partial":False,"source":""}
292257
offline = Config.get("enable_offline_energy_generation")
293258
cache = Config.get("enable_energy_caching")
294259

@@ -306,6 +271,7 @@ def get_offline_data(country,start,end,sync_first=False):
306271
output["partial"] = partial
307272
output["data"] = data
308273
output["available"] = True
274+
output["source"] = "cache"
309275
print("data from cache")
310276
return output
311277

@@ -318,34 +284,31 @@ def get_offline_data(country,start,end,sync_first=False):
318284
output["partial"] = partial
319285
output["data"] = data
320286
output["available"] = True
287+
output["source"] = "offline_file"
321288
print("just got the data from offline file")
322289

323290
return output
324291

325292

326-
def sync_offline_data():
293+
def sync_offline_data(file=False,cache=False):
327294
"""
328295
This method syncs offline data for offline sources enabled in the cache.
329296
Data is synced for all available countries
330297
You need to run this before getting offline data. you can even setup a CRON job to call this method on regular intervals
331298
"""
332299
c_keys = meta.get_country_metadata()
333-
if Config.get("enable_offline_energy_generation"):
300+
if Config.get("enable_offline_energy_generation") == True and file == True:
334301
for key in c_keys:
335302
try:
336303
_sync_offline_file(key)
337304
except Exception as e:
338305
# print(e)
339306
log_stuff("Error in syncing offline file for "+key+". Message"+ str(e))
340-
if Config.get("enable_energy_caching"):
307+
if Config.get("enable_energy_caching") == True and cache == True :
341308
for key in c_keys:
342309
try:
343310
_sync_offline_cache(key)
344311
except Exception as e:
345312
# print(e)
346313
log_stuff("Error in syncing offline file for "+key+". Message: "+ str(e))
347-
348-
349-
350-
351314

codegreen_core/utilities/config.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,10 @@
22
import configparser
33
import redis
44

5-
65
class ConfigError(Exception):
76
"""Custom exception for configuration errors."""
87
pass
98

10-
119
class Config:
1210
config_data = None
1311
section_name = "codegreen"
@@ -66,6 +64,26 @@ class Config:
6664
"boolean":False,
6765
"use":"The start date for offline energy generation download,YYYY-mm-dd format"
6866
},
67+
{
68+
"name":"generation_cache_hour",
69+
"default":"72",
70+
"boolean":False,
71+
"use":"Indicate the number of hours in the past the data will be stored in the cache "
72+
},
73+
74+
{
75+
"name":"cron_refresh_offline_files_hour",
76+
"default":"6",
77+
"boolean":False,
78+
"use":"time to setup cron for updating offline energy files"
79+
},
80+
{
81+
"name":"cron_refresh_cache_hour",
82+
"default":"6",
83+
"boolean":False,
84+
"use":"time to setup CRON job to update the energy generation cache"
85+
},
86+
6987
{
7088
"name":"enable_logging",
7189
"default":"False",

0 commit comments

Comments
 (0)