Skip to content

0.0.5 #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/workflow.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Publish Python 🐍 distribution 📦 to PyPI
name: Publish to PyPI

on:
push:
Expand All @@ -7,7 +7,7 @@ on:

jobs:
build:
name: Build distribution 📦
name: Build distribution
runs-on: ubuntu-latest

steps:
Expand All @@ -30,7 +30,7 @@ jobs:
path: dist/

publish-to-pypi:
name: Publish Python 🐍 distribution 📦 to PyPI
name: Publish to PyPI
needs:
- build
runs-on: ubuntu-latest
Expand Down
4 changes: 3 additions & 1 deletion codegreen_core/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from .main import *
from .offline import *
# from . import main

__all__ = ["energy"]
__all__ = ["info","energy","sync_offline_data",'get_offline_data']
34 changes: 34 additions & 0 deletions codegreen_core/data/entsoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
"Biomass": ["Biomass"],
}



# helper methods


Expand Down Expand Up @@ -287,6 +289,7 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d
- `data_available`: A boolean indicating if data was successfully retrieved.
- `data`: A pandas DataFrame containing the energy data if available, empty DataFrame if not.
- `time_interval` : the time interval of the DataFrame
- `columns` : a dict with column description
:rtype: dict
"""
try:
Expand Down Expand Up @@ -371,6 +374,7 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d
"data": _format_energy_data(table),
"data_available": True,
"time_interval": duration,
"columns":gen_cols_from_data(table)
}
except Exception as e:
# print(e)
Expand All @@ -380,8 +384,38 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d
"data_available": False,
"error": e,
"time_interval": 0,
"columns":None
}

def gen_cols_from_data(df):
"""generates list of columns for the given energy generation dataframe"""
allAddkeys = [
"Wind",
"Solar",
"Nuclear",
"Hydroelectricity",
"Geothermal",
"Natural Gas",
"Petroleum",
"Coal",
"Biomass",
]

allCols = df.columns.tolist()
# find out which columns are present in the data out of all the possible columns in both the categories
renPresent = list(set(allCols).intersection(renewableSources))
nonRenPresent = list(set(allCols).intersection(nonRenewableSources))

cols = {
"renewable" : renPresent,
"nonRenewable": nonRenPresent,
"percentage":[]
}
for ky in allAddkeys:
fieldName = ky + "_per"
cols["percentage"].append(fieldName)
return cols


def get_forecast_percent_renewable(
country: str, start: datetime, end: datetime
Expand Down
152 changes: 94 additions & 58 deletions codegreen_core/data/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,54 +10,76 @@

def energy(country, start_time, end_time, type="generation") -> dict:
"""
Returns hourly time series of energy production mix for a specified country and time range.

This method fetches the energy data for the specified country between the specified duration.
It checks if a valid energy data source is available. If not, None is returned. Otherwise, the
energy data is returned as a pandas DataFrame. The structure of data depends on the energy source.

For example, if the source is ENTSOE, the data contains:

========================== ========== ================================================================
Column type Description
========================== ========== ================================================================
startTimeUTC object Start date in UTC (format YYYYMMDDhhmm)
startTime datetime Start time in local timezone
Biomass float64
Fossil Hard coal float64
Geothermal float64
....more energy sources float64
**renewableTotal** float64 The total based on all renewable sources
renewableTotalWS float64 The total production using only Wind and Solar energy sources
nonRenewableTotal float64
total float64 Total using all energy sources
percentRenewable int64
percentRenewableWS int64 Percentage of energy produced using only wind and solar energy
Wind_per int64 Percentages of individual energy sources
Solar_per int64
Nuclear_per int64
Hydroelectricity_per int64
Geothermal_per int64
Natural Gas_per int64
Petroleum_per int64
Coal_per int64
Biomass_per int64
========================== ========== ================================================================

Note : fields marked bold are calculated based on the data fetched.

:param str country: The 2 alphabet country code.
:param datetime start_time: The start date for data retrieval. A Datetime object. Note that this date will be rounded to the nearest hour.
:param datetime end_time: The end date for data retrieval. A datetime object. This date is also rounded to the nearest hour.
:param str type: The type of data to retrieve; either 'generation' or 'forecast'. Defaults to 'generation'.
:param boolean interval60: To fix the time interval of data to 60 minutes. True by default. Only applicable for generation data

:return: A dictionary containing:
- `error`: A string with an error message, empty if no errors.
- `data_available`: A boolean indicating if data was successfully retrieved.
- `data`: A pandas DataFrame containing the energy data if available, empty DataFrame if not.
- `time_interval` : the time interval of the DataFrame
Returns an hourly time series of the energy production mix for a specified country and time range,
if a valid energy data source is available.

The data is returned as a pandas DataFrame along with additional metadata.
The columns vary depending on the data source. For example, if the source is ENTSOE,
the data includes fields such as "Biomass", "Geothermal", "Hydro Pumped Storage",
"Hydro Run-of-river and Poundage", "Hydro Water Reservoir", etc.

However, some fields remain consistent across data sources:

========================= ========== ================================================================
Column Type Description
========================= ========== ================================================================
startTimeUTC object Start time in UTC (format: YYYYMMDDhhmm)
startTime datetime Start time in local timezone
renewableTotal float64 The total production from all renewable sources
renewableTotalWS float64 Total production using only Wind and Solar energy sources
nonRenewableTotal float64 Total production from non-renewable sources
total float64 Total energy production from all sources
percentRenewable int64 Percentage of total energy from renewable sources
percentRenewableWS int64 Percentage of energy from Wind and Solar only
Wind_per int64 Percentage contribution from Wind energy
Solar_per int64 Percentage contribution from Solar energy
Nuclear_per int64 Percentage contribution from Nuclear energy
Hydroelectricity_per int64 Percentage contribution from Hydroelectricity
Geothermal_per int64 Percentage contribution from Geothermal energy
Natural Gas_per int64 Percentage contribution from Natural Gas
Petroleum_per int64 Percentage contribution from Petroleum
Coal_per int64 Percentage contribution from Coal
Biomass_per int64 Percentage contribution from Biomass
========================= ========== ================================================================

:param str country:
The 2-letter country code (e.g., "DE" for Germany, "FR" for France, etc.).
:param datetime start_time:
The start date for data retrieval (rounded to the nearest hour).
:param datetime end_time:
The end date for data retrieval (rounded to the nearest hour).
:param str type:
The type of data to retrieve; either 'generation' or 'forecast'. Defaults to 'generation'.

:return: A dictionary containing the following keys:

- **error** (*str*): An error message, empty if no errors occurred.
- **data_available** (*bool*): Indicates whether data was successfully retrieved.
- **data** (*pandas.DataFrame*): The retrieved energy data if available; an empty DataFrame otherwise.
- **time_interval** (*int*): The time interval of the DataFrame (constant value: ``60``).
- **source** (*str*): Specifies the origin of the retrieved data. Defaults to ``'public_data'``, indicating it was fetched from an external source. If the offline storage feature is enabled, this value may change if the data is available locally.
- **columns** : a dict of columns for renewable and non renewable energy sources in the data

:rtype: dict

**Example Usage:**

Get generation data for Germany

.. code-block:: python

from datetime import datetime
from codegreen_core.data import energy
result = energy(country="DE", start_time=datetime(2025, 1, 1), end_time=datetime(2025, 1, 2), type="generation")

Get forecast data for Norway

.. code-block:: python

from datetime import datetime
from codegreen_core.data import energy
result = energy(country="NO", start_time=datetime(2025, 1, 1), end_time=datetime(2025, 1, 2), type="forecast")

"""
if not isinstance(country, str):
raise ValueError("Invalid country")
Expand All @@ -75,27 +97,41 @@ def energy(country, start_time, end_time, type="generation") -> dict:
e_source = meta.get_country_energy_source(country)
if e_source == "ENTSOE":
if type == "generation":
"""
let local_found= false
see if caching is enabled, if yes, first check in the cache
if not,
check if offline data is enabled
if yes, check is data is available locally
if no, go online
"""
offline_data = off.get_offline_data(country,start_time,end_time)
if offline_data["available"] is True and offline_data["partial"] is False and offline_data["data"] is not None:
# todo fix this if partial get remaining data and merge instead of fetching the complete data
return {"data":offline_data["data"],"data_available":True,"error":"None","time_interval":60,"source":offline_data["source"]}
return {"data":offline_data["data"],"data_available":True,"error":"None","time_interval":60,"source":offline_data["source"],"columns":et.gen_cols_from_data(offline_data["data"])}
else:
energy_data = et.get_actual_production_percentage(country, start_time, end_time, interval60=True)
energy_data["data"] = energy_data["data"]
#energy_data["data"] = energy_data["data"]
energy_data["source"] = "public_data"
#energy_data["columns"] =
return energy_data
elif type == "forecast":
energy_data = et.get_forecast_percent_renewable(country, start_time, end_time)
energy_data["data"] = energy_data["data"]
# energy_data["data"] = energy_data["data"]
return energy_data
else:
raise CodegreenDataError(Message.NO_ENERGY_SOURCE)
return None

def info()-> list:
"""
Returns a list of countries (in two-letter codes) and energy sources for which data can be fetched using the package.

:return: A list of dictionary containing:

- name of the country
- `energy_source` : the publicly available energy data source
- `carbon_intensity_method` : the methodology used to calculate carbon intensity
- `code` : the 2 letter country code

:rtype: list
"""
data = meta.get_country_metadata()
data_list = []
for key , value in data.items():
c = value
c["code"] = key
data_list.append(c)
return data_list
47 changes: 30 additions & 17 deletions codegreen_core/data/offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,16 +243,27 @@ def _get_offline_cache_data(country,start,end):
return False,None


def get_offline_data(country,start,end,sync_first=False):
def get_offline_data(country,start,end,sync_first=False)->dict:
"""
This method returns locally stored energy data.
Data is stored in 2 sources : one. Redis cache and second : csv files.
Redis cache contains data only for the last 72 hours from when it was last synced
Offline data files can contain data for longer durations.
Both these options can be configured in the config file
returns {available:True/False, data:dataframe}
Note that this method assumes that syncing of the sources is being handled separately
This method returns locally stored energy data.

Data is stored in two sources:

1. **Redis cache**: Contains data for a limited number of hours from the last sync.
2. **CSV files**: Contain data for longer durations.

Both storage options can be configured in the configuration file.

**Note**: Unless you specify the ``sync_first`` flag, the method assumes that syncing of the data sources is handled separately. If ``sync_first`` is set to ``True`` and data files are not initialized in advance, the method may take longer to complete

:return: A dictionary with the following keys:
- **available** (*bool*): Indicates if the data is available.
- **data** (*pandas.DataFrame*): The energy data, if available. Otherwise, an empty DataFrame.

:rtype: dict

"""

output = {"available":False,"data":None, "partial":False,"source":""}
offline = Config.get("enable_offline_energy_generation")
cache = Config.get("enable_energy_caching")
Expand All @@ -264,7 +275,7 @@ def get_offline_data(country,start,end,sync_first=False):
if cache :
# first look in the cache
if(sync_first):
print("will first sync the cache to get the latest data")
#print("will first sync the cache to get the latest data")
_sync_offline_cache(country)
partial,data = _get_offline_cache_data(country,start,end)
if data is not None and partial is False:
Expand All @@ -278,37 +289,39 @@ def get_offline_data(country,start,end,sync_first=False):
if offline:
# first look if data files are available, if yes, return data
if(sync_first):
print("will first sync the offline files to get the latest data")
#print("will first sync the offline files to get the latest data")
_sync_offline_file(country)
partial,data = _get_offline_file_data(country,start,end)
output["partial"] = partial
output["data"] = data
output["available"] = True
output["source"] = "offline_file"
print("just got the data from offline file")
#print("just got the data from offline file")

return output


def sync_offline_data(file=False,cache=False):
"""
This method syncs offline data for offline sources enabled in the cache.
Data is synced for all available countries
You need to run this before getting offline data. you can even setup a CRON job to call this method on regular intervals
This method syncs offline data for offline sources enabled in the configuration file. The data is synced for all available countries.

You need to run this method before retrieving offline data. It is also possible to set up a CRON job to call this method at regular intervals to keep data synchronized.

The sync operation can take some time, depending on the data size and the selected sync options (file, cache, or both).

:param bool file: If ``True``, sync data in offline files. Defaults to ``False``.
:param bool cache: If ``True``, sync data in the cache. Defaults to ``False``.
"""
c_keys = meta.get_country_metadata()
if Config.get("enable_offline_energy_generation") == True and file == True:
for key in c_keys:
try:
_sync_offline_file(key)
except Exception as e:
# print(e)
log_stuff("Error in syncing offline file for "+key+". Message"+ str(e))
if Config.get("enable_energy_caching") == True and cache == True :
for key in c_keys:
try:
_sync_offline_cache(key)
except Exception as e:
# print(e)
log_stuff("Error in syncing offline file for "+key+". Message: "+ str(e))

25 changes: 25 additions & 0 deletions codegreen_core/tools/loadshift_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,31 @@ def predict_now(
:type criteria: str
:return: Tuple[timestamp, message, average_percent_renewable]
:rtype: tuple

**Example usage**:

.. code-block:: python

from datetime import datetime,timedelta
from codegreen_core.tools.loadshift_time import predict_now

country_code = "DK"
est_runtime_hour = 10
est_runtime_min = 0
now = datetime.now()
hard_finish_date = now + timedelta(days=1)
criteria = "percent_renewable"
per_renewable = 50

time = predict_now(country_code,
est_runtime_hour,
est_runtime_min,
hard_finish_date,
criteria,
per_renewable)
# (1728640800.0, <Message.OPTIMAL_TIME: 'OPTIMAL_TIME'>, 76.9090909090909)


"""
if criteria == "percent_renewable":
try:
Expand Down
Binary file removed docs/_static/modules.png
Binary file not shown.
Loading