Skip to content

Commit 66250b2

Browse files
committed
Fixes #55 + some minor changes
1 parent 183fb1f commit 66250b2

File tree

1 file changed

+26
-20
lines changed

1 file changed

+26
-20
lines changed

samples/weather2file.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def save_to_file(self):
147147
logging.debug(f"{full_file_path} saved")
148148

149149
def append(self, df):
150-
self.df = self.df.append(df).reset_index(drop=True)
150+
self.df = pd.concat([self.df, df], ignore_index=True)
151151

152152
def get_newest_timestamp(self,module_mac):
153153
if ('module_mac' in self.df.columns):
@@ -233,18 +233,17 @@ def _write_file(self, file_path):
233233
class SQLHandler(DataFrameHandler):
234234
def __init__(self, file_name, output_path):
235235
raise NotImplementedError("sql details not setup")
236-
from sqlalchemy import create_engine
236+
#from sqlalchemy import create_engine
237237

238-
super().__init__(df, file_name, output_path, file_format="sql", kwargs={"con": self.engine})
239-
self.engine = create_engine("sqlite://", echo=False)
238+
#super().__init__(file_name, output_path, file_format="sql", kwargs={"con": self.engine})
239+
#self.engine = create_engine("sqlite://", echo=False)
240240

241241
def _read_file(self, file_path):
242242
return pd.read_sql(file_path, **self.kwargs)
243243

244244
def _write_file(self, file_path):
245245
raise NotImplementedError("sql details not setup")
246-
engine = create_engine("sqlite://", echo=False)
247-
df.to_sql(file_path, index=False, **self.kwargs)
246+
self.df.to_sql(file_path, index=False, **self.kwargs)
248247

249248

250249
class FeatherHandler(DataFrameHandler):
@@ -393,9 +392,9 @@ def _get_field_dict(self, station_id,module_id,data_type,start_date,end_date):
393392
"""Returns a dict to be used when requesting data through the Netatmo API"""
394393

395394
return {'device_id':station_id,
396-
'module_id':module_id,
397395
'scale':'max',
398396
'mtype':','.join(data_type),
397+
'module_id':module_id,
399398
'date_begin':start_date,
400399
'date_end':end_date}
401400

@@ -445,14 +444,12 @@ def get_module_df(self, newest_utctime, station_name, station_mac, module_data_o
445444
# Start with the oldest timestamp
446445
module_start_date_timestamp = module_data_overview['last_setup']
447446

448-
# Create an empty DataFrame to fill with new values
449-
df_module = pd.DataFrame([])
450-
447+
# Fill array with data
448+
data = []
451449

452450
if(newest_utctime):
453451
# Found newer data! Change start time according to the newest value
454452

455-
456453
if(newest_utctime > module_start_date_timestamp):
457454
module_start_date_timestamp = newest_utctime + 1
458455
logging.info(f'Newer data found for {module_name}. Setting new start date to {self._get_date_from_timestamp(module_start_date_timestamp, tz=time_z)}')
@@ -483,16 +480,17 @@ def get_module_df(self, newest_utctime, station_name, station_mac, module_data_o
483480
try:
484481
# Was there any data?
485482
if(retreived_module_data['body']):
486-
# Yes! Append it with df_module
487-
df_module = df_module.append(self._to_dataframe(retreived_module_data['body'],
483+
new_df = self._to_dataframe(retreived_module_data['body'],
488484
module_data_overview,
489485
station_name,
490486
station_mac,
491487
dtype,
492-
time_z))
493-
logging.debug(f'{len(retreived_module_data["body"])} samples found for {module_data_overview["module_name"]}. {df_module.shape[0]} new samples collected so far.')
488+
time_z)
489+
data.append(new_df)
490+
new_df['utc_time'].min()
491+
logging.debug(f'{len(retreived_module_data["body"])} samples found for {module_data_overview["module_name"]}. {new_df["timestamp"].iloc[0]} - {new_df["timestamp"].iloc[-1]}')
494492
# Now change the start_time
495-
module_start_date_timestamp = df_module['utc_time'].max() + 1
493+
module_start_date_timestamp = new_df['utc_time'].max() + 1
496494

497495
else:
498496
keep_collecting_module_data = False
@@ -503,8 +501,15 @@ def get_module_df(self, newest_utctime, station_name, station_mac, module_data_o
503501
keep_collecting_module_data = False
504502
logging.error(f'Something fishy is going on... Aborting collection for module {module_name}')
505503

504+
505+
if data:
506+
df_module = pd.concat(data,ignore_index=True)
507+
else:
508+
df_module = pd.DataFrame([])
509+
510+
506511
logging.info(f'Collected data from {module_name} contains {df_module.shape[0]} samples.')
507-
return df_module.reset_index(drop=True)
512+
return df_module
508513

509514
def main():
510515

@@ -602,17 +607,18 @@ def main():
602607
nr_previous_requests=args.previous_requests)
603608

604609

605-
for station_mac, station_data_overview in rate_limit_handler.get_stations():
610+
for station_name, station_data_overview in rate_limit_handler.get_stations():
606611

607-
station_name = station_data_overview['station_name']
612+
station_mac = station_data_overview['_id']
608613

609614
station_timezone = timezone(station_data_overview['place']['timezone'])
610615
logging.info(f'Timezone {station_timezone} extracted from data.')
611616

612617
end_datetime_timestamp = np.floor(datetime.timestamp(station_timezone.localize(args.end_datetime)))
618+
newest_utc = df_handler.get_newest_timestamp(station_data_overview['_id'])
613619
df_handler.append(
614620
rate_limit_handler.get_module_df(
615-
df_handler.get_newest_timestamp(station_data_overview['_id']),
621+
newest_utc,
616622
station_name,
617623
station_mac,
618624
station_data_overview,

0 commit comments

Comments
 (0)