1
1
import json
2
2
import logging
3
- import random
4
3
import time
5
4
from datetime import datetime , timedelta , timezone
6
5
from functools import wraps
7
- from typing import Optional , Union
6
+ from typing import Callable , Optional , Union
8
7
9
8
from influxdb_client_3 import InfluxDBClient3
10
9
@@ -92,9 +91,12 @@ def __init__(
92
91
token : str ,
93
92
organization_id : str ,
94
93
database : str ,
94
+ key_setter : Optional [Callable [[object ], object ]] = None ,
95
+ timestamp_setter : Optional [Callable [[object ], int ]] = None ,
95
96
start_date : datetime = datetime .now (tz = timezone .utc ),
96
97
end_date : Optional [datetime ] = None ,
97
98
measurements : Optional [Union [str , list [str ]]] = None ,
99
+ measurement_column_name : str = "_measurement_name" ,
98
100
sql_query : Optional [str ] = None ,
99
101
time_delta : str = "5m" ,
100
102
delay : float = 0 ,
@@ -109,9 +111,15 @@ def __init__(
109
111
:param token: Authentication token for InfluxDB.
110
112
:param organization_id: Organization name in InfluxDB.
111
113
:param database: Database name in InfluxDB.
114
+ :param key_setter: sets the kafka message key for a measurement record.
115
+ By default, will set the key to the measurement's name.
116
+ :param timestamp_setter: sets the kafka message timestamp for a measurement record.
117
+ By default, the timestamp will be the Kafka default (Kafka produce time).
112
118
:param start_date: The start datetime for querying InfluxDB. Uses current time by default.
113
- :param end_date: The end datetime for querying InfluxDB. If none provided, runs indefinitely for a single measurement.
119
+ :param end_date: The end datetime for querying InfluxDB.
120
+ If none provided, runs indefinitely for a single measurement.
114
121
:param measurements: The measurements to query. If None, all measurements will be processed.
122
+ :param measurement_column_name: The column name used for appending the measurement name to the record.
115
123
:param sql_query: Custom SQL query for retrieving data.
116
124
Query expects a `{start_time}`, `{end_time}`, and `{measurement_name}` for later formatting.
117
125
If provided, it overrides the default window-query logic.
@@ -145,6 +153,9 @@ def __init__(
145
153
"org" : organization_id ,
146
154
"database" : database ,
147
155
}
156
+ self ._measurement_column_name = measurement_column_name
157
+ self ._key_setter = key_setter or self ._default_key_setter
158
+ self ._timestamp_setter = timestamp_setter
148
159
self ._measurements = measurements
149
160
self ._sql_query = _set_sql_query (sql_query or "" )
150
161
self ._start_date = start_date
@@ -167,6 +178,9 @@ def _close_client(self):
167
178
self ._client .close ()
168
179
self ._client = None
169
180
181
+ def _default_key_setter (self , record : dict ):
182
+ return record [self ._measurement_column_name ]
183
+
170
184
@property
171
185
def _measurement_names (self ) -> list [str ]:
172
186
if not self ._measurements :
@@ -189,14 +203,16 @@ def _get_measurements(self) -> list[str]:
189
203
return result ["name" ].tolist ()
190
204
191
205
@with_retry
192
- def _produce_records (self , records : list [dict ], measurement_name : str ):
206
+ def _produce_records (self , records : list [dict ]):
193
207
for record in records :
194
- # TODO: a key, value, and timestamp setter
195
208
msg = self .serialize (
196
- key = f" { measurement_name } _ { random . randint ( 1 , 1000 ) } " , # noqa: S311
209
+ key = self . _key_setter ( record ),
197
210
value = record ,
211
+ timestamp_ms = self ._timestamp_setter (record )
212
+ if self ._timestamp_setter
213
+ else None ,
198
214
)
199
- self .produce (value = msg .value , key = msg .key )
215
+ self .produce (value = msg .value , key = msg .key , timestamp = msg . timestamp )
200
216
self .producer .flush ()
201
217
202
218
@with_retry
@@ -249,10 +265,9 @@ def _process_measurement(self, measurement_name):
249
265
if data is not None and not data .empty :
250
266
if "iox::measurement" in data .columns :
251
267
data = data .drop (columns = ["iox::measurement" ])
252
- data ["measurement_name" ] = measurement_name
268
+ data [self . _measurement_column_name ] = measurement_name
253
269
self ._produce_records (
254
270
json .loads (data .to_json (orient = "records" , date_format = "iso" )),
255
- data ,
256
271
)
257
272
258
273
start_time = end_time
0 commit comments