@@ -32,9 +32,7 @@ def __init__(
32
32
):
33
33
super ().__init__ (target , stream_name , schema , key_properties )
34
34
self .client = self ._authenticated_client ()
35
- self .index_schema_fields = self .config .get ("index_schema_fields" , {}).get (
36
- self .stream_name , {}
37
- )
35
+ self .index_schema_fields = self .config .get ("index_schema_fields" , {}).get (self .stream_name , {})
38
36
self .metadata_fields = self .config .get ("metadata_fields" , {}).get (self .stream_name , {})
39
37
self .index_mappings = self .config .get ("index_mappings" , {}).get (self .stream_name , {})
40
38
self .index_name = None
@@ -50,6 +48,26 @@ def setup(self) -> None:
50
48
self .index_name = self ._template_index ()
51
49
self .create_index (self .index_name )
52
50
51
+ def preprocess_record (self , record : dict , context : dict ) -> dict : # noqa: PLR6301, ARG002
52
+ """Process incoming record and return a modified result.
53
+
54
+ Args:
55
+ record: Individual record in the stream.
56
+ context: Stream partition or context dictionary.
57
+
58
+ Returns:
59
+ A new, processed record.
60
+ """
61
+ for field , mapping in self .index_mappings .items ():
62
+ type = mapping .get ("type" )
63
+ if type == "keyword" :
64
+ value = record .get (field )
65
+ if value :
66
+ record [field ] = (
67
+ [item .strip () for item in record [field ].split ("," )] if isinstance (value , str ) else value
68
+ )
69
+ return record
70
+
53
71
def _template_index (self , schemas : dict = {}) -> str :
54
72
"""Template the input index config for Elasticsearch indexing.
55
73
@@ -99,9 +117,7 @@ def _build_fields(
99
117
for k , v in mapping .items ():
100
118
match = jsonpath_ng .parse (v ).find (record )
101
119
if len (match ) == 0 :
102
- self .logger .warning (
103
- f"schema key { k } with json path { v } could not be found in record: { record } "
104
- )
120
+ self .logger .warning (f"schema key { k } with json path { v } could not be found in record: { record } " )
105
121
schemas [k ] = v
106
122
else :
107
123
if len (match ) > 1 :
@@ -157,13 +173,9 @@ def create_index(self, index: str) -> None:
157
173
index = index , fields = list (self .index_mappings .keys ())
158
174
)[index ]["mappings" ].items ()
159
175
}
160
- if not all (
161
- self .index_mappings [key ]["type" ] == value for key , value in mappings .items ()
162
- ):
176
+ if not all (self .index_mappings [key ]["type" ] == value for key , value in mappings .items ()):
163
177
try :
164
- self .client .indices .put_mapping (
165
- index = index , body = {"properties" : self .index_mappings }
166
- )
178
+ self .client .indices .put_mapping (index = index , body = {"properties" : self .index_mappings })
167
179
except elasticsearch .exceptions .BadRequestError as e :
168
180
if e .message == "illegal_argument_exception" :
169
181
self .logger .warning (
@@ -217,9 +229,7 @@ def process_batch(self, context: dict[str, Any]) -> None:
217
229
Args:
218
230
context: Dictionary containing batch processing context including records.
219
231
"""
220
- updated_records , distinct_indices = self .build_request_body_and_distinct_indices (
221
- context ["records" ]
222
- )
232
+ updated_records , distinct_indices = self .build_request_body_and_distinct_indices (context ["records" ])
223
233
for index in distinct_indices :
224
234
self .create_index (index )
225
235
try :
0 commit comments