|
1 | 1 | #!/usr/bin/env python
|
2 | 2 | # -*- coding: utf-8; -*-
|
| 3 | +import json |
3 | 4 |
|
4 | 5 | # Copyright (c) 2023 Oracle and/or its affiliates.
|
5 | 6 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
|
8 | 9 | import pandas as pd
|
9 | 10 |
|
10 | 11 | from ads.common.decorator.runtime_dependency import OptionalDependency
|
11 |
| -from ads.feature_store.common.utils.utility import get_features |
| 12 | +from ads.feature_store.common.utils.utility import ( |
| 13 | + get_features, |
| 14 | + show_ingestion_summary, |
| 15 | + show_validation_summary, |
| 16 | +) |
12 | 17 | from ads.feature_store.execution_strategy.engine.spark_engine import SparkEngine
|
13 | 18 |
|
14 | 19 | try:
|
|
25 | 30 | FeatureStoreJobType,
|
26 | 31 | LifecycleState,
|
27 | 32 | EntityType,
|
| 33 | + ExpectationType, |
28 | 34 | )
|
29 | 35 | from ads.feature_store.common.spark_session_singleton import SparkSessionSingleton
|
30 | 36 | from ads.feature_store.common.utils.transformation_utils import TransformationUtils
|
@@ -145,6 +151,36 @@ def delete_dataset(self, dataset, dataset_job: DatasetJob):
|
145 | 151 | output_details=output_details,
|
146 | 152 | )
|
147 | 153 |
|
| 154 | + @staticmethod |
| 155 | + def _validate_expectation(expectation_type, validation_output): |
| 156 | + """ |
| 157 | + Validates the expectation based on the given expectation type and the validation output. |
| 158 | +
|
| 159 | + Args: |
| 160 | + expectation_type (str): The type of expectation to validate (e.g., "STRICT", "LENIENT"). |
| 161 | + validation_output (dict): The output of the validation containing success status and statistics. |
| 162 | +
|
| 163 | + Raises: |
| 164 | + Exception: If the expectation fails in strict mode, raises an exception with an error message. |
| 165 | +
|
| 166 | + Warnings: |
| 167 | + If the expectation fails in lenient mode, logs a warning message. |
| 168 | +
|
| 169 | + """ |
| 170 | + |
| 171 | + error_message = None |
| 172 | + ingestion_status = "Ingestion in progress" |
| 173 | + |
| 174 | + if not validation_output["success"]: |
| 175 | + if expectation_type == ExpectationType.STRICT.value: |
| 176 | + error_message = f"Expectation failed with Insufficient Success Rate, Aborting ingestion" |
| 177 | + ingestion_status = "Insufficient Success Rate, Aborting ingestion" |
| 178 | + |
| 179 | + show_validation_summary(ingestion_status, validation_output, expectation_type) |
| 180 | + |
| 181 | + if error_message: |
| 182 | + raise Exception(error_message) |
| 183 | + |
148 | 184 | def _save_offline_dataframe(
|
149 | 185 | self, data_frame, feature_group, feature_group_job: FeatureGroupJob
|
150 | 186 | ):
|
@@ -182,12 +218,22 @@ def _save_offline_dataframe(
|
182 | 218 |
|
183 | 219 | # TODO: Get event timestamp column and apply filtering basis from and to timestamp
|
184 | 220 |
|
185 |
| - # Apply validations |
186 |
| - validation_output = ExpectationService.apply_validations( |
187 |
| - expectation_details=feature_group.expectation_details, |
188 |
| - expectation_suite_name=feature_group.name, |
189 |
| - dataframe=data_frame, |
190 |
| - ) |
| 221 | + if feature_group.expectation_details: |
| 222 | + expectation_type = feature_group.expectation_details["expectationType"] |
| 223 | + logger.info(f"Validation expectation type: {expectation_type}") |
| 224 | + |
| 225 | + # Apply validations |
| 226 | + validation_output = ExpectationService.apply_validations( |
| 227 | + expectation_details=feature_group.expectation_details, |
| 228 | + expectation_suite_name=feature_group.name, |
| 229 | + dataframe=data_frame, |
| 230 | + ) |
| 231 | + |
| 232 | + if validation_output: |
| 233 | + self._validate_expectation( |
| 234 | + expectation_type=expectation_type, |
| 235 | + validation_output=validation_output, |
| 236 | + ) |
191 | 237 |
|
192 | 238 | # Apply the transformation
|
193 | 239 | if feature_group.transformation_id:
|
@@ -238,9 +284,15 @@ def _save_offline_dataframe(
|
238 | 284 | f"FeatureGroup Materialization Failed with : {type(ex)} with error message: {ex}"
|
239 | 285 | )
|
240 | 286 |
|
| 287 | + show_ingestion_summary( |
| 288 | + entity_id=feature_group.id, |
| 289 | + entity_type=EntityType.FEATURE_GROUP, |
| 290 | + error_details=error_details, |
| 291 | + ) |
| 292 | + |
241 | 293 | output_details = {
|
242 | 294 | "error_details": error_details,
|
243 |
| - "validation_output": validation_output, |
| 295 | + "validation_output": str(validation_output), |
244 | 296 | "commit_id": "commit_id",
|
245 | 297 | "feature_statistics": feature_statistics,
|
246 | 298 | }
|
@@ -323,12 +375,22 @@ def _save_dataset_input(self, dataset, dataset_job: DatasetJob):
|
323 | 375 | # Execute the SQL query on the spark and load the dataframe.
|
324 | 376 | dataset_dataframe = self.spark_engine.sql(dataset.query)
|
325 | 377 |
|
326 |
| - # Apply validations |
327 |
| - validation_output = ExpectationService.apply_validations( |
328 |
| - expectation_details=dataset.expectation_details, |
329 |
| - expectation_suite_name=dataset.name, |
330 |
| - dataframe=dataset_dataframe, |
331 |
| - ) |
| 378 | + if dataset.expectation_details: |
| 379 | + expectation_type = dataset.expectation_details["expectationType"] |
| 380 | + logger.info(f"Validation expectation type: {expectation_type}") |
| 381 | + |
| 382 | + # Apply validations |
| 383 | + validation_output = ExpectationService.apply_validations( |
| 384 | + expectation_details=dataset.expectation_details, |
| 385 | + expectation_suite_name=dataset.name, |
| 386 | + dataframe=dataset_dataframe, |
| 387 | + ) |
| 388 | + |
| 389 | + if validation_output: |
| 390 | + self._validate_expectation( |
| 391 | + expectation_type=expectation_type, |
| 392 | + validation_output=validation_output, |
| 393 | + ) |
332 | 394 |
|
333 | 395 | self.delta_lake_service.save_delta_dataframe(
|
334 | 396 | dataset_dataframe,
|
@@ -357,9 +419,15 @@ def _save_dataset_input(self, dataset, dataset_job: DatasetJob):
|
357 | 419 | f"Dataset Materialization Failed with : {type(ex)} with error message: {ex}"
|
358 | 420 | )
|
359 | 421 |
|
| 422 | + show_ingestion_summary( |
| 423 | + entity_id=dataset.id, |
| 424 | + entity_type=EntityType.DATASET, |
| 425 | + error_details=error_details, |
| 426 | + ) |
| 427 | + |
360 | 428 | output_details = {
|
361 | 429 | "error_details": error_details,
|
362 |
| - "validation_output": validation_output, |
| 430 | + "validation_output": str(validation_output), |
363 | 431 | "commit_id": "commit_id",
|
364 | 432 | "feature_statistics": feature_statistics,
|
365 | 433 | }
|
|
0 commit comments