|
1 | 1 | #!/usr/bin/env python
|
2 | 2 | # -*- coding: utf-8; -*-
|
| 3 | +import json |
3 | 4 |
|
4 | 5 | # Copyright (c) 2023 Oracle and/or its affiliates.
|
5 | 6 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
|
8 | 9 | import pandas as pd
|
9 | 10 |
|
10 | 11 | from ads.common.decorator.runtime_dependency import OptionalDependency
|
11 |
| -from ads.feature_store.common.utils.utility import get_features |
| 12 | +from ads.feature_store.common.utils.utility import ( |
| 13 | + get_features, |
| 14 | + show_ingestion_summary, |
| 15 | + show_validation_summary, |
| 16 | +) |
12 | 17 | from ads.feature_store.execution_strategy.engine.spark_engine import SparkEngine
|
13 | 18 |
|
14 | 19 | try:
|
|
25 | 30 | FeatureStoreJobType,
|
26 | 31 | LifecycleState,
|
27 | 32 | EntityType,
|
| 33 | + ExpectationType, |
28 | 34 | )
|
29 | 35 | from ads.feature_store.common.spark_session_singleton import SparkSessionSingleton
|
30 | 36 | from ads.feature_store.common.utils.feature_schema_mapper import (
|
@@ -147,6 +153,37 @@ def delete_dataset(self, dataset, dataset_job: DatasetJob):
|
147 | 153 | output_details=output_details,
|
148 | 154 | )
|
149 | 155 |
|
| 156 | + @staticmethod |
| 157 | + def _validate_expectation(expectation_type, validation_output): |
| 158 | + """ |
| 159 | + Validates the expectation based on the given expectation type and the validation output. |
| 160 | +
|
| 161 | + Args: |
| 162 | + expectation_type (str): The type of expectation to validate (e.g., "STRICT", "LENIENT"). |
| 163 | + validation_output (dict): The output of the validation containing success status and statistics. |
| 164 | +
|
| 165 | + Raises: |
| 166 | + Exception: If the expectation fails in strict mode, raises an exception with an error message. |
| 167 | +
|
| 168 | + Warnings: |
| 169 | + If the expectation fails in lenient mode, logs a warning message. |
| 170 | +
|
| 171 | + """ |
| 172 | + |
| 173 | + error_message = None |
| 174 | + ingestion_status = "Ingestion in progress" |
| 175 | + |
| 176 | + if not validation_output["success"]: |
| 177 | + statistics = validation_output["statistics"] |
| 178 | + if expectation_type == ExpectationType.STRICT.value: |
| 179 | + error_message = f"Expectation failed with Insufficient Success Rate, Aborting ingestion" |
| 180 | + ingestion_status = "Insufficient Success Rate, Aborting ingestion" |
| 181 | + |
| 182 | + show_validation_summary(ingestion_status, statistics, expectation_type) |
| 183 | + |
| 184 | + if error_message: |
| 185 | + raise Exception(error_message) |
| 186 | + |
150 | 187 | def _save_offline_dataframe(
|
151 | 188 | self, data_frame, feature_group, feature_group_job: FeatureGroupJob
|
152 | 189 | ):
|
@@ -185,12 +222,22 @@ def _save_offline_dataframe(
|
185 | 222 |
|
186 | 223 | # TODO: Get event timestamp column and apply filtering basis from and to timestamp
|
187 | 224 |
|
188 |
| - # Apply validations |
189 |
| - validation_output = ExpectationService.apply_validations( |
190 |
| - expectation_details=feature_group.expectation_details, |
191 |
| - expectation_suite_name=feature_group.name, |
192 |
| - dataframe=data_frame, |
193 |
| - ) |
| 225 | + if feature_group.expectation_details: |
| 226 | + expectation_type = feature_group.expectation_details["expectationType"] |
| 227 | + logger.info(f"Validation expectation type: {expectation_type}") |
| 228 | + |
| 229 | + # Apply validations |
| 230 | + validation_output = ExpectationService.apply_validations( |
| 231 | + expectation_details=feature_group.expectation_details, |
| 232 | + expectation_suite_name=feature_group.name, |
| 233 | + dataframe=data_frame, |
| 234 | + ) |
| 235 | + |
| 236 | + if validation_output: |
| 237 | + self._validate_expectation( |
| 238 | + expectation_type=expectation_type, |
| 239 | + validation_output=validation_output, |
| 240 | + ) |
194 | 241 |
|
195 | 242 | # Apply the transformation
|
196 | 243 | if feature_group.transformation_id:
|
@@ -241,9 +288,13 @@ def _save_offline_dataframe(
|
241 | 288 | f"FeatureGroup Materialization Failed with : {type(ex)} with error message: {ex}"
|
242 | 289 | )
|
243 | 290 |
|
| 291 | + show_ingestion_summary( |
| 292 | + entity_type=EntityType.FEATURE_GROUP, error_details=error_details |
| 293 | + ) |
| 294 | + |
244 | 295 | output_details = {
|
245 | 296 | "error_details": error_details,
|
246 |
| - "validation_output": validation_output, |
| 297 | + "validation_output": str(validation_output), |
247 | 298 | "commit_id": "commit_id",
|
248 | 299 | "feature_statistics": feature_statistics,
|
249 | 300 | }
|
@@ -326,12 +377,22 @@ def _save_dataset_input(self, dataset, dataset_job: DatasetJob):
|
326 | 377 | # Execute the SQL query on the spark and load the dataframe.
|
327 | 378 | dataset_dataframe = self.spark_engine.sql(dataset.query)
|
328 | 379 |
|
329 |
| - # Apply validations |
330 |
| - validation_output = ExpectationService.apply_validations( |
331 |
| - expectation_details=dataset.expectation_details, |
332 |
| - expectation_suite_name=dataset.name, |
333 |
| - dataframe=dataset_dataframe, |
334 |
| - ) |
| 380 | + if dataset.expectation_details: |
| 381 | + expectation_type = dataset.expectation_details["expectationType"] |
| 382 | + logger.info(f"Validation expectation type: {expectation_type}") |
| 383 | + |
| 384 | + # Apply validations |
| 385 | + validation_output = ExpectationService.apply_validations( |
| 386 | + expectation_details=dataset.expectation_details, |
| 387 | + expectation_suite_name=dataset.name, |
| 388 | + dataframe=dataset_dataframe, |
| 389 | + ) |
| 390 | + |
| 391 | + if validation_output: |
| 392 | + self._validate_expectation( |
| 393 | + expectation_type=expectation_type, |
| 394 | + validation_output=validation_output, |
| 395 | + ) |
335 | 396 |
|
336 | 397 | self.delta_lake_service.save_delta_dataframe(
|
337 | 398 | dataset_dataframe,
|
@@ -360,9 +421,13 @@ def _save_dataset_input(self, dataset, dataset_job: DatasetJob):
|
360 | 421 | f"Dataset Materialization Failed with : {type(ex)} with error message: {ex}"
|
361 | 422 | )
|
362 | 423 |
|
| 424 | + show_ingestion_summary( |
| 425 | + entity_type=EntityType.DATASET, error_details=error_details |
| 426 | + ) |
| 427 | + |
363 | 428 | output_details = {
|
364 | 429 | "error_details": error_details,
|
365 |
| - "validation_output": validation_output, |
| 430 | + "validation_output": str(validation_output), |
366 | 431 | "commit_id": "commit_id",
|
367 | 432 | "feature_statistics": feature_statistics,
|
368 | 433 | }
|
|
0 commit comments