8
8
9
9
from typing import Union , List
10
10
11
+ import oci .regions
11
12
from great_expectations .core import ExpectationSuite
12
13
13
14
from ads .common .decorator .runtime_dependency import OptionalDependency
15
+ from ads .common .oci_resource import OCIResource , SEARCH_TYPE
14
16
from ads .feature_store .common .utils .feature_schema_mapper import (
15
17
map_spark_type_to_feature_type ,
16
18
map_feature_type_to_pandas ,
19
21
from ads .feature_store .feature_group_expectation import Rule , Expectation
20
22
from ads .feature_store .input_feature_detail import FeatureDetail
21
23
from ads .feature_store .common .spark_session_singleton import SparkSessionSingleton
22
- import re
24
+
23
25
try :
24
26
from pyspark .pandas import DataFrame
25
27
except ModuleNotFoundError :
47
49
48
50
49
51
def get_execution_engine_type (
50
- data_frame : Union [DataFrame , pd .DataFrame ]
52
+ data_frame : Union [DataFrame , pd .DataFrame ]
51
53
) -> ExecutionEngine :
52
54
"""
53
55
Determines the execution engine type for a given DataFrame.
@@ -87,7 +89,7 @@ def get_metastore_id(feature_store_id: str):
87
89
88
90
89
91
def validate_delta_format_parameters (
90
- timestamp : datetime = None , version_number : int = None , is_restore : bool = False
92
+ timestamp : datetime = None , version_number : int = None , is_restore : bool = False
91
93
):
92
94
"""
93
95
Validate the user input provided as part of preview, restore APIs for ingested data, Ingested data is
@@ -121,9 +123,9 @@ def validate_delta_format_parameters(
121
123
122
124
123
125
def show_ingestion_summary (
124
- entity_id : str ,
125
- entity_type : EntityType = EntityType .FEATURE_GROUP ,
126
- error_details : str = None ,
126
+ entity_id : str ,
127
+ entity_type : EntityType = EntityType .FEATURE_GROUP ,
128
+ error_details : str = None ,
127
129
):
128
130
"""
129
131
Displays a ingestion summary table with the given entity type and error details.
@@ -163,7 +165,7 @@ def show_validation_summary(ingestion_status: str, validation_output, expectatio
163
165
statistics = validation_output ["statistics" ]
164
166
165
167
table_headers = (
166
- ["expectation_type" ] + list (statistics .keys ()) + ["ingestion_status" ]
168
+ ["expectation_type" ] + list (statistics .keys ()) + ["ingestion_status" ]
167
169
)
168
170
169
171
table_values = [expectation_type ] + list (statistics .values ()) + [ingestion_status ]
@@ -207,9 +209,9 @@ def show_validation_summary(ingestion_status: str, validation_output, expectatio
207
209
208
210
209
211
def get_features (
210
- output_columns : List [dict ],
211
- parent_id : str ,
212
- entity_type : EntityType = EntityType .FEATURE_GROUP ,
212
+ output_columns : List [dict ],
213
+ parent_id : str ,
214
+ entity_type : EntityType = EntityType .FEATURE_GROUP ,
213
215
) -> List [Feature ]:
214
216
"""
215
217
Returns a list of features, given a list of output_columns and a feature_group_id.
@@ -266,7 +268,7 @@ def get_schema_from_spark_df(df: DataFrame):
266
268
267
269
268
270
def get_schema_from_df (
269
- data_frame : Union [DataFrame , pd .DataFrame ], feature_store_id : str
271
+ data_frame : Union [DataFrame , pd .DataFrame ], feature_store_id : str
270
272
) -> List [dict ]:
271
273
"""
272
274
Given a DataFrame, returns a list of dictionaries that describe its schema.
@@ -280,7 +282,7 @@ def get_schema_from_df(
280
282
281
283
282
284
def get_input_features_from_df (
283
- data_frame : Union [DataFrame , pd .DataFrame ], feature_store_id : str
285
+ data_frame : Union [DataFrame , pd .DataFrame ], feature_store_id : str
284
286
) -> List [FeatureDetail ]:
285
287
"""
286
288
Given a DataFrame, returns a list of FeatureDetail objects that represent its input features.
@@ -297,7 +299,7 @@ def get_input_features_from_df(
297
299
298
300
299
301
def convert_expectation_suite_to_expectation (
300
- expectation_suite : ExpectationSuite , expectation_type : ExpectationType
302
+ expectation_suite : ExpectationSuite , expectation_type : ExpectationType
301
303
):
302
304
"""
303
305
Convert an ExpectationSuite object to an Expectation object with detailed rule information.
@@ -356,7 +358,7 @@ def largest_matching_subset_of_primary_keys(left_feature_group, right_feature_gr
356
358
357
359
358
360
def convert_pandas_datatype_with_schema (
359
- raw_feature_details : List [dict ], input_df : pd .DataFrame
361
+ raw_feature_details : List [dict ], input_df : pd .DataFrame
360
362
) -> pd .DataFrame :
361
363
feature_detail_map = {}
362
364
columns_to_remove = []
@@ -381,7 +383,7 @@ def convert_pandas_datatype_with_schema(
381
383
382
384
383
385
def convert_spark_dataframe_with_schema (
384
- raw_feature_details : List [dict ], input_df : DataFrame
386
+ raw_feature_details : List [dict ], input_df : DataFrame
385
387
) -> DataFrame :
386
388
feature_detail_map = {}
387
389
columns_to_remove = []
@@ -403,10 +405,35 @@ def validate_input_feature_details(input_feature_details, data_frame):
403
405
return convert_spark_dataframe_with_schema (input_feature_details , data_frame )
404
406
405
407
406
- def validate_model_ocid (model_ocid ):
407
- pattern = r'^ocid1\.datasciencemodel\.oc(?P<realm>[0-17]+)\.(?P<region>[A-Za-z0-9]+)?\.?(?P<future_use>[A-Za-z0-9]+)?\.(?P<unique_id>[A-Za-z0-9]+)$'
408
- match = re .match (pattern , model_ocid )
409
- if match :
410
- # groups = match.groupdict()
411
- return True
412
- return False
408
+ def validate_model_ocid_format (model_ocid ):
409
+ split_words = model_ocid .split ('.' )
410
+ region = split_words [3 ]
411
+ realm = split_words [2 ]
412
+ print (split_words )
413
+ # region = auth.get("signer").region will not work for config
414
+ # TODO: try to get current region if possible??
415
+ if region in oci .regions .REGIONS_SHORT_NAMES :
416
+ region = oci .regions .REGIONS_SHORT_NAMES [region ]
417
+ elif region not in oci .regions .REGIONS :
418
+ return False
419
+ if realm not in oci .regions .REGION_REALMS [region ]:
420
+ return False
421
+ return True
422
+
423
+
424
+ def search_model_ocids (model_ids : list ) -> list :
425
+ query = "query datasciencemodel resources where "
426
+ items = model_ids
427
+ for item in items :
428
+ query = query + f"identifier='{ item } '||"
429
+ list_models = OCIResource .search (
430
+ query [:- 2 ]
431
+ , type = SEARCH_TYPE .STRUCTURED ,
432
+ )
433
+ list_models_ids = []
434
+ for model in list_models :
435
+ list_models_ids .append (model .identifier )
436
+ for model_id in model_ids :
437
+ if model_id not in list_models_ids :
438
+ logger .warning (model_id + " doesnt exist" )
439
+ return list_models_ids
0 commit comments