add unit test

ahosler · ahosler · commit 3838d2ab3d07 · 2025-01-16T13:37:39.000Z
diff --git a/ads/opctl/operator/lowcode/common/transformations.py b/ads/opctl/operator/lowcode/common/transformations.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python
 
-# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 from abc import ABC
 
+import numpy as np
 import pandas as pd
 
 from ads.opctl import logger
@@ -215,12 +216,17 @@ def _outlier_treatment(self, df):
             .transform(lambda x: (x - x.mean()) / x.std())
         )
         outliers_mask = df["z_score"].abs() > 3
+
+        if df[self.target_column_name].dtype == np.int:
+            df[self.target_column_name].astype(np.float)
+
         df.loc[outliers_mask, self.target_column_name] = (
             df[self.target_column_name]
             .groupby(DataColumns.Series)
-            .transform(lambda x: x.mean())
+            .transform(lambda x: np.median(x))
         )
-        return df.drop("z_score", axis=1)
+        df_ret = df.drop("z_score", axis=1)
+        return df_ret
 
     def _check_historical_dataset(self, df):
         expected_names = [self.target_column_name, self.dt_column_name] + (
diff --git a/ads/opctl/operator/lowcode/common/utils.py b/ads/opctl/operator/lowcode/common/utils.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import logging
@@ -52,7 +52,7 @@ def load_data(data_spec, storage_options=None, **kwargs):
         default_signer() if ObjectStorageDetails.is_oci_path(filename) else {}
     )
     if vault_secret_id is not None and connect_args is None:
-        connect_args = dict()
+        connect_args = {}
 
     if data is not None:
         if format == "spark":
@@ -102,7 +102,7 @@ def load_data(data_spec, storage_options=None, **kwargs):
                 except Exception as e:
                     raise Exception(
                         f"Could not retrieve database credentials from vault {vault_secret_id}: {e}"
-                    )
+                    ) from e
 
             con = oracledb.connect(**connect_args)
             if table_name is not None:
@@ -126,6 +126,7 @@ def load_data(data_spec, storage_options=None, **kwargs):
 
 
 def write_data(data, filename, format, storage_options, index=False, **kwargs):
+    disable_print()
     if not format:
         _, format = os.path.splitext(filename)
         format = format[1:]
@@ -134,7 +135,8 @@ def write_data(data, filename, format, storage_options, index=False, **kwargs):
         return call_pandas_fsspec(
             write_fn, filename, index=index, storage_options=storage_options, **kwargs
         )
-    raise OperatorYamlContentError(
+    enable_print()
+    raise InvalidParameterError(
         f"The format {format} is not currently supported for writing data. Please change the format parameter for the data output: {filename} ."
     )
 
diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import os
 import yaml
@@ -159,6 +159,39 @@ def test_load_datasets(model, data_details):
             print(train_metrics)
 
 
+@pytest.mark.parametrize("model", MODELS[:-1])
+def test_pandas_df_historical(model):
+    df = pd.read_csv(f"{DATASET_PREFIX}dataset1.csv")
+
+    yaml_i = deepcopy(TEMPLATE_YAML)
+    yaml_i["spec"]["model"] = model
+    yaml_i["spec"]["historical_data"].pop("url")
+    yaml_i["spec"]["historical_data"]["data"] = df
+    yaml_i["spec"]["target_column"] = "Y"
+    yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL
+    yaml_i["spec"]["horizon"] = 5
+    run(yaml_i, backend="operator.local", debug=False)
+    subprocess.run(f"ls -a {output_data_path}", shell=True)
+
+
+@pytest.mark.parametrize("model", MODELS[:-1])
+def test_pandas_historical_test(model):
+    df = pd.read_csv(f"{DATASET_PREFIX}dataset4.csv")
+    df_train = df[:-1]
+    df_test = df[-1:]
+
+    yaml_i = deepcopy(TEMPLATE_YAML)
+    yaml_i["spec"]["model"] = model
+    yaml_i["spec"]["historical_data"].pop("url")
+    yaml_i["spec"]["historical_data"]["data"] = df_train
+    yaml_i["spec"]["test_data"]["data"] = df_test
+    yaml_i["spec"]["target_column"] = "Y"
+    yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL
+    yaml_i["spec"]["horizon"] = 5
+    run(yaml_i, backend="operator.local", debug=False)
+    subprocess.run(f"ls -a {output_data_path}", shell=True)
+
+
 def run_operator(
     historical_data_path,
     additional_data_path,