oracle
diff --git a/‎.github/workflows/publish-to-readthedocs.yml
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/publish-to-readthedocs.yml
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/run-unittests-default_setup.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/run-unittests-default_setup.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/run-unittests.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/run-unittests.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎README-development.md
Lines changed: 57 additions & 2 deletions b/‎README-development.md
Lines changed: 57 additions & 2 deletions
diff --git a/‎ads/ads_version.json
Lines changed: 1 addition & 1 deletion b/‎ads/ads_version.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/dataset/dataset.py
Lines changed: 54 additions & 28 deletions b/‎ads/dataset/dataset.py
Lines changed: 54 additions & 28 deletions
@@ -1,7 +1,11 @@
 name: "Publish Docs"
 
 # To run this workflow manually from the Actions tab
-on: workflow_dispatch
+on:
+  # Auto-trigger this workflow on tag creation
+  push:
+    tags:
+      - 'v*.*.*'
 
 env:
   RTDS_ADS_PROJECT: https://readthedocs.org/api/v3/projects/accelerated-data-science
 
@@ -1,6 +1,7 @@
 name: tests/unitary/default_setup/**
 
 on:
+  workflow_dispatch:
   pull_request:
     branches:
       - main
 
@@ -1,6 +1,7 @@
 name: tests/unitary/**
 
 on:
+  workflow_dispatch:
   pull_request:
     branches:
       - main
@@ -41,7 +42,7 @@ jobs:
         test-path: ["tests/unitary", "tests/unitary/with_extras/model"]
         include:
           - test-path: "tests/unitary"
-            ignore-path: "tests/unitary/with_extras/model"
+            ignore-path: "--ignore tests/unitary/with_extras/model --ignore tests/unitary/with_extras/feature_store"
             name: "unitary"
           - test-path: "tests/unitary/with_extras/model"
             name: "model"
@@ -115,8 +116,7 @@ jobs:
           # Run tests
           python -m pytest -v -p no:warnings --durations=5 \
           -n auto --dist loadfile ${{ matrix.cov-reports }} \
-          ${{ matrix.test-path }} \
-          --ignore "${{ matrix.ignore-path }}"
+          ${{ matrix.test-path }} ${{ matrix.ignore-path }}
 
       - name: "Save coverage files"
         uses: actions/upload-artifact@v3
 
@@ -14,6 +14,7 @@ repos:
         args: ['--allow-multiple-documents']
     -   id: detect-private-key
     -   id: end-of-file-fixer
+        exclude: '\.ipynb?$'
     -   id: pretty-format-json
         args: ['--autofix']
     -   id: trailing-whitespace
 
@@ -30,10 +30,10 @@ for development and testing purposes.
 Install Anaconda from `https://repo.continuum.io/miniconda/` for the operating system you are using.
 
 In the terminal client, enter the following where <yourenvname> is the name you want to call your environment,
-and set the Python version you want to use. ADS SDK requires Python >=3.7.
+and set the Python version you want to use. ADS SDK requires Python >=3.8.
 
 ```bash
-    conda create -n <yourenvname> python=3.7 anaconda
+    conda create -n <yourenvname> python=3.8 anaconda
 ```
 
 This installs the Python version and all the associated anaconda packaged libraries at `path_to_your_anaconda_location/anaconda/envs/<yourenvname>`
@@ -79,6 +79,61 @@ Use `ads_version.json` for versioning. The ADS SDK is packaged as a wheel. To ge
 
 This wheel can then be installed using `pip`.
 
+## Running tests
+
+The SDK uses pytest as its test framework.
+
+### Running default setup tests
+
+Default setup tests for testing ADS SDK without extra dependencies, specified in setup.py.
+
+```bash
+  # Update your environment with tests dependencies
+  pip install -r test-requirements.txt
+  # Run default setup tests
+  python3 -m pytest tests/unitary/default_setup
+```
+
+### Running all unit tests
+
+To run all unit test install extra dependencies to test all modules of ADS ASD.
+
+```bash
+  # Update your environment with tests dependencies
+  pip install -r dev-requirements.txt
+  # Run all unit tests
+  python3 -m pytest tests/unitary
+```
+
+### Running integration tests
+
+ADS opctl integration tests can't be run together with all other integration tests, they require special setup.
+To run all but opctl integration tests, you can run:
+
+```bash
+  # Update your environment with tests dependencies
+  pip install -r dev-requirements.txt
+  # Run integration tests
+  python3 -m pytest tests/integration --ignore=tests/integration/opctl
+```
+
+### Running opctl integration tests
+
+ADS opctl integration tests utilize cpu, gpu jobs images and need dataexpl_p37_cpu_v2 and pyspark30_p37_cpu_v3 Data Science Environments be installed, see the [About Conda Environments](https://docs.oracle.com/en-us/iaas/data-science/using/conda_understand_environments.htm).
+To build development container, see the [Build Development Container Image](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/opctl/localdev/jobs_container_image.html).
+
+```bash
+  # Update your environment with tests dependencies
+  pip install -r test-requirements.txt
+  pip install -e ".[opctl]"
+  pip install oci oci-cli
+  # Build cpu and gpu jobs images
+  ads opctl build-image -d job-local
+  ads opctl build-image -g -d job-local  
+  # Run opclt integration tests
+  python3 -m pytest tests/integration/opctl
+```
+
 ## Security
 
 Consult the [security guide](./SECURITY.md) for our responsible security
 
@@ -1,3 +1,3 @@
 {
-  "version": "2.8.7"
+  "version": "2.8.8"
 }
@@ -31,6 +31,8 @@
     DatasetDefaults,
     deprecate_default_value,
     deprecate_variable,
+    get_dataset,
+    infer_target_type,
 )
 from ads.dataset.label_encoder import DataFrameLabelEncoder
 from ads.dataset.pipeline import TransformerPipeline
@@ -223,7 +225,8 @@ def _head(self, n=5):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("classfication_data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("classfication_data.csv"))
         >>> ds.head()
         * displays the first 5 rows of the dataset, just as the traditional head() function would *
         """
@@ -298,7 +301,8 @@ def call(self, func, *args, sample_size=None, **kwargs):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("classfication_data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("classfication_data.csv"))
         >>> def f1(df):
         ...  return(sum(df), axis=0)
         >>> sum_ds = ds.call(f1)
@@ -340,20 +344,19 @@ def set_target(self, target, type_discovery=True, target_type=None):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("classfication_data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("classfication_data.csv"))
         >>> ds_with_target= ds.set_target("target_class")
         """
-        from ads.dataset.factory import DatasetFactory
-
         if target_type:
             target_series = self.sampled_df[target].astype(target_type)
         else:
             target_series = self.sampled_df[target]
-        return DatasetFactory._get_dataset(
+        return get_dataset(
             self.df,
             self.sampled_df,
             target,
-            DatasetFactory.infer_target_type(target, target_series, type_discovery),
+            infer_target_type(target, target_series, type_discovery),
             self.shape,
             **self.init_kwargs,
         )
@@ -396,7 +399,8 @@ def to_pandas(self, filter=None, frac=None, include_transformer_pipeline=False):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_as_df = ds.to_pandas()
 
         Notes
@@ -462,7 +466,8 @@ def to_dask(
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_dask = ds.to_dask()
 
         Notes
@@ -521,7 +526,8 @@ def to_h2o(self, filter=None, frac=None, include_transformer_pipeline=False):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_as_h2o = ds.to_h2o()
 
         Notes
@@ -578,7 +584,8 @@ def to_xgb(self, filter=None, frac=None, include_transformer_pipeline=False):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> xgb_dmat = ds.to_xgb()
 
         Notes
@@ -617,7 +624,8 @@ def sample(self, frac=None, random_state=utils.random_state):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_sample = ds.sample()
         """
         df = self.df.sample(frac=frac, random_state=random_state)
@@ -644,7 +652,8 @@ def drop_columns(self, columns):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_smaller = ds.drop_columns(['col1', 'col2'])
         """
         self._validate_feature(columns)
@@ -671,7 +680,8 @@ def assign_column(self, column, arg):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_same_size = ds.assign_column('target',lambda x:  x>15 if x not None)
         >>> ds_bigger = ds.assign_column('new_col', np.arange(ds.shape[0]))
         """
@@ -746,7 +756,8 @@ def rename_columns(self, columns):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_renamed = ds.rename_columns({'col1': 'target'})
         """
         if isinstance(columns, list):
@@ -770,7 +781,8 @@ def set_name(self, name):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data1.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data1.csv"))
         >>> ds_renamed = ds.set_name("dataset1")
         """
         self.name = name
@@ -788,7 +800,8 @@ def set_description(self, description):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data1.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data1.csv"))
         >>> ds_renamed = ds.set_description("dataset1 is from "data1.csv"")
         """
         self.description = description
@@ -821,7 +834,8 @@ def snapshot(self, snapshot_dir=None, name="", storage_options=None):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_uri = ds.snapshot()
         """
         if snapshot_dir is None:
@@ -873,7 +887,8 @@ def to_csv(self, path, storage_options=None, **kwargs):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> [ds_link] = ds.to_csv("my/path.csv")
         """
         if storage_options is None:
@@ -900,7 +915,8 @@ def to_parquet(self, path, storage_options=None, **kwargs):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds.to_parquet("my/path")
         """
         if storage_options is None:
@@ -927,7 +943,8 @@ def to_json(self, path, storage_options=None, **kwargs):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds.to_json("my/path.json")
         """
         if storage_options is None:
@@ -962,7 +979,8 @@ def to_hdf(
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds.to_hdf(path="my/path.h5", key="df")
         """
         if storage_options is None:
@@ -1035,7 +1053,13 @@ def to_avro(self, path, schema=None, storage_options=None, **kwargs):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.avro")
+        >>> import pandas
+        >>> import fastavro
+        >>> with open("data.avro", "rb") as fp:
+        >>>     reader = fastavro.reader(fp)
+        >>>     records = [r for r in reader]
+        >>>     df = pandas.DataFrame.from_records(records)
+        >>> ds = ADSDataset.from_dataframe(df)
         >>> ds.to_avro("my/path.avro")
         """
         # Get the row by row formatting
@@ -1101,7 +1125,8 @@ def astype(self, types):
 
         Examples
         --------
-        >>> ds = DatasetFactory.open("data.csv")
+        >>> import pandas as pd
+        >>> ds = ADSDataset.from_dataframe(pd.read_csv("data.csv"))
         >>> ds_reformatted = ds.astype({"target": "categorical"})
         """
         return self.__getattr__("astype")(helper.map_types(types))
@@ -1119,8 +1144,10 @@ def merge(self, data, **kwargs):
 
         Examples
         --------
-        >>> ds1 = DatasetFactory.open("data1.csv")
-        >>> ds2 = DatasetFactory.open("data2.csv")
+        >>> import pandas as pd
+        >>> df1 = pd.read_csv("data1.csv")
+        >>> df2 = pd.read_csv("data2.csv")
+        >>> ds = ADSDataset.from_dataframe(df1.merge(df2))
         >>> ds_12 = ds1.merge(ds2)
         """
         assert isinstance(data, pd.DataFrame) or isinstance(
@@ -1275,9 +1302,8 @@ def _build_new_dataset(
             if progress:
                 progress.update("Building new dataset")
             target_type = self.target.type if target_type is None else target_type
-            from ads.dataset.factory import DatasetFactory
 
-            new_ds = DatasetFactory._get_dataset(
+            new_ds = get_dataset(
                 df,
                 sampled_df,
                 target,
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- "version": "2.8.7"`
	`2`	`+ "version": "2.8.8"`
`3`	`3`	`}`