Skip to content

Commit 32ae6e9

Browse files
authored
Merge pull request #9 from oracle/2.6.x
Release version 2.6.2
2 parents 91cf264 + 09dd92d commit 32ae6e9

File tree

256 files changed

+8402
-7647
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

256 files changed

+8402
-7647
lines changed

Makefile

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,35 @@
1+
RELEASE_BRANCH := release/ads
2+
DOCS_RELEASE_BRANCH := release
3+
CLONE_DIR := /tmp/advanced-ds
4+
DOCS_CLONE_DIR := /tmp/ads-docs
5+
COPY_INVENTORY := setup.py CONTRIBUTING.md LICENSE.txt MANIFEST.in README-development.md README.md SECURITY.md THIRD_PARTY_LICENSES.txt
6+
7+
prepare-release-branch: clean
8+
@git checkout master
9+
@git clean -xdf
10+
@git pull
11+
git checkout -b release/$(RELEASE_VERSION)
12+
13+
prepare-ads:
14+
@echo "Started advanced-ds clone at $$(date)"
15+
@git clone ssh://git@bitbucket.oci.oraclecorp.com:7999/odsc/advanced-ds.git --branch $(RELEASE_BRANCH) --depth 1 $(CLONE_DIR)
16+
@echo "Finished cloning at $$(date)"
17+
cp -r $(CLONE_DIR)/ads .
18+
$(foreach var,$(COPY_INVENTORY),cp $(CLONE_DIR)/$(var) .;)
19+
20+
prepare-docs:
21+
@echo "Started ads_docs clone at $$(date)"
22+
@git clone ssh://git@bitbucket.oci.oraclecorp.com:7999/odsc/ads_docs.git --branch $(DOCS_RELEASE_BRANCH) --depth 1 $(DOCS_CLONE_DIR)
23+
@echo "Finished cloning at $$(date)"
24+
cp -r $(DOCS_CLONE_DIR)/source docs/ && cp $(DOCS_CLONE_DIR)/requirements.txt docs
25+
26+
prepare: prepare-release-branch prepare-ads prepare-docs
27+
28+
push: clean
29+
@bash -c 'if [[ $$(git branch | grep \*) == "* release/$(RELEASE_VERSION)" ]];then echo "Version matching current branch"; else echo "Set proper value to RELEASE_VERSION";exit 1 ; fi'
30+
@git add .
31+
@git commit -m "Release version: $(RELEASE_VERSION)"
32+
@git push --set-upstream origin release/$(RELEASE_VERSION)
133

234
dist: clean
335
@python3 setup.py sdist bdist_wheel
@@ -10,3 +42,5 @@ clean:
1042
@find ./ -name '*.pyc' -exec rm -f {} \;
1143
@find ./ -name 'Thumbs.db' -exec rm -f {} \;
1244
@find ./ -name '*~' -exec rm -f {} \;
45+
@rm -rf $(CLONE_DIR)
46+
@rm -rf $(DOCS_CLONE_DIR)

README.md

Lines changed: 89 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@
22

33
[![PyPI](https://img.shields.io/pypi/v/oracle-ads.svg)](https://pypi.org/project/oracle-ads/)
44

5-
The [Oracle Accelerated Data Science (ADS) SDK](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html) is maintained by the [Oracle Cloud Infrastructure Data Science service](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm) team. It speeds up common data science activities by providing tools that automate and/or simplify common data science tasks, along with providing a data scientist friendly pythonic interface to Oracle Cloud Infrastructure (OCI) services, most notably OCI Data Science, Data Flow, Object storage, and the Autonomous Database. ADS gives you an interface to manage the lifecycle of machine learning models, from data acquisition to model evaluation, interpretation, and model deployment.
5+
The [Oracle Accelerated Data Science (ADS) SDK](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html) is maintained by the Oracle Cloud Infrastructure (OCI) [Data Science service](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm) team. It speeds up common data science activities by providing tools that automate and simplify common data science tasks. Additionally, provides data scientists a friendly pythonic interface to OCI services. Some of the more notable services are OCI Data Science, Model Catalog, Model Deployment, Jobs, Data Flow, Object Storage, Vault, Big Data Service, Data Catalog, and the Autonomous Database. ADS gives you an interface to manage the life cycle of machine learning models, from data acquisition to model evaluation, interpretation, and model deployment.
66

77
With ADS you can:
88

99
- Read datasets from Oracle Object Storage, Oracle RDBMS (ATP/ADW/On-prem), AWS S3 and other sources into `Pandas dataframes`.
10-
- Easily compute summary statistics on your dataframes and perform data profiling.
11-
- Tune models using hyperparameter optimization with the `ADSTuner` tool.
12-
- Generate detailed evaluation reports of your model candidates with the `ADSEvaluator` module.
10+
- Use feature types to characterize your data, create meaning summary statistics and plot. Use the warning and validation system to test the quality of your data.
11+
- Tune models using hyperparameter optimization with the `ADSTuner` tool.
12+
- Generate detailed evaluation reports of your model candidates with the `ADSEvaluator` module.
1313
- Save machine learning models to the [OCI Data Science Model Catalog](https://docs.oracle.com/en-us/iaas/data-science/using/models-about.htm).
14-
- Deploy those models as HTTP endpoints with [Model Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm).
14+
- Deploy models as HTTP endpoints with [Model Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm).
1515
- Launch distributed ETL, data processing, and model training jobs in Spark with [OCI Data Flow](https://docs.oracle.com/en-us/iaas/data-flow/using/home.htm).
16-
- Train machine learning models in OCI Data Science [Jobs](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm).
17-
- Manage the lifecycle of conda environments through the `ads conda` command line interface (CLI).
16+
- Train machine learning models in OCI Data Science [Jobs](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm).
17+
- Manage the life cycle of conda environments through the `ads conda` command line interface (CLI).
1818

1919
## Installation
2020

@@ -28,44 +28,98 @@ You have various options when installing ADS.
2828

2929
### Installing extras libraries
3030

31-
To use ADS within a [Notebook Session](https://docs.oracle.com/en-us/iaas/data-science/using/manage-notebook-sessions.htm) of the OCI Data Science service:
31+
The `all-optional` module will install all optional dependencies.
3232

3333
```bash
34-
$ python3 -m pip install oracle-ads[notebook]
34+
$ python3 -m pip install oracle-ads[all-optional]
3535
```
3636

37-
For machine learning tasks install
37+
To work with gradient boosting models, install the `boosted` module. This module includes XGBoost and LightGBM model classes.
3838

3939
```bash
4040
$ python3 -m pip install oracle-ads[boosted]
4141
```
4242

43-
To work on text related tasks run
43+
For big data use cases using Oracle Big Data Service (BDS), install the `bds` module. It includes the following libraries, `ibis-framework[impala]`, `hdfs[kerberos]` and `sqlalchemy`.
4444

4545
```bash
46-
$ python3 -m pip install oracle-ads[text]
46+
$ python3 -m pip install oracle-ads[bds]
4747
```
4848

49-
For access to a broad set of data formats (for example, Excel, Avro, etc.) run
49+
To work with a broad set of data formats (for example, Excel, Avro, etc.) install the `data` module. It includes the `fastavro`, `openpyxl`, `pandavro`, `asteval`, `datefinder`, `htmllistparse`, and `sqlalchemy` libraries.
5050

5151
```bash
5252
$ python3 -m pip install oracle-ads[data]
5353
```
5454

55+
To work with geospatial data install the `geo` module. It includes the `geopandas` and libraries from the `viz` module.
56+
57+
```bash
58+
$ python3 -m pip install oracle-ads[geo]
59+
```
60+
61+
Install the `notebook` module to use ADS within a OCI Data Science service [notebook session](https://docs.oracle.com/en-us/iaas/data-science/using/manage-notebook-sessions.htm). This module installs `ipywidgets` and `ipython` libraries.
62+
63+
```bash
64+
$ python3 -m pip install oracle-ads[notebook]
65+
```
66+
67+
To work with ONNX-compatible run times and libraries designed to maximize performance and model portability, install the `onnx` module. It includes the following libraries, `onnx`, `onnxruntime`, `onnxmltools`, `skl2onnx`, `xgboost`, `lightgbm` and libraries from the `viz` module.
68+
69+
```bash
70+
$ python3 -m pip install oracle-ads[onnx]
71+
```
72+
73+
For infrastructure tasks, install the `opctl` module. It includes the following libraries, `oci-cli`, `docker`, `conda-pack`, `nbconvert`, `nbformat`, and `inflection`.
74+
75+
```bash
76+
$ python3 -m pip install oracle-ads[opctl]
77+
```
78+
79+
For hyperparameter optimization tasks install the `optuna` module. It includes the `optuna` and libraries from the `viz` module.
80+
81+
```bash
82+
$ python3 -m pip install oracle-ads[optuna]
83+
```
84+
85+
Install the `tensorflow` module to include `tensorflow` and libraries from the `viz` module.
86+
87+
```bash
88+
$ python3 -m pip install oracle-ads[tensorflow]
89+
```
90+
91+
For text related tasks, install the `text` module. This will include the `wordcloud`, `spacy` libraries.
92+
93+
```bash
94+
$ python3 -m pip install oracle-ads[text]
95+
```
96+
97+
Install the `torch` module to include `pytorch` and libraries from the `viz` module.
98+
99+
```bash
100+
$ python3 -m pip install oracle-ads[torch]
101+
```
102+
103+
Install the `viz` module to include libraries for visualization tasks. Some of the key packages are `bokeh`, `folium`, `seaborn` and related packages.
104+
105+
```bash
106+
$ python3 -m pip install oracle-ads[viz]
107+
```
108+
55109
**Note**
56110

57111
Multiple extra dependencies can be installed together. For example:
58112

59113
```bash
60-
$ python3 -m pip install oracle-ads[notebook,boosted,text]
114+
$ python3 -m pip install oracle-ads[notebook,viz,text]
61115
```
62116

63117
## Documentation
64118

65119
- [Oracle Accelerated Data Science SDK (ADS) Documentation](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html)
66-
- [Oracle Cloud Infrastructure Data Science and AI services Examples](https://github.com/oracle/oci-data-science-ai-samples)
120+
- [OCI Data Science and AI services Examples](https://github.com/oracle/oci-data-science-ai-samples)
67121
- [Oracle AI & Data Science Blog](https://blogs.oracle.com/ai-and-datascience/)
68-
- [Oracle Cloud Infrastructure Documentation](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm)
122+
- [OCI Documentation](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm)
69123

70124
## Examples
71125

@@ -75,48 +129,37 @@ Multiple extra dependencies can be installed together. For example:
75129
import ads
76130
from ads.common.auth import default_signer
77131
import oci
132+
import pandas as pd
78133

79134
ads.set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT")
80-
bucket_name = <bucket-name>
81-
file_name = <file-name>
135+
bucket_name = <bucket_name>
136+
key = <key>
82137
namespace = <namespace>
83-
df = pd.read_csv(f"oci://{bucket_name}@{namespace}/{file_name}", storage_options=default_signer())
138+
df = pd.read_csv(f"oci://{bucket_name}@{namespace}/{key}", storage_options=default_signer())
84139
```
85140

86-
### Load data from ADB (simple)
141+
### Load data from ADB
142+
143+
This example uses SQL injection safe binding variables.
87144

88145
```python
146+
import ads
147+
import pandas as pd
148+
89149
connection_parameters = {
90-
"user_name": "<username>",
150+
"user_name": "<user_name>",
91151
"password": "<password>",
92-
"service_name": "<service_name_{high|med|low}>",
93-
"wallet_location": "/full/path/to/my_wallet.zip",
152+
"service_name": "<tns_name>",
153+
"wallet_location": "<file_path>",
94154
}
95-
import pandas as pd
96-
import ads
97155

98-
# simple read of a SQL query into a dataframe with no bind variables
99-
df = pd.DataFrame.ads.read_sql(
100-
"SELECT * FROM SH.SALES",
101-
connection_parameters=connection_parameters,
102-
)
103-
```
104-
105-
### Load data from ADB (using sql-injection-safe bind variables)
106-
107-
```python
108156
df = pd.DataFrame.ads.read_sql(
109157
"""
110-
SELECT
111-
*
112-
FROM
113-
SH.SALES
114-
WHERE
115-
ROWNUM <= :max_rows
158+
SELECT *
159+
FROM SH.SALES
160+
WHERE ROWNUM <= :max_rows
116161
""",
117-
bind_variables={
118-
max_rows : 100
119-
},
162+
bind_variables={ max_rows : 100 },
120163
connection_parameters=connection_parameters,
121164
)
122165
```
@@ -129,8 +172,8 @@ Find Getting Started instructions for developers in [README-development.md](http
129172

130173
## Security
131174

132-
Please consult the security guide [SECURITY.md](https://github.com/oracle/accelerated-data-science/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process.
175+
Consult the security guide [SECURITY.md](https://github.com/oracle/accelerated-data-science/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process.
133176

134177
## License
135178

136-
Copyright (c) 2020, 2022 Oracle and/or its affiliates. Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
179+
Copyright (c) 2020, 2022 Oracle and/or its affiliates. Licensed under the [Universal Permissive License v1.0](https://oss.oracle.com/licenses/upl/)

ads/__init__.py

Lines changed: 13 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@
99
import logging
1010
import sys
1111

12-
import IPython
13-
from IPython import get_ipython
14-
from IPython.core.error import UsageError
12+
import oci
13+
1514
import matplotlib.font_manager # causes matplotlib to regenerate its fonts
1615
import json
1716

1817
import ocifs
19-
import oci
2018
from ads.common.decorator.deprecate import deprecated
19+
from ads.common.ipython import configure_plotting, _log_traceback
2120
from ads.feature_engineering.accessor.series_accessor import ADSSeriesAccessor
2221
from ads.feature_engineering.accessor.dataframe_accessor import ADSDataFrameAccessor
2322

23+
2424
os.environ["GIT_PYTHON_REFRESH"] = "quiet"
2525

2626
__version__ = ""
@@ -35,9 +35,12 @@
3535
oci_key_profile = "DEFAULT"
3636
test_mode = os.environ.get("TEST_MODE", False)
3737
resource_principal_mode = bool(os.environ.get("RESOURCE_PRINCIPAL_MODE", False))
38+
orig_ipython_traceback = None
3839

3940

40-
def set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT"):
41+
def set_auth(
42+
auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT"
43+
):
4144
"""
4245
Enable/disable resource principal identity or keypair identity in a notebook session.
4346
@@ -92,7 +95,11 @@ def set_debug_mode(mode=True):
9295
"""
9396
global debug_mode
9497
debug_mode = mode
98+
import IPython
99+
95100
if debug_mode:
101+
from ads.common.ipython import orig_ipython_traceback
102+
96103
IPython.core.interactiveshell.InteractiveShell.showtraceback = (
97104
orig_ipython_traceback
98105
)
@@ -169,37 +176,4 @@ def hello():
169176
)
170177

171178

172-
def _log_traceback(self, exc_tuple=None, **kwargs):
173-
try:
174-
etype, value, tb = self._get_exc_info(exc_tuple)
175-
except ValueError:
176-
print("No traceback available to show.", file=sys.stderr)
177-
return
178-
msg = etype.__name__, str(value)
179-
logger.error("ADS Exception", exc_info=(etype, value, tb))
180-
sys.stderr.write("{0}: {1}".format(*msg))
181-
182-
183-
if IPython.core.interactiveshell.InteractiveShell.showtraceback != _log_traceback:
184-
orig_ipython_traceback = (
185-
IPython.core.interactiveshell.InteractiveShell.showtraceback
186-
)
187-
188-
# Override the default showtraceback behavior of ipython, to show only the error message and log the stacktrace
189-
IPython.core.interactiveshell.InteractiveShell.showtraceback = _log_traceback
190-
191-
ipy = get_ipython()
192-
if ipy is not None:
193-
try:
194-
# show matplotlib plots inline
195-
ipy.run_line_magic("matplotlib", "inline")
196-
except UsageError:
197-
# ignore error and use the default matplotlib mode
198-
pass
199-
else:
200-
import matplotlib as mpl
201-
202-
mpl.rcParams["backend"] = "agg"
203-
import matplotlib.pyplot as plt
204-
205-
plt.switch_backend("agg")
179+
configure_plotting()

ads/ads_version.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
{
2-
"version": "2.6.1"
2+
"version": "2.6.2"
33
}

0 commit comments

Comments
 (0)