Skip to content

Commit 854b819

Browse files
committed
fix the dataflow bug
1 parent cd2c8f3 commit 854b819

File tree

3 files changed

+19
-16
lines changed

3 files changed

+19
-16
lines changed

ads/jobs/builders/infrastructure/dataflow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ def _load_default_properties() -> dict:
436436
nb_session = dsc_client.get_notebook_session(
437437
os.environ["NB_SESSION_OCID"]
438438
).data
439-
nb_config = nb_session.notebook_session_configuration_details
439+
nb_config = nb_session.notebook_session_config_details
440440
defaults["driver_shape"] = nb_config.shape
441441
logger.debug(f"Set driver shape to {nb_config.shape}")
442442
defaults["executor_shape"] = nb_config.shape

ads/jobs/extension.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,23 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8; -*-
33

4-
# Copyright (c) 2022 Oracle and/or its affiliates.
4+
# Copyright (c) 2022, 2023 Oracle and/or its affiliates.
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

77

8-
import os
9-
import tempfile
108
import json
9+
import os
1110
import shlex
11+
import tempfile
12+
import warnings
1213
from getopt import gnu_getopt
1314

14-
from ads.opctl.constants import (
15-
ADS_DATAFLOW_CONFIG_FILE_NAME,
16-
DEFAULT_ADS_CONFIG_FOLDER,
17-
)
18-
from ads.jobs import Job, DataFlow, DataFlowRuntime, DataFlowRun
15+
from ads.common.decorator.runtime_dependency import (OptionalDependency,
16+
runtime_dependency)
17+
from ads.jobs import DataFlow, DataFlowRun, DataFlowRuntime, Job
1918
from ads.jobs.utils import get_dataflow_config
20-
from ads.common.decorator.runtime_dependency import (
21-
runtime_dependency,
22-
OptionalDependency,
23-
)
19+
from ads.opctl.constants import (ADS_DATAFLOW_CONFIG_FILE_NAME,
20+
DEFAULT_ADS_CONFIG_FOLDER)
2421

2522

2623
def dataflow(line, cell=None):
@@ -91,6 +88,8 @@ def dataflow_run(options, args, cell):
9188
archive_name = options["-a"]
9289
elif "--archive" in options:
9390
archive_name = options["--archive"]
91+
elif hasattr(dataflow_config, "archive_uri") and dataflow_config.archive_uri:
92+
archive_name = dataflow_config.archive_uri
9493
else:
9594
archive_name = None
9695
with tempfile.TemporaryDirectory() as td:
@@ -105,7 +104,11 @@ def dataflow_run(options, args, cell):
105104
rt_spec["args"] = args[1:]
106105
if archive_name:
107106
rt_spec["archiveUri"] = archive_name
108-
rt_spec["archiveBucket"] = dataflow_config.pop("archive_bucket")
107+
rt_spec["archiveBucket"] = dataflow_config.pop("archive_bucket", None)
108+
if not archive_name.startswith("oci://") and not rt_spec["archiveBucket"]:
109+
raise ValueError(
110+
"`archiveBucket` has to be set in the config if `archive` is a local path."
111+
)
109112
rt = DataFlowRuntime(rt_spec)
110113
infra = DataFlow(spec=dataflow_config)
111114
if "-o" in options or "--overwrite" in options:

ads/jobs/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8; -*-
33

4-
# Copyright (c) 2022 Oracle and/or its affiliates.
4+
# Copyright (c) 2022, 2023 Oracle and/or its affiliates.
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

77

@@ -43,7 +43,7 @@ def get_dataflow_config(path=None, oci_profile=None):
4343
)
4444
return config
4545
else:
46-
logger.error(f"{dataflow_config_file_path} not found.")
46+
logger.warning(f"{dataflow_config_file_path} not found. Follow this link https://accelerated-data-science.readthedocs.io/en/latest/user_guide/apachespark/dataflow.html to set up the config.")
4747
return {}
4848

4949

0 commit comments

Comments
 (0)