Skip to content

Commit c9d6d74

Browse files
authored
[FSTORE-1633] Fix engine choice in case of connection to serverless (4.1) (#427)
* [FSTORE-1633] Fix engine choice in case of connection to serverless * Bump version to 4.1.3
1 parent 2515818 commit c9d6d74

File tree

8 files changed

+30
-26
lines changed

8 files changed

+30
-26
lines changed

java/beam/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<artifactId>hsfs-parent</artifactId>
77
<groupId>com.logicalclocks</groupId>
8-
<version>4.1.2</version>
8+
<version>4.1.3</version>
99
</parent>
1010
<modelVersion>4.0.0</modelVersion>
1111

java/flink/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<artifactId>hsfs-parent</artifactId>
77
<groupId>com.logicalclocks</groupId>
8-
<version>4.1.2</version>
8+
<version>4.1.3</version>
99
</parent>
1010
<modelVersion>4.0.0</modelVersion>
1111

java/hsfs/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<artifactId>hsfs-parent</artifactId>
77
<groupId>com.logicalclocks</groupId>
8-
<version>4.1.2</version>
8+
<version>4.1.3</version>
99
</parent>
1010
<modelVersion>4.0.0</modelVersion>
1111

java/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<groupId>com.logicalclocks</groupId>
88
<artifactId>hsfs-parent</artifactId>
99
<packaging>pom</packaging>
10-
<version>4.1.2</version>
10+
<version>4.1.3</version>
1111
<modules>
1212
<module>hsfs</module>
1313
<module>spark</module>

java/spark/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
<parent>
2323
<artifactId>hsfs-parent</artifactId>
2424
<groupId>com.logicalclocks</groupId>
25-
<version>4.1.2</version>
25+
<version>4.1.3</version>
2626
</parent>
2727
<modelVersion>4.0.0</modelVersion>
2828

python/hopsworks_common/connection.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import weakref
2525
from typing import Any, Optional
2626

27-
from hopsworks_common import client, usage, util, version
27+
from hopsworks_common import client, constants, usage, util, version
2828
from hopsworks_common.core import (
2929
hosts_api,
3030
project_api,
@@ -98,13 +98,12 @@ class Connection:
9898
project: The name of the project to connect to. When running on Hopsworks, this
9999
defaults to the project from where the client is run from.
100100
Defaults to `None`.
101-
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
102-
which initializes the engine to Spark if the environment provides Spark, for
103-
example on Hopsworks and Databricks, or falls back to Python if Spark is not
104-
available, e.g. on local Python environments or AWS SageMaker. This option
105-
allows you to override this behaviour. `"training"` engine is useful when only
106-
feature store metadata is needed, for example training dataset location and label
107-
information when Hopsworks training experiment is conducted.
101+
engine: Specifies the engine to use. Possible options are "spark", "python", "training", "spark-no-metastore", or "spark-delta". The default value is None, which automatically selects the engine based on the environment:
102+
"spark": Used if Spark is available and the connection is not to serverless Hopsworks, such as in Hopsworks or Databricks environments.
103+
"python": Used in local Python environments or AWS SageMaker when Spark is not available or the connection is done to serverless Hopsworks.
104+
"training": Used when only feature store metadata is needed, such as for obtaining training dataset locations and label information during Hopsworks training experiments.
105+
"spark-no-metastore": Functions like "spark" but does not rely on the Hive metastore.
106+
"spark-delta": Minimizes dependencies further by avoiding both Hive metastore and HopsFS.
108107
hostname_verification: Whether or not to verify Hopsworks' certificate, defaults
109108
to `True`.
110109
trust_store_path: Path on the file system containing the Hopsworks certificates,
@@ -338,30 +337,35 @@ def connect(self) -> None:
338337
self._connected = True
339338
finalizer = weakref.finalize(self, self.close)
340339
try:
340+
external = client.base.Client.REST_ENDPOINT not in os.environ
341+
serverless = self._host == constants.HOSTS.APP_HOST
341342
# determine engine, needed to init client
342-
if (self._engine is not None and self._engine.lower() == "spark") or (
343-
self._engine is None and importlib.util.find_spec("pyspark")
343+
if (
344+
self._engine is None
345+
and importlib.util.find_spec("pyspark")
346+
and (not external or not serverless)
344347
):
345348
self._engine = "spark"
346-
elif (self._engine is not None and self._engine.lower() == "python") or (
347-
self._engine is None and not importlib.util.find_spec("pyspark")
348-
):
349+
elif self._engine is None:
350+
self._engine = "python"
351+
elif self._engine.lower() == "spark":
352+
self._engine = "spark"
353+
elif self._engine.lower() == "python":
349354
self._engine = "python"
350-
elif self._engine is not None and self._engine.lower() == "training":
355+
elif self._engine.lower() == "training":
351356
self._engine = "training"
352-
elif (
353-
self._engine is not None
354-
and self._engine.lower() == "spark-no-metastore"
355-
):
357+
elif self._engine.lower() == "spark-no-metastore":
356358
self._engine = "spark-no-metastore"
359+
elif self._engine.lower() == "spark-delta":
360+
self._engine = "spark-delta"
357361
else:
358362
raise ConnectionError(
359363
"Engine you are trying to initialize is unknown. "
360364
"Supported engines are `'spark'`, `'python'` and `'training'`."
361365
)
362366

363367
# init client
364-
if client.base.Client.REST_ENDPOINT not in os.environ:
368+
if external:
365369
client.init(
366370
"external",
367371
self._host,

python/hopsworks_common/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
# limitations under the License.
1515
#
1616

17-
__version__ = "4.1.2"
17+
__version__ = "4.1.3"

utils/java/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
<groupId>com.logicalclocks</groupId>
77
<artifactId>hsfs-utils</artifactId>
8-
<version>4.1.2</version>
8+
<version>4.1.3</version>
99

1010
<properties>
1111
<hops.version>3.2.0.0-SNAPSHOT</hops.version>

0 commit comments

Comments
 (0)