From de87abc4f838b33f2b9e9a7423cec36add1de310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florentin=20D=C3=B6rre?= Date: Thu, 18 Sep 2025 17:55:47 +0200 Subject: [PATCH] Introduce aura_instance_id parameter allow to explicitly pass it. This is important if the original Aura instance id not part of the uri. Such as for multi-db support ref GDSA-283 --- changelog.md | 2 +- .../pages/graph-analytics-serverless.adoc | 2 ++ .../session/dbms_connection_info.py | 9 +++++++++ .../session/dedicated_sessions.py | 16 +++++++++++---- graphdatascience/session/gds_sessions.py | 5 ++++- .../unit/session/test_dbms_connection_info.py | 7 +++++++ .../tests/unit/test_dedicated_sessions.py | 20 +++++++++++++++++++ 7 files changed, 55 insertions(+), 6 deletions(-) diff --git a/changelog.md b/changelog.md index 1765c2b1a..baae6c6db 100644 --- a/changelog.md +++ b/changelog.md @@ -13,7 +13,7 @@ ## Improvements - Added retries to verifying connection and authentication of DB connection provided to `GdsSessions.getOrCreate` - +- `GdsSessions.get_or_create` now allows to specify the `aura_instance_id`. This is required if the instance id could not be derived from the provided database connection URI. ## Other changes diff --git a/doc/modules/ROOT/pages/graph-analytics-serverless.adoc b/doc/modules/ROOT/pages/graph-analytics-serverless.adoc index 4fd2c2557..ec54c53df 100644 --- a/doc/modules/ROOT/pages/graph-analytics-serverless.adoc +++ b/doc/modules/ROOT/pages/graph-analytics-serverless.adoc @@ -85,6 +85,7 @@ sessions.get_or_create( db_connection: Optional[DbmsConnectionInfo] = None, ttl: Optional[timedelta] = None, cloud_location: Optional[CloudLocation] = None, + aura_instance_id: Optional[str] = None, timeout: Optional[int] = None, neo4j_driver_options: Optional[dict[str, Any]] = None, arrow_client_options: Optional[dict[str, Any]] = None, @@ -100,6 +101,7 @@ sessions.get_or_create( | db_connection | https://neo4j.com/docs/graph-data-science-client/{docs-version}/api/sessions/DbmsConnectionInfo[DbmsConnectionInfo] | yes | None | Bolt server URL, username, and password to a Neo4j DBMS. Required for the Attached and Self-managed types. Alternatively to username and password, you can provide a `neo4j.Auth` https://neo4j.com/docs/python-manual/current/connect-advanced/#authentication-methods[object]. | ttl | datetime.timedelta | yes | 1h | Time-to-live for the session. | cloud_location | https://neo4j.com/docs/graph-data-science-client/{docs-version}/api/sessions/cloud_location[CloudLocation] | yes | None | Aura-supported cloud provider and region where the GDS Session will run. Required for the Self-managed and Standalone types. +| aura_instance_id | str | yes | None | Aura instance ID for the session. Required for the Attached type, if the id could not be derived from the DB connection. | timeout | int | yes | None | Seconds to wait for the session to enter Ready state. If the time is exceeded, an error will be returned. | neo4j_driver_options | dict[str, any] | yes | None | Additional options passed to the Neo4j driver to the Neo4j DBMS. Only relevant if `db_connection` is specified. | arrow_client_options | dict[str, any] | yes | None | Additional options passed to the Arrow Flight Client used to connect to the Session. diff --git a/graphdatascience/session/dbms_connection_info.py b/graphdatascience/session/dbms_connection_info.py index 756400627..3f1500a91 100644 --- a/graphdatascience/session/dbms_connection_info.py +++ b/graphdatascience/session/dbms_connection_info.py @@ -2,6 +2,7 @@ import os from dataclasses import dataclass +from urllib.parse import urlparse from neo4j import Auth, basic_auth @@ -38,6 +39,14 @@ def get_auth(self) -> Auth | None: auth = basic_auth(self.username, self.password) return auth + def hosted_in_aura(self) -> bool: + """ + Returns: + True if the DBMS is hosted in Aura, False otherwise. + """ + host = urlparse(self.uri).hostname + return host is not None and host.endswith("databases.neo4j.io") + @staticmethod def from_env() -> DbmsConnectionInfo: """ diff --git a/graphdatascience/session/dedicated_sessions.py b/graphdatascience/session/dedicated_sessions.py index b4296398e..87dce4ed0 100644 --- a/graphdatascience/session/dedicated_sessions.py +++ b/graphdatascience/session/dedicated_sessions.py @@ -61,6 +61,7 @@ def get_or_create( db_connection: DbmsConnectionInfo | None = None, ttl: timedelta | None = None, cloud_location: CloudLocation | None = None, + aura_instance_id: str | None = None, timeout: int | None = None, neo4j_driver_options: dict[str, Any] | None = None, arrow_client_options: dict[str, Any] | None = None, @@ -74,12 +75,17 @@ def get_or_create( else: db_runner = self._create_db_runner(db_connection, neo4j_driver_options) - dbid = AuraApi.extract_id(db_connection.uri) - aura_db_instance = self._aura_api.list_instance(dbid) + aura_instance_id = AuraApi.extract_id(db_connection.uri) if not aura_instance_id else aura_instance_id + aura_db_instance = self._aura_api.list_instance(aura_instance_id) if aura_db_instance is None: if not cloud_location: - raise ValueError("cloud_location must be provided for sessions against a self-managed DB.") + if db_connection.hosted_in_aura(): + raise ValueError( + f"Could not derive Aura instance id from the URI `{db_connection.uri}`. Please provide the instance id via the `aura_instance_id` argument, or specify a cloud location if the DBMS is self-managed." + ) + else: + raise ValueError("cloud_location must be provided for sessions against a self-managed DB.") session_details = self._get_or_create_self_managed_session( session_name, memory.value, cloud_location, ttl @@ -88,7 +94,9 @@ def get_or_create( if cloud_location is not None: raise ValueError("cloud_location cannot be provided for sessions against an AuraDB.") - session_details = self._get_or_create_attached_session(session_name, memory.value, dbid, ttl) + session_details = self._get_or_create_attached_session( + session_name, memory.value, aura_instance_id, ttl + ) self._await_session_running(session_details, timeout) diff --git a/graphdatascience/session/gds_sessions.py b/graphdatascience/session/gds_sessions.py index 5daecf1db..b72c94a6d 100644 --- a/graphdatascience/session/gds_sessions.py +++ b/graphdatascience/session/gds_sessions.py @@ -107,6 +107,7 @@ def get_or_create( timeout: int | None = None, neo4j_driver_config: dict[str, Any] | None = None, arrow_client_options: dict[str, Any] | None = None, + aura_instance_id: str | None = None, ) -> AuraGraphDataScience: """ Retrieves an existing session with the given session name and database connection, @@ -122,8 +123,9 @@ def get_or_create( ttl: (timedelta | None): The sessions time to live after inactivity in seconds. cloud_location (CloudLocation | None): The cloud location. Required if the GDS session is for a self-managed database. timeout (int | None): Optional timeout (in seconds) when waiting for session to become ready. If unset the method will wait forever. If set and session does not become ready an exception will be raised. It is user responsibility to ensure resource gets cleaned up in this situation. - neo4j_driver_config (dict[str, Any] | None): Optional configuration for the Neo4j driver to the Neo4j DBMS. Only relevant if `db_connection` is specified.. + neo4j_driver_config (dict[str, Any] | None): Optional configuration for the Neo4j driver to the Neo4j DBMS. Only relevant if `db_connection` is specified.. arrow_client_options (dict[str, Any] | None): Optional configuration for the Arrow Flight client. + aura_instance_id (str | None): The Aura instance id. Required if the database is in Aura but its instance id cannot be inferred from the connection information. Returns: AuraGraphDataScience: The session. """ @@ -132,6 +134,7 @@ def get_or_create( memory, db_connection=db_connection, ttl=ttl, + aura_instance_id=aura_instance_id, cloud_location=cloud_location, timeout=timeout, neo4j_driver_options=neo4j_driver_config, diff --git a/graphdatascience/tests/unit/session/test_dbms_connection_info.py b/graphdatascience/tests/unit/session/test_dbms_connection_info.py index df06bb8f9..2901205eb 100644 --- a/graphdatascience/tests/unit/session/test_dbms_connection_info.py +++ b/graphdatascience/tests/unit/session/test_dbms_connection_info.py @@ -39,3 +39,10 @@ def test_dbms_connection_info_fail_on_auth_and_username() -> None: ) else: assert False, "Expected ValueError was not raised" + + +def test_dbms_connection_info_hosted_in_aura() -> None: + assert DbmsConnectionInfo(uri="bolt://something.databases.neo4j.io").hosted_in_aura() + assert DbmsConnectionInfo(uri="bolt://something.databases.neo4j.io:7474").hosted_in_aura() + + assert not DbmsConnectionInfo(uri="bolt://something.neo4j.com").hosted_in_aura() diff --git a/graphdatascience/tests/unit/test_dedicated_sessions.py b/graphdatascience/tests/unit/test_dedicated_sessions.py index 161ed7b3b..bafaf8b86 100644 --- a/graphdatascience/tests/unit/test_dedicated_sessions.py +++ b/graphdatascience/tests/unit/test_dedicated_sessions.py @@ -614,6 +614,26 @@ def test_get_or_create_for_without_cloud_location(mocker: MockerFixture, aura_ap ) +def test_get_or_create_for_non_derivable_aura_instance_id(mocker: MockerFixture, aura_api: AuraApi) -> None: + sessions = DedicatedSessions(aura_api) + patch_neo4j_query_runner(mocker) + + with pytest.raises( + ValueError, + match=re.escape( + "Could not derive Aura instance id from the URI `neo4j+s://06cba79f.databases.neo4j.io`. Please provide the instance id via the `aura_instance_id` argument, or specify a cloud location if the DBMS is self-managed." + ), + ): + sessions.get_or_create( + "my-session", + SessionMemory.m_8GB, + DbmsConnectionInfo( + "neo4j+s://06cba79f.databases.neo4j.io", "dbuser", "db_pw" + ), # not part of list instances result + cloud_location=None, + ) + + def test_get_or_create_failed_session(mocker: MockerFixture, aura_api: AuraApi) -> None: db = _setup_db_instance(aura_api)