From 52cb8d86f06c504f8f7c09a2611bee682669870e Mon Sep 17 00:00:00 2001 From: Kirill Safonov Date: Thu, 27 Feb 2025 14:08:52 +0100 Subject: [PATCH] Onboard new Files API client to integration tests --- tests/integration/conftest.py | 14 ++++++++- tests/integration/test_auth.py | 4 +-- tests/integration/test_files.py | 56 ++++++++++++++++----------------- 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 95dd10fd1..efcc78e95 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -7,7 +7,7 @@ import pytest -from databricks.sdk import AccountClient, WorkspaceClient +from databricks.sdk import AccountClient, FilesAPI, FilesExt, WorkspaceClient from databricks.sdk.service.catalog import VolumeType @@ -125,6 +125,18 @@ def volume(ucws, schema): ucws.volumes.delete(volume.full_name) +@pytest.fixture(scope="session", params=[False, True]) +def files_api(request, ucws) -> FilesAPI: + if request.param: + # ensure new Files API client is used for files of any size + ucws.config.multipart_upload_min_stream_size = 0 + # enable new Files API client + return FilesExt(ucws.api_client, ucws.config) + else: + # use the default client + return ucws.files + + @pytest.fixture() def workspace_dir(w, random): directory = f"/Users/{w.current_user.me().user_name}/dir-{random(12)}" diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index c6904599f..b50c54f1b 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -108,12 +108,12 @@ def _get_lts_versions(w) -> typing.List[SparkVersion]: return lts_runtimes -def test_runtime_auth_from_jobs_volumes(ucws, fresh_wheel_file, env_or_skip, random, volume): +def test_runtime_auth_from_jobs_volumes(ucws, files_api, fresh_wheel_file, env_or_skip, random, volume): dbr_versions = [v for v in _get_lts_versions(ucws) if int(v.key.split(".")[0]) >= 15] volume_wheel = f"{volume}/tmp/wheels/{random(10)}/{fresh_wheel_file.name}" with fresh_wheel_file.open("rb") as f: - ucws.files.upload(volume_wheel, f) + files_api.upload(volume_wheel, f) lib = Library(whl=volume_wheel) return _test_runtime_auth_from_jobs_inner(ucws, env_or_skip, random, dbr_versions, lib) diff --git a/tests/integration/test_files.py b/tests/integration/test_files.py index 932f85550..348f88b05 100644 --- a/tests/integration/test_files.py +++ b/tests/integration/test_files.py @@ -218,7 +218,7 @@ def create_volume(w, catalog, schema, volume): return ResourceWithCleanup(lambda: w.volumes.delete(res.full_name)) -def test_files_api_upload_download(ucws, random): +def test_files_api_upload_download(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() @@ -226,12 +226,12 @@ def test_files_api_upload_download(ucws, random): with ResourceWithCleanup.create_volume(w, "main", schema, volume): f = io.BytesIO(b"some text data") target_file = f"/Volumes/main/{schema}/{volume}/filesit-with-?-and-#-{random()}.txt" - w.files.upload(target_file, f) - with w.files.download(target_file).contents as f: + files_api.upload(target_file, f) + with files_api.download(target_file).contents as f: assert f.read() == b"some text data" -def test_files_api_read_twice_from_one_download(ucws, random): +def test_files_api_read_twice_from_one_download(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() @@ -239,9 +239,9 @@ def test_files_api_read_twice_from_one_download(ucws, random): with ResourceWithCleanup.create_volume(w, "main", schema, volume): f = io.BytesIO(b"some text data") target_file = f"/Volumes/main/{schema}/{volume}/filesit-{random()}.txt" - w.files.upload(target_file, f) + files_api.upload(target_file, f) - res = w.files.download(target_file).contents + res = files_api.download(target_file).contents with res: assert res.read() == b"some text data" @@ -251,7 +251,7 @@ def test_files_api_read_twice_from_one_download(ucws, random): res.read() -def test_files_api_delete_file(ucws, random): +def test_files_api_delete_file(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() @@ -259,11 +259,11 @@ def test_files_api_delete_file(ucws, random): with ResourceWithCleanup.create_volume(w, "main", schema, volume): f = io.BytesIO(b"some text data") target_file = f"/Volumes/main/{schema}/{volume}/filesit-{random()}.txt" - w.files.upload(target_file, f) - w.files.delete(target_file) + files_api.upload(target_file, f) + files_api.delete(target_file) -def test_files_api_get_metadata(ucws, random): +def test_files_api_get_metadata(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() @@ -271,62 +271,62 @@ def test_files_api_get_metadata(ucws, random): with ResourceWithCleanup.create_volume(w, "main", schema, volume): f = io.BytesIO(b"some text data") target_file = f"/Volumes/main/{schema}/{volume}/filesit-{random()}.txt" - w.files.upload(target_file, f) - m = w.files.get_metadata(target_file) + files_api.upload(target_file, f) + m = files_api.get_metadata(target_file) assert m.content_type == "application/octet-stream" assert m.content_length == 14 assert m.last_modified is not None -def test_files_api_create_directory(ucws, random): +def test_files_api_create_directory(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() with ResourceWithCleanup.create_schema(w, "main", schema): with ResourceWithCleanup.create_volume(w, "main", schema, volume): target_directory = f"/Volumes/main/{schema}/{volume}/filesit-{random()}/" - w.files.create_directory(target_directory) + files_api.create_directory(target_directory) -def test_files_api_list_directory_contents(ucws, random): +def test_files_api_list_directory_contents(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() with ResourceWithCleanup.create_schema(w, "main", schema): with ResourceWithCleanup.create_volume(w, "main", schema, volume): target_directory = f"/Volumes/main/{schema}/{volume}/filesit-{random()}" - w.files.upload(target_directory + "/file1.txt", io.BytesIO(b"some text data")) - w.files.upload(target_directory + "/file2.txt", io.BytesIO(b"some text data")) - w.files.upload(target_directory + "/file3.txt", io.BytesIO(b"some text data")) + files_api.upload(target_directory + "/file1.txt", io.BytesIO(b"some text data")) + files_api.upload(target_directory + "/file2.txt", io.BytesIO(b"some text data")) + files_api.upload(target_directory + "/file3.txt", io.BytesIO(b"some text data")) - result = list(w.files.list_directory_contents(target_directory)) + result = list(files_api.list_directory_contents(target_directory)) assert len(result) == 3 -def test_files_api_delete_directory(ucws, random): +def test_files_api_delete_directory(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() with ResourceWithCleanup.create_schema(w, "main", schema): with ResourceWithCleanup.create_volume(w, "main", schema, volume): target_directory = f"/Volumes/main/{schema}/{volume}/filesit-{random()}/" - w.files.create_directory(target_directory) - w.files.delete_directory(target_directory) + files_api.create_directory(target_directory) + files_api.delete_directory(target_directory) -def test_files_api_get_directory_metadata(ucws, random): +def test_files_api_get_directory_metadata(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() with ResourceWithCleanup.create_schema(w, "main", schema): with ResourceWithCleanup.create_volume(w, "main", schema, volume): target_directory = f"/Volumes/main/{schema}/{volume}/filesit-{random()}/" - w.files.create_directory(target_directory) - w.files.get_directory_metadata(target_directory) + files_api.create_directory(target_directory) + files_api.get_directory_metadata(target_directory) @pytest.mark.benchmark -def test_files_api_download_benchmark(ucws, random): +def test_files_api_download_benchmark(ucws, files_api, random): w = ucws schema = "filesit-" + random() volume = "filesit-" + random() @@ -335,7 +335,7 @@ def test_files_api_download_benchmark(ucws, random): # Create a 50 MB file f = io.BytesIO(bytes(range(256)) * 200000) target_file = f"/Volumes/main/{schema}/{volume}/filesit-benchmark-{random()}.txt" - w.files.upload(target_file, f) + files_api.upload(target_file, f) totals = {} for chunk_size_kb in [ @@ -357,7 +357,7 @@ def test_files_api_download_benchmark(ucws, random): count = 10 for i in range(count): start = time.time() - f = w.files.download(target_file).contents + f = files_api.download(target_file).contents f.set_chunk_size(chunk_size) with f as vf: vf.read()