Skip to content

[Internal] Onboard new Files API client to integration tests #905

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import pytest

from databricks.sdk import AccountClient, WorkspaceClient
from databricks.sdk import AccountClient, FilesAPI, FilesExt, WorkspaceClient
from databricks.sdk.service.catalog import VolumeType


Expand Down Expand Up @@ -125,6 +125,18 @@ def volume(ucws, schema):
ucws.volumes.delete(volume.full_name)


@pytest.fixture(scope="session", params=[False, True])
def files_api(request, ucws) -> FilesAPI:
if request.param:
# ensure new Files API client is used for files of any size
ucws.config.multipart_upload_min_stream_size = 0
# enable new Files API client
return FilesExt(ucws.api_client, ucws.config)
else:
# use the default client
return ucws.files


@pytest.fixture()
def workspace_dir(w, random):
directory = f"/Users/{w.current_user.me().user_name}/dir-{random(12)}"
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ def _get_lts_versions(w) -> typing.List[SparkVersion]:
return lts_runtimes


def test_runtime_auth_from_jobs_volumes(ucws, fresh_wheel_file, env_or_skip, random, volume):
def test_runtime_auth_from_jobs_volumes(ucws, files_api, fresh_wheel_file, env_or_skip, random, volume):
dbr_versions = [v for v in _get_lts_versions(ucws) if int(v.key.split(".")[0]) >= 15]

volume_wheel = f"{volume}/tmp/wheels/{random(10)}/{fresh_wheel_file.name}"
with fresh_wheel_file.open("rb") as f:
ucws.files.upload(volume_wheel, f)
files_api.upload(volume_wheel, f)

lib = Library(whl=volume_wheel)
return _test_runtime_auth_from_jobs_inner(ucws, env_or_skip, random, dbr_versions, lib)
Expand Down
56 changes: 28 additions & 28 deletions tests/integration/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,30 +218,30 @@ def create_volume(w, catalog, schema, volume):
return ResourceWithCleanup(lambda: w.volumes.delete(res.full_name))


def test_files_api_upload_download(ucws, random):
def test_files_api_upload_download(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
with ResourceWithCleanup.create_schema(w, "main", schema):
with ResourceWithCleanup.create_volume(w, "main", schema, volume):
f = io.BytesIO(b"some text data")
target_file = f"/Volumes/main/{schema}/{volume}/filesit-with-?-and-#-{random()}.txt"
w.files.upload(target_file, f)
with w.files.download(target_file).contents as f:
files_api.upload(target_file, f)
with files_api.download(target_file).contents as f:
assert f.read() == b"some text data"


def test_files_api_read_twice_from_one_download(ucws, random):
def test_files_api_read_twice_from_one_download(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
with ResourceWithCleanup.create_schema(w, "main", schema):
with ResourceWithCleanup.create_volume(w, "main", schema, volume):
f = io.BytesIO(b"some text data")
target_file = f"/Volumes/main/{schema}/{volume}/filesit-{random()}.txt"
w.files.upload(target_file, f)
files_api.upload(target_file, f)

res = w.files.download(target_file).contents
res = files_api.download(target_file).contents

with res:
assert res.read() == b"some text data"
Expand All @@ -251,82 +251,82 @@ def test_files_api_read_twice_from_one_download(ucws, random):
res.read()


def test_files_api_delete_file(ucws, random):
def test_files_api_delete_file(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
with ResourceWithCleanup.create_schema(w, "main", schema):
with ResourceWithCleanup.create_volume(w, "main", schema, volume):
f = io.BytesIO(b"some text data")
target_file = f"/Volumes/main/{schema}/{volume}/filesit-{random()}.txt"
w.files.upload(target_file, f)
w.files.delete(target_file)
files_api.upload(target_file, f)
files_api.delete(target_file)


def test_files_api_get_metadata(ucws, random):
def test_files_api_get_metadata(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
with ResourceWithCleanup.create_schema(w, "main", schema):
with ResourceWithCleanup.create_volume(w, "main", schema, volume):
f = io.BytesIO(b"some text data")
target_file = f"/Volumes/main/{schema}/{volume}/filesit-{random()}.txt"
w.files.upload(target_file, f)
m = w.files.get_metadata(target_file)
files_api.upload(target_file, f)
m = files_api.get_metadata(target_file)
assert m.content_type == "application/octet-stream"
assert m.content_length == 14
assert m.last_modified is not None


def test_files_api_create_directory(ucws, random):
def test_files_api_create_directory(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
with ResourceWithCleanup.create_schema(w, "main", schema):
with ResourceWithCleanup.create_volume(w, "main", schema, volume):
target_directory = f"/Volumes/main/{schema}/{volume}/filesit-{random()}/"
w.files.create_directory(target_directory)
files_api.create_directory(target_directory)


def test_files_api_list_directory_contents(ucws, random):
def test_files_api_list_directory_contents(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
with ResourceWithCleanup.create_schema(w, "main", schema):
with ResourceWithCleanup.create_volume(w, "main", schema, volume):
target_directory = f"/Volumes/main/{schema}/{volume}/filesit-{random()}"
w.files.upload(target_directory + "/file1.txt", io.BytesIO(b"some text data"))
w.files.upload(target_directory + "/file2.txt", io.BytesIO(b"some text data"))
w.files.upload(target_directory + "/file3.txt", io.BytesIO(b"some text data"))
files_api.upload(target_directory + "/file1.txt", io.BytesIO(b"some text data"))
files_api.upload(target_directory + "/file2.txt", io.BytesIO(b"some text data"))
files_api.upload(target_directory + "/file3.txt", io.BytesIO(b"some text data"))

result = list(w.files.list_directory_contents(target_directory))
result = list(files_api.list_directory_contents(target_directory))
assert len(result) == 3


def test_files_api_delete_directory(ucws, random):
def test_files_api_delete_directory(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
with ResourceWithCleanup.create_schema(w, "main", schema):
with ResourceWithCleanup.create_volume(w, "main", schema, volume):
target_directory = f"/Volumes/main/{schema}/{volume}/filesit-{random()}/"
w.files.create_directory(target_directory)
w.files.delete_directory(target_directory)
files_api.create_directory(target_directory)
files_api.delete_directory(target_directory)


def test_files_api_get_directory_metadata(ucws, random):
def test_files_api_get_directory_metadata(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
with ResourceWithCleanup.create_schema(w, "main", schema):
with ResourceWithCleanup.create_volume(w, "main", schema, volume):
target_directory = f"/Volumes/main/{schema}/{volume}/filesit-{random()}/"
w.files.create_directory(target_directory)
w.files.get_directory_metadata(target_directory)
files_api.create_directory(target_directory)
files_api.get_directory_metadata(target_directory)


@pytest.mark.benchmark
def test_files_api_download_benchmark(ucws, random):
def test_files_api_download_benchmark(ucws, files_api, random):
w = ucws
schema = "filesit-" + random()
volume = "filesit-" + random()
Expand All @@ -335,7 +335,7 @@ def test_files_api_download_benchmark(ucws, random):
# Create a 50 MB file
f = io.BytesIO(bytes(range(256)) * 200000)
target_file = f"/Volumes/main/{schema}/{volume}/filesit-benchmark-{random()}.txt"
w.files.upload(target_file, f)
files_api.upload(target_file, f)

totals = {}
for chunk_size_kb in [
Expand All @@ -357,7 +357,7 @@ def test_files_api_download_benchmark(ucws, random):
count = 10
for i in range(count):
start = time.time()
f = w.files.download(target_file).contents
f = files_api.download(target_file).contents
f.set_chunk_size(chunk_size)
with f as vf:
vf.read()
Expand Down
Loading