Skip to content

GH-38914: [Python] Add EncryptionConfiguration.uniform_encryption #46347

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions python/pyarrow/_parquet_encryption.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ cdef class EncryptionConfiguration(_Weakrefable):
__slots__ = ()

def __init__(self, footer_key, *, column_keys=None,
uniform_encryption=None,
encryption_algorithm=None,
plaintext_footer=None, double_wrapping=None,
cache_lifetime=None, internal_key_material=None,
Expand All @@ -60,6 +61,8 @@ cdef class EncryptionConfiguration(_Weakrefable):
new CEncryptionConfiguration(tobytes(footer_key)))
if column_keys is not None:
self.column_keys = column_keys
if uniform_encryption is not None:
self.uniform_encryption = uniform_encryption
if encryption_algorithm is not None:
self.encryption_algorithm = encryption_algorithm
if plaintext_footer is not None:
Expand Down Expand Up @@ -103,6 +106,18 @@ cdef class EncryptionConfiguration(_Weakrefable):
[f"{k}: {', '.join(v)}" for k, v in value.items()])
self.configuration.get().column_keys = tobytes(column_keys)

@property
def uniform_encryption(self):
"""Whether to encrypt footer and all columns with the same encryption key.

This cannot be used together with column_keys.
"""
return self.configuration.get().uniform_encryption

@uniform_encryption.setter
def uniform_encryption(self, value):
self.configuration.get().uniform_encryption = value

@property
def encryption_algorithm(self):
"""Parquet encryption algorithm.
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/includes/libparquet_encryption.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ cdef extern from "parquet/encryption/crypto_factory.h" \
CEncryptionConfiguration(const c_string& footer_key) except +
c_string footer_key
c_string column_keys
c_bool uniform_encryption
ParquetCipher encryption_algorithm
c_bool plaintext_footer
c_bool double_wrapping
Expand Down
48 changes: 48 additions & 0 deletions python/pyarrow/tests/parquet/test_encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def test_encrypted_parquet_write_read(tempdir, data_table):
encryption_algorithm="AES_GCM_V1",
cache_lifetime=timedelta(minutes=5.0),
data_key_length_bits=256)
assert encryption_config.uniform_encryption is False

kms_connection_config, crypto_factory = write_encrypted_file(
path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY,
Expand All @@ -133,6 +134,33 @@ def test_encrypted_parquet_write_read(tempdir, data_table):
assert data_table.equals(result_table)


def test_uniform_encrypted_parquet_write_read(tempdir, data_table):
"""Write an encrypted parquet, verify it's encrypted, and then read it."""
path = tempdir / PARQUET_NAME

# Encrypt the footer and all columns with the footer key,
encryption_config = pe.EncryptionConfiguration(
footer_key=FOOTER_KEY_NAME,
uniform_encryption=True,
encryption_algorithm="AES_GCM_V1",
cache_lifetime=timedelta(minutes=5.0),
data_key_length_bits=256)
assert encryption_config.uniform_encryption is True

kms_connection_config, crypto_factory = write_encrypted_file(
path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, b"",
encryption_config)

verify_file_encrypted(path)

# Read with decryption properties
decryption_config = pe.DecryptionConfiguration(
cache_lifetime=timedelta(minutes=5.0))
result_table = read_encrypted_parquet(
path, decryption_config, kms_connection_config, crypto_factory)
assert data_table.equals(result_table)


def write_encrypted_parquet(path, table, encryption_config,
kms_connection_config, crypto_factory):
file_encryption_properties = crypto_factory.file_encryption_properties(
Expand Down Expand Up @@ -241,6 +269,26 @@ def test_encrypted_parquet_write_no_col_key(tempdir, data_table):
FOOTER_KEY, b"", encryption_config)


def test_encrypted_parquet_write_col_key_and_uniform_encryption(tempdir, data_table):
"""Write an encrypted parquet, but give only footer key,
without column key."""
path = tempdir / 'encrypted_table_col_key_and_uniform_encryption.in_mem.parquet'

# Encrypt the footer with the footer key
encryption_config = pe.EncryptionConfiguration(
footer_key=FOOTER_KEY_NAME,
column_keys={
COL_KEY_NAME: ["a", "b"],
},
uniform_encryption=True)

with pytest.raises(OSError,
match=r"Cannot set both column_keys and uniform_encryption"):
# Write with encryption properties
write_encrypted_file(path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME,
FOOTER_KEY, b"", encryption_config)


def test_encrypted_parquet_write_kms_error(tempdir, data_table,
basic_encryption_config):
"""Write an encrypted parquet, but raise KeyError in KmsClient."""
Expand Down
Loading