diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index 8e79a56cb460d..95e167cc53fa3 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -52,6 +52,7 @@ cdef class EncryptionConfiguration(_Weakrefable): __slots__ = () def __init__(self, footer_key, *, column_keys=None, + uniform_encryption=None, encryption_algorithm=None, plaintext_footer=None, double_wrapping=None, cache_lifetime=None, internal_key_material=None, @@ -60,6 +61,8 @@ cdef class EncryptionConfiguration(_Weakrefable): new CEncryptionConfiguration(tobytes(footer_key))) if column_keys is not None: self.column_keys = column_keys + if uniform_encryption is not None: + self.uniform_encryption = uniform_encryption if encryption_algorithm is not None: self.encryption_algorithm = encryption_algorithm if plaintext_footer is not None: @@ -103,6 +106,18 @@ cdef class EncryptionConfiguration(_Weakrefable): [f"{k}: {', '.join(v)}" for k, v in value.items()]) self.configuration.get().column_keys = tobytes(column_keys) + @property + def uniform_encryption(self): + """Whether to encrypt footer and all columns with the same encryption key. + + This cannot be used together with column_keys. + """ + return self.configuration.get().uniform_encryption + + @uniform_encryption.setter + def uniform_encryption(self, value): + self.configuration.get().uniform_encryption = value + @property def encryption_algorithm(self): """Parquet encryption algorithm. diff --git a/python/pyarrow/includes/libparquet_encryption.pxd b/python/pyarrow/includes/libparquet_encryption.pxd index 2b40414ce5383..4041bf53aaccf 100644 --- a/python/pyarrow/includes/libparquet_encryption.pxd +++ b/python/pyarrow/includes/libparquet_encryption.pxd @@ -72,6 +72,7 @@ cdef extern from "parquet/encryption/crypto_factory.h" \ CEncryptionConfiguration(const c_string& footer_key) except + c_string footer_key c_string column_keys + c_bool uniform_encryption ParquetCipher encryption_algorithm c_bool plaintext_footer c_bool double_wrapping diff --git a/python/pyarrow/tests/parquet/test_encryption.py b/python/pyarrow/tests/parquet/test_encryption.py index ff388ef506997..a11a4935a1c68 100644 --- a/python/pyarrow/tests/parquet/test_encryption.py +++ b/python/pyarrow/tests/parquet/test_encryption.py @@ -118,6 +118,7 @@ def test_encrypted_parquet_write_read(tempdir, data_table): encryption_algorithm="AES_GCM_V1", cache_lifetime=timedelta(minutes=5.0), data_key_length_bits=256) + assert encryption_config.uniform_encryption is False kms_connection_config, crypto_factory = write_encrypted_file( path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, @@ -133,6 +134,33 @@ def test_encrypted_parquet_write_read(tempdir, data_table): assert data_table.equals(result_table) +def test_uniform_encrypted_parquet_write_read(tempdir, data_table): + """Write an encrypted parquet, verify it's encrypted, and then read it.""" + path = tempdir / PARQUET_NAME + + # Encrypt the footer and all columns with the footer key, + encryption_config = pe.EncryptionConfiguration( + footer_key=FOOTER_KEY_NAME, + uniform_encryption=True, + encryption_algorithm="AES_GCM_V1", + cache_lifetime=timedelta(minutes=5.0), + data_key_length_bits=256) + assert encryption_config.uniform_encryption is True + + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, b"", + encryption_config) + + verify_file_encrypted(path) + + # Read with decryption properties + decryption_config = pe.DecryptionConfiguration( + cache_lifetime=timedelta(minutes=5.0)) + result_table = read_encrypted_parquet( + path, decryption_config, kms_connection_config, crypto_factory) + assert data_table.equals(result_table) + + def write_encrypted_parquet(path, table, encryption_config, kms_connection_config, crypto_factory): file_encryption_properties = crypto_factory.file_encryption_properties( @@ -241,6 +269,26 @@ def test_encrypted_parquet_write_no_col_key(tempdir, data_table): FOOTER_KEY, b"", encryption_config) +def test_encrypted_parquet_write_col_key_and_uniform_encryption(tempdir, data_table): + """Write an encrypted parquet, but give only footer key, + without column key.""" + path = tempdir / 'encrypted_table_col_key_and_uniform_encryption.in_mem.parquet' + + # Encrypt the footer with the footer key + encryption_config = pe.EncryptionConfiguration( + footer_key=FOOTER_KEY_NAME, + column_keys={ + COL_KEY_NAME: ["a", "b"], + }, + uniform_encryption=True) + + with pytest.raises(OSError, + match=r"Cannot set both column_keys and uniform_encryption"): + # Write with encryption properties + write_encrypted_file(path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, + FOOTER_KEY, b"", encryption_config) + + def test_encrypted_parquet_write_kms_error(tempdir, data_table, basic_encryption_config): """Write an encrypted parquet, but raise KeyError in KmsClient."""