Skip to content

Commit 266781f

Browse files
committed
GH-38914: [Python] Add EncryptionConfiguration.uniform_encryption
- expose missing EncryptionConfiguration parameter in pyarrow
1 parent ef3b0ef commit 266781f

File tree

3 files changed

+61
-0
lines changed

3 files changed

+61
-0
lines changed

python/pyarrow/_parquet_encryption.pyx

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ cdef class EncryptionConfiguration(_Weakrefable):
5252
__slots__ = ()
5353

5454
def __init__(self, footer_key, *, column_keys=None,
55+
uniform_encryption=None,
5556
encryption_algorithm=None,
5657
plaintext_footer=None, double_wrapping=None,
5758
cache_lifetime=None, internal_key_material=None,
@@ -60,6 +61,8 @@ cdef class EncryptionConfiguration(_Weakrefable):
6061
new CEncryptionConfiguration(tobytes(footer_key)))
6162
if column_keys is not None:
6263
self.column_keys = column_keys
64+
if uniform_encryption is not None:
65+
self.uniform_encryption = uniform_encryption
6366
if encryption_algorithm is not None:
6467
self.encryption_algorithm = encryption_algorithm
6568
if plaintext_footer is not None:
@@ -103,6 +106,18 @@ cdef class EncryptionConfiguration(_Weakrefable):
103106
["{}: {}".format(k, ", ".join(v)) for k, v in value.items()])
104107
self.configuration.get().column_keys = tobytes(column_keys)
105108

109+
@property
110+
def uniform_encryption(self):
111+
"""Encrypt footer and all columns with the same encryption key.
112+
113+
This cannot be used together with column_keys.
114+
"""
115+
return self.configuration.get().uniform_encryption
116+
117+
@uniform_encryption.setter
118+
def uniform_encryption(self, value):
119+
self.configuration.get().uniform_encryption = value
120+
106121
@property
107122
def encryption_algorithm(self):
108123
"""Parquet encryption algorithm.

python/pyarrow/includes/libparquet_encryption.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ cdef extern from "parquet/encryption/crypto_factory.h" \
7272
CEncryptionConfiguration(const c_string& footer_key) except +
7373
c_string footer_key
7474
c_string column_keys
75+
c_bool uniform_encryption
7576
ParquetCipher encryption_algorithm
7677
c_bool plaintext_footer
7778
c_bool double_wrapping

python/pyarrow/tests/parquet/test_encryption.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,32 @@ def test_encrypted_parquet_write_read(tempdir, data_table):
133133
assert data_table.equals(result_table)
134134

135135

136+
def test_uniform_encrypted_parquet_write_read(tempdir, data_table):
137+
"""Write an encrypted parquet, verify it's encrypted, and then read it."""
138+
path = tempdir / PARQUET_NAME
139+
140+
# Encrypt the footer and all columns with the footer key,
141+
encryption_config = pe.EncryptionConfiguration(
142+
footer_key=FOOTER_KEY_NAME,
143+
uniform_encryption=True,
144+
encryption_algorithm="AES_GCM_V1",
145+
cache_lifetime=timedelta(minutes=5.0),
146+
data_key_length_bits=256)
147+
148+
kms_connection_config, crypto_factory = write_encrypted_file(
149+
path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, b"",
150+
encryption_config)
151+
152+
verify_file_encrypted(path)
153+
154+
# Read with decryption properties
155+
decryption_config = pe.DecryptionConfiguration(
156+
cache_lifetime=timedelta(minutes=5.0))
157+
result_table = read_encrypted_parquet(
158+
path, decryption_config, kms_connection_config, crypto_factory)
159+
assert data_table.equals(result_table)
160+
161+
136162
def write_encrypted_parquet(path, table, encryption_config,
137163
kms_connection_config, crypto_factory):
138164
file_encryption_properties = crypto_factory.file_encryption_properties(
@@ -241,6 +267,25 @@ def test_encrypted_parquet_write_no_col_key(tempdir, data_table):
241267
FOOTER_KEY, b"", encryption_config)
242268

243269

270+
def test_encrypted_parquet_write_col_key_and_uniform_encryption(tempdir, data_table):
271+
"""Write an encrypted parquet, but give only footer key,
272+
without column key."""
273+
path = tempdir / 'encrypted_table_col_key_and_uniform_encryption.in_mem.parquet'
274+
275+
# Encrypt the footer with the footer key
276+
encryption_config = pe.EncryptionConfiguration(
277+
footer_key=FOOTER_KEY_NAME,
278+
column_keys={
279+
COL_KEY_NAME: ["a", "b"],
280+
},
281+
uniform_encryption=True)
282+
283+
with pytest.raises(OSError, match="Cannot set both column_keys and uniform_encryption"):
284+
# Write with encryption properties
285+
write_encrypted_file(path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME,
286+
FOOTER_KEY, b"", encryption_config)
287+
288+
244289
def test_encrypted_parquet_write_kms_error(tempdir, data_table,
245290
basic_encryption_config):
246291
"""Write an encrypted parquet, but raise KeyError in KmsClient."""

0 commit comments

Comments
 (0)