Skip to content

Commit 6ab37dd

Browse files
authored
GH-38914: [Python] Add EncryptionConfiguration.uniform_encryption (#46347)
### Rationale for this change Support uniform encryption when writing parquet files. ### What changes are included in this PR? Exposing EncryptionConfiguration.uniform_encryption to pyarrow. ### Are these changes tested? Yes, see included tests. ### Are there any user-facing changes? Addition of uniform_encryption parameter to EncryptionConfiguration. * fixes #38914 * supersedes #39216 * GitHub Issue: #38914 Authored-by: Martin Nowak <code@dawg.eu> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent c8fec38 commit 6ab37dd

File tree

3 files changed

+64
-0
lines changed

3 files changed

+64
-0
lines changed

python/pyarrow/_parquet_encryption.pyx

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ cdef class EncryptionConfiguration(_Weakrefable):
5252
__slots__ = ()
5353

5454
def __init__(self, footer_key, *, column_keys=None,
55+
uniform_encryption=None,
5556
encryption_algorithm=None,
5657
plaintext_footer=None, double_wrapping=None,
5758
cache_lifetime=None, internal_key_material=None,
@@ -60,6 +61,8 @@ cdef class EncryptionConfiguration(_Weakrefable):
6061
new CEncryptionConfiguration(tobytes(footer_key)))
6162
if column_keys is not None:
6263
self.column_keys = column_keys
64+
if uniform_encryption is not None:
65+
self.uniform_encryption = uniform_encryption
6366
if encryption_algorithm is not None:
6467
self.encryption_algorithm = encryption_algorithm
6568
if plaintext_footer is not None:
@@ -103,6 +106,18 @@ cdef class EncryptionConfiguration(_Weakrefable):
103106
[f"{k}: {', '.join(v)}" for k, v in value.items()])
104107
self.configuration.get().column_keys = tobytes(column_keys)
105108

109+
@property
110+
def uniform_encryption(self):
111+
"""Whether to encrypt footer and all columns with the same encryption key.
112+
113+
This cannot be used together with column_keys.
114+
"""
115+
return self.configuration.get().uniform_encryption
116+
117+
@uniform_encryption.setter
118+
def uniform_encryption(self, value):
119+
self.configuration.get().uniform_encryption = value
120+
106121
@property
107122
def encryption_algorithm(self):
108123
"""Parquet encryption algorithm.

python/pyarrow/includes/libparquet_encryption.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ cdef extern from "parquet/encryption/crypto_factory.h" \
7272
CEncryptionConfiguration(const c_string& footer_key) except +
7373
c_string footer_key
7474
c_string column_keys
75+
c_bool uniform_encryption
7576
ParquetCipher encryption_algorithm
7677
c_bool plaintext_footer
7778
c_bool double_wrapping

python/pyarrow/tests/parquet/test_encryption.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ def test_encrypted_parquet_write_read(tempdir, data_table):
118118
encryption_algorithm="AES_GCM_V1",
119119
cache_lifetime=timedelta(minutes=5.0),
120120
data_key_length_bits=256)
121+
assert encryption_config.uniform_encryption is False
121122

122123
kms_connection_config, crypto_factory = write_encrypted_file(
123124
path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY,
@@ -133,6 +134,33 @@ def test_encrypted_parquet_write_read(tempdir, data_table):
133134
assert data_table.equals(result_table)
134135

135136

137+
def test_uniform_encrypted_parquet_write_read(tempdir, data_table):
138+
"""Write an encrypted parquet, verify it's encrypted, and then read it."""
139+
path = tempdir / PARQUET_NAME
140+
141+
# Encrypt the footer and all columns with the footer key,
142+
encryption_config = pe.EncryptionConfiguration(
143+
footer_key=FOOTER_KEY_NAME,
144+
uniform_encryption=True,
145+
encryption_algorithm="AES_GCM_V1",
146+
cache_lifetime=timedelta(minutes=5.0),
147+
data_key_length_bits=256)
148+
assert encryption_config.uniform_encryption is True
149+
150+
kms_connection_config, crypto_factory = write_encrypted_file(
151+
path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, b"",
152+
encryption_config)
153+
154+
verify_file_encrypted(path)
155+
156+
# Read with decryption properties
157+
decryption_config = pe.DecryptionConfiguration(
158+
cache_lifetime=timedelta(minutes=5.0))
159+
result_table = read_encrypted_parquet(
160+
path, decryption_config, kms_connection_config, crypto_factory)
161+
assert data_table.equals(result_table)
162+
163+
136164
def write_encrypted_parquet(path, table, encryption_config,
137165
kms_connection_config, crypto_factory):
138166
file_encryption_properties = crypto_factory.file_encryption_properties(
@@ -241,6 +269,26 @@ def test_encrypted_parquet_write_no_col_key(tempdir, data_table):
241269
FOOTER_KEY, b"", encryption_config)
242270

243271

272+
def test_encrypted_parquet_write_col_key_and_uniform_encryption(tempdir, data_table):
273+
"""Write an encrypted parquet, but give only footer key,
274+
without column key."""
275+
path = tempdir / 'encrypted_table_col_key_and_uniform_encryption.in_mem.parquet'
276+
277+
# Encrypt the footer with the footer key
278+
encryption_config = pe.EncryptionConfiguration(
279+
footer_key=FOOTER_KEY_NAME,
280+
column_keys={
281+
COL_KEY_NAME: ["a", "b"],
282+
},
283+
uniform_encryption=True)
284+
285+
with pytest.raises(OSError,
286+
match=r"Cannot set both column_keys and uniform_encryption"):
287+
# Write with encryption properties
288+
write_encrypted_file(path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME,
289+
FOOTER_KEY, b"", encryption_config)
290+
291+
244292
def test_encrypted_parquet_write_kms_error(tempdir, data_table,
245293
basic_encryption_config):
246294
"""Write an encrypted parquet, but raise KeyError in KmsClient."""

0 commit comments

Comments
 (0)