Skip to content

Commit f619f30

Browse files
fix(connection, cursor): raise DataError when bind parameter limit is exceeded (#165) (#187)
* raise DataError in Connection execute * rethrow error in Cursor insert_data_bulk * add integration and unit test for cursor * rectify integration test Co-authored-by: Soksamnang Lim <soklim@amazon.com>
1 parent af2c9b1 commit f619f30

File tree

4 files changed

+190
-3
lines changed

4 files changed

+190
-3
lines changed

redshift_connector/core.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
ArrayContentNotHomogenousError,
3737
ArrayContentNotSupportedError,
3838
DatabaseError,
39+
DataError,
3940
Error,
4041
IntegrityError,
4142
InterfaceError,
@@ -1710,6 +1711,9 @@ def execute(self: "Connection", cursor: Cursor, operation: str, vals) -> None:
17101711
# Int32 - The OID of the parameter data type.
17111712
val: typing.Union[bytes, bytearray] = bytearray(statement_name_bin)
17121713
typing.cast(bytearray, val).extend(statement.encode(_client_encoding) + NULL_BYTE)
1714+
if len(params) > 32767:
1715+
raise DataError("Prepared statement exceeds bind parameter limit 32767. {} bind parameters were "
1716+
"provided. Please retry with fewer bind parameters.".format(len(params)))
17131717
typing.cast(bytearray, val).extend(h_pack(len(params)))
17141718
for oid, fc, send_func in params: # type: ignore
17151719
# Parse message doesn't seem to handle the -1 type_oid for NULL

redshift_connector/cursor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def insert_data_bulk(
340340
self.execute(insert_stmt, values_list)
341341

342342
except Exception as e:
343-
raise InterfaceError(e)
343+
raise e
344344
finally:
345345
# reset paramstyle to it's original value
346346
self.paramstyle = orig_paramstyle

test/integration/test_cursor.py

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest # type: ignore
55

66
import redshift_connector
7-
from redshift_connector import InterfaceError
7+
from redshift_connector import InterfaceError, DataError
88

99

1010
@pytest.mark.parametrize("col_name", (("apples", "apples"), ("author‎ ", "author\u200e")))
@@ -66,3 +66,130 @@ def test_insert_data_invalid_column_raises(mocked_csv, db_kwargs):
6666
delimiter=",",
6767
batch_size=3,
6868
)
69+
70+
71+
# max binding parameters for a prepared statement
72+
max_params = 32767
73+
74+
def test_insert_data_raises_too_many_params(db_kwargs):
75+
prepared_stmt = (
76+
"INSERT INTO githubissue165 (col1) VALUES " + "(%s), " * max_params + "(%s);"
77+
)
78+
params = [1 for _ in range(max_params + 1)]
79+
80+
with redshift_connector.connect(**db_kwargs) as conn:
81+
with conn.cursor() as cursor:
82+
cursor.execute("create temporary table githubissue165 (col1 int)")
83+
84+
with pytest.raises(
85+
DataError,
86+
match=f"Prepared statement exceeds bind parameter limit 32767. {32768} bind parameters were "
87+
f"provided.",
88+
):
89+
cursor.execute(prepared_stmt, params)
90+
91+
92+
def test_insert_data_raises_no_exception(db_kwargs):
93+
prepared_stmt_32767 = (
94+
"INSERT INTO githubissue165 (col1) VALUES "
95+
+ "(%s), " * (max_params - 1)
96+
+ "(%s);"
97+
)
98+
params_32767 = [1 for _ in range(max_params)]
99+
100+
prepared_stmt_32766 = (
101+
"INSERT INTO githubissue165 (col1) VALUES "
102+
+ "(%s), " * (max_params - 2)
103+
+ "(%s);"
104+
)
105+
params_32766 = [1 for _ in range(max_params - 1)]
106+
107+
with redshift_connector.connect(**db_kwargs) as conn:
108+
with conn.cursor() as cursor:
109+
cursor.execute("create temporary table githubissue165 (col1 int)")
110+
try:
111+
cursor.execute(prepared_stmt_32767, params_32767)
112+
except Exception as e:
113+
assert (
114+
False
115+
), f"'execute' with {max_params} bind parameters raised an exception {e}"
116+
try:
117+
cursor.execute(prepared_stmt_32766, params_32766)
118+
except Exception as e:
119+
assert (
120+
False
121+
), f"'execute' with {max_params - 1} bind parameters raised an exception {e}"
122+
123+
124+
indices, names = (
125+
[0],
126+
["col1"],
127+
)
128+
129+
130+
@patch("builtins.open", new_callable=mock_open)
131+
def test_insert_data_bulk_raises_too_many_params(mocked_csv, db_kwargs):
132+
csv_str = "\col1\n" + "1\n" * max_params + "1" # 32768 rows
133+
mocked_csv.side_effect = [StringIO(csv_str)]
134+
135+
with redshift_connector.connect(**db_kwargs) as conn:
136+
with conn.cursor() as cursor:
137+
cursor.execute("create temporary table githubissue165 (col1 int)")
138+
with pytest.raises(
139+
DataError,
140+
match="Prepared statement exceeds bind parameter limit 32767.",
141+
):
142+
cursor.insert_data_bulk(
143+
filename="mocked_csv",
144+
table_name="githubissue165",
145+
parameter_indices=indices,
146+
column_names=["col1"],
147+
delimiter=",",
148+
batch_size=max_params + 1,
149+
)
150+
151+
152+
@patch("builtins.open", new_callable=mock_open)
153+
def test_insert_data_bulk_raises_no_exception_32766(mocked_csv_32766, db_kwargs):
154+
csv_str_32766 = "\col1\n" + "1\n" * (max_params - 2) + "1"
155+
mocked_csv_32766.side_effect = [StringIO(csv_str_32766)]
156+
157+
with redshift_connector.connect(**db_kwargs) as conn:
158+
with conn.cursor() as cursor:
159+
cursor.execute("create temporary table githubissue165 (col1 int)")
160+
try:
161+
cursor.insert_data_bulk(
162+
filename="mocked_csv_32766",
163+
table_name="githubissue165",
164+
parameter_indices=indices,
165+
column_names=["col1"],
166+
delimiter=",",
167+
batch_size=max_params - 1,
168+
)
169+
except Exception as e:
170+
assert (
171+
False
172+
), f"'insert_data_bulk' with {max_params - 1} bind parameters raised an exception {e}"
173+
174+
175+
@patch("builtins.open", new_callable=mock_open)
176+
def test_insert_data_bulk_raises_no_exception_32767(mocked_csv_32767, db_kwargs):
177+
csv_str_32767 = "\col1\n" + "1\n" * (max_params - 1) + "1"
178+
mocked_csv_32767.side_effect = [StringIO(csv_str_32767)]
179+
180+
with redshift_connector.connect(**db_kwargs) as conn:
181+
with conn.cursor() as cursor:
182+
cursor.execute("create temporary table githubissue165 (col1 int)")
183+
try:
184+
cursor.insert_data_bulk(
185+
filename="mocked_csv_32767",
186+
table_name="githubissue165",
187+
parameter_indices=indices,
188+
column_names=["col1"],
189+
delimiter=",",
190+
batch_size=max_params,
191+
)
192+
except Exception as e:
193+
assert (
194+
False
195+
), f"'insert_data_bulk' with {max_params} bind parameters raised an exception {e}"

test/unit/test_cursor.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import pytest # type: ignore
88

9-
from redshift_connector import Connection, Cursor, InterfaceError
9+
from redshift_connector import Connection, Cursor, InterfaceError, DataError
1010

1111
IS_SINGLE_DATABASE_METADATA_TOGGLE: typing.List[bool] = [True, False]
1212

@@ -406,3 +406,59 @@ def test_insert_data_uses_batch_size(mocked_csv, batch_size, mocker):
406406
actual_insert_stmts_executed += 1
407407

408408
assert actual_insert_stmts_executed == ceil(3 / batch_size)
409+
410+
max_params = 32767
411+
412+
@patch("builtins.open", new_callable=mock_open)
413+
def test_insert_data_bulk_raises_too_many_parameters(mocked_csv, mocker):
414+
# mock fetchone to return "True" to ensure the table_name and column_name
415+
# validation steps pass
416+
mocker.patch("redshift_connector.Cursor.fetchone", return_value=[1])
417+
418+
mock_cursor: Cursor = Cursor.__new__(Cursor)
419+
420+
# mock out the connection to raise DataError.
421+
mock_cursor._c = Mock()
422+
mocker.patch.object(mock_cursor._c, "execute", side_effect=DataError("Prepared statement exceeds bind parameter "
423+
"limit 32767."))
424+
mock_cursor.paramstyle = "mocked"
425+
426+
indexes, names = (
427+
[0],
428+
["col1"],
429+
)
430+
431+
csv_str = "\col1\n" + "1\n" * max_params + "1" # 32768 rows
432+
mocked_csv.side_effect = [StringIO(csv_str)]
433+
434+
with pytest.raises(
435+
DataError, match="Prepared statement exceeds bind parameter limit 32767."
436+
):
437+
mock_cursor.insert_data_bulk(
438+
filename="mocked_csv",
439+
table_name="githubissue165",
440+
parameter_indices=indexes,
441+
column_names=["col1"],
442+
delimiter=",",
443+
batch_size=max_params + 1,
444+
)
445+
446+
447+
@patch("builtins.open", new_callable=mock_open)
448+
def test_insert_data_raises_too_many_parameters(mocker):
449+
mock_cursor: Cursor = Cursor.__new__(Cursor)
450+
451+
# mock out the connection to raise DataError.
452+
mock_cursor._c = Mock()
453+
mock_cursor._c.execute.side_effect = DataError(
454+
"Prepared statement exceeds bind " "parameter limit 32767."
455+
)
456+
mock_cursor.paramstyle = "mocked"
457+
458+
prepared_stmt = "INSERT INTO githubissue165 (col1) VALUES " + "(%s), " * max_params + "(%s);"
459+
params = [1 for _ in range(max_params + 1)]
460+
461+
with pytest.raises(
462+
DataError, match="Prepared statement exceeds bind parameter limit 32767."
463+
):
464+
mock_cursor.execute(prepared_stmt, params)

0 commit comments

Comments
 (0)