21
21
def test_sea_multi_chunk_with_cloud_fetch (requested_row_count = 5000 ):
22
22
"""
23
23
Test executing a query that generates multiple chunks using cloud fetch.
24
-
24
+
25
25
Args:
26
26
requested_row_count: Number of rows to request in the query
27
-
27
+
28
28
Returns:
29
29
bool: True if the test passed, False otherwise
30
30
"""
31
31
server_hostname = os .environ .get ("DATABRICKS_SERVER_HOSTNAME" )
32
32
http_path = os .environ .get ("DATABRICKS_HTTP_PATH" )
33
33
access_token = os .environ .get ("DATABRICKS_TOKEN" )
34
34
catalog = os .environ .get ("DATABRICKS_CATALOG" )
35
-
35
+
36
36
# Create output directory for test results
37
37
output_dir = Path ("test_results" )
38
38
output_dir .mkdir (exist_ok = True )
39
-
39
+
40
40
# Files to store results
41
41
rows_file = output_dir / "cloud_fetch_rows.csv"
42
42
stats_file = output_dir / "cloud_fetch_stats.json"
@@ -50,9 +50,7 @@ def test_sea_multi_chunk_with_cloud_fetch(requested_row_count=5000):
50
50
51
51
try :
52
52
# Create connection with cloud fetch enabled
53
- logger .info (
54
- "Creating connection for query execution with cloud fetch enabled"
55
- )
53
+ logger .info ("Creating connection for query execution with cloud fetch enabled" )
56
54
connection = Connection (
57
55
server_hostname = server_hostname ,
58
56
http_path = http_path ,
@@ -76,46 +74,50 @@ def test_sea_multi_chunk_with_cloud_fetch(requested_row_count=5000):
76
74
concat('value_', repeat('a', 10000)) as test_value
77
75
FROM range(1, { requested_row_count } + 1) AS t(id)
78
76
"""
79
-
80
- logger .info (f"Executing query with cloud fetch to generate { requested_row_count } rows" )
77
+
78
+ logger .info (
79
+ f"Executing query with cloud fetch to generate { requested_row_count } rows"
80
+ )
81
81
start_time = time .time ()
82
82
cursor .execute (query )
83
-
83
+
84
84
# Fetch all rows
85
85
rows = cursor .fetchall ()
86
86
actual_row_count = len (rows )
87
87
end_time = time .time ()
88
88
execution_time = end_time - start_time
89
-
89
+
90
90
logger .info (f"Query executed in { execution_time :.2f} seconds" )
91
- logger .info (f"Requested { requested_row_count } rows, received { actual_row_count } rows" )
92
-
91
+ logger .info (
92
+ f"Requested { requested_row_count } rows, received { actual_row_count } rows"
93
+ )
94
+
93
95
# Write rows to CSV file for inspection
94
96
logger .info (f"Writing rows to { rows_file } " )
95
- with open (rows_file , 'w' , newline = '' ) as f :
97
+ with open (rows_file , "w" , newline = "" ) as f :
96
98
writer = csv .writer (f )
97
- writer .writerow (['id' , ' value_length' ]) # Header
98
-
99
+ writer .writerow (["id" , " value_length" ]) # Header
100
+
99
101
# Extract IDs to check for duplicates and missing values
100
102
row_ids = []
101
103
for row in rows :
102
104
row_id = row [0 ]
103
105
value_length = len (row [1 ])
104
106
writer .writerow ([row_id , value_length ])
105
107
row_ids .append (row_id )
106
-
108
+
107
109
# Verify row count
108
110
success = actual_row_count == requested_row_count
109
-
111
+
110
112
# Check for duplicate IDs
111
113
unique_ids = set (row_ids )
112
114
duplicate_count = len (row_ids ) - len (unique_ids )
113
-
115
+
114
116
# Check for missing IDs
115
117
expected_ids = set (range (1 , requested_row_count + 1 ))
116
118
missing_ids = expected_ids - unique_ids
117
119
extra_ids = unique_ids - expected_ids
118
-
120
+
119
121
# Write statistics to JSON file
120
122
stats = {
121
123
"requested_row_count" : requested_row_count ,
@@ -124,56 +126,64 @@ def test_sea_multi_chunk_with_cloud_fetch(requested_row_count=5000):
124
126
"duplicate_count" : duplicate_count ,
125
127
"missing_ids_count" : len (missing_ids ),
126
128
"extra_ids_count" : len (extra_ids ),
127
- "missing_ids" : list (missing_ids )[:100 ] if missing_ids else [], # Limit to first 100 for readability
128
- "extra_ids" : list (extra_ids )[:100 ] if extra_ids else [], # Limit to first 100 for readability
129
- "success" : success and duplicate_count == 0 and len (missing_ids ) == 0 and len (extra_ids ) == 0
129
+ "missing_ids" : list (missing_ids )[:100 ]
130
+ if missing_ids
131
+ else [], # Limit to first 100 for readability
132
+ "extra_ids" : list (extra_ids )[:100 ]
133
+ if extra_ids
134
+ else [], # Limit to first 100 for readability
135
+ "success" : success
136
+ and duplicate_count == 0
137
+ and len (missing_ids ) == 0
138
+ and len (extra_ids ) == 0 ,
130
139
}
131
-
132
- with open (stats_file , 'w' ) as f :
140
+
141
+ with open (stats_file , "w" ) as f :
133
142
json .dump (stats , f , indent = 2 )
134
-
143
+
135
144
# Log detailed results
136
145
if duplicate_count > 0 :
137
146
logger .error (f"❌ FAILED: Found { duplicate_count } duplicate row IDs" )
138
147
success = False
139
148
else :
140
149
logger .info ("✅ PASSED: No duplicate row IDs found" )
141
-
150
+
142
151
if missing_ids :
143
152
logger .error (f"❌ FAILED: Missing { len (missing_ids )} expected row IDs" )
144
153
if len (missing_ids ) <= 10 :
145
154
logger .error (f"Missing IDs: { sorted (list (missing_ids ))} " )
146
155
success = False
147
156
else :
148
157
logger .info ("✅ PASSED: All expected row IDs present" )
149
-
158
+
150
159
if extra_ids :
151
160
logger .error (f"❌ FAILED: Found { len (extra_ids )} unexpected row IDs" )
152
161
if len (extra_ids ) <= 10 :
153
162
logger .error (f"Extra IDs: { sorted (list (extra_ids ))} " )
154
163
success = False
155
164
else :
156
165
logger .info ("✅ PASSED: No unexpected row IDs found" )
157
-
166
+
158
167
if actual_row_count == requested_row_count :
159
168
logger .info ("✅ PASSED: Row count matches requested count" )
160
169
else :
161
- logger .error (f"❌ FAILED: Row count mismatch. Expected { requested_row_count } , got { actual_row_count } " )
170
+ logger .error (
171
+ f"❌ FAILED: Row count mismatch. Expected { requested_row_count } , got { actual_row_count } "
172
+ )
162
173
success = False
163
-
174
+
164
175
# Close resources
165
176
cursor .close ()
166
177
connection .close ()
167
178
logger .info ("Successfully closed SEA session" )
168
-
179
+
169
180
logger .info (f"Test results written to { rows_file } and { stats_file } " )
170
181
return success
171
182
172
183
except Exception as e :
173
- logger .error (
174
- f"Error during SEA multi-chunk test with cloud fetch: { str (e )} "
175
- )
184
+ logger .error (f"Error during SEA multi-chunk test with cloud fetch: { str (e )} " )
176
185
import traceback
186
+
177
187
logger .error (traceback .format_exc ())
178
188
return False
179
189
@@ -193,31 +203,33 @@ def main():
193
203
)
194
204
logger .error ("Please set these variables before running the tests." )
195
205
sys .exit (1 )
196
-
206
+
197
207
# Get row count from command line or use default
198
208
requested_row_count = 10000
199
-
209
+
200
210
if len (sys .argv ) > 1 :
201
211
try :
202
212
requested_row_count = int (sys .argv [1 ])
203
213
except ValueError :
204
214
logger .error (f"Invalid row count: { sys .argv [1 ]} " )
205
215
logger .error ("Please provide a valid integer for row count." )
206
216
sys .exit (1 )
207
-
217
+
208
218
logger .info (f"Testing with { requested_row_count } rows" )
209
-
219
+
210
220
# Run the multi-chunk test with cloud fetch
211
221
success = test_sea_multi_chunk_with_cloud_fetch (requested_row_count )
212
-
222
+
213
223
# Report results
214
224
if success :
215
- logger .info ("✅ TEST PASSED: Multi-chunk cloud fetch test completed successfully" )
225
+ logger .info (
226
+ "✅ TEST PASSED: Multi-chunk cloud fetch test completed successfully"
227
+ )
216
228
sys .exit (0 )
217
229
else :
218
230
logger .error ("❌ TEST FAILED: Multi-chunk cloud fetch test encountered errors" )
219
231
sys .exit (1 )
220
232
221
233
222
234
if __name__ == "__main__" :
223
- main ()
235
+ main ()
0 commit comments