Skip to content

Commit 613f1df

Browse files
add support for multi-array UDFs
1 parent f24f347 commit 613f1df

File tree

4 files changed

+470
-44
lines changed

4 files changed

+470
-44
lines changed

src/main/java/io/tiledb/cloud/CustomCode.txt

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,135 @@ public String submitUDF(String namespace, String array, MultiArrayUDF udf, Strin
113113
return localVarResp.getData();
114114
}
115115

116+
/**
117+
*
118+
* send a UDF to run against a specified array/URI registered to a group/project
119+
* @param namespace namespace array is in (an organization name or user's username) (required)
120+
* @param array name/uri of array that is url-encoded (required)
121+
* @param udf UDF to run (required)
122+
* @param xPayer Name of organization or user who should be charged for this request (optional)
123+
* @param acceptEncoding Encoding to use (optional)
124+
* @param v2 flag to indicate if v2 array UDFs should be used, currently in beta testing. Setting any value will enable v2 array UDFs. (optional)
125+
* @return byte[]
126+
* @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body
127+
* @http.response.details
128+
<table summary="Response Details" border="1">
129+
<tr><td> Status Code </td><td> Description </td><td> Response Headers </td></tr>
130+
<tr><td> 200 </td><td> UDF completed and the UDF-type specific result is returned </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
131+
<tr><td> 0 </td><td> error response </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
132+
</table>
133+
*/
134+
public byte[] submitUDFBytes(String namespace, String array, MultiArrayUDF udf, String xPayer, String acceptEncoding, String v2) throws ApiException {
135+
ApiResponse<byte[]> localVarResp = submitUDFWithHttpInfoBytes(namespace, array, udf, xPayer, acceptEncoding, v2);
136+
return localVarResp.getData();
137+
}
138+
139+
/**
140+
*
141+
* send a UDF to run against a specified array/URI registered to a group/project
142+
* @param namespace namespace array is in (an organization name or user&#39;s username) (required)
143+
* @param array name/uri of array that is url-encoded (required)
144+
* @param udf UDF to run (required)
145+
* @param xPayer Name of organization or user who should be charged for this request (optional)
146+
* @param acceptEncoding Encoding to use (optional)
147+
* @param v2 flag to indicate if v2 array UDFs should be used, currently in beta testing. Setting any value will enable v2 array UDFs. (optional)
148+
* @return ApiResponse&lt;File&gt;
149+
* @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body
150+
* @http.response.details
151+
<table summary="Response Details" border="1">
152+
<tr><td> Status Code </td><td> Description </td><td> Response Headers </td></tr>
153+
<tr><td> 200 </td><td> UDF completed and the UDF-type specific result is returned </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
154+
<tr><td> 0 </td><td> error response </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
155+
</table>
156+
*/
157+
public ApiResponse<byte[]> submitUDFWithHttpInfoBytes(String namespace, String array, MultiArrayUDF udf, String xPayer, String acceptEncoding, String v2) throws ApiException {
158+
okhttp3.Call localVarCall = submitUDFValidateBeforeCall(namespace, array, udf, xPayer, acceptEncoding, v2, null);
159+
Type localVarReturnType = new TypeToken<byte[]>(){}.getType();
160+
return localVarApiClient.execute(localVarCall, localVarReturnType);
161+
}
162+
163+
/**
164+
*
165+
* submit a multi-array UDF in the given namespace
166+
* @param namespace namespace array is in (an organization name or user&#39;s username) (required)
167+
* @param udf UDF to run (required)
168+
* @param acceptEncoding Encoding to use (optional)
169+
* @return String
170+
* @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body
171+
* @http.response.details
172+
<table summary="Response Details" border="1">
173+
<tr><td> Status Code </td><td> Description </td><td> Response Headers </td></tr>
174+
<tr><td> 200 </td><td> UDF completed and the UDF-type specific result is returned </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
175+
<tr><td> 0 </td><td> error response </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
176+
</table>
177+
*/
178+
public String submitMultiArrayUDFString(String namespace, MultiArrayUDF udf, String acceptEncoding) throws ApiException {
179+
ApiResponse<String> localVarResp = submitMultiArrayUDFWithHttpInfoString(namespace, udf, acceptEncoding);
180+
return localVarResp.getData();
181+
}
182+
183+
/**
184+
*
185+
* submit a multi-array UDF in the given namespace
186+
* @param namespace namespace array is in (an organization name or user&#39;s username) (required)
187+
* @param udf UDF to run (required)
188+
* @param acceptEncoding Encoding to use (optional)
189+
* @return ApiResponse&lt;File&gt;
190+
* @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body
191+
* @http.response.details
192+
<table summary="Response Details" border="1">
193+
<tr><td> Status Code </td><td> Description </td><td> Response Headers </td></tr>
194+
<tr><td> 200 </td><td> UDF completed and the UDF-type specific result is returned </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
195+
<tr><td> 0 </td><td> error response </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
196+
</table>
197+
*/
198+
public ApiResponse<String> submitMultiArrayUDFWithHttpInfoString(String namespace, MultiArrayUDF udf, String acceptEncoding) throws ApiException {
199+
okhttp3.Call localVarCall = submitMultiArrayUDFValidateBeforeCall(namespace, udf, acceptEncoding, null);
200+
Type localVarReturnType = new TypeToken<String>(){}.getType();
201+
return localVarApiClient.execute(localVarCall, localVarReturnType);
202+
}
203+
204+
/**
205+
*
206+
* submit a multi-array UDF in the given namespace
207+
* @param namespace namespace array is in (an organization name or user&#39;s username) (required)
208+
* @param udf UDF to run (required)
209+
* @param acceptEncoding Encoding to use (optional)
210+
* @return byte[]
211+
* @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body
212+
* @http.response.details
213+
<table summary="Response Details" border="1">
214+
<tr><td> Status Code </td><td> Description </td><td> Response Headers </td></tr>
215+
<tr><td> 200 </td><td> UDF completed and the UDF-type specific result is returned </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
216+
<tr><td> 0 </td><td> error response </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
217+
</table>
218+
*/
219+
public byte[] submitMultiArrayUDFBytes(String namespace, MultiArrayUDF udf, String acceptEncoding) throws ApiException {
220+
ApiResponse<byte[]> localVarResp = submitMultiArrayUDFWithHttpInfoBytes(namespace, udf, acceptEncoding);
221+
return localVarResp.getData();
222+
}
223+
224+
/**
225+
*
226+
* submit a multi-array UDF in the given namespace
227+
* @param namespace namespace array is in (an organization name or user&#39;s username) (required)
228+
* @param udf UDF to run (required)
229+
* @param acceptEncoding Encoding to use (optional)
230+
* @return ApiResponse&lt;File&gt;
231+
* @throws ApiException If fail to call the API, e.g. server error or cannot deserialize the response body
232+
* @http.response.details
233+
<table summary="Response Details" border="1">
234+
<tr><td> Status Code </td><td> Description </td><td> Response Headers </td></tr>
235+
<tr><td> 200 </td><td> UDF completed and the UDF-type specific result is returned </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
236+
<tr><td> 0 </td><td> error response </td><td> * X-TILEDB-CLOUD-TASK-ID - Task ID for just completed request <br> </td></tr>
237+
</table>
238+
*/
239+
public ApiResponse<byte[]> submitMultiArrayUDFWithHttpInfoBytes(String namespace, MultiArrayUDF udf, String acceptEncoding) throws ApiException {
240+
okhttp3.Call localVarCall = submitMultiArrayUDFValidateBeforeCall(namespace, udf, acceptEncoding, null);
241+
Type localVarReturnType = new TypeToken<byte[]>(){}.getType();
242+
return localVarApiClient.execute(localVarCall, localVarReturnType);
243+
}
244+
116245

117246

118247

src/main/java/io/tiledb/cloud/TileDBUDF.java

Lines changed: 162 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import io.tiledb.cloud.rest_api.model.GenericUDF;
77
import io.tiledb.cloud.rest_api.model.MultiArrayUDF;
88
import io.tiledb.cloud.rest_api.model.ResultFormat;
9+
import io.tiledb.java.api.Pair;
910
import org.apache.arrow.compression.CommonsCompressionFactory;
1011
import org.apache.arrow.memory.RootAllocator;
1112
import org.apache.arrow.memory.UnsafeAllocationManager;
@@ -14,6 +15,7 @@
1415
import org.apache.arrow.vector.VectorSchemaRoot;
1516
import org.apache.arrow.vector.ipc.ArrowStreamReader;
1617
import org.apache.arrow.vector.util.TransferPair;
18+
import org.json.JSONArray;
1719
import org.json.JSONObject;
1820

1921
import java.io.ByteArrayInputStream;
@@ -60,23 +62,25 @@ public String executeGeneric(GenericUDF genericUDF, HashMap<String, Object> argu
6062
*
6163
* @param genericUDF The generic UDF definition
6264
* @param arguments The UDF arguments
63-
* @return The result in JSON format
65+
* @return The result as a JSON object
6466
*/
6567
public JSONObject executeGenericJSON(GenericUDF genericUDF, HashMap<String, Object> arguments){
66-
String serializedArgs = serializeArgs(arguments);
67-
genericUDF.setArgument(serializedArgs);
6868
genericUDF.setResultFormat(ResultFormat.JSON);
69-
try {
70-
String jsonString = apiInstance.submitGenericUDFString(namespace, genericUDF, "none");
71-
return new JSONObject(jsonString);
72-
} catch (ApiException e) {
73-
System.err.println("Exception when calling UdfApi#submitGenericUDF");
74-
System.err.println("Status code: " + e.getCode());
75-
System.err.println("Reason: " + e.getResponseBody());
76-
System.err.println("Response headers: " + e.getResponseHeaders());
77-
e.printStackTrace();
78-
}
79-
return null;
69+
String jsonString = this.executeGeneric(genericUDF, arguments);
70+
return new JSONObject(jsonString);
71+
}
72+
73+
/**
74+
* Executes a generic-UDF. A generic-UDF is a UDF that is not using a TIleDB array.
75+
*
76+
* @param genericUDF The generic UDF definition
77+
* @param arguments The UDF arguments
78+
* @return The result as a JSON array object
79+
*/
80+
public JSONArray executeGenericJSONArray(GenericUDF genericUDF, HashMap<String, Object> arguments){
81+
genericUDF.setResultFormat(ResultFormat.JSON);
82+
String jsonString = this.executeGeneric(genericUDF, arguments);
83+
return new JSONArray(jsonString);
8084
}
8185

8286
/**
@@ -92,29 +96,7 @@ public io.tiledb.java.api.Pair<ArrayList<ValueVector>, Integer> executeGenericAr
9296
genericUDF.setResultFormat(ResultFormat.ARROW);
9397
try {
9498
byte[] bytes = apiInstance.submitGenericUDFBytes(namespace, genericUDF, "none");
95-
ArrayList<ValueVector> valueVectors = null;
96-
int readBatchesCount = 0;
97-
98-
RootAllocator allocator = new RootAllocator(RootAllocator.configBuilder().allocationManagerFactory(UnsafeAllocationManager.FACTORY).build());
99-
ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayInputStream(bytes), allocator, CommonsCompressionFactory.INSTANCE);
100-
101-
VectorSchemaRoot root = reader.getVectorSchemaRoot();
102-
103-
while(reader.loadNextBatch()) {
104-
readBatchesCount++;
105-
valueVectors = new ArrayList<>();
106-
for (FieldVector f : root.getFieldVectors()) {
107-
// transfer will not copy data but transfer ownership of memory
108-
// from ArrowStreamReader to TileDBSQL. This is necessary because
109-
// otherwise we are not able to close the reader and retain the
110-
// data.
111-
TransferPair t = f.getTransferPair(allocator);
112-
t.transfer();
113-
valueVectors.add(t.getTo());
114-
}
115-
}
116-
reader.close();
117-
return new io.tiledb.java.api.Pair<>(valueVectors, readBatchesCount);
99+
return TileDBUtils.createValueVectors(bytes);
118100
} catch (IOException | ApiException e) {
119101
e.printStackTrace();
120102
}
@@ -128,17 +110,14 @@ public io.tiledb.java.api.Pair<ArrayList<ValueVector>, Integer> executeGenericAr
128110
* @param arguments The UDF arguments
129111
* @param arrayURI The array URI
130112
* @param xPayer Name of organization or user who should be charged for this request
131-
* @return
113+
* @return The results as a String
132114
*/
133115
public String executeSingleArray(MultiArrayUDF multiArrayUDF, HashMap<String, Object> arguments, String arrayURI, String xPayer){
134116
String serializedArgs = serializeArgs(arguments);
135117
multiArrayUDF.setArgument(serializedArgs);
136-
//split uri to get namespace and array name
137-
arrayURI = arrayURI.replaceAll("tiledb://", ""); //remove tiledb prefix
138-
String[] split = arrayURI.split("/");
139-
if (split.length != 2)
140-
throw new RuntimeException(
141-
"TileDB URI is in the wrong format. The format should be: tiledb://namespace/array_name");
118+
119+
String[] split = breakdownFullURI(arrayURI);
120+
142121
try {
143122
return apiInstance.submitUDFString(split[0], split[1], multiArrayUDF, xPayer, "none", "");
144123
} catch (ApiException e) {
@@ -151,6 +130,130 @@ public String executeSingleArray(MultiArrayUDF multiArrayUDF, HashMap<String, Ob
151130
return null;
152131
}
153132

133+
/**
134+
* Executes an array-UDF. An array-UDF is a UDF applied to a TileDB array
135+
*
136+
* @param multiArrayUDF The array-UDF. Can reference one arrays
137+
* @param arguments The UDF arguments
138+
* @param arrayURI The array URI
139+
* @param xPayer Name of organization or user who should be charged for this request
140+
* @return The results as a JSON Object
141+
*/
142+
public JSONObject executeSingleArrayJSON(MultiArrayUDF multiArrayUDF, HashMap<String, Object> arguments, String arrayURI, String xPayer){
143+
multiArrayUDF.setResultFormat(ResultFormat.JSON);
144+
String jsonString = this.executeSingleArray(multiArrayUDF, arguments, arrayURI, xPayer);
145+
return new JSONObject(jsonString);
146+
}
147+
148+
/**
149+
* Executes an array-UDF. An array-UDF is a UDF applied to a TileDB array
150+
*
151+
* @param multiArrayUDF The array-UDF. Can reference one arrays
152+
* @param arguments The UDF arguments
153+
* @param arrayURI The array URI
154+
* @param xPayer Name of organization or user who should be charged for this request
155+
* @return The results as a JSON array
156+
*/
157+
public JSONArray executeSingleArrayJSONArray(MultiArrayUDF multiArrayUDF, HashMap<String, Object> arguments, String arrayURI, String xPayer){
158+
multiArrayUDF.setResultFormat(ResultFormat.JSON);
159+
String jsonString = this.executeSingleArray(multiArrayUDF, arguments, arrayURI, xPayer);
160+
return new JSONArray(jsonString);
161+
}
162+
163+
/**
164+
* Executes an array-UDF. An array-UDF is a UDF applied to a TileDB array
165+
*
166+
* @param multiArrayUDF The array-UDF. Can reference one arrays
167+
* @param arguments The UDF arguments
168+
* @param arrayURI The array URI
169+
* @param xPayer Name of organization or user who should be charged for this request
170+
* @return The results in arrow format
171+
*/
172+
public Pair<ArrayList<ValueVector>, Integer> executeSingleArrayArrow(MultiArrayUDF multiArrayUDF, HashMap<String, Object> arguments, String arrayURI, String xPayer){
173+
String serializedArgs = serializeArgs(arguments);
174+
multiArrayUDF.setArgument(serializedArgs);
175+
multiArrayUDF.setResultFormat(ResultFormat.ARROW);
176+
177+
String[] split = breakdownFullURI(arrayURI);
178+
179+
try {
180+
byte[] bytes = apiInstance.submitUDFBytes(split[0], split[1], multiArrayUDF, xPayer, "none", "");
181+
return TileDBUtils.createValueVectors(bytes);
182+
} catch (IOException | ApiException e) {
183+
e.printStackTrace();
184+
}
185+
return null;
186+
}
187+
188+
/**
189+
* Executes a multi-array-UDF. A multi- array-UDF is a UDF applied to multiple TileDB arrays
190+
*
191+
* @param multiArrayUDF The multiArrayUDF input object
192+
* @param arguments The arguments
193+
* @return The results as a String
194+
*/
195+
public String executeMultiArray(MultiArrayUDF multiArrayUDF, HashMap<String, Object> arguments){
196+
String serializedArgs = serializeArgs(arguments);
197+
multiArrayUDF.setArgument(serializedArgs);
198+
try {
199+
return apiInstance.submitMultiArrayUDFString(this.namespace, multiArrayUDF, "none");
200+
} catch (ApiException e) {
201+
System.err.println("Exception when calling UdfApi#submitMultiArrayUDFString");
202+
System.err.println("Status code: " + e.getCode());
203+
System.err.println("Reason: " + e.getResponseBody());
204+
System.err.println("Response headers: " + e.getResponseHeaders());
205+
e.printStackTrace();
206+
}
207+
return null;
208+
}
209+
210+
/**
211+
* Executes a multi-array-UDF. A multi- array-UDF is a UDF applied to multiple TileDB arrays
212+
*
213+
* @param multiArrayUDF The multiArrayUDF input object
214+
* @param arguments The arguments
215+
* @return The results as JSON object
216+
*/
217+
public JSONObject executeMultiArrayJSON(MultiArrayUDF multiArrayUDF, HashMap<String, Object> arguments){
218+
multiArrayUDF.setResultFormat(ResultFormat.JSON);
219+
String jsonString = this.executeMultiArray(multiArrayUDF, arguments);
220+
return new JSONObject(jsonString);
221+
}
222+
223+
/**
224+
* Executes a multi-array-UDF. A multi- array-UDF is a UDF applied to multiple TileDB arrays
225+
*
226+
* @param multiArrayUDF The multiArrayUDF input object
227+
* @param arguments The arguments
228+
* @return The results as JSON Array
229+
*/
230+
public JSONArray executeMultiArrayJSONArray(MultiArrayUDF multiArrayUDF, HashMap<String, Object> arguments){
231+
multiArrayUDF.setResultFormat(ResultFormat.JSON);
232+
String jsonString = this.executeMultiArray(multiArrayUDF, arguments);
233+
return new JSONArray(jsonString);
234+
}
235+
236+
/**
237+
* Executes a multi-array-UDF. A multi- array-UDF is a UDF applied to multiple TileDB arrays
238+
*
239+
* @param multiArrayUDF The multiArrayUDF input object
240+
* @param arguments The arguments
241+
* @return The results in arrow format
242+
*/
243+
public Pair<ArrayList<ValueVector>, Integer> executeMultiArrayArrow(MultiArrayUDF multiArrayUDF, HashMap<String, Object> arguments){
244+
String serializedArgs = serializeArgs(arguments);
245+
multiArrayUDF.setArgument(serializedArgs);
246+
multiArrayUDF.setResultFormat(ResultFormat.ARROW);
247+
248+
try {
249+
byte[] bytes = apiInstance.submitMultiArrayUDFBytes(this.namespace, multiArrayUDF, "none");
250+
return TileDBUtils.createValueVectors(bytes);
251+
} catch (IOException | ApiException e) {
252+
e.printStackTrace();
253+
}
254+
return null;
255+
}
256+
154257
/**
155258
* Serializes the arguments to a String
156259
*
@@ -162,4 +265,19 @@ private String serializeArgs(HashMap<String, Object> arguments) {
162265
Gson gson = new Gson();
163266
return gson.toJson(arguments);
164267
}
268+
269+
/**
270+
* Breaks down a full uri to its components
271+
* @param arrayURI The input full uri
272+
* @return An array of strings
273+
*/
274+
private String[] breakdownFullURI(String arrayURI) {
275+
arrayURI = arrayURI.replaceAll("tiledb://", ""); //remove tiledb prefix
276+
//split uri to get namespace and array name
277+
String[] split = arrayURI.split("/");
278+
if (split.length != 2)
279+
throw new RuntimeException(
280+
"TileDB URI is in the wrong format. The format should be: tiledb://namespace/array_name");
281+
return split;
282+
}
165283
}

0 commit comments

Comments
 (0)