2
2
3
3
import io .tiledb .cloud .rest_api .ApiException ;
4
4
import io .tiledb .cloud .rest_api .api .SqlApi ;
5
- import io .tiledb .cloud .rest_api .model .ResultFormat ;
6
5
import io .tiledb .cloud .rest_api .model .SQLParameters ;
7
6
import org .apache .arrow .memory .RootAllocator ;
7
+ import org .apache .arrow .vector .FieldVector ;
8
+ import org .apache .arrow .vector .ValueVector ;
8
9
import org .apache .arrow .vector .VectorSchemaRoot ;
9
10
import org .apache .arrow .vector .ipc .ArrowStreamReader ;
10
- import org .apache .arrow .vector .types . pojo . Schema ;
11
+ import org .apache .arrow .vector .util . TransferPair ;
11
12
12
13
import java .io .ByteArrayInputStream ;
13
14
import java .io .IOException ;
14
- import java .util .ArrayList ;
15
- import java .util .Arrays ;
16
- import java .util .List ;
17
- import java .util .Objects ;
15
+ import java .util .*;
16
+ import org .apache .arrow .compression .CommonsCompressionFactory ;
18
17
19
- public class TileDBSQL {
20
- String namespace ;
18
+ public class TileDBSQL implements AutoCloseable {
19
+ private String namespace ;
21
20
22
- SQLParameters sql ;
21
+ private SQLParameters sql ;
23
22
24
- TileDBClient tileDBClient ;
23
+ private TileDBClient tileDBClient ;
25
24
26
- SqlApi apiInstance ;
25
+ private SqlApi apiInstance ;
27
26
28
- ArrayList <VectorSchemaRoot > readBatches ;
27
+ private ArrayList <VectorSchemaRoot > readBatches ;
29
28
30
- List <Object > results ;
29
+ private List <Object > results ;
30
+
31
+ private ArrowStreamReader reader ;
31
32
32
33
/**
33
34
*
@@ -48,29 +49,40 @@ public TileDBSQL(TileDBClient tileDBClient, String namespace, SQLParameters sql)
48
49
49
50
/**
50
51
* Exec an SQL query and get results in arrow format.
52
+ *
53
+ * @return A pair that consists of an ArrayList of all valueVectors and the
54
+ * number of batches read.
51
55
*/
52
- public void execArrow (){
56
+ public io . tiledb . java . api . Pair < ArrayList < ValueVector >, Integer > execArrow (){
53
57
try {
54
58
assert sql .getResultFormat () != null ;
55
- byte [] bytes = apiInstance .runSQLBytes (namespace , sql , sql .getResultFormat ().toString ());
56
- System .out .println (Arrays .toString (bytes ));
59
+ byte [] bytes = apiInstance .runSQLBytes (namespace , sql , "none" );
60
+ ArrayList <ValueVector > valueVectors = null ;
61
+ int readBatchesCount = 0 ;
57
62
58
63
RootAllocator allocator = new RootAllocator (Long .MAX_VALUE );
59
- try (ArrowStreamReader reader = new ArrowStreamReader (new ByteArrayInputStream (bytes ), allocator )) {
60
- while (reader .loadNextBatch ()) {
61
- // This will be loaded with new values on every call to loadNextBatch
62
- VectorSchemaRoot readBatch = reader .getVectorSchemaRoot ();
63
- readBatches .add (readBatch );
64
+ ArrowStreamReader reader = new ArrowStreamReader (new ByteArrayInputStream (bytes ), allocator , CommonsCompressionFactory .INSTANCE );
65
+
66
+ VectorSchemaRoot root = reader .getVectorSchemaRoot ();
67
+
68
+ while (reader .loadNextBatch ()) {
69
+ readBatchesCount ++;
70
+ valueVectors = new ArrayList <>();
71
+ for (FieldVector f : root .getFieldVectors ()) {
72
+ // transfer will not copy data but transfer ownership of memory
73
+ // from ArrowStreamReader to TileDBSQL. This is necessary because
74
+ // otherwise we are not able to close the reader and retain the
75
+ // data.
76
+ TransferPair t = f .getTransferPair (allocator );
77
+ t .transfer ();
78
+ valueVectors .add (t .getTo ());
64
79
}
65
- } catch (IOException e ) {
66
- throw new RuntimeException (e );
67
80
}
68
- } catch (ApiException e ) {
69
- System .err .println ("Exception when calling SqlApi#runSQL/runSQLBytes" );
70
- System .err .println ("Status code: " + e .getCode ());
71
- System .err .println ("Reason: " + e .getResponseBody ());
72
- System .err .println ("Response headers: " + e .getResponseHeaders ());
73
- e .printStackTrace ();
81
+ reader .close ();
82
+ return new io .tiledb .java .api .Pair <>(valueVectors , readBatchesCount );
83
+
84
+ } catch (IOException | ApiException e ) {
85
+ throw new RuntimeException (e );
74
86
}
75
87
}
76
88
@@ -79,43 +91,28 @@ public void execArrow(){
79
91
*
80
92
* @return
81
93
*/
82
- public void execStandard (){
94
+ public List < Object > exec (){
83
95
try {
84
96
assert sql .getResultFormat () != null ;
85
- results = apiInstance .runSQL (namespace , sql , sql .getResultFormat ().toString ());
97
+ return apiInstance .runSQL (namespace , sql , sql .getResultFormat ().toString ());
86
98
} catch (ApiException e ) {
87
99
System .err .println ("Exception when calling SqlApi#runSQL/runSQLBytes" );
88
100
System .err .println ("Status code: " + e .getCode ());
89
101
System .err .println ("Reason: " + e .getResponseBody ());
90
102
System .err .println ("Response headers: " + e .getResponseHeaders ());
91
103
e .printStackTrace ();
92
104
}
105
+ return null ;
93
106
}
94
107
95
108
/**
96
- * Exec an SQL query
109
+ *
97
110
*/
98
- public void exec (){
99
- if ( this . sql . getResultFormat () == ResultFormat . ARROW ) {
100
- execArrow ();
101
- }else {
102
- execStandard ( );
111
+ public void close (){
112
+ try {
113
+ reader . close ();
114
+ } catch ( IOException e ) {
115
+ throw new RuntimeException ( e );
103
116
}
104
117
}
105
-
106
- /**
107
- * Get the results in Arrow format
108
- * @return
109
- */
110
- public ArrayList <VectorSchemaRoot > getReadBatches () {
111
- return readBatches ;
112
- }
113
-
114
- /**
115
- * Get the results as lists of Objects
116
- * @return
117
- */
118
- public List <Object > getResults () {
119
- return results ;
120
- }
121
118
}
0 commit comments