Skip to content

Commit 32d1dd1

Browse files
aditanaseadragomir
authored andcommitted
[HSTACK] - fix array_has returning false instead of NULL on empty array
1 parent ac06ef4 commit 32d1dd1

File tree

2 files changed

+57
-20
lines changed

2 files changed

+57
-20
lines changed

datafusion/functions-nested/src/array_has.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
269269
let values = haystack.values();
270270
let is_nested = values.data_type().is_nested();
271271
let offsets = haystack.value_offsets();
272+
let nulls = haystack.nulls();
273+
272274
// If first argument is empty list (second argument is non-null), return false
273275
// i.e. array_has([], non-null element) -> false
274276
if values.is_empty() {
@@ -283,9 +285,15 @@ fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
283285
let start = offset[0].to_usize().unwrap();
284286
let end = offset[1].to_usize().unwrap();
285287
let length = end - start;
286-
// For non-nested list, length is 0 for null
288+
// For non-nested list, check null vs empty
289+
// otherwise array_has on [] returns null instead of false
287290
if length == 0 {
288-
continue;
291+
if let Some(nulls) = nulls {
292+
if nulls.is_null(i) {
293+
continue;
294+
}
295+
}
296+
final_contained[i] = Some(false);
289297
}
290298
let sliced_array = eq_array.slice(start, length);
291299
final_contained[i] = Some(sliced_array.true_count() > 0);

datafusion/sqllogictest/test_files/array.slt

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ AS VALUES
6363
(make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')),
6464
(make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')),
6565
(NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')),
66+
(NULL, make_array(), make_array('a', 'm', 'e', 't')),
6667
(make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')),
6768
(make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL)
6869
;
@@ -709,6 +710,7 @@ List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int6
709710
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
710711
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
711712
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
713+
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
712714

713715
# arrays table
714716
query ???
@@ -719,6 +721,7 @@ select column1, column2, column3 from arrays;
719721
[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r]
720722
[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t]
721723
NULL [13.3, 14.4, 15.5] [a, m, e, t]
724+
NULL [] [a, m, e, t]
722725
[[11, 12], [13, 14]] NULL [,]
723726
[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL
724727

@@ -844,6 +847,7 @@ select column1[2], column2[3], column3[1] from arrays;
844847
[7, 8] 9.9 d
845848
[9, 10] 12.2 s
846849
NULL 15.5 a
850+
NULL NULL a
847851
[13, 14] NULL ,
848852
[NULL, 18] 18.8 NULL
849853

@@ -858,6 +862,7 @@ NULL NULL NULL
858862
NULL NULL NULL
859863
NULL NULL NULL
860864
NULL NULL NULL
865+
NULL NULL NULL
861866

862867
# single index with columns #3 (negative index)
863868
query ?RT
@@ -868,6 +873,7 @@ select column1[-2], column2[-3], column3[-1] from arrays;
868873
[5, 6] 7.7 r
869874
[7, NULL] 10.1 t
870875
NULL 13.3 t
876+
NULL NULL t
871877
[11, 12] NULL ,
872878
[15, 16] 16.6 NULL
873879

@@ -880,6 +886,7 @@ select column1[9 - 7], column2[2 * 0], column3[1 - 3] from arrays;
880886
[7, 8] NULL o
881887
[9, 10] NULL i
882888
NULL NULL e
889+
NULL NULL e
883890
[13, 14] NULL NULL
884891
[NULL, 18] NULL NULL
885892

@@ -951,6 +958,7 @@ select column1[2:4], column2[1:4], column3[3:4] from arrays;
951958
[[7, 8]] [7.7, 8.8, 9.9] [l, o]
952959
[[9, 10]] [10.1, NULL, 12.2] [t]
953960
NULL [13.3, 14.4, 15.5] [e, t]
961+
NULL [] [e, t]
954962
[[13, 14]] NULL []
955963
[[NULL, 18]] [16.6, 17.7, 18.8] NULL
956964

@@ -963,6 +971,7 @@ select column1[0:5], column2[0:3], column3[0:9] from arrays;
963971
[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r]
964972
[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t]
965973
NULL [13.3, 14.4, 15.5] [a, m, e, t]
974+
NULL [] [a, m, e, t]
966975
[[11, 12], [13, 14]] NULL [,]
967976
[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL
968977

@@ -1027,6 +1036,7 @@ select column1[2:4:2], column2[1:4:2], column3[3:4:2] from arrays;
10271036
[[7, 8]] [7.7, 9.9] [l]
10281037
[[9, 10]] [10.1, 12.2] [t]
10291038
NULL [13.3, 15.5] [e]
1039+
NULL [] [e]
10301040
[[13, 14]] NULL []
10311041
[[NULL, 18]] [16.6, 18.8] NULL
10321042

@@ -1039,6 +1049,7 @@ select column1[0:5:2], column2[0:3:2], column3[0:9:2] from arrays;
10391049
[[5, 6]] [7.7, 9.9] [d, l, r]
10401050
[[7, NULL]] [10.1, 12.2] [s, t]
10411051
NULL [13.3, 15.5] [a, e]
1052+
NULL [] [a, e]
10421053
[[11, 12]] NULL [,]
10431054
[[15, 16]] [16.6, 18.8] NULL
10441055

@@ -2645,6 +2656,7 @@ select array_append(column2, 100.1), array_append(column3, '.') from arrays;
26452656
[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .]
26462657
[10.1, NULL, 12.2, 100.1] [s, i, t, .]
26472658
[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .]
2659+
[100.1] [a, m, e, t, .]
26482660
[100.1] [,, .]
26492661
[16.6, 17.7, 18.8, 100.1] [.]
26502662

@@ -2656,6 +2668,7 @@ select array_append(column2, 100.1), array_append(column3, '.') from large_array
26562668
[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .]
26572669
[10.1, NULL, 12.2, 100.1] [s, i, t, .]
26582670
[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .]
2671+
[100.1] [a, m, e, t, .]
26592672
[100.1] [,, .]
26602673
[16.6, 17.7, 18.8, 100.1] [.]
26612674

@@ -2903,6 +2916,7 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from arrays;
29032916
[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r]
29042917
[100.1, 10.1, NULL, 12.2] [., s, i, t]
29052918
[100.1, 13.3, 14.4, 15.5] [., a, m, e, t]
2919+
[100.1] [., a, m, e, t]
29062920
[100.1] [., ,]
29072921
[100.1, 16.6, 17.7, 18.8] [.]
29082922

@@ -2914,6 +2928,7 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from large_arr
29142928
[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r]
29152929
[100.1, 10.1, NULL, 12.2] [., s, i, t]
29162930
[100.1, 13.3, 14.4, 15.5] [., a, m, e, t]
2931+
[100.1] [., a, m, e, t]
29172932
[100.1] [., ,]
29182933
[100.1, 16.6, 17.7, 18.8] [.]
29192934

@@ -3275,6 +3290,7 @@ select array_concat(column1, column1), array_concat(column2, column2), array_con
32753290
[[5, 6], [7, 8], [5, 6], [7, 8]] [7.7, 8.8, 9.9, 7.7, 8.8, 9.9] [d, NULL, l, o, r, d, NULL, l, o, r]
32763291
[[7, NULL], [9, 10], [7, NULL], [9, 10]] [10.1, NULL, 12.2, 10.1, NULL, 12.2] [s, i, t, s, i, t]
32773292
NULL [13.3, 14.4, 15.5, 13.3, 14.4, 15.5] [a, m, e, t, a, m, e, t]
3293+
NULL [] [a, m, e, t, a, m, e, t]
32783294
[[11, 12], [13, 14], [11, 12], [13, 14]] NULL [,, ,]
32793295
[[15, 16], [NULL, 18], [15, 16], [NULL, 18]] [16.6, 17.7, 18.8, 16.6, 17.7, 18.8] NULL
32803296

@@ -3287,6 +3303,7 @@ select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), ar
32873303
[[5, 6], [7, 8], [1, 2], [3, 4]] [7.7, 8.8, 9.9, 1.1, 2.2, 3.3]
32883304
[[7, NULL], [9, 10], [1, 2], [3, 4]] [10.1, NULL, 12.2, 1.1, 2.2, 3.3]
32893305
[[1, 2], [3, 4]] [13.3, 14.4, 15.5, 1.1, 2.2, 3.3]
3306+
[[1, 2], [3, 4]] [1.1, 2.2, 3.3]
32903307
[[11, 12], [13, 14], [1, 2], [3, 4]] [1.1, 2.2, 3.3]
32913308
[[15, 16], [NULL, 18], [1, 2], [3, 4]] [16.6, 17.7, 18.8, 1.1, 2.2, 3.3]
32923309

@@ -3299,6 +3316,7 @@ select array_concat(column3, make_array('.', '.', '.')) from arrays;
32993316
[d, NULL, l, o, r, ., ., .]
33003317
[s, i, t, ., ., .]
33013318
[a, m, e, t, ., ., .]
3319+
[a, m, e, t, ., ., .]
33023320
[,, ., ., .]
33033321
[., ., .]
33043322

@@ -4704,6 +4722,7 @@ NULL 0
47044722
#NULL 0
47054723

47064724
# cardinality with columns
4725+
# FIXME cardinality on empty array should be NULL or zero?
47074726
query III
47084727
select cardinality(column1), cardinality(column2), cardinality(column3) from arrays;
47094728
----
@@ -4712,6 +4731,7 @@ select cardinality(column1), cardinality(column2), cardinality(column3) from arr
47124731
4 3 5
47134732
4 3 3
47144733
NULL 3 4
4734+
NULL NULL 4
47154735
4 NULL 1
47164736
4 3 NULL
47174737

@@ -4723,6 +4743,7 @@ select cardinality(column1), cardinality(column2), cardinality(column3) from lar
47234743
4 3 5
47244744
4 3 3
47254745
NULL 3 4
4746+
NULL NULL 4
47264747
4 NULL 1
47274748
4 3 NULL
47284749

@@ -5406,6 +5427,7 @@ select array_dims(column1), array_dims(column2), array_dims(column3) from arrays
54065427
[2, 2] [3] [5]
54075428
[2, 2] [3] [3]
54085429
NULL [3] [4]
5430+
NULL NULL [4]
54095431
[2, 2] NULL [1]
54105432
[2, 2] [3] NULL
54115433

@@ -5417,6 +5439,7 @@ select array_dims(column1), array_dims(column2), array_dims(column3) from large_
54175439
[2, 2] [3] [5]
54185440
[2, 2] [3] [3]
54195441
NULL [3] [4]
5442+
NULL NULL [4]
54205443
[2, 2] NULL [1]
54215444
[2, 2] [3] NULL
54225445

@@ -5575,6 +5598,7 @@ select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arr
55755598
2 1 1
55765599
2 1 1
55775600
NULL 1 1
5601+
NULL 1 1
55785602
2 NULL 1
55795603
2 1 NULL
55805604

@@ -5586,6 +5610,7 @@ select array_ndims(column1), array_ndims(column2), array_ndims(column3) from lar
55865610
2 1 1
55875611
2 1 1
55885612
NULL 1 1
5613+
NULL 1 1
55895614
2 NULL 1
55905615
2 1 NULL
55915616

@@ -5907,6 +5932,7 @@ true false true false
59075932
true false false true
59085933
false true false false
59095934
NULL NULL false false
5935+
NULL NULL false false
59105936
false false NULL false
59115937
false false false NULL
59125938

@@ -5922,6 +5948,7 @@ true false true false
59225948
true false false true
59235949
false true false false
59245950
NULL NULL false false
5951+
NULL NULL false false
59255952
false false NULL false
59265953
false false false NULL
59275954

@@ -7442,27 +7469,29 @@ false
74427469
#NULL
74437470

74447471
# empty scalar function #5
7445-
query B
7446-
select empty(column1) from arrays;
7472+
query BB
7473+
select empty(column1), empty(column2) from arrays;
74477474
----
7448-
false
7449-
false
7450-
false
7451-
false
7452-
NULL
7453-
false
7454-
false
7475+
false false
7476+
false false
7477+
false false
7478+
false false
7479+
NULL false
7480+
NULL true
7481+
false NULL
7482+
false false
74557483

7456-
query B
7457-
select empty(arrow_cast(column1, 'LargeList(List(Int64))')) from arrays;
7484+
query BB
7485+
select empty(arrow_cast(column1, 'LargeList(List(Int64))')), empty(arrow_cast(column2, 'LargeList(Int64)')) from arrays;
74587486
----
7459-
false
7460-
false
7461-
false
7462-
false
7463-
NULL
7464-
false
7465-
false
7487+
false false
7488+
false false
7489+
false false
7490+
false false
7491+
NULL false
7492+
NULL true
7493+
false NULL
7494+
false false
74667495

74677496
query B
74687497
select empty(column1) from fixed_size_arrays;

0 commit comments

Comments
 (0)