Skip to content

Commit 98bc7f4

Browse files
aditanaseadragomir
authored andcommitted
[HSTACK] - fix array_has returning false instead of NULL on empty array
1 parent d4e9b09 commit 98bc7f4

File tree

2 files changed

+57
-20
lines changed

2 files changed

+57
-20
lines changed

datafusion/functions-nested/src/array_has.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
269269
let values = haystack.values();
270270
let is_nested = values.data_type().is_nested();
271271
let offsets = haystack.value_offsets();
272+
let nulls = haystack.nulls();
273+
272274
// If first argument is empty list (second argument is non-null), return false
273275
// i.e. array_has([], non-null element) -> false
274276
if values.is_empty() {
@@ -283,9 +285,15 @@ fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
283285
let start = offset[0].to_usize().unwrap();
284286
let end = offset[1].to_usize().unwrap();
285287
let length = end - start;
286-
// For non-nested list, length is 0 for null
288+
// For non-nested list, check null vs empty
289+
// otherwise array_has on [] returns null instead of false
287290
if length == 0 {
288-
continue;
291+
if let Some(nulls) = nulls {
292+
if nulls.is_null(i) {
293+
continue;
294+
}
295+
}
296+
final_contained[i] = Some(false);
289297
}
290298
let sliced_array = eq_array.slice(start, length);
291299
final_contained[i] = Some(sliced_array.true_count() > 0);

datafusion/sqllogictest/test_files/array.slt

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ AS VALUES
6363
(make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')),
6464
(make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')),
6565
(NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')),
66+
(NULL, make_array(), make_array('a', 'm', 'e', 't')),
6667
(make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')),
6768
(make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL)
6869
;
@@ -709,6 +710,7 @@ List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int6
709710
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
710711
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
711712
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
713+
List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
712714

713715
# arrays table
714716
query ???
@@ -719,6 +721,7 @@ select column1, column2, column3 from arrays;
719721
[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r]
720722
[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t]
721723
NULL [13.3, 14.4, 15.5] [a, m, e, t]
724+
NULL [] [a, m, e, t]
722725
[[11, 12], [13, 14]] NULL [,]
723726
[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL
724727

@@ -844,6 +847,7 @@ select column1[2], column2[3], column3[1] from arrays;
844847
[7, 8] 9.9 d
845848
[9, 10] 12.2 s
846849
NULL 15.5 a
850+
NULL NULL a
847851
[13, 14] NULL ,
848852
[NULL, 18] 18.8 NULL
849853

@@ -858,6 +862,7 @@ NULL NULL NULL
858862
NULL NULL NULL
859863
NULL NULL NULL
860864
NULL NULL NULL
865+
NULL NULL NULL
861866

862867
# single index with columns #3 (negative index)
863868
query ?RT
@@ -868,6 +873,7 @@ select column1[-2], column2[-3], column3[-1] from arrays;
868873
[5, 6] 7.7 r
869874
[7, NULL] 10.1 t
870875
NULL 13.3 t
876+
NULL NULL t
871877
[11, 12] NULL ,
872878
[15, 16] 16.6 NULL
873879

@@ -880,6 +886,7 @@ select column1[9 - 7], column2[2 * 0], column3[1 - 3] from arrays;
880886
[7, 8] NULL o
881887
[9, 10] NULL i
882888
NULL NULL e
889+
NULL NULL e
883890
[13, 14] NULL NULL
884891
[NULL, 18] NULL NULL
885892

@@ -951,6 +958,7 @@ select column1[2:4], column2[1:4], column3[3:4] from arrays;
951958
[[7, 8]] [7.7, 8.8, 9.9] [l, o]
952959
[[9, 10]] [10.1, NULL, 12.2] [t]
953960
NULL [13.3, 14.4, 15.5] [e, t]
961+
NULL [] [e, t]
954962
[[13, 14]] NULL []
955963
[[NULL, 18]] [16.6, 17.7, 18.8] NULL
956964

@@ -963,6 +971,7 @@ select column1[0:5], column2[0:3], column3[0:9] from arrays;
963971
[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, NULL, l, o, r]
964972
[[7, NULL], [9, 10]] [10.1, NULL, 12.2] [s, i, t]
965973
NULL [13.3, 14.4, 15.5] [a, m, e, t]
974+
NULL [] [a, m, e, t]
966975
[[11, 12], [13, 14]] NULL [,]
967976
[[15, 16], [NULL, 18]] [16.6, 17.7, 18.8] NULL
968977

@@ -1027,6 +1036,7 @@ select column1[2:4:2], column2[1:4:2], column3[3:4:2] from arrays;
10271036
[[7, 8]] [7.7, 9.9] [l]
10281037
[[9, 10]] [10.1, 12.2] [t]
10291038
NULL [13.3, 15.5] [e]
1039+
NULL [] [e]
10301040
[[13, 14]] NULL []
10311041
[[NULL, 18]] [16.6, 18.8] NULL
10321042

@@ -1039,6 +1049,7 @@ select column1[0:5:2], column2[0:3:2], column3[0:9:2] from arrays;
10391049
[[5, 6]] [7.7, 9.9] [d, l, r]
10401050
[[7, NULL]] [10.1, 12.2] [s, t]
10411051
NULL [13.3, 15.5] [a, e]
1052+
NULL [] [a, e]
10421053
[[11, 12]] NULL [,]
10431054
[[15, 16]] [16.6, 18.8] NULL
10441055

@@ -2630,6 +2641,7 @@ select array_append(column2, 100.1), array_append(column3, '.') from arrays;
26302641
[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .]
26312642
[10.1, NULL, 12.2, 100.1] [s, i, t, .]
26322643
[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .]
2644+
[100.1] [a, m, e, t, .]
26332645
[100.1] [,, .]
26342646
[16.6, 17.7, 18.8, 100.1] [.]
26352647

@@ -2641,6 +2653,7 @@ select array_append(column2, 100.1), array_append(column3, '.') from large_array
26412653
[7.7, 8.8, 9.9, 100.1] [d, NULL, l, o, r, .]
26422654
[10.1, NULL, 12.2, 100.1] [s, i, t, .]
26432655
[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .]
2656+
[100.1] [a, m, e, t, .]
26442657
[100.1] [,, .]
26452658
[16.6, 17.7, 18.8, 100.1] [.]
26462659

@@ -2886,6 +2899,7 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from arrays;
28862899
[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r]
28872900
[100.1, 10.1, NULL, 12.2] [., s, i, t]
28882901
[100.1, 13.3, 14.4, 15.5] [., a, m, e, t]
2902+
[100.1] [., a, m, e, t]
28892903
[100.1] [., ,]
28902904
[100.1, 16.6, 17.7, 18.8] [.]
28912905

@@ -2897,6 +2911,7 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from large_arr
28972911
[100.1, 7.7, 8.8, 9.9] [., d, NULL, l, o, r]
28982912
[100.1, 10.1, NULL, 12.2] [., s, i, t]
28992913
[100.1, 13.3, 14.4, 15.5] [., a, m, e, t]
2914+
[100.1] [., a, m, e, t]
29002915
[100.1] [., ,]
29012916
[100.1, 16.6, 17.7, 18.8] [.]
29022917

@@ -3255,6 +3270,7 @@ select array_concat(column1, column1), array_concat(column2, column2), array_con
32553270
[[5, 6], [7, 8], [5, 6], [7, 8]] [7.7, 8.8, 9.9, 7.7, 8.8, 9.9] [d, NULL, l, o, r, d, NULL, l, o, r]
32563271
[[7, NULL], [9, 10], [7, NULL], [9, 10]] [10.1, NULL, 12.2, 10.1, NULL, 12.2] [s, i, t, s, i, t]
32573272
NULL [13.3, 14.4, 15.5, 13.3, 14.4, 15.5] [a, m, e, t, a, m, e, t]
3273+
NULL [] [a, m, e, t, a, m, e, t]
32583274
[[11, 12], [13, 14], [11, 12], [13, 14]] NULL [,, ,]
32593275
[[15, 16], [NULL, 18], [15, 16], [NULL, 18]] [16.6, 17.7, 18.8, 16.6, 17.7, 18.8] NULL
32603276

@@ -3267,6 +3283,7 @@ select array_concat(column1, make_array(make_array(1, 2), make_array(3, 4))), ar
32673283
[[5, 6], [7, 8], [1, 2], [3, 4]] [7.7, 8.8, 9.9, 1.1, 2.2, 3.3]
32683284
[[7, NULL], [9, 10], [1, 2], [3, 4]] [10.1, NULL, 12.2, 1.1, 2.2, 3.3]
32693285
[[1, 2], [3, 4]] [13.3, 14.4, 15.5, 1.1, 2.2, 3.3]
3286+
[[1, 2], [3, 4]] [1.1, 2.2, 3.3]
32703287
[[11, 12], [13, 14], [1, 2], [3, 4]] [1.1, 2.2, 3.3]
32713288
[[15, 16], [NULL, 18], [1, 2], [3, 4]] [16.6, 17.7, 18.8, 1.1, 2.2, 3.3]
32723289

@@ -3279,6 +3296,7 @@ select array_concat(column3, make_array('.', '.', '.')) from arrays;
32793296
[d, NULL, l, o, r, ., ., .]
32803297
[s, i, t, ., ., .]
32813298
[a, m, e, t, ., ., .]
3299+
[a, m, e, t, ., ., .]
32823300
[,, ., ., .]
32833301
[., ., .]
32843302

@@ -4687,6 +4705,7 @@ NULL 0
46874705
#NULL 0
46884706

46894707
# cardinality with columns
4708+
# FIXME cardinality on empty array should be NULL or zero?
46904709
query III
46914710
select cardinality(column1), cardinality(column2), cardinality(column3) from arrays;
46924711
----
@@ -4695,6 +4714,7 @@ select cardinality(column1), cardinality(column2), cardinality(column3) from arr
46954714
4 3 5
46964715
4 3 3
46974716
NULL 3 4
4717+
NULL NULL 4
46984718
4 NULL 1
46994719
4 3 NULL
47004720

@@ -4706,6 +4726,7 @@ select cardinality(column1), cardinality(column2), cardinality(column3) from lar
47064726
4 3 5
47074727
4 3 3
47084728
NULL 3 4
4729+
NULL NULL 4
47094730
4 NULL 1
47104731
4 3 NULL
47114732

@@ -5376,6 +5397,7 @@ select array_dims(column1), array_dims(column2), array_dims(column3) from arrays
53765397
[2, 2] [3] [5]
53775398
[2, 2] [3] [3]
53785399
NULL [3] [4]
5400+
NULL NULL [4]
53795401
[2, 2] NULL [1]
53805402
[2, 2] [3] NULL
53815403

@@ -5387,6 +5409,7 @@ select array_dims(column1), array_dims(column2), array_dims(column3) from large_
53875409
[2, 2] [3] [5]
53885410
[2, 2] [3] [3]
53895411
NULL [3] [4]
5412+
NULL NULL [4]
53905413
[2, 2] NULL [1]
53915414
[2, 2] [3] NULL
53925415

@@ -5545,6 +5568,7 @@ select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arr
55455568
2 1 1
55465569
2 1 1
55475570
NULL 1 1
5571+
NULL 1 1
55485572
2 NULL 1
55495573
2 1 NULL
55505574

@@ -5556,6 +5580,7 @@ select array_ndims(column1), array_ndims(column2), array_ndims(column3) from lar
55565580
2 1 1
55575581
2 1 1
55585582
NULL 1 1
5583+
NULL 1 1
55595584
2 NULL 1
55605585
2 1 NULL
55615586

@@ -5877,6 +5902,7 @@ true false true false
58775902
true false false true
58785903
false true false false
58795904
NULL NULL false false
5905+
NULL NULL false false
58805906
false false NULL false
58815907
false false false NULL
58825908

@@ -5892,6 +5918,7 @@ true false true false
58925918
true false false true
58935919
false true false false
58945920
NULL NULL false false
5921+
NULL NULL false false
58955922
false false NULL false
58965923
false false false NULL
58975924

@@ -7403,27 +7430,29 @@ false
74037430
#NULL
74047431

74057432
# empty scalar function #5
7406-
query B
7407-
select empty(column1) from arrays;
7433+
query BB
7434+
select empty(column1), empty(column2) from arrays;
74087435
----
7409-
false
7410-
false
7411-
false
7412-
false
7413-
NULL
7414-
false
7415-
false
7436+
false false
7437+
false false
7438+
false false
7439+
false false
7440+
NULL false
7441+
NULL true
7442+
false NULL
7443+
false false
74167444

7417-
query B
7418-
select empty(arrow_cast(column1, 'LargeList(List(Int64))')) from arrays;
7445+
query BB
7446+
select empty(arrow_cast(column1, 'LargeList(List(Int64))')), empty(arrow_cast(column2, 'LargeList(Int64)')) from arrays;
74197447
----
7420-
false
7421-
false
7422-
false
7423-
false
7424-
NULL
7425-
false
7426-
false
7448+
false false
7449+
false false
7450+
false false
7451+
false false
7452+
NULL false
7453+
NULL true
7454+
false NULL
7455+
false false
74277456

74287457
query B
74297458
select empty(column1) from fixed_size_arrays;

0 commit comments

Comments
 (0)