Skip to content

Commit

Permalink
Fix NULL values in FixedSizeList creation (#9141)
Browse files Browse the repository at this point in the history
* Fix NULL values in FixedSizeList creation

* Refactor array capacity calculation in ScalarValue

* chore
  • Loading branch information
Weijun-H authored Feb 7, 2024
1 parent d442656 commit 55720a4
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 9 deletions.
27 changes: 23 additions & 4 deletions datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1385,24 +1385,43 @@ impl ScalarValue {
.map(|s| s.to_array())
.collect::<Result<Vec<_>>>()?;

let capacity = Capacities::Array(arrays.iter().map(|arr| arr.len()).sum());
let capacity = Capacities::Array(
arrays
.iter()
.filter_map(|arr| {
if !arr.is_null(0) {
Some(arr.len())
} else {
None
}
})
.sum(),
);

// ScalarValue::List contains a single element ListArray.
let nulls = arrays
.iter()
.map(|arr| arr.is_null(0))
.collect::<Vec<bool>>();
let arrays_data = arrays.iter().map(|arr| arr.to_data()).collect::<Vec<_>>();
let arrays_data = arrays
.iter()
.filter(|arr| !arr.is_null(0))
.map(|arr| arr.to_data())
.collect::<Vec<_>>();

let arrays_ref = arrays_data.iter().collect::<Vec<_>>();
let mut mutable =
MutableArrayData::with_capacities(arrays_ref, true, capacity);

// ScalarValue::List contains a single element ListArray.
for (index, is_null) in (0..arrays.len()).zip(nulls.into_iter()) {
let mut index = 0;
for is_null in nulls.into_iter() {
if is_null {
mutable.extend_nulls(1)
mutable.extend_nulls(1);
} else {
// mutable array contains non-null elements
mutable.extend(index, 0, 1);
index += 1;
}
}
let data = mutable.freeze();
Expand Down
8 changes: 3 additions & 5 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,16 @@ AS
FROM arrays
;

#TODO: create FixedSizeList with NULL column
statement ok
CREATE TABLE fixed_size_arrays
AS VALUES
(arrow_cast(make_array(make_array(NULL, 2),make_array(3, NULL)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(1.1, 2.2, 3.3), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('L', 'o', 'r', 'e', 'm'), 'FixedSizeList(5, Utf8)')),
(arrow_cast(make_array(make_array(3, 4),make_array(5, 6)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(NULL, 5.5, 6.6), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('i', 'p', NULL, 'u', 'm'), 'FixedSizeList(5, Utf8)')),
(arrow_cast(make_array(make_array(5, 6),make_array(7, 8)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(7.7, 8.8, 9.9), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('d', NULL, 'l', 'o', 'r'), 'FixedSizeList(5, Utf8)')),
(arrow_cast(make_array(make_array(7, NULL),make_array(9, 10)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(10.1, NULL, 12.2), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('s', 'i', 't', 'a', 'b'), 'FixedSizeList(5, Utf8)')),
(arrow_cast(make_array(make_array(7, NULL),make_array(9, 10)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('a', 'm', 'e', 't', 'x'), 'FixedSizeList(5, Utf8)')),
(arrow_cast(make_array(make_array(11, 12),make_array(13, 14)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)')),
(arrow_cast(make_array(make_array(15, 16),make_array(NULL, 18)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(16.6, 17.7, 18.8), 'FixedSizeList(3, Float64)'), arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)'))
(NULL, arrow_cast(make_array(13.3, 14.4, 15.5), 'FixedSizeList(3, Float64)'), arrow_cast(make_array('a', 'm', 'e', 't', 'x'), 'FixedSizeList(5, Utf8)')),
(arrow_cast(make_array(make_array(11, 12),make_array(13, 14)), 'FixedSizeList(2, List(Int64))'), NULL, arrow_cast(make_array(',','a','b','c','d'), 'FixedSizeList(5, Utf8)')),
(arrow_cast(make_array(make_array(15, 16),make_array(NULL, 18)), 'FixedSizeList(2, List(Int64))'), arrow_cast(make_array(16.6, 17.7, 18.8), 'FixedSizeList(3, Float64)'), NULL)
;

statement ok
Expand Down Expand Up @@ -4707,7 +4706,6 @@ false false false true
true false true false
true false false true
false true false false
false true false false
false false false false
false false false false

Expand Down

0 comments on commit 55720a4

Please sign in to comment.