Skip to content

Commit 3c514e5

Browse files
committed
Add test for with_struct_encoding and encode data_type and language
1 parent 6d40bbd commit 3c514e5

File tree

5 files changed

+210
-45
lines changed

5 files changed

+210
-45
lines changed

lib/arrow-rdf/src/encoded/encoding/with_struct_encoding.rs

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,17 @@ fn into_struct_enc<'data>(
9090
TermRef::NumericLiteral(v) => {
9191
builder.append_numeric(v.into(), v.to_be_bytes().as_ref())
9292
}
93-
TermRef::SimpleLiteral(v) => builder.append_string(v.value),
94-
TermRef::LanguageStringLiteral(v) => builder.append_string(v.value),
93+
TermRef::SimpleLiteral(v) => builder.append_string(v.value, None),
94+
TermRef::LanguageStringLiteral(v) => {
95+
builder.append_string(v.value, Some(v.language))
96+
}
9597
TermRef::DateTimeLiteral(v) => builder.append_date_time(v),
9698
TermRef::TimeLiteral(v) => builder.append_time(v),
9799
TermRef::DateLiteral(v) => builder.append_date(v),
98100
TermRef::DurationLiteral(v) => builder.append_duration(v),
99101
TermRef::YearMonthDurationLiteral(v) => builder.append_year_month_duration(v),
100102
TermRef::DayTimeDurationLiteral(v) => builder.append_day_time_duration(v),
101-
TermRef::TypedLiteral(v) => builder.append_literal(v.value),
103+
TermRef::TypedLiteral(v) => builder.append_literal(v.value, v.literal_type),
102104
}
103105
} else {
104106
builder.append_null()
@@ -107,3 +109,59 @@ fn into_struct_enc<'data>(
107109

108110
Ok(builder.finish())
109111
}
112+
113+
#[cfg(test)]
114+
mod tests {
115+
use crate::encoded::{EncRdfTermBuilder, FromEncodedTerm};
116+
use crate::sortable::FromSortableTerm;
117+
use crate::{as_enc_term_array, DFResult};
118+
use datafusion::arrow::array::{Array, AsArray};
119+
use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl};
120+
use datamodel::{Date, DayTimeDuration, TermRef, Timestamp, YearMonthDuration};
121+
use oxrdf::vocab::xsd;
122+
use std::sync::Arc;
123+
124+
#[test]
125+
fn test_with_struct_encoding_results_in_same_terms() -> DFResult<()> {
126+
let mut test_data_builder = EncRdfTermBuilder::new();
127+
test_data_builder.append_named_node("http://www.example.org/instance#a")?;
128+
test_data_builder.append_blank_node("blank1")?;
129+
test_data_builder.append_boolean(true)?;
130+
test_data_builder.append_int(1.into())?;
131+
test_data_builder.append_integer(2.into())?;
132+
test_data_builder.append_float(3u16.into())?;
133+
test_data_builder.append_double(4.into())?;
134+
test_data_builder.append_decimal(5.into())?;
135+
test_data_builder.append_date(Date::new(Timestamp::new(0.into(), None)))?;
136+
test_data_builder.append_duration(Some(YearMonthDuration::new(12).into()), None)?;
137+
test_data_builder.append_duration(None, Some(DayTimeDuration::new(30)))?;
138+
test_data_builder.append_duration(
139+
Some(YearMonthDuration::new(12).into()),
140+
Some(DayTimeDuration::new(30)),
141+
)?;
142+
test_data_builder.append_string("simple string", None)?;
143+
test_data_builder.append_string("language string", Some("en"))?;
144+
test_data_builder.append_typed_literal("10", xsd::SHORT.as_str())?;
145+
test_data_builder.append_null()?;
146+
let test_array = test_data_builder.finish()?;
147+
148+
let number_of_rows = test_array.len();
149+
let udf = super::EncWithSortableEncoding::new();
150+
let result = udf
151+
.invoke_batch(
152+
&[ColumnarValue::Array(Arc::new(test_array.clone()))],
153+
number_of_rows,
154+
)?
155+
.to_array(number_of_rows)?;
156+
157+
let expected_array = as_enc_term_array(&test_array)?;
158+
let result = result.as_struct();
159+
for i in 0..number_of_rows {
160+
let expected = TermRef::from_enc_array(expected_array, i);
161+
let actual = TermRef::from_sortable_array(result, i);
162+
assert_eq!(expected, actual);
163+
}
164+
165+
Ok(())
166+
}
167+
}

lib/arrow-rdf/src/sortable/builder.rs

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ impl SortableTermBuilder {
2222
}
2323

2424
pub fn append_null(&mut self) {
25-
self.append(SortableTermType::Null, EncTermField::Null, None, &[])
25+
self.append(SortableTermType::Null, EncTermField::Null, None, &[], None)
2626
}
2727

2828
pub fn append_boolean(&mut self, value: Boolean) {
@@ -31,6 +31,7 @@ impl SortableTermBuilder {
3131
EncTermField::Boolean,
3232
Some(value.into()),
3333
&value.to_be_bytes(),
34+
None,
3435
)
3536
}
3637

@@ -48,6 +49,7 @@ impl SortableTermBuilder {
4849
field,
4950
Some(value),
5051
original_be_bytes,
52+
None,
5153
)
5254
}
5355

@@ -57,6 +59,7 @@ impl SortableTermBuilder {
5759
EncTermField::BlankNode,
5860
None,
5961
value.as_str().as_bytes(),
62+
None,
6063
)
6164
}
6265

@@ -66,15 +69,17 @@ impl SortableTermBuilder {
6669
EncTermField::NamedNode,
6770
None,
6871
value.as_str().as_bytes(),
72+
None,
6973
)
7074
}
7175

72-
pub fn append_string(&mut self, value: &str) {
76+
pub fn append_string(&mut self, value: &str, language: Option<&str>) {
7377
self.append(
7478
SortableTermType::String,
7579
EncTermField::String,
7680
None,
7781
value.as_bytes(),
82+
language.map(|l| l.as_bytes()),
7883
)
7984
}
8085

@@ -84,6 +89,7 @@ impl SortableTermBuilder {
8489
EncTermField::DateTime,
8590
Some(value.timestamp().value().into()),
8691
&value.to_be_bytes(),
92+
None,
8793
)
8894
}
8995

@@ -93,6 +99,7 @@ impl SortableTermBuilder {
9399
EncTermField::Time,
94100
Some(value.timestamp().value().into()),
95101
&value.to_be_bytes(),
102+
None,
96103
)
97104
}
98105

@@ -102,15 +109,17 @@ impl SortableTermBuilder {
102109
EncTermField::Date,
103110
Some(value.timestamp().value().into()),
104111
&value.to_be_bytes(),
112+
None,
105113
)
106114
}
107115

108116
pub(crate) fn append_duration(&mut self, value: Duration) {
109117
self.append(
110118
SortableTermType::Duration,
111119
EncTermField::Duration,
112-
Some(Integer::from(value.all_months()).into()),
113-
&value.seconds().to_string().as_bytes(),
120+
None, // Sort by bytes
121+
&value.to_be_bytes(),
122+
None,
114123
)
115124
}
116125

@@ -119,7 +128,8 @@ impl SortableTermBuilder {
119128
SortableTermType::YearMonthDuration,
120129
EncTermField::Duration,
121130
Some(Integer::from(value.as_i64()).into()),
122-
&value.to_be_bytes(),
131+
Duration::from(value).to_be_bytes().as_slice(),
132+
None,
123133
)
124134
}
125135

@@ -128,16 +138,18 @@ impl SortableTermBuilder {
128138
SortableTermType::DayTimeDuration,
129139
EncTermField::Duration,
130140
Some(value.as_seconds().into()),
131-
&value.to_be_bytes(),
141+
Duration::from(value).to_be_bytes().as_slice(),
142+
None,
132143
)
133144
}
134145

135-
pub fn append_literal(&mut self, value: &str) {
146+
pub fn append_literal(&mut self, value: &str, literal_type: &str) {
136147
self.append(
137148
SortableTermType::UnsupportedLiteral,
138149
EncTermField::TypedLiteral,
139150
None,
140151
value.as_bytes(),
152+
Some(literal_type.as_bytes()),
141153
)
142154
}
143155

@@ -147,6 +159,7 @@ impl SortableTermBuilder {
147159
enc_type: EncTermField,
148160
numeric: Option<Double>,
149161
bytes: &[u8],
162+
additional_bytes: Option<&[u8]>,
150163
) {
151164
self.builder
152165
.field_builder::<UInt8Builder>(SortableTermField::Type.index())
@@ -172,6 +185,16 @@ impl SortableTermBuilder {
172185
.unwrap();
173186
bytes_builder.append_value(bytes);
174187

188+
let additional_bytes_builder = self
189+
.builder
190+
.field_builder::<BinaryBuilder>(SortableTermField::AdditionalBytes.index())
191+
.unwrap();
192+
if let Some(additional_bytes) = additional_bytes {
193+
additional_bytes_builder.append_value(additional_bytes);
194+
} else {
195+
additional_bytes_builder.append_null();
196+
}
197+
175198
self.builder.append(true)
176199
}
177200

0 commit comments

Comments
 (0)