Skip to content

Commit d809f19

Browse files
authored
[Variant] Add documentation, tests and cleaner api for Variant::get_path (#7942)
# Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Follow on to #7919 # Rationale for this change While reviewing #7919 from @Samyak2 I found I wanted to write some additional tests directly for `Variant::get_path` When I started doing that I found it was somewhat awkward to write examples, so I added some new conversion routines to make it easier. # What changes are included in this PR? 1. Add doc examples (and thus tests) of `VaraintGet` and `VariantPath` 2. Add more documentation # Are these changes tested? Yes, by doc examples which run in CI # Are there any user-facing changes? If there are user-facing changes then we may require documentation to be updated before approving the PR. If there are any breaking changes to public APIs, please call them out.
1 parent 03a837e commit d809f19

File tree

4 files changed

+160
-32
lines changed

4 files changed

+160
-32
lines changed

parquet-variant-compute/src/variant_get.rs

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use arrow::{
2222
error::Result,
2323
};
2424
use arrow_schema::{ArrowError, Field};
25-
use parquet_variant::path::VariantPath;
25+
use parquet_variant::VariantPath;
2626

2727
use crate::{VariantArray, VariantArrayBuilder};
2828

@@ -41,8 +41,7 @@ pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
4141

4242
if let Some(as_type) = options.as_type {
4343
return Err(ArrowError::NotYetImplemented(format!(
44-
"getting a {} from a VariantArray is not implemented yet",
45-
as_type
44+
"getting a {as_type} from a VariantArray is not implemented yet",
4645
)));
4746
}
4847

@@ -91,7 +90,7 @@ mod test {
9190
use std::sync::Arc;
9291

9392
use arrow::array::{Array, ArrayRef, StringArray};
94-
use parquet_variant::path::{VariantPath, VariantPathElement};
93+
use parquet_variant::VariantPath;
9594

9695
use crate::batch_json_string_to_variant;
9796
use crate::VariantArray;
@@ -133,29 +132,21 @@ mod test {
133132
fn get_primitive_variant_field() {
134133
single_variant_get_test(
135134
r#"{"some_field": 1234}"#,
136-
vec![VariantPathElement::field("some_field".into())].into(),
135+
VariantPath::from("some_field"),
137136
"1234",
138137
);
139138
}
140139

141140
#[test]
142141
fn get_primitive_variant_list_index() {
143-
single_variant_get_test(
144-
"[1234, 5678]",
145-
vec![VariantPathElement::index(0)].into(),
146-
"1234",
147-
);
142+
single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
148143
}
149144

150145
#[test]
151146
fn get_primitive_variant_inside_object_of_object() {
152147
single_variant_get_test(
153148
r#"{"top_level_field": {"inner_field": 1234}}"#,
154-
vec![
155-
VariantPathElement::field("top_level_field".into()),
156-
VariantPathElement::field("inner_field".into()),
157-
]
158-
.into(),
149+
VariantPath::from("top_level_field").join("inner_field"),
159150
"1234",
160151
);
161152
}
@@ -164,11 +155,7 @@ mod test {
164155
fn get_primitive_variant_inside_list_of_object() {
165156
single_variant_get_test(
166157
r#"[{"some_field": 1234}]"#,
167-
vec![
168-
VariantPathElement::index(0),
169-
VariantPathElement::field("some_field".into()),
170-
]
171-
.into(),
158+
VariantPath::from(0).join("some_field"),
172159
"1234",
173160
);
174161
}
@@ -177,11 +164,7 @@ mod test {
177164
fn get_primitive_variant_inside_object_of_list() {
178165
single_variant_get_test(
179166
r#"{"some_field": [1234]}"#,
180-
vec![
181-
VariantPathElement::field("some_field".into()),
182-
VariantPathElement::index(0),
183-
]
184-
.into(),
167+
VariantPath::from("some_field").join(0),
185168
"1234",
186169
);
187170
}
@@ -190,7 +173,7 @@ mod test {
190173
fn get_complex_variant() {
191174
single_variant_get_test(
192175
r#"{"top_level_field": {"inner_field": 1234}}"#,
193-
vec![VariantPathElement::field("top_level_field".into())].into(),
176+
VariantPath::from("top_level_field"),
194177
r#"{"inner_field": 1234}"#,
195178
);
196179
}

parquet-variant/src/lib.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
//! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
2121
//! [Apache Parquet]: https://parquet.apache.org/
2222
//!
23+
//! ## Main APIs
24+
//! - [`Variant`]: Represents a variant value, which can be an object, list, or primitive.
25+
//! - [`VariantBuilder`]: For building `Variant` values.
26+
//!
2327
//! ## 🚧 Work In Progress
2428
//!
2529
//! This crate is under active development and is not yet ready for production use.
@@ -29,9 +33,10 @@
2933
3034
mod builder;
3135
mod decoder;
32-
pub mod path;
36+
mod path;
3337
mod utils;
3438
mod variant;
3539

3640
pub use builder::*;
41+
pub use path::{VariantPath, VariantPathElement};
3742
pub use variant::*;

parquet-variant/src/path.rs

Lines changed: 112 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,77 @@
1616
// under the License.
1717
use std::{borrow::Cow, ops::Deref};
1818

19-
/// Represents a qualified path to a potential subfield or index of a variant value.
20-
#[derive(Debug, Clone)]
19+
/// Represents a qualified path to a potential subfield or index of a variant
20+
/// value.
21+
///
22+
/// Can be used with [`Variant::get_path`] to retrieve a specific subfield of
23+
/// a variant value.
24+
///
25+
/// [`Variant::get_path`]: crate::Variant::get_path
26+
///
27+
/// Create a [`VariantPath`] from a vector of [`VariantPathElement`], or
28+
/// from a single field name or index.
29+
///
30+
/// # Example: Simple paths
31+
/// ```rust
32+
/// # use parquet_variant::{VariantPath, VariantPathElement};
33+
/// // access the field "foo" in a variant object value
34+
/// let path = VariantPath::from("foo");
35+
/// // access the first element in a variant list vale
36+
/// let path = VariantPath::from(0);
37+
/// ```
38+
///
39+
/// # Example: Compound paths
40+
/// ```
41+
/// # use parquet_variant::{VariantPath, VariantPathElement};
42+
/// /// You can also create a path by joining elements together:
43+
/// // access the field "foo" and then the first element in a variant list value
44+
/// let path = VariantPath::from("foo").join(0);
45+
/// // this is the same as the previous one
46+
/// let path2 = VariantPath::new(vec!["foo".into(), 0.into()]);
47+
/// assert_eq!(path, path2);
48+
/// // you can also create a path from a vector of `VariantPathElement` directly
49+
/// let path3 = VariantPath::new(vec![
50+
/// VariantPathElement::field("foo"),
51+
/// VariantPathElement::index(0)
52+
/// ]);
53+
/// assert_eq!(path, path3);
54+
/// ```
55+
///
56+
/// # Example: Accessing Compound paths
57+
/// ```
58+
/// # use parquet_variant::{VariantPath, VariantPathElement};
59+
/// /// You can access the paths using slices
60+
/// // access the field "foo" and then the first element in a variant list value
61+
/// let path = VariantPath::from("foo")
62+
/// .join("bar")
63+
/// .join("baz");
64+
/// assert_eq!(path[1], VariantPathElement::field("bar"));
65+
/// ```
66+
#[derive(Debug, Clone, PartialEq)]
2167
pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);
2268

2369
impl<'a> VariantPath<'a> {
70+
/// Create a new `VariantPath` from a vector of `VariantPathElement`.
2471
pub fn new(path: Vec<VariantPathElement<'a>>) -> Self {
2572
Self(path)
2673
}
2774

75+
/// Return the inner path elements.
2876
pub fn path(&self) -> &Vec<VariantPathElement> {
2977
&self.0
3078
}
79+
80+
/// Return a new `VariantPath` with element appended
81+
pub fn join(mut self, element: impl Into<VariantPathElement<'a>>) -> Self {
82+
self.push(element);
83+
self
84+
}
85+
86+
/// Append a new element to the path
87+
pub fn push(&mut self, element: impl Into<VariantPathElement<'a>>) {
88+
self.0.push(element.into());
89+
}
3190
}
3291

3392
impl<'a> From<Vec<VariantPathElement<'a>>> for VariantPath<'a> {
@@ -36,6 +95,20 @@ impl<'a> From<Vec<VariantPathElement<'a>>> for VariantPath<'a> {
3695
}
3796
}
3897

98+
/// Create from &str
99+
impl<'a> From<&'a str> for VariantPath<'a> {
100+
fn from(path: &'a str) -> Self {
101+
VariantPath::new(vec![path.into()])
102+
}
103+
}
104+
105+
/// Create from usize
106+
impl<'a> From<usize> for VariantPath<'a> {
107+
fn from(index: usize) -> Self {
108+
VariantPath::new(vec![VariantPathElement::index(index)])
109+
}
110+
}
111+
39112
impl<'a> Deref for VariantPath<'a> {
40113
type Target = [VariantPathElement<'a>];
41114

@@ -44,8 +117,10 @@ impl<'a> Deref for VariantPath<'a> {
44117
}
45118
}
46119

47-
/// Element of a path
48-
#[derive(Debug, Clone)]
120+
/// Element of a [`VariantPath`] that can be a field name or an index.
121+
///
122+
/// See [`VariantPath`] for more details and examples.
123+
#[derive(Debug, Clone, PartialEq)]
49124
pub enum VariantPathElement<'a> {
50125
/// Access field with name `name`
51126
Field { name: Cow<'a, str> },
@@ -54,11 +129,43 @@ pub enum VariantPathElement<'a> {
54129
}
55130

56131
impl<'a> VariantPathElement<'a> {
57-
pub fn field(name: Cow<'a, str>) -> VariantPathElement<'a> {
132+
pub fn field(name: impl Into<Cow<'a, str>>) -> VariantPathElement<'a> {
133+
let name = name.into();
58134
VariantPathElement::Field { name }
59135
}
60136

61137
pub fn index(index: usize) -> VariantPathElement<'a> {
62138
VariantPathElement::Index { index }
63139
}
64140
}
141+
142+
// Conversion utilities for `VariantPathElement` from string types
143+
impl<'a> From<Cow<'a, str>> for VariantPathElement<'a> {
144+
fn from(name: Cow<'a, str>) -> Self {
145+
VariantPathElement::field(name)
146+
}
147+
}
148+
149+
impl<'a> From<&'a str> for VariantPathElement<'a> {
150+
fn from(name: &'a str) -> Self {
151+
VariantPathElement::field(Cow::Borrowed(name))
152+
}
153+
}
154+
155+
impl<'a> From<String> for VariantPathElement<'a> {
156+
fn from(name: String) -> Self {
157+
VariantPathElement::field(Cow::Owned(name))
158+
}
159+
}
160+
161+
impl<'a> From<&'a String> for VariantPathElement<'a> {
162+
fn from(name: &'a String) -> Self {
163+
VariantPathElement::field(Cow::Borrowed(name.as_str()))
164+
}
165+
}
166+
167+
impl<'a> From<usize> for VariantPathElement<'a> {
168+
fn from(index: usize) -> Self {
169+
VariantPathElement::index(index)
170+
}
171+
}

parquet-variant/src/variant.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,8 @@ impl<'m, 'v> Variant<'m, 'v> {
942942
/// Returns `Some(&VariantObject)` for object variants,
943943
/// `None` for non-object variants.
944944
///
945+
/// See [`Self::get_path`] to dynamically traverse objects
946+
///
945947
/// # Examples
946948
/// ```
947949
/// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
@@ -999,6 +1001,8 @@ impl<'m, 'v> Variant<'m, 'v> {
9991001
/// Returns `Some(&VariantList)` for list variants,
10001002
/// `None` for non-list variants.
10011003
///
1004+
/// See [`Self::get_path`] to dynamically traverse lists
1005+
///
10021006
/// # Examples
10031007
/// ```
10041008
/// # use parquet_variant::{Variant, VariantBuilder, VariantList};
@@ -1068,6 +1072,35 @@ impl<'m, 'v> Variant<'m, 'v> {
10681072
/// Return a new Variant with the path followed.
10691073
///
10701074
/// If the path is not found, `None` is returned.
1075+
///
1076+
/// # Example
1077+
/// ```
1078+
/// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
1079+
/// # let mut builder = VariantBuilder::new();
1080+
/// # let mut obj = builder.new_object();
1081+
/// # let mut list = obj.new_list("foo");
1082+
/// # list.append_value("bar");
1083+
/// # list.append_value("baz");
1084+
/// # list.finish();
1085+
/// # obj.finish().unwrap();
1086+
/// # let (metadata, value) = builder.finish();
1087+
/// // given a variant like `{"foo": ["bar", "baz"]}`
1088+
/// let variant = Variant::new(&metadata, &value);
1089+
/// // Accessing a non existent path returns None
1090+
/// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None);
1091+
/// // Access obj["foo"]
1092+
/// let path = VariantPath::from("foo");
1093+
/// let foo = variant.get_path(&path).expect("field `foo` should exist");
1094+
/// assert!(foo.as_list().is_some(), "field `foo` should be a list");
1095+
/// // Access foo[0]
1096+
/// let path = VariantPath::from(0);
1097+
/// let bar = foo.get_path(&path).expect("element 0 should exist");
1098+
/// // bar is a string
1099+
/// assert_eq!(bar.as_string(), Some("bar"));
1100+
/// // You can also access nested paths
1101+
/// let path = VariantPath::from("foo").join(0);
1102+
/// assert_eq!(variant.get_path(&path).unwrap(), bar);
1103+
/// ```
10711104
pub fn get_path(&self, path: &VariantPath) -> Option<Variant> {
10721105
path.iter()
10731106
.try_fold(self.clone(), |output, element| match element {

0 commit comments

Comments
 (0)