Skip to content
This repository was archived by the owner on Dec 29, 2021. It is now read-only.

Commit d8c11da

Browse files
committed
remove JsonType and directly use Arrow's DataType
1 parent e6651b1 commit d8c11da

File tree

1 file changed

+13
-42
lines changed

1 file changed

+13
-42
lines changed

src/io/json.rs

Lines changed: 13 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,18 @@ use arrow::error::ArrowError;
1010
use arrow::record_batch::RecordBatch;
1111
use serde_json::Value;
1212

13-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14-
pub enum JsonType {
15-
Bool,
16-
Int,
17-
Float,
18-
Str,
19-
BoolList,
20-
IntList,
21-
FloatList,
22-
StrList,
23-
Struct,
24-
}
25-
26-
fn json_to_datatype(dtype: &JsonType) -> DataType {
27-
use JsonType::*;
28-
match dtype {
29-
Bool => DataType::Boolean,
30-
Int => DataType::Int64,
31-
Float => DataType::Float64,
32-
Str => DataType::Utf8,
33-
// BoolList | IntList | FloatList | StrList => DataType::List(_),
34-
// Struct => DataType::Struct(_),
35-
_ => {
36-
// lists and structs, return an error
37-
unimplemented!("Lists and structs not yet supported")
38-
}
39-
}
40-
}
4113

42-
fn generate_schema(spec: HashMap<String, HashSet<JsonType>>) -> Arc<Schema> {
14+
fn generate_schema(spec: HashMap<String, HashSet<DataType>>) -> Arc<Schema> {
4315
let fields = spec
4416
.iter()
4517
.map(|(k, hs)| {
46-
let v: Vec<&JsonType> = hs.iter().collect();
18+
let v: Vec<&DataType> = hs.iter().collect();
4719
match v.len() {
4820
1 => {
49-
let dtype = json_to_datatype(v[0]);
50-
Field::new(k, dtype, true)
21+
Field::new(k, v[0].clone(), true)
5122
}
5223
2 => {
53-
if v.contains(&&JsonType::Float) && v.contains(&&JsonType::Int) {
24+
if v.contains(&&DataType::Float64) && v.contains(&&DataType::Int64) {
5425
Field::new(k, DataType::Float64, true)
5526
// } else if v.contains(JsonType::Bool) || v.contains(JsonType::Str) {
5627
// Field::new(k, DataType::Utf8, true)
@@ -77,7 +48,7 @@ fn infer_json_schema(
7748
file: File,
7849
max_read_records: Option<usize>,
7950
) -> Result<Arc<Schema>, ArrowError> {
80-
let mut values: HashMap<String, HashSet<JsonType>> = HashMap::new();
51+
let mut values: HashMap<String, HashSet<DataType>> = HashMap::new();
8152
let mut reader = BufReader::new(file.try_clone()?);
8253

8354
let mut line = String::new();
@@ -100,11 +71,11 @@ fn infer_json_schema(
10071
Value::Bool(b) => {
10172
if values.contains_key(k) {
10273
let x = values.get_mut(k).unwrap();
103-
x.insert(JsonType::Bool);
74+
x.insert(DataType::Boolean);
10475
} else {
10576
// create hashset and add value type
10677
let mut hs = HashSet::new();
107-
hs.insert(JsonType::Bool);
78+
hs.insert(DataType::Boolean);
10879
values.insert(k.to_string(), hs);
10980
}
11081
}
@@ -115,34 +86,34 @@ fn infer_json_schema(
11586
if n.is_f64() {
11687
if values.contains_key(k) {
11788
let x = values.get_mut(k).unwrap();
118-
x.insert(JsonType::Float);
89+
x.insert(DataType::Float64);
11990
} else {
12091
// create hashset and add value type
12192
let mut hs = HashSet::new();
122-
hs.insert(JsonType::Float);
93+
hs.insert(DataType::Float64);
12394
values.insert(k.to_string(), hs);
12495
}
12596
} else {
12697
// default to i64
12798
if values.contains_key(k) {
12899
let x = values.get_mut(k).unwrap();
129-
x.insert(JsonType::Int);
100+
x.insert(DataType::Int64);
130101
} else {
131102
// create hashset and add value type
132103
let mut hs = HashSet::new();
133-
hs.insert(JsonType::Int);
104+
hs.insert(DataType::Int64);
134105
values.insert(k.to_string(), hs);
135106
}
136107
}
137108
}
138109
Value::String(_) => {
139110
if values.contains_key(k) {
140111
let x = values.get_mut(k).unwrap();
141-
x.insert(JsonType::Str);
112+
x.insert(DataType::Utf8);
142113
} else {
143114
// create hashset and add value type
144115
let mut hs = HashSet::new();
145-
hs.insert(JsonType::Str);
116+
hs.insert(DataType::Utf8);
146117
values.insert(k.to_string(), hs);
147118
}
148119
}

0 commit comments

Comments
 (0)