@@ -10,47 +10,18 @@ use arrow::error::ArrowError;
10
10
use arrow:: record_batch:: RecordBatch ;
11
11
use serde_json:: Value ;
12
12
13
- #[ derive( Debug , Clone , PartialEq , Eq , Hash ) ]
14
- pub enum JsonType {
15
- Bool ,
16
- Int ,
17
- Float ,
18
- Str ,
19
- BoolList ,
20
- IntList ,
21
- FloatList ,
22
- StrList ,
23
- Struct ,
24
- }
25
-
26
- fn json_to_datatype ( dtype : & JsonType ) -> DataType {
27
- use JsonType :: * ;
28
- match dtype {
29
- Bool => DataType :: Boolean ,
30
- Int => DataType :: Int64 ,
31
- Float => DataType :: Float64 ,
32
- Str => DataType :: Utf8 ,
33
- // BoolList | IntList | FloatList | StrList => DataType::List(_),
34
- // Struct => DataType::Struct(_),
35
- _ => {
36
- // lists and structs, return an error
37
- unimplemented ! ( "Lists and structs not yet supported" )
38
- }
39
- }
40
- }
41
13
42
- fn generate_schema ( spec : HashMap < String , HashSet < JsonType > > ) -> Arc < Schema > {
14
+ fn generate_schema ( spec : HashMap < String , HashSet < DataType > > ) -> Arc < Schema > {
43
15
let fields = spec
44
16
. iter ( )
45
17
. map ( |( k, hs) | {
46
- let v: Vec < & JsonType > = hs. iter ( ) . collect ( ) ;
18
+ let v: Vec < & DataType > = hs. iter ( ) . collect ( ) ;
47
19
match v. len ( ) {
48
20
1 => {
49
- let dtype = json_to_datatype ( v[ 0 ] ) ;
50
- Field :: new ( k, dtype, true )
21
+ Field :: new ( k, v[ 0 ] . clone ( ) , true )
51
22
}
52
23
2 => {
53
- if v. contains ( & & JsonType :: Float ) && v. contains ( & & JsonType :: Int ) {
24
+ if v. contains ( & & DataType :: Float64 ) && v. contains ( & & DataType :: Int64 ) {
54
25
Field :: new ( k, DataType :: Float64 , true )
55
26
// } else if v.contains(JsonType::Bool) || v.contains(JsonType::Str) {
56
27
// Field::new(k, DataType::Utf8, true)
@@ -77,7 +48,7 @@ fn infer_json_schema(
77
48
file : File ,
78
49
max_read_records : Option < usize > ,
79
50
) -> Result < Arc < Schema > , ArrowError > {
80
- let mut values: HashMap < String , HashSet < JsonType > > = HashMap :: new ( ) ;
51
+ let mut values: HashMap < String , HashSet < DataType > > = HashMap :: new ( ) ;
81
52
let mut reader = BufReader :: new ( file. try_clone ( ) ?) ;
82
53
83
54
let mut line = String :: new ( ) ;
@@ -100,11 +71,11 @@ fn infer_json_schema(
100
71
Value :: Bool ( b) => {
101
72
if values. contains_key ( k) {
102
73
let x = values. get_mut ( k) . unwrap ( ) ;
103
- x. insert ( JsonType :: Bool ) ;
74
+ x. insert ( DataType :: Boolean ) ;
104
75
} else {
105
76
// create hashset and add value type
106
77
let mut hs = HashSet :: new ( ) ;
107
- hs. insert ( JsonType :: Bool ) ;
78
+ hs. insert ( DataType :: Boolean ) ;
108
79
values. insert ( k. to_string ( ) , hs) ;
109
80
}
110
81
}
@@ -115,34 +86,34 @@ fn infer_json_schema(
115
86
if n. is_f64 ( ) {
116
87
if values. contains_key ( k) {
117
88
let x = values. get_mut ( k) . unwrap ( ) ;
118
- x. insert ( JsonType :: Float ) ;
89
+ x. insert ( DataType :: Float64 ) ;
119
90
} else {
120
91
// create hashset and add value type
121
92
let mut hs = HashSet :: new ( ) ;
122
- hs. insert ( JsonType :: Float ) ;
93
+ hs. insert ( DataType :: Float64 ) ;
123
94
values. insert ( k. to_string ( ) , hs) ;
124
95
}
125
96
} else {
126
97
// default to i64
127
98
if values. contains_key ( k) {
128
99
let x = values. get_mut ( k) . unwrap ( ) ;
129
- x. insert ( JsonType :: Int ) ;
100
+ x. insert ( DataType :: Int64 ) ;
130
101
} else {
131
102
// create hashset and add value type
132
103
let mut hs = HashSet :: new ( ) ;
133
- hs. insert ( JsonType :: Int ) ;
104
+ hs. insert ( DataType :: Int64 ) ;
134
105
values. insert ( k. to_string ( ) , hs) ;
135
106
}
136
107
}
137
108
}
138
109
Value :: String ( _) => {
139
110
if values. contains_key ( k) {
140
111
let x = values. get_mut ( k) . unwrap ( ) ;
141
- x. insert ( JsonType :: Str ) ;
112
+ x. insert ( DataType :: Utf8 ) ;
142
113
} else {
143
114
// create hashset and add value type
144
115
let mut hs = HashSet :: new ( ) ;
145
- hs. insert ( JsonType :: Str ) ;
116
+ hs. insert ( DataType :: Utf8 ) ;
146
117
values. insert ( k. to_string ( ) , hs) ;
147
118
}
148
119
}
0 commit comments