1
1
package csv
2
2
3
+ // TODO: error, throw error always? no decode value with gap etc? -d csv from_csv
4
+ // TODO: header row field count mismatch error, csv reader takes care of check atm. can use FieldsPerRecord -1
5
+ // TODO: row object keys mismatch writer
6
+ // TODO: lazy quotes?
7
+ // TODO: comment in writer? string elements?
8
+ // TODO: to_csv objects
9
+ // TODO: to_csv opts help
10
+ // TODO: go maps are random order, now sorts headers
11
+ // TODO: option aliases?
12
+ // TODO: snake_case option?
13
+
3
14
import (
4
15
"bytes"
5
16
"embed"
6
- "encoding/csv"
7
17
"errors"
8
18
"fmt"
9
19
"io"
20
+ "sort"
10
21
11
22
"github.com/wader/fq/format"
23
+ "github.com/wader/fq/internal/csvex"
12
24
"github.com/wader/fq/internal/gojqex"
13
25
"github.com/wader/fq/pkg/bitio"
14
26
"github.com/wader/fq/pkg/decode"
@@ -27,8 +39,11 @@ func init() {
27
39
ProbeOrder : format .ProbeOrderTextFuzzy ,
28
40
DecodeFn : decodeCSV ,
29
41
DecodeInArg : format.CSVLIn {
30
- Comma : "," ,
31
- Comment : "#" ,
42
+ Delimiter : "," ,
43
+ Comment : "" ,
44
+ QuoteChar : `"` ,
45
+ Header : true ,
46
+ SkipInitialSpace : false ,
32
47
},
33
48
Functions : []string {"_todisplay" },
34
49
})
@@ -39,29 +54,59 @@ func init() {
39
54
func decodeCSV (d * decode.D , in any ) any {
40
55
ci , _ := in .(format.CSVLIn )
41
56
42
- var rvs []any
43
57
br := d .RawLen (d .Len ())
44
- r := csv .NewReader (bitio .NewIOReader (br ))
45
- r .TrimLeadingSpace = true
58
+ r := csvex .NewReader (bitio .NewIOReader (br ))
46
59
r .LazyQuotes = true
47
- if ci .Comma != "" {
60
+ if ci .Delimiter != "" {
61
+ r .Comma = rune (ci .Delimiter [0 ])
62
+ } else if ci .Comma != "" {
48
63
r .Comma = rune (ci .Comma [0 ])
49
64
}
50
65
if ci .Comment != "" {
51
66
r .Comment = rune (ci .Comment [0 ])
67
+ } else {
68
+ r .Comment = 0
69
+ }
70
+ if ci .QuoteChar != "" {
71
+ r .Quote = rune (ci .QuoteChar [0 ])
72
+ } else {
73
+ r .Quote = '"'
52
74
}
75
+ r .TrimLeadingSpace = ci .SkipInitialSpace
76
+
77
+ row := 1
78
+ var rvs []any
79
+
80
+ var headers []string
53
81
for {
54
82
r , err := r .Read ()
55
83
if errors .Is (err , io .EOF ) {
56
84
break
57
85
} else if err != nil {
58
86
return err
59
87
}
60
- var vs []any
61
- for _ , s := range r {
62
- vs = append (vs , s )
88
+
89
+ if ci .Header {
90
+ if headers == nil {
91
+ // TODO: duplicate headers?
92
+ headers = append (headers , r ... )
93
+ } else {
94
+ obj := map [string ]any {}
95
+ for i , s := range r {
96
+ h := headers [i ]
97
+ obj [h ] = s
98
+ }
99
+ rvs = append (rvs , obj )
100
+ }
101
+ } else {
102
+ var vs []any
103
+ for _ , s := range r {
104
+ vs = append (vs , s )
105
+ }
106
+ rvs = append (rvs , vs )
63
107
}
64
- rvs = append (rvs , vs )
108
+
109
+ row ++
65
110
}
66
111
67
112
d .Value .V = & scalar.Any {Actual : rvs }
@@ -71,35 +116,108 @@ func decodeCSV(d *decode.D, in any) any {
71
116
}
72
117
73
118
type ToCSVOpts struct {
74
- Comma string
119
+ Comma string // alias for Delimiter
120
+ Delimiter string
121
+ QuoteChar string
122
+ Header bool
75
123
}
76
124
77
125
func toCSV (_ * interp.Interp , c []any , opts ToCSVOpts ) any {
78
126
b := & bytes.Buffer {}
79
- w := csv .NewWriter (b )
80
- if opts .Comma != "" {
127
+ w := csvex .NewWriter (b )
128
+ if opts .Delimiter != "" {
129
+ w .Comma = rune (opts .Delimiter [0 ])
130
+ } else if opts .Comma != "" {
81
131
w .Comma = rune (opts .Comma [0 ])
82
132
}
133
+ if opts .QuoteChar != "" {
134
+ w .Quote = rune (opts .QuoteChar [0 ])
135
+ } else {
136
+ w .Quote = '"'
137
+ }
138
+
139
+ seenObject := 0
140
+ seenArrays := 0
141
+ var headers []string
142
+
83
143
for _ , row := range c {
84
- rs , ok := gojqex.Cast [[]any ](row )
85
- if ! ok {
86
- return fmt .Errorf ("expected row to be an array, got %s" , gojqex .TypeErrorPreview (row ))
87
- }
88
- vs , ok := gojqex .NormalizeToStrings (rs ).([]any )
89
- if ! ok {
90
- panic ("not array" )
91
- }
92
- var ss []string
93
- for _ , v := range vs {
94
- s , ok := v .(string )
144
+ switch row .(type ) {
145
+ case []any :
146
+ if seenObject > 0 {
147
+ return fmt .Errorf ("mixed row types, expected row to be an object, got %s" , gojqex .TypeErrorPreview (row ))
148
+ }
149
+
150
+ rs , ok := gojqex.Cast [[]any ](row )
95
151
if ! ok {
96
- return fmt .Errorf ("expected row record to be scalars , got %s" , gojqex .TypeErrorPreview (v ))
152
+ return fmt .Errorf ("expected row to be an array , got %s" , gojqex .TypeErrorPreview (row ))
97
153
}
98
- ss = append (ss , s )
99
- }
100
- if err := w .Write (ss ); err != nil {
101
- return err
154
+ vs , ok := gojqex .NormalizeToStrings (rs ).([]any )
155
+ if ! ok {
156
+ panic ("not array" )
157
+ }
158
+ var ss []string
159
+ for _ , v := range vs {
160
+ s , ok := v .(string )
161
+ if ! ok {
162
+ return fmt .Errorf ("expected row record to be scalars, got %s" , gojqex .TypeErrorPreview (v ))
163
+ }
164
+ ss = append (ss , s )
165
+ }
166
+ if err := w .Write (ss ); err != nil {
167
+ return err
168
+ }
169
+
170
+ seenArrays ++
171
+ case map [string ]any :
172
+ if seenArrays > 0 {
173
+ return fmt .Errorf ("mixed row types, expected row to be an array, got %s" , gojqex .TypeErrorPreview (row ))
174
+ }
175
+
176
+ rm , ok := gojqex.Cast [map [string ]any ](row )
177
+ if ! ok {
178
+ return fmt .Errorf ("expected row to be an object, got %s" , gojqex .TypeErrorPreview (row ))
179
+ }
180
+ vm , ok := gojqex .NormalizeToStrings (rm ).(map [string ]any )
181
+ if ! ok {
182
+ panic ("not object" )
183
+ }
184
+
185
+ if headers == nil {
186
+ // TODO: maps are random order in go
187
+ for k := range vm {
188
+ headers = append (headers , k )
189
+ }
190
+ sort .Strings (headers )
191
+
192
+ if err := w .Write (headers ); err != nil {
193
+ return err
194
+ }
195
+ }
196
+
197
+ var ss []string
198
+ keysFound := 0
199
+ for _ , k := range headers {
200
+ s , ok := vm [k ].(string )
201
+ if ! ok {
202
+ return fmt .Errorf ("expected row object to have a %q key, %s" , k , gojqex .TypeErrorPreview (row ))
203
+ }
204
+ ss = append (ss , s )
205
+ keysFound ++
206
+ }
207
+ // TODO: what keys are extra/missing
208
+ if keysFound < len (headers ) {
209
+ return fmt .Errorf ("expected row object has missing keys %s" , gojqex .TypeErrorPreview (row ))
210
+ } else if keysFound > len (headers ) {
211
+ return fmt .Errorf ("expected row object has extra keys %s" , gojqex .TypeErrorPreview (row ))
212
+ }
213
+
214
+ if err := w .Write (ss ); err != nil {
215
+ return err
216
+ }
217
+
218
+ seenObject ++
102
219
}
220
+
103
221
}
104
222
w .Flush ()
105
223
0 commit comments