1
1
package csv
2
2
3
+ // TODO: error, throw error always? no decode value with gap etc? -d csv from_csv
4
+ // TODO: header row field count mismatch error, csv reader takes care of check atm. can use FieldsPerRecord -1
5
+ // TODO: row object keys mismatch writer
6
+ // TODO: lazy quotes?
7
+ // TODO: comment in writer? string elements?
8
+ // TODO: to_csv objects
9
+ // TODO: to_csv opts help
10
+ // TODO: go maps are random order, now sorts headers
11
+ // TODO: option aliases?
12
+ // TODO: snake_case option?
13
+
3
14
import (
4
15
"bytes"
5
16
"embed"
6
- "encoding/csv"
7
17
"errors"
8
18
"fmt"
9
19
"io"
20
+ "sort"
10
21
11
22
"github.com/wader/fq/format"
23
+ "github.com/wader/fq/internal/csvex"
12
24
"github.com/wader/fq/internal/gojqex"
13
25
"github.com/wader/fq/pkg/bitio"
14
26
"github.com/wader/fq/pkg/decode"
@@ -27,8 +39,11 @@ func init() {
27
39
ProbeOrder : format .ProbeOrderTextFuzzy ,
28
40
DecodeFn : decodeCSV ,
29
41
DefaultInArg : format.CSVLIn {
30
- Comma : "," ,
31
- Comment : "#" ,
42
+ Delimiter : "," ,
43
+ Comment : "" ,
44
+ QuoteChar : `"` ,
45
+ Header : true ,
46
+ SkipInitialSpace : false ,
32
47
},
33
48
Functions : []string {"_todisplay" },
34
49
})
@@ -40,29 +55,59 @@ func decodeCSV(d *decode.D) any {
40
55
var ci format.CSVLIn
41
56
d .ArgAs (& ci )
42
57
43
- var rvs []any
44
58
br := d .RawLen (d .Len ())
45
- r := csv .NewReader (bitio .NewIOReader (br ))
46
- r .TrimLeadingSpace = true
59
+ r := csvex .NewReader (bitio .NewIOReader (br ))
47
60
r .LazyQuotes = true
48
- if ci .Comma != "" {
61
+ if ci .Delimiter != "" {
62
+ r .Comma = rune (ci .Delimiter [0 ])
63
+ } else if ci .Comma != "" {
49
64
r .Comma = rune (ci .Comma [0 ])
50
65
}
51
66
if ci .Comment != "" {
52
67
r .Comment = rune (ci .Comment [0 ])
68
+ } else {
69
+ r .Comment = 0
70
+ }
71
+ if ci .QuoteChar != "" {
72
+ r .Quote = rune (ci .QuoteChar [0 ])
73
+ } else {
74
+ r .Quote = '"'
53
75
}
76
+ r .TrimLeadingSpace = ci .SkipInitialSpace
77
+
78
+ row := 1
79
+ var rvs []any
80
+
81
+ var headers []string
54
82
for {
55
83
r , err := r .Read ()
56
84
if errors .Is (err , io .EOF ) {
57
85
break
58
86
} else if err != nil {
59
87
return err
60
88
}
61
- var vs []any
62
- for _ , s := range r {
63
- vs = append (vs , s )
89
+
90
+ if ci .Header {
91
+ if headers == nil {
92
+ // TODO: duplicate headers?
93
+ headers = append (headers , r ... )
94
+ } else {
95
+ obj := map [string ]any {}
96
+ for i , s := range r {
97
+ h := headers [i ]
98
+ obj [h ] = s
99
+ }
100
+ rvs = append (rvs , obj )
101
+ }
102
+ } else {
103
+ var vs []any
104
+ for _ , s := range r {
105
+ vs = append (vs , s )
106
+ }
107
+ rvs = append (rvs , vs )
64
108
}
65
- rvs = append (rvs , vs )
109
+
110
+ row ++
66
111
}
67
112
68
113
d .Value .V = & scalar.Any {Actual : rvs }
@@ -72,35 +117,108 @@ func decodeCSV(d *decode.D) any {
72
117
}
73
118
74
119
type ToCSVOpts struct {
75
- Comma string
120
+ Comma string // alias for Delimiter
121
+ Delimiter string
122
+ QuoteChar string
123
+ Header bool
76
124
}
77
125
78
126
func toCSV (_ * interp.Interp , c []any , opts ToCSVOpts ) any {
79
127
b := & bytes.Buffer {}
80
- w := csv .NewWriter (b )
81
- if opts .Comma != "" {
128
+ w := csvex .NewWriter (b )
129
+ if opts .Delimiter != "" {
130
+ w .Comma = rune (opts .Delimiter [0 ])
131
+ } else if opts .Comma != "" {
82
132
w .Comma = rune (opts .Comma [0 ])
83
133
}
134
+ if opts .QuoteChar != "" {
135
+ w .Quote = rune (opts .QuoteChar [0 ])
136
+ } else {
137
+ w .Quote = '"'
138
+ }
139
+
140
+ seenObject := 0
141
+ seenArrays := 0
142
+ var headers []string
143
+
84
144
for _ , row := range c {
85
- rs , ok := gojqex.Cast [[]any ](row )
86
- if ! ok {
87
- return fmt .Errorf ("expected row to be an array, got %s" , gojqex .TypeErrorPreview (row ))
88
- }
89
- vs , ok := gojqex .NormalizeToStrings (rs ).([]any )
90
- if ! ok {
91
- panic ("not array" )
92
- }
93
- var ss []string
94
- for _ , v := range vs {
95
- s , ok := v .(string )
145
+ switch row .(type ) {
146
+ case []any :
147
+ if seenObject > 0 {
148
+ return fmt .Errorf ("mixed row types, expected row to be an object, got %s" , gojqex .TypeErrorPreview (row ))
149
+ }
150
+
151
+ rs , ok := gojqex.Cast [[]any ](row )
96
152
if ! ok {
97
- return fmt .Errorf ("expected row record to be scalars , got %s" , gojqex .TypeErrorPreview (v ))
153
+ return fmt .Errorf ("expected row to be an array , got %s" , gojqex .TypeErrorPreview (row ))
98
154
}
99
- ss = append (ss , s )
100
- }
101
- if err := w .Write (ss ); err != nil {
102
- return err
155
+ vs , ok := gojqex .NormalizeToStrings (rs ).([]any )
156
+ if ! ok {
157
+ panic ("not array" )
158
+ }
159
+ var ss []string
160
+ for _ , v := range vs {
161
+ s , ok := v .(string )
162
+ if ! ok {
163
+ return fmt .Errorf ("expected row record to be scalars, got %s" , gojqex .TypeErrorPreview (v ))
164
+ }
165
+ ss = append (ss , s )
166
+ }
167
+ if err := w .Write (ss ); err != nil {
168
+ return err
169
+ }
170
+
171
+ seenArrays ++
172
+ case map [string ]any :
173
+ if seenArrays > 0 {
174
+ return fmt .Errorf ("mixed row types, expected row to be an array, got %s" , gojqex .TypeErrorPreview (row ))
175
+ }
176
+
177
+ rm , ok := gojqex.Cast [map [string ]any ](row )
178
+ if ! ok {
179
+ return fmt .Errorf ("expected row to be an object, got %s" , gojqex .TypeErrorPreview (row ))
180
+ }
181
+ vm , ok := gojqex .NormalizeToStrings (rm ).(map [string ]any )
182
+ if ! ok {
183
+ panic ("not object" )
184
+ }
185
+
186
+ if headers == nil {
187
+ // TODO: maps are random order in go
188
+ for k := range vm {
189
+ headers = append (headers , k )
190
+ }
191
+ sort .Strings (headers )
192
+
193
+ if err := w .Write (headers ); err != nil {
194
+ return err
195
+ }
196
+ }
197
+
198
+ var ss []string
199
+ keysFound := 0
200
+ for _ , k := range headers {
201
+ s , ok := vm [k ].(string )
202
+ if ! ok {
203
+ return fmt .Errorf ("expected row object to have a %q key, %s" , k , gojqex .TypeErrorPreview (row ))
204
+ }
205
+ ss = append (ss , s )
206
+ keysFound ++
207
+ }
208
+ // TODO: what keys are extra/missing
209
+ if keysFound < len (headers ) {
210
+ return fmt .Errorf ("expected row object has missing keys %s" , gojqex .TypeErrorPreview (row ))
211
+ } else if keysFound > len (headers ) {
212
+ return fmt .Errorf ("expected row object has extra keys %s" , gojqex .TypeErrorPreview (row ))
213
+ }
214
+
215
+ if err := w .Write (ss ); err != nil {
216
+ return err
217
+ }
218
+
219
+ seenObject ++
103
220
}
221
+
104
222
}
105
223
w .Flush ()
106
224
0 commit comments