Skip to content

Commit f82af49

Browse files
committed
cvs: Add more from/to options
Add header option, on by default Disable comments by default Rename comma to delimiter Add quote_char option Add skip initial space option Uses a forked version of std csv to support custom quote character See top of csv.go for TODOs
1 parent 23d980d commit f82af49

File tree

7 files changed

+1608
-32
lines changed

7 files changed

+1608
-32
lines changed

format/csv/csv.go

Lines changed: 148 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,26 @@
11
package csv
22

3+
// TODO: error, throw error always? no decode value with gap etc? -d csv from_csv
4+
// TODO: header row field count mismatch error, csv reader takes care of check atm. can use FieldsPerRecord -1
5+
// TODO: row object keys mismatch writer
6+
// TODO: lazy quotes?
7+
// TODO: comment in writer? string elements?
8+
// TODO: to_csv objects
9+
// TODO: to_csv opts help
10+
// TODO: go maps are random order, now sorts headers
11+
// TODO: option aliases?
12+
// TODO: snake_case option?
13+
314
import (
415
"bytes"
516
"embed"
6-
"encoding/csv"
717
"errors"
818
"fmt"
919
"io"
20+
"sort"
1021

1122
"github.com/wader/fq/format"
23+
"github.com/wader/fq/internal/csvex"
1224
"github.com/wader/fq/internal/gojqex"
1325
"github.com/wader/fq/pkg/bitio"
1426
"github.com/wader/fq/pkg/decode"
@@ -27,8 +39,11 @@ func init() {
2739
ProbeOrder: format.ProbeOrderTextFuzzy,
2840
DecodeFn: decodeCSV,
2941
DefaultInArg: format.CSVLIn{
30-
Comma: ",",
31-
Comment: "#",
42+
Delimiter: ",",
43+
Comment: "",
44+
QuoteChar: `"`,
45+
Header: true,
46+
SkipInitialSpace: false,
3247
},
3348
Functions: []string{"_todisplay"},
3449
})
@@ -40,29 +55,59 @@ func decodeCSV(d *decode.D) any {
4055
var ci format.CSVLIn
4156
d.ArgAs(&ci)
4257

43-
var rvs []any
4458
br := d.RawLen(d.Len())
45-
r := csv.NewReader(bitio.NewIOReader(br))
46-
r.TrimLeadingSpace = true
59+
r := csvex.NewReader(bitio.NewIOReader(br))
4760
r.LazyQuotes = true
48-
if ci.Comma != "" {
61+
if ci.Delimiter != "" {
62+
r.Comma = rune(ci.Delimiter[0])
63+
} else if ci.Comma != "" {
4964
r.Comma = rune(ci.Comma[0])
5065
}
5166
if ci.Comment != "" {
5267
r.Comment = rune(ci.Comment[0])
68+
} else {
69+
r.Comment = 0
70+
}
71+
if ci.QuoteChar != "" {
72+
r.Quote = rune(ci.QuoteChar[0])
73+
} else {
74+
r.Quote = '"'
5375
}
76+
r.TrimLeadingSpace = ci.SkipInitialSpace
77+
78+
row := 1
79+
var rvs []any
80+
81+
var headers []string
5482
for {
5583
r, err := r.Read()
5684
if errors.Is(err, io.EOF) {
5785
break
5886
} else if err != nil {
5987
return err
6088
}
61-
var vs []any
62-
for _, s := range r {
63-
vs = append(vs, s)
89+
90+
if ci.Header {
91+
if headers == nil {
92+
// TODO: duplicate headers?
93+
headers = append(headers, r...)
94+
} else {
95+
obj := map[string]any{}
96+
for i, s := range r {
97+
h := headers[i]
98+
obj[h] = s
99+
}
100+
rvs = append(rvs, obj)
101+
}
102+
} else {
103+
var vs []any
104+
for _, s := range r {
105+
vs = append(vs, s)
106+
}
107+
rvs = append(rvs, vs)
64108
}
65-
rvs = append(rvs, vs)
109+
110+
row++
66111
}
67112

68113
d.Value.V = &scalar.Any{Actual: rvs}
@@ -72,35 +117,108 @@ func decodeCSV(d *decode.D) any {
72117
}
73118

74119
type ToCSVOpts struct {
75-
Comma string
120+
Comma string // alias for Delimiter
121+
Delimiter string
122+
QuoteChar string
123+
Header bool
76124
}
77125

78126
func toCSV(_ *interp.Interp, c []any, opts ToCSVOpts) any {
79127
b := &bytes.Buffer{}
80-
w := csv.NewWriter(b)
81-
if opts.Comma != "" {
128+
w := csvex.NewWriter(b)
129+
if opts.Delimiter != "" {
130+
w.Comma = rune(opts.Delimiter[0])
131+
} else if opts.Comma != "" {
82132
w.Comma = rune(opts.Comma[0])
83133
}
134+
if opts.QuoteChar != "" {
135+
w.Quote = rune(opts.QuoteChar[0])
136+
} else {
137+
w.Quote = '"'
138+
}
139+
140+
seenObject := 0
141+
seenArrays := 0
142+
var headers []string
143+
84144
for _, row := range c {
85-
rs, ok := gojqex.Cast[[]any](row)
86-
if !ok {
87-
return fmt.Errorf("expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
88-
}
89-
vs, ok := gojqex.NormalizeToStrings(rs).([]any)
90-
if !ok {
91-
panic("not array")
92-
}
93-
var ss []string
94-
for _, v := range vs {
95-
s, ok := v.(string)
145+
switch row.(type) {
146+
case []any:
147+
if seenObject > 0 {
148+
return fmt.Errorf("mixed row types, expected row to be an object, got %s", gojqex.TypeErrorPreview(row))
149+
}
150+
151+
rs, ok := gojqex.Cast[[]any](row)
96152
if !ok {
97-
return fmt.Errorf("expected row record to be scalars, got %s", gojqex.TypeErrorPreview(v))
153+
return fmt.Errorf("expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
98154
}
99-
ss = append(ss, s)
100-
}
101-
if err := w.Write(ss); err != nil {
102-
return err
155+
vs, ok := gojqex.NormalizeToStrings(rs).([]any)
156+
if !ok {
157+
panic("not array")
158+
}
159+
var ss []string
160+
for _, v := range vs {
161+
s, ok := v.(string)
162+
if !ok {
163+
return fmt.Errorf("expected row record to be scalars, got %s", gojqex.TypeErrorPreview(v))
164+
}
165+
ss = append(ss, s)
166+
}
167+
if err := w.Write(ss); err != nil {
168+
return err
169+
}
170+
171+
seenArrays++
172+
case map[string]any:
173+
if seenArrays > 0 {
174+
return fmt.Errorf("mixed row types, expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
175+
}
176+
177+
rm, ok := gojqex.Cast[map[string]any](row)
178+
if !ok {
179+
return fmt.Errorf("expected row to be an object, got %s", gojqex.TypeErrorPreview(row))
180+
}
181+
vm, ok := gojqex.NormalizeToStrings(rm).(map[string]any)
182+
if !ok {
183+
panic("not object")
184+
}
185+
186+
if headers == nil {
187+
// TODO: maps are random order in go
188+
for k := range vm {
189+
headers = append(headers, k)
190+
}
191+
sort.Strings(headers)
192+
193+
if err := w.Write(headers); err != nil {
194+
return err
195+
}
196+
}
197+
198+
var ss []string
199+
keysFound := 0
200+
for _, k := range headers {
201+
s, ok := vm[k].(string)
202+
if !ok {
203+
return fmt.Errorf("expected row object to have a %q key, %s", k, gojqex.TypeErrorPreview(row))
204+
}
205+
ss = append(ss, s)
206+
keysFound++
207+
}
208+
// TODO: what keys are extra/missing
209+
if keysFound < len(headers) {
210+
return fmt.Errorf("expected row object has missing keys %s", gojqex.TypeErrorPreview(row))
211+
} else if keysFound > len(headers) {
212+
return fmt.Errorf("expected row object has extra keys %s", gojqex.TypeErrorPreview(row))
213+
}
214+
215+
if err := w.Write(ss); err != nil {
216+
return err
217+
}
218+
219+
seenObject++
103220
}
221+
104222
}
105223
w.Flush()
106224

format/format.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,12 @@ type HTMLIn struct {
330330
}
331331

332332
type CSVLIn struct {
333-
Comma string `doc:"Separator character"`
334-
Comment string `doc:"Comment line character"`
333+
Comma string `doc:"Alias for Delimiter"`
334+
Delimiter string `doc:"Field delimiter character"`
335+
Comment string `doc:"Comment line character"`
336+
QuoteChar string `doc:"Quote character"`
337+
Header bool `doc:"Convert to objects based on header"`
338+
SkipInitialSpace bool `doc:"Don't include leading whitespace"`
335339
}
336340

337341
type BitCoinBlockIn struct {

internal/csvex/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Modified version of go std encoding/csv Reader/Writer to support difference quote character

0 commit comments

Comments
 (0)