Skip to content

Commit e3e2581

Browse files
committed
cvs: Add more from/to options
Add header option, on by default Disable comments by default Rename comma to delimiter Add quote_char option Add skip initial space option Uses a forked version of std csv to support custom quote character See top of csv.go for TODOs
1 parent 63b5828 commit e3e2581

File tree

7 files changed

+1608
-32
lines changed

7 files changed

+1608
-32
lines changed

format/csv/csv.go

Lines changed: 148 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,26 @@
11
package csv
22

3+
// TODO: error, throw error always? no decode value with gap etc? -d csv from_csv
4+
// TODO: header row field count mismatch error, csv reader takes care of check atm. can use FieldsPerRecord -1
5+
// TODO: row object keys mismatch writer
6+
// TODO: lazy quotes?
7+
// TODO: comment in writer? string elements?
8+
// TODO: to_csv objects
9+
// TODO: to_csv opts help
10+
// TODO: go maps are random order, now sorts headers
11+
// TODO: option aliases?
12+
// TODO: snake_case option?
13+
314
import (
415
"bytes"
516
"embed"
6-
"encoding/csv"
717
"errors"
818
"fmt"
919
"io"
20+
"sort"
1021

1122
"github.com/wader/fq/format"
23+
"github.com/wader/fq/internal/csvex"
1224
"github.com/wader/fq/internal/gojqex"
1325
"github.com/wader/fq/pkg/bitio"
1426
"github.com/wader/fq/pkg/decode"
@@ -27,8 +39,11 @@ func init() {
2739
ProbeOrder: format.ProbeOrderTextFuzzy,
2840
DecodeFn: decodeCSV,
2941
DecodeInArg: format.CSVLIn{
30-
Comma: ",",
31-
Comment: "#",
42+
Delimiter: ",",
43+
Comment: "",
44+
QuoteChar: `"`,
45+
Header: true,
46+
SkipInitialSpace: false,
3247
},
3348
Functions: []string{"_todisplay"},
3449
})
@@ -39,29 +54,59 @@ func init() {
3954
func decodeCSV(d *decode.D, in any) any {
4055
ci, _ := in.(format.CSVLIn)
4156

42-
var rvs []any
4357
br := d.RawLen(d.Len())
44-
r := csv.NewReader(bitio.NewIOReader(br))
45-
r.TrimLeadingSpace = true
58+
r := csvex.NewReader(bitio.NewIOReader(br))
4659
r.LazyQuotes = true
47-
if ci.Comma != "" {
60+
if ci.Delimiter != "" {
61+
r.Comma = rune(ci.Delimiter[0])
62+
} else if ci.Comma != "" {
4863
r.Comma = rune(ci.Comma[0])
4964
}
5065
if ci.Comment != "" {
5166
r.Comment = rune(ci.Comment[0])
67+
} else {
68+
r.Comment = 0
69+
}
70+
if ci.QuoteChar != "" {
71+
r.Quote = rune(ci.QuoteChar[0])
72+
} else {
73+
r.Quote = '"'
5274
}
75+
r.TrimLeadingSpace = ci.SkipInitialSpace
76+
77+
row := 1
78+
var rvs []any
79+
80+
var headers []string
5381
for {
5482
r, err := r.Read()
5583
if errors.Is(err, io.EOF) {
5684
break
5785
} else if err != nil {
5886
return err
5987
}
60-
var vs []any
61-
for _, s := range r {
62-
vs = append(vs, s)
88+
89+
if ci.Header {
90+
if headers == nil {
91+
// TODO: duplicate headers?
92+
headers = append(headers, r...)
93+
} else {
94+
obj := map[string]any{}
95+
for i, s := range r {
96+
h := headers[i]
97+
obj[h] = s
98+
}
99+
rvs = append(rvs, obj)
100+
}
101+
} else {
102+
var vs []any
103+
for _, s := range r {
104+
vs = append(vs, s)
105+
}
106+
rvs = append(rvs, vs)
63107
}
64-
rvs = append(rvs, vs)
108+
109+
row++
65110
}
66111

67112
d.Value.V = &scalar.Any{Actual: rvs}
@@ -71,35 +116,108 @@ func decodeCSV(d *decode.D, in any) any {
71116
}
72117

73118
type ToCSVOpts struct {
74-
Comma string
119+
Comma string // alias for Delimiter
120+
Delimiter string
121+
QuoteChar string
122+
Header bool
75123
}
76124

77125
func toCSV(_ *interp.Interp, c []any, opts ToCSVOpts) any {
78126
b := &bytes.Buffer{}
79-
w := csv.NewWriter(b)
80-
if opts.Comma != "" {
127+
w := csvex.NewWriter(b)
128+
if opts.Delimiter != "" {
129+
w.Comma = rune(opts.Delimiter[0])
130+
} else if opts.Comma != "" {
81131
w.Comma = rune(opts.Comma[0])
82132
}
133+
if opts.QuoteChar != "" {
134+
w.Quote = rune(opts.QuoteChar[0])
135+
} else {
136+
w.Quote = '"'
137+
}
138+
139+
seenObject := 0
140+
seenArrays := 0
141+
var headers []string
142+
83143
for _, row := range c {
84-
rs, ok := gojqex.Cast[[]any](row)
85-
if !ok {
86-
return fmt.Errorf("expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
87-
}
88-
vs, ok := gojqex.NormalizeToStrings(rs).([]any)
89-
if !ok {
90-
panic("not array")
91-
}
92-
var ss []string
93-
for _, v := range vs {
94-
s, ok := v.(string)
144+
switch row.(type) {
145+
case []any:
146+
if seenObject > 0 {
147+
return fmt.Errorf("mixed row types, expected row to be an object, got %s", gojqex.TypeErrorPreview(row))
148+
}
149+
150+
rs, ok := gojqex.Cast[[]any](row)
95151
if !ok {
96-
return fmt.Errorf("expected row record to be scalars, got %s", gojqex.TypeErrorPreview(v))
152+
return fmt.Errorf("expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
97153
}
98-
ss = append(ss, s)
99-
}
100-
if err := w.Write(ss); err != nil {
101-
return err
154+
vs, ok := gojqex.NormalizeToStrings(rs).([]any)
155+
if !ok {
156+
panic("not array")
157+
}
158+
var ss []string
159+
for _, v := range vs {
160+
s, ok := v.(string)
161+
if !ok {
162+
return fmt.Errorf("expected row record to be scalars, got %s", gojqex.TypeErrorPreview(v))
163+
}
164+
ss = append(ss, s)
165+
}
166+
if err := w.Write(ss); err != nil {
167+
return err
168+
}
169+
170+
seenArrays++
171+
case map[string]any:
172+
if seenArrays > 0 {
173+
return fmt.Errorf("mixed row types, expected row to be an array, got %s", gojqex.TypeErrorPreview(row))
174+
}
175+
176+
rm, ok := gojqex.Cast[map[string]any](row)
177+
if !ok {
178+
return fmt.Errorf("expected row to be an object, got %s", gojqex.TypeErrorPreview(row))
179+
}
180+
vm, ok := gojqex.NormalizeToStrings(rm).(map[string]any)
181+
if !ok {
182+
panic("not object")
183+
}
184+
185+
if headers == nil {
186+
// TODO: maps are random order in go
187+
for k := range vm {
188+
headers = append(headers, k)
189+
}
190+
sort.Strings(headers)
191+
192+
if err := w.Write(headers); err != nil {
193+
return err
194+
}
195+
}
196+
197+
var ss []string
198+
keysFound := 0
199+
for _, k := range headers {
200+
s, ok := vm[k].(string)
201+
if !ok {
202+
return fmt.Errorf("expected row object to have a %q key, %s", k, gojqex.TypeErrorPreview(row))
203+
}
204+
ss = append(ss, s)
205+
keysFound++
206+
}
207+
// TODO: what keys are extra/missing
208+
if keysFound < len(headers) {
209+
return fmt.Errorf("expected row object has missing keys %s", gojqex.TypeErrorPreview(row))
210+
} else if keysFound > len(headers) {
211+
return fmt.Errorf("expected row object has extra keys %s", gojqex.TypeErrorPreview(row))
212+
}
213+
214+
if err := w.Write(ss); err != nil {
215+
return err
216+
}
217+
218+
seenObject++
102219
}
220+
103221
}
104222
w.Flush()
105223

format/format.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,12 @@ type HTMLIn struct {
320320
}
321321

322322
type CSVLIn struct {
323-
Comma string `doc:"Separator character"`
324-
Comment string `doc:"Comment line character"`
323+
Comma string `doc:"Alias for Delimiter"`
324+
Delimiter string `doc:"Field delimiter character"`
325+
Comment string `doc:"Comment line character"`
326+
QuoteChar string `doc:"Quote character"`
327+
Header bool `doc:"Convert to objects based on header"`
328+
SkipInitialSpace bool `doc:"Don't include leading whitespace"`
325329
}
326330

327331
type BitCoinBlockIn struct {

internal/csvex/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Modified version of go std encoding/csv Reader/Writer to support difference quote character

0 commit comments

Comments
 (0)