Skip to content

Commit 76b2462

Browse files
committed
Limit jstream parse depth
Add bcicen/jstream#15 by vendoring the package. Sets JSON depth limit to 100 entries in S3 Select.
1 parent 05a6c17 commit 76b2462

File tree

19 files changed

+1479
-16
lines changed

19 files changed

+1479
-16
lines changed

cmd/postpolicyform.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ import (
2929
"strings"
3030
"time"
3131

32-
"github.com/bcicen/jstream"
3332
"github.com/minio/minio-go/v7/pkg/encrypt"
3433
"github.com/minio/minio-go/v7/pkg/set"
3534
xhttp "github.com/minio/minio/internal/http"
35+
"github.com/minio/minio/internal/s3select/jstream"
3636
)
3737

3838
// startWithConds - map which indicates if a given condition supports starts-with policy operator
@@ -140,7 +140,7 @@ type PostPolicyForm struct {
140140
func sanitizePolicy(r io.Reader) (io.Reader, error) {
141141
var buf bytes.Buffer
142142
e := json.NewEncoder(&buf)
143-
d := jstream.NewDecoder(r, 0).ObjectAsKVS()
143+
d := jstream.NewDecoder(r, 0).ObjectAsKVS().MaxDepth(10)
144144
sset := set.NewStringSet()
145145
for mv := range d.Stream() {
146146
var kvs jstream.KVS

internal/s3select/csv/record.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ import (
2525
"strconv"
2626
"strings"
2727

28-
"github.com/bcicen/jstream"
2928
csv "github.com/minio/csvparser"
29+
"github.com/minio/minio/internal/s3select/jstream"
3030
"github.com/minio/minio/internal/s3select/sql"
3131
)
3232

internal/s3select/json/preader.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import (
2424
"runtime"
2525
"sync"
2626

27-
"github.com/bcicen/jstream"
27+
"github.com/minio/minio/internal/s3select/jstream"
2828
"github.com/minio/minio/internal/s3select/sql"
2929
)
3030

@@ -185,7 +185,7 @@ func (r *PReader) startReaders() {
185185
dst = make([]jstream.KVS, 0, 1000)
186186
}
187187

188-
d := jstream.NewDecoder(bytes.NewBuffer(in.input), 0).ObjectAsKVS()
188+
d := jstream.NewDecoder(bytes.NewBuffer(in.input), 0).ObjectAsKVS().MaxDepth(100)
189189
stream := d.Stream()
190190
all := dst[:0]
191191
for mv := range stream {

internal/s3select/json/reader.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,8 @@ import (
2121
"io"
2222
"sync"
2323

24+
"github.com/minio/minio/internal/s3select/jstream"
2425
"github.com/minio/minio/internal/s3select/sql"
25-
26-
"github.com/bcicen/jstream"
2726
)
2827

2928
// Limit single document size to 10MiB, 10x the AWS limit:
@@ -84,7 +83,7 @@ func (r *Reader) Close() error {
8483
// NewReader - creates new JSON reader using readCloser.
8584
func NewReader(readCloser io.ReadCloser, args *ReaderArgs) *Reader {
8685
readCloser = &syncReadCloser{rc: readCloser}
87-
d := jstream.NewDecoder(io.LimitReader(readCloser, maxDocumentSize), 0).ObjectAsKVS()
86+
d := jstream.NewDecoder(io.LimitReader(readCloser, maxDocumentSize), 0).ObjectAsKVS().MaxDepth(100)
8887
return &Reader{
8988
args: args,
9089
decoder: d,

internal/s3select/json/record.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ import (
2626
"strconv"
2727
"strings"
2828

29-
"github.com/bcicen/jstream"
3029
csv "github.com/minio/csvparser"
30+
"github.com/minio/minio/internal/s3select/jstream"
3131
"github.com/minio/minio/internal/s3select/sql"
3232
)
3333

internal/s3select/jstream/LICENSE

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2018 Bradley Cicenas
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
22+
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
<p align="center"><img width="350px" src="jstream.png" alt="jstream"/></p>
2+
3+
#
4+
5+
[![GoDoc](https://godoc.org/github.com/bcicen/jstream?status.svg)](https://godoc.org/github.com/bcicen/jstream)
6+
7+
8+
`jstream` is a streaming JSON parser and value extraction library for Go.
9+
10+
Unlike most JSON parsers, `jstream` is document position- and depth-aware -- this enables the extraction of values at a specified depth, eliminating the overhead of allocating encompassing arrays or objects; e.g:
11+
12+
Using the below example document:
13+
<img width="85%" src="https://bradley.codes/static/img/jstream-levels.gif" alt="jstream"/>
14+
15+
we can choose to extract and act only the objects within the top-level array:
16+
```go
17+
f, _ := os.Open("input.json")
18+
decoder := jstream.NewDecoder(f, 1) // extract JSON values at a depth level of 1
19+
for mv := range decoder.Stream() {
20+
fmt.Printf("%v\n ", mv.Value)
21+
}
22+
```
23+
24+
output:
25+
```
26+
map[desc:RGB colors:[red green blue]]
27+
map[desc:CMYK colors:[cyan magenta yellow black]]
28+
```
29+
30+
likewise, increasing depth level to `3` yields:
31+
```
32+
red
33+
green
34+
blue
35+
cyan
36+
magenta
37+
yellow
38+
black
39+
```
40+
41+
optionally, kev:value pairs can be emitted as an individual struct:
42+
```go
43+
decoder := jstream.NewDecoder(f, 2).EmitKV() // enable KV streaming at a depth level of 2
44+
```
45+
46+
```
47+
jstream.KV{desc RGB}
48+
jstream.KV{colors [red green blue]}
49+
jstream.KV{desc CMYK}
50+
jstream.KV{colors [cyan magenta yellow black]}
51+
```
52+
53+
## Installing
54+
55+
```bash
56+
go get github.com/bcicen/jstream
57+
```
58+
59+
## Commandline
60+
61+
`jstream` comes with a cli tool for quick viewing of parsed values from JSON input:
62+
63+
```bash
64+
jstream -d 1 < input.json
65+
```
66+
67+
```json
68+
{"colors":["red","green","blue"],"desc":"RGB"}
69+
{"colors":["cyan","magenta","yellow","black"],"desc":"CMYK"}
70+
```
71+
72+
detailed output with `-v` option:
73+
```bash
74+
cat input.json | jstream -v -d -1
75+
76+
depth start end type | value
77+
2 018 023 string | "RGB"
78+
3 041 046 string | "red"
79+
3 048 055 string | "green"
80+
3 057 063 string | "blue"
81+
2 039 065 array | ["red","green","blue"]
82+
1 004 069 object | {"colors":["red","green","blue"],"desc":"RGB"}
83+
2 087 093 string | "CMYK"
84+
3 111 117 string | "cyan"
85+
3 119 128 string | "magenta"
86+
3 130 138 string | "yellow"
87+
3 140 147 string | "black"
88+
2 109 149 array | ["cyan","magenta","yellow","black"]
89+
1 073 153 object | {"colors":["cyan","magenta","yellow","black"],"desc":"CMYK"}
90+
0 000 155 array | [{"colors":["red","green","blue"],"desc":"RGB"},{"colors":["cyan","magenta","yellow","black"],"desc":"CMYK"}]
91+
```
92+
93+
### Options
94+
95+
Opt | Description
96+
--- | ---
97+
-d \<n\> | emit values at depth n. if n < 0, all values will be emitted
98+
-kv | output inner key value pairs as newly formed objects
99+
-v | output depth and offset details for each value
100+
-h | display help dialog
101+
102+
## Benchmarks
103+
104+
Obligatory benchmarks performed on files with arrays of objects, where the decoded objects are to be extracted.
105+
106+
Two file sizes are used -- regular (1.6mb, 1000 objects) and large (128mb, 100000 objects)
107+
108+
input size | lib | MB/s | Allocated
109+
--- | --- | --- | ---
110+
regular | standard | 97 | 3.6MB
111+
regular | jstream | 175 | 2.1MB
112+
large | standard | 92 | 305MB
113+
large | jstream | 404 | 69MB
114+
115+
In a real world scenario, including initialization and reader overhead from varying blob sizes, performance can be expected as below:
116+
<img src="https://bradley.codes/static/img/bench.svg" alt="jstream"/>

0 commit comments

Comments
 (0)