@@ -4,21 +4,21 @@ use std::collections::{HashMap, HashSet};
4
4
use rand:: { prelude:: StdRng , Rng , SeedableRng } ;
5
5
use tree_sitter:: { Language , Node , Tree } ;
6
6
7
- use tree_sitter_edit:: { Editor , NodeId } ;
7
+ use tree_sitter_edit:: Editor ;
8
8
9
9
use crate :: node_types:: NodeTypes ;
10
10
11
11
#[ derive( Debug , Default ) ]
12
- pub struct Edits < ' a > ( HashMap < usize , & ' a [ u8 ] > ) ;
12
+ struct Edits ( HashMap < usize , Vec < u8 > > ) ;
13
13
14
- impl < ' a > Editor for Edits < ' a > {
14
+ impl Editor for Edits {
15
15
fn has_edit ( & self , _tree : & Tree , node : & Node ) -> bool {
16
16
self . 0 . get ( & node. id ( ) ) . is_some ( )
17
17
}
18
18
19
19
fn edit ( & self , _source : & [ u8 ] , tree : & Tree , node : & Node ) -> Vec < u8 > {
20
20
debug_assert ! ( self . has_edit( tree, node) ) ;
21
- Vec :: from ( * self . 0 . get ( & node. id ( ) ) . unwrap ( ) )
21
+ self . 0 . get ( & node. id ( ) ) . unwrap ( ) . clone ( )
22
22
}
23
23
}
24
24
@@ -71,15 +71,32 @@ fn parse(language: Language, code: &str) -> tree_sitter::Tree {
71
71
parser. parse ( code, None ) . expect ( "Failed to parse code" )
72
72
}
73
73
74
+ /// Splicing configuration
74
75
#[ derive( Debug ) ]
75
76
pub struct Config {
77
+ /// Percent chance to perform chaotic mutation
78
+ ///
79
+ /// Chaotic mutations may result in invalid syntax.
76
80
pub chaos : u8 ,
81
+ /// Percent chance to perform a deletion.
82
+ ///
83
+ /// By default, deletes optional nodes. Chaotic deletions delete any node.
77
84
pub deletions : u8 ,
78
85
pub language : Language ,
79
86
// pub intra_splices: usize,
87
+ /// Perform anywhere from zero to this many inter-file splices per test.
80
88
pub inter_splices : usize ,
89
+ /// Approximate maximum file size to produce (bytes)
90
+ ///
91
+ /// Some of the input tests should be below this size.
92
+ pub max_size : usize ,
81
93
pub node_types : NodeTypes ,
94
+ /// Re-parse the file after this many mutations.
95
+ ///
96
+ /// When this is more than `inter_splices`, never re-parse.
97
+ pub reparse : usize ,
82
98
pub seed : u64 ,
99
+ /// How many tests to generate
83
100
pub tests : usize ,
84
101
}
85
102
@@ -91,13 +108,21 @@ struct Splicer<'a> {
91
108
kinds : Vec < & ' static str > ,
92
109
// intra_splices: usize,
93
110
inter_splices : usize ,
111
+ max_size : usize ,
94
112
node_types : NodeTypes ,
95
113
trees : Vec < ( & ' a [ u8 ] , & ' a Tree ) > ,
96
114
remaining : usize ,
115
+ reparse : usize ,
97
116
rng : StdRng ,
98
117
}
99
118
100
119
impl < ' a > Splicer < ' a > {
120
+ fn delta ( node : Node < ' _ > , replace : & [ u8 ] ) -> isize {
121
+ let range = node. byte_range ( ) ;
122
+ isize:: try_from ( replace. len ( ) ) . unwrap_or_default ( )
123
+ - isize:: try_from ( range. end - range. start ) . unwrap_or_default ( )
124
+ }
125
+
101
126
fn pick_usize ( & mut self , n : usize ) -> usize {
102
127
self . rng . gen_range ( 0 ..n)
103
128
}
@@ -136,23 +161,25 @@ impl<'a> Splicer<'a> {
136
161
* nodes. get ( self . pick_idx ( & nodes) ) . unwrap ( )
137
162
}
138
163
139
- fn delete_node ( & mut self , _text : & [ u8 ] , tree : & Tree ) -> ( usize , Vec < u8 > ) {
164
+ fn delete_node ( & mut self , _text : & [ u8 ] , tree : & Tree ) -> ( usize , Vec < u8 > , isize ) {
140
165
let chaotic = self . rng . gen_range ( 0 ..100 ) < self . chaos ;
141
166
if chaotic {
142
- return ( self . pick_node ( tree) . id ( ) , Vec :: new ( ) ) ;
167
+ let node = self . pick_node ( tree) ;
168
+ return ( node. id ( ) , Vec :: new ( ) , Self :: delta ( node, & [ ] ) ) ;
143
169
}
144
170
let nodes = self . all_nodes ( tree) ;
145
171
if nodes. iter ( ) . all ( |n| !self . node_types . optional_node ( n) ) {
146
- return ( self . pick_node ( tree) . id ( ) , Vec :: new ( ) ) ;
172
+ let node = self . pick_node ( tree) ;
173
+ return ( node. id ( ) , Vec :: new ( ) , Self :: delta ( node, & [ ] ) ) ;
147
174
}
148
175
let mut node = nodes. get ( self . pick_idx ( & nodes) ) . unwrap ( ) ;
149
176
while !self . node_types . optional_node ( node) {
150
177
node = nodes. get ( self . pick_idx ( & nodes) ) . unwrap ( ) ;
151
178
}
152
- ( node. id ( ) , Vec :: new ( ) )
179
+ ( node. id ( ) , Vec :: new ( ) , Self :: delta ( * node , & [ ] ) )
153
180
}
154
181
155
- fn splice_node ( & mut self , text : & [ u8 ] , tree : & Tree ) -> ( usize , Vec < u8 > ) {
182
+ fn splice_node ( & mut self , text : & [ u8 ] , tree : & Tree ) -> ( usize , Vec < u8 > , isize ) {
156
183
let chaotic = self . rng . gen_range ( 0 ..100 ) < self . chaos ;
157
184
158
185
let mut node = tree. root_node ( ) ;
@@ -188,30 +215,38 @@ impl<'a> Splicer<'a> {
188
215
// std::str::from_utf8(&text[node.byte_range()]).unwrap(),
189
216
// std::str::from_utf8(candidate).unwrap(),
190
217
// );
191
- ( node. id ( ) , Vec :: from ( * candidate) )
218
+ let replace = Vec :: from ( * candidate) ;
219
+ let delta = Self :: delta ( node, replace. as_slice ( ) ) ;
220
+ ( node. id ( ) , replace, delta)
192
221
}
193
222
194
223
fn splice_tree ( & mut self , text0 : & [ u8 ] , mut tree : Tree ) -> Option < Vec < u8 > > {
195
- let splices = self . rng . gen_range ( 0 ..self . inter_splices ) ;
224
+ let mut edits = Edits :: default ( ) ;
225
+ if self . inter_splices == 0 {
226
+ return None ;
227
+ }
228
+ let splices = self . rng . gen_range ( 1 ..self . inter_splices ) ;
196
229
let mut text = Vec :: from ( text0) ;
197
- for _ in 0 ..splices {
198
- let ( id, bytes) = if self . rng . gen_range ( 0 ..100 ) < self . deletions {
230
+ let mut sz = isize:: try_from ( text. len ( ) ) . unwrap_or_default ( ) ;
231
+ for i in 0 ..splices {
232
+ let ( id, bytes, delta) = if self . rng . gen_range ( 0 ..100 ) < self . deletions {
199
233
self . delete_node ( text. as_slice ( ) , & tree)
200
234
} else {
201
235
self . splice_node ( text. as_slice ( ) , & tree)
202
236
} ;
203
- let id = NodeId { id } ;
204
- let bytes = bytes. to_vec ( ) ;
205
- let mut result = Vec :: with_capacity ( text. len ( ) / 4 ) ; // low guesstimate
206
- tree_sitter_edit:: render (
207
- & mut result,
208
- & tree,
209
- text. as_slice ( ) ,
210
- & tree_sitter_edit:: Replace { id, bytes } ,
211
- )
212
- . ok ( ) ?;
213
- text = result. clone ( ) ;
214
- tree = parse ( self . language , & String :: from_utf8_lossy ( text. as_slice ( ) ) ) ;
237
+ sz += delta;
238
+ let sized_out = usize:: try_from ( sz) . unwrap_or_default ( ) >= self . max_size ;
239
+ edits. 0 . insert ( id, bytes) ;
240
+ if i % self . reparse == 0 || i + 1 == splices || sized_out {
241
+ let mut result = Vec :: with_capacity ( usize:: try_from ( sz) . unwrap_or_default ( ) ) ;
242
+ tree_sitter_edit:: render ( & mut result, & tree, text. as_slice ( ) , & edits) . ok ( ) ?;
243
+ text = result. clone ( ) ;
244
+ tree = parse ( self . language , & String :: from_utf8_lossy ( text. as_slice ( ) ) ) ;
245
+ edits = Edits :: default ( ) ;
246
+ }
247
+ if sized_out {
248
+ break ;
249
+ }
215
250
}
216
251
Some ( text)
217
252
}
@@ -226,8 +261,12 @@ impl<'a> Iterator for Splicer<'a> {
226
261
}
227
262
self . remaining -= 1 ;
228
263
229
- let tree_idx: usize = self . pick_usize ( self . trees . len ( ) ) ;
230
- let ( text, tree) = * self . trees . get ( tree_idx) . unwrap ( ) ;
264
+ let mut tree_idx: usize = self . pick_usize ( self . trees . len ( ) ) ;
265
+ let ( mut text, mut tree) = * self . trees . get ( tree_idx) . unwrap ( ) ;
266
+ while text. len ( ) > self . max_size {
267
+ tree_idx = self . pick_usize ( self . trees . len ( ) ) ;
268
+ ( text, tree) = * self . trees . get ( tree_idx) . unwrap ( ) ;
269
+ }
231
270
self . splice_tree ( text, tree. clone ( ) )
232
271
}
233
272
}
@@ -261,8 +300,10 @@ pub fn splice<'a>(
261
300
kinds,
262
301
// intra_splices: config.intra_splices,
263
302
inter_splices : config. inter_splices ,
303
+ max_size : config. max_size ,
264
304
node_types : config. node_types ,
265
305
remaining : std:: cmp:: min ( config. tests , possible) ,
306
+ reparse : config. reparse ,
266
307
rng,
267
308
trees,
268
309
}
0 commit comments