Skip to content

Commit a596844

Browse files
Deletions
1 parent dcda962 commit a596844

File tree

9 files changed

+237
-15
lines changed

9 files changed

+237
-15
lines changed

Cargo.lock

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
use anyhow::Result;
22

33
fn main() -> Result<()> {
4-
tree_splicer::cli::main(tree_sitter_javascript::language())
4+
tree_splicer::cli::main(
5+
tree_sitter_javascript::language(),
6+
tree_sitter_javascript::NODE_TYPES,
7+
)
58
}

crates/tree-splicer-rust/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use anyhow::Result;
22

33
fn main() -> Result<()> {
4-
tree_splicer::cli::main(tree_sitter_rust::language())
4+
tree_splicer::cli::main(tree_sitter_rust::language(), tree_sitter_rust::NODE_TYPES)
55
}
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
use anyhow::Result;
22

33
fn main() -> Result<()> {
4-
tree_splicer::cli::main(tree_sitter_typescript::language_typescript())
4+
tree_splicer::cli::main(
5+
tree_sitter_typescript::language_typescript(),
6+
tree_sitter_typescript::TYPESCRIPT_NODE_TYPES,
7+
)
58
}

crates/tree-splicer/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ log = "0.4"
1818
nu-ansi-term = { version = "0.46", optional = true }
1919
num_cpus = { version = "1", optional = true }
2020
rand = "0.8"
21+
serde = { version = "1", features = ["derive"] }
22+
serde_json = "1"
2123
tracing = "0.1"
2224
tracing-subscriber = { version = "0.3", features = ["json"], optional = true }
2325
tree-sitter = "0.20"

crates/tree-splicer/src/cli.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ pub struct Args {
6565
#[arg(short, long, default_value_t = 5)]
6666
pub chaos: u8,
6767

68+
/// Percent of deletion mutations - the rest are splices
69+
#[arg(short, long, default_value_t = 5)]
70+
pub deletions: u8,
71+
6872
/// Behavior on parse errors
6973
#[arg(long, default_value_t = OnParseError::Warn, value_name = "CHOICE")]
7074
on_parse_error: OnParseError,
@@ -138,7 +142,7 @@ fn init_tracing(args: &Args) {
138142
builder.event_format(formatter::TerseFormatter).init();
139143
}
140144

141-
pub fn main(language: tree_sitter::Language) -> Result<()> {
145+
pub fn main(language: tree_sitter::Language, node_types_json_str: &'static str) -> Result<()> {
142146
let args = Args::parse();
143147

144148
init_tracing(&args);
@@ -160,15 +164,19 @@ pub fn main(language: tree_sitter::Language) -> Result<()> {
160164
}
161165
}
162166

167+
let node_types = crate::node_types::NodeTypes::new(node_types_json_str)?;
163168
let config = Config {
169+
chaos: args.chaos,
170+
deletions: args.deletions,
164171
language,
165172
// intra_splices: 10,
166173
inter_splices: args.mutations,
174+
node_types,
167175
seed: args.seed,
168176
tests: args.tests,
169177
};
170178
std::fs::create_dir_all(&args.output).context("Couldn't create output directory")?;
171-
for (i, out) in splice::splice(config, &files, args.chaos).enumerate() {
179+
for (i, out) in splice::splice(config, &files).enumerate() {
172180
std::fs::write(args.output.join(i.to_string()), out)
173181
.context("Couldn't save generated test case")?;
174182
}

crates/tree-splicer/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
#[cfg(feature = "cli")]
22
pub mod cli;
3+
pub mod node_types;
34
pub mod splice;

crates/tree-splicer/src/node_types.rs

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
//! Read tree-sitter's `node-types.json`.
2+
//
3+
// Copied in part from [treeedbgen] and treereduce.
4+
//
5+
// [treeedbgen]: https://github.com/langston-barrett/treeedb/blob/1a2fae3509c76cd5a8e1004f808ea800d49d1a19/treeedbgen/src/lib.rs
6+
7+
use std::collections::HashMap;
8+
9+
use serde::{Deserialize, Serialize};
10+
11+
/// node-types.json
12+
#[derive(Clone, Eq, PartialEq, Serialize, Deserialize, Debug)]
13+
struct Node {
14+
#[serde(rename(deserialize = "type", serialize = "type"))]
15+
ty: String,
16+
named: bool,
17+
#[serde(default)] // empty
18+
children: Children,
19+
#[serde(default)] // empty
20+
fields: HashMap<String, Field>,
21+
#[serde(default)] // empty
22+
subtypes: Vec<Subtype>,
23+
}
24+
25+
#[derive(Default, Clone, Eq, PartialEq, Serialize, Deserialize, Debug)]
26+
struct Children {
27+
multiple: bool,
28+
required: bool,
29+
types: Vec<Subtype>,
30+
}
31+
32+
#[derive(Clone, Eq, PartialEq, Serialize, Deserialize, Debug)]
33+
struct Field {
34+
multiple: bool,
35+
required: bool,
36+
types: Vec<Subtype>,
37+
}
38+
39+
#[derive(Clone, Eq, PartialEq, Serialize, Deserialize, Debug)]
40+
struct Subtype {
41+
#[serde(rename(deserialize = "type", serialize = "type"))]
42+
ty: String,
43+
named: bool,
44+
}
45+
46+
#[derive(Clone, Debug)]
47+
pub struct FieldInfo {
48+
parent_ty: String,
49+
multiple: bool,
50+
required: bool,
51+
}
52+
53+
#[derive(Clone, Debug)]
54+
pub struct NodeTypes {
55+
children: HashMap<String, Children>,
56+
subtypes: HashMap<String, Vec<String>>,
57+
reverse_fields: HashMap<String, Vec<FieldInfo>>,
58+
}
59+
60+
fn subtypes(name: &str, nodes: &Vec<Node>) -> Vec<String> {
61+
let mut r = vec![name.to_string()];
62+
for n in nodes {
63+
if n.ty == name {
64+
for subty in &n.subtypes {
65+
r.push(subty.ty.clone());
66+
r.extend(subtypes(&subty.ty, nodes));
67+
}
68+
}
69+
}
70+
r
71+
}
72+
73+
impl NodeTypes {
74+
pub fn new(node_types_json_str: &str) -> Result<Self, serde_json::Error> {
75+
let nodes: Vec<Node> = serde_json::from_str(node_types_json_str)?;
76+
let subtypes: HashMap<_, _> = nodes
77+
.iter()
78+
.map(|n| (n.ty.clone(), subtypes(&n.ty, &nodes)))
79+
.collect();
80+
let mut reverse_fields = HashMap::new();
81+
82+
// For each type of node...
83+
for node in &nodes {
84+
// Loop through it's fields...
85+
for (_field_name, field) in node.fields.iter() {
86+
// And save the name of all types that the field could be.
87+
for subtype in &field.types {
88+
for subsubty in subtypes.get(&subtype.ty).unwrap_or(&Vec::new()) {
89+
let entry = reverse_fields.entry(subsubty.clone());
90+
entry
91+
.and_modify(|v: &mut Vec<FieldInfo>| {
92+
v.push(FieldInfo {
93+
parent_ty: node.ty.clone(),
94+
multiple: field.multiple,
95+
required: field.required,
96+
});
97+
})
98+
.or_insert_with(|| {
99+
vec![FieldInfo {
100+
parent_ty: node.ty.clone(),
101+
multiple: field.multiple,
102+
required: field.required,
103+
}]
104+
});
105+
}
106+
}
107+
}
108+
}
109+
Ok(NodeTypes {
110+
children: nodes
111+
.iter()
112+
.map(|n| (n.ty.clone(), n.children.clone()))
113+
.collect(),
114+
subtypes,
115+
reverse_fields,
116+
})
117+
}
118+
119+
/// Defaults to `true` if the real answer can't be determined.
120+
fn optional(&self, node_kind: &str, parent_kind: &str) -> bool {
121+
if let Some(flds) = self.reverse_fields.get(node_kind) {
122+
for fi in flds {
123+
if parent_kind == fi.parent_ty && (!fi.multiple || fi.required) {
124+
return false;
125+
}
126+
}
127+
}
128+
true
129+
}
130+
131+
/// Defaults to `true` if the real answer can't be determined.
132+
pub fn optional_node(&self, node: &tree_sitter::Node) -> bool {
133+
if let Some(p) = node.parent() {
134+
self.optional(node.kind(), p.kind())
135+
} else {
136+
true
137+
}
138+
}
139+
140+
// TODO(#21): Also include fields, include multiple and not required
141+
pub fn list_types(&self, node: &tree_sitter::Node) -> Vec<String> {
142+
let mut kinds = Vec::new();
143+
if let Some(children) = self.children.get(node.kind()) {
144+
if children.multiple && !children.required {
145+
for child in &children.types {
146+
kinds.push(child.ty.clone());
147+
}
148+
}
149+
}
150+
kinds
151+
}
152+
153+
pub fn subtypes(&self, kind: &String) -> &[String] {
154+
self.subtypes.get(kind).expect("Invalid node kind")
155+
}
156+
}

crates/tree-splicer/src/splice.rs

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ use tree_sitter::{Language, Node, Tree};
66

77
use tree_sitter_edit::{Editor, NodeId};
88

9+
use crate::node_types::NodeTypes;
10+
911
#[derive(Debug, Default)]
1012
pub struct Edits<'a>(HashMap<usize, &'a [u8]>);
1113

@@ -71,9 +73,12 @@ fn parse(language: Language, code: &str) -> tree_sitter::Tree {
7173

7274
#[derive(Debug)]
7375
pub struct Config {
76+
pub chaos: u8,
77+
pub deletions: u8,
7478
pub language: Language,
7579
// pub intra_splices: usize,
7680
pub inter_splices: usize,
81+
pub node_types: NodeTypes,
7782
pub seed: u64,
7883
pub tests: usize,
7984
}
@@ -82,9 +87,11 @@ struct Splicer<'a> {
8287
language: Language,
8388
branches: Branches<'a>,
8489
chaos: u8,
90+
deletions: u8,
8591
kinds: Vec<&'static str>,
8692
// intra_splices: usize,
8793
inter_splices: usize,
94+
node_types: NodeTypes,
8895
trees: Vec<(&'a [u8], &'a Tree)>,
8996
remaining: usize,
9097
rng: StdRng,
@@ -99,19 +106,16 @@ impl<'a> Splicer<'a> {
99106
self.pick_usize(v.len())
100107
}
101108

102-
fn pick_node<'b>(&mut self, tree: &'b Tree) -> Node<'b> {
103-
let mut all_nodes = Vec::with_capacity(16); // min
109+
fn all_nodes<'b>(&self, tree: &'b Tree) -> Vec<Node<'b>> {
110+
let mut all = Vec::with_capacity(16); // min
104111
let root = tree.root_node();
105112
let mut cursor = tree.walk();
106113
let mut nodes: HashSet<_> = root.children(&mut cursor).collect();
107-
if nodes.is_empty() {
108-
return root;
109-
}
110114
while !nodes.is_empty() {
111115
let mut next = HashSet::new();
112116
for node in nodes {
113117
debug_assert!(!next.contains(&node));
114-
all_nodes.push(node);
118+
all.push(node);
115119
let mut child_cursor = tree.walk();
116120
for child in node.children(&mut child_cursor) {
117121
debug_assert!(child.id() != node.id());
@@ -121,7 +125,31 @@ impl<'a> Splicer<'a> {
121125
}
122126
nodes = next;
123127
}
124-
*all_nodes.get(self.pick_idx(&all_nodes)).unwrap()
128+
all
129+
}
130+
131+
fn pick_node<'b>(&mut self, tree: &'b Tree) -> Node<'b> {
132+
let nodes = self.all_nodes(tree);
133+
if nodes.is_empty() {
134+
return tree.root_node();
135+
}
136+
*nodes.get(self.pick_idx(&nodes)).unwrap()
137+
}
138+
139+
fn delete_node(&mut self, _text: &[u8], tree: &Tree) -> (usize, Vec<u8>) {
140+
let chaotic = self.rng.gen_range(0..100) < self.chaos;
141+
if chaotic {
142+
return (self.pick_node(tree).id(), Vec::new());
143+
}
144+
let nodes = self.all_nodes(tree);
145+
if nodes.iter().all(|n| !self.node_types.optional_node(n)) {
146+
return (self.pick_node(tree).id(), Vec::new());
147+
}
148+
let mut node = nodes.get(self.pick_idx(&nodes)).unwrap();
149+
while !self.node_types.optional_node(node) {
150+
node = nodes.get(self.pick_idx(&nodes)).unwrap();
151+
}
152+
(node.id(), Vec::new())
125153
}
126154

127155
fn splice_node(&mut self, text: &[u8], tree: &Tree) -> (usize, Vec<u8>) {
@@ -163,7 +191,11 @@ impl<'a> Splicer<'a> {
163191
let splices = self.rng.gen_range(0..self.inter_splices);
164192
let mut text = Vec::from(text0);
165193
for _ in 0..splices {
166-
let (id, bytes) = self.splice_node(text.as_slice(), &tree);
194+
let (id, bytes) = if self.rng.gen_range(0..100) < self.deletions {
195+
self.delete_node(text.as_slice(), &tree)
196+
} else {
197+
self.splice_node(text.as_slice(), &tree)
198+
};
167199
let id = NodeId { id };
168200
let bytes = bytes.to_vec();
169201
let mut result = Vec::with_capacity(text.len() / 4); // low guesstimate
@@ -200,7 +232,6 @@ impl<'a> Iterator for Splicer<'a> {
200232
pub fn splice<'a>(
201233
config: Config,
202234
files: &'a HashMap<String, (Vec<u8>, Tree)>,
203-
chaos: u8,
204235
) -> impl Iterator<Item = Vec<u8>> + 'a {
205236
let trees: Vec<_> = files
206237
.iter()
@@ -219,12 +250,14 @@ pub fn splice<'a>(
219250
let rng = rand::rngs::StdRng::seed_from_u64(config.seed);
220251
let kinds = branches.0.keys().copied().collect();
221252
Splicer {
253+
chaos: config.chaos,
254+
deletions: config.deletions,
222255
language: config.language,
223256
branches,
224-
chaos,
225257
kinds,
226258
// intra_splices: config.intra_splices,
227259
inter_splices: config.inter_splices,
260+
node_types: config.node_types,
228261
remaining: std::cmp::min(config.tests, possible),
229262
rng,
230263
trees,

0 commit comments

Comments
 (0)