Skip to content

Commit 437ac65

Browse files
authored
Expose max_recursion_depth in build_regex_from_schema (#181)
Currently, the max_recursion_depth variable is not exposed in Python, so the default value of 3 is used. This PR exposes the argument to handle complex json schemas.
1 parent ad8c7b7 commit 437ac65

File tree

4 files changed

+140
-13
lines changed

4 files changed

+140
-13
lines changed

src/bin/convert-json-schema.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ fn main() {
1818
}
1919

2020
let schema = std::io::read_to_string(std::io::stdin()).unwrap();
21-
let regex = json_schema::regex_from_str(&schema, None).unwrap();
21+
let regex = json_schema::regex_from_str(&schema, None, None).unwrap();
2222
println!("Regex: {}", regex);
2323
println!("Regex len: {}", regex.len());
2424
}

src/json_schema/mod.rs

Lines changed: 135 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,13 @@ use crate::Result;
138138
/// # Ok(())
139139
/// }
140140
/// ```
141-
pub fn regex_from_str(json: &str, whitespace_pattern: Option<&str>) -> Result<String> {
141+
pub fn regex_from_str(
142+
json: &str,
143+
whitespace_pattern: Option<&str>,
144+
max_recursion_depth: Option<usize>,
145+
) -> Result<String> {
142146
let json_value: Value = serde_json::from_str(json)?;
143-
regex_from_value(&json_value, whitespace_pattern)
147+
regex_from_value(&json_value, whitespace_pattern, max_recursion_depth)
144148
}
145149

146150
/// Generates a regular expression string from `serde_json::Value` type of JSON schema.
@@ -178,11 +182,18 @@ pub fn regex_from_str(json: &str, whitespace_pattern: Option<&str>) -> Result<St
178182
/// # Ok(())
179183
/// }
180184
/// ```
181-
pub fn regex_from_value(json: &Value, whitespace_pattern: Option<&str>) -> Result<String> {
185+
pub fn regex_from_value(
186+
json: &Value,
187+
whitespace_pattern: Option<&str>,
188+
max_recursion_depth: Option<usize>,
189+
) -> Result<String> {
182190
let mut parser = parsing::Parser::new(json);
183191
if let Some(pattern) = whitespace_pattern {
184192
parser = parser.with_whitespace_pattern(pattern)
185193
}
194+
if let Some(depth) = max_recursion_depth {
195+
parser = parser.with_max_recursion_depth(depth)
196+
}
186197
parser.to_regex(json)
187198
}
188199

@@ -1213,7 +1224,7 @@ mod tests {
12131224
],
12141225
),
12151226
] {
1216-
let result = regex_from_str(schema, None).expect("To regex failed");
1227+
let result = regex_from_str(schema, None, None).expect("To regex failed");
12171228
assert_eq!(result, regex, "JSON Schema {} didn't match", schema);
12181229

12191230
let re = Regex::new(&result).expect("Regex failed");
@@ -1269,7 +1280,7 @@ mod tests {
12691280
],
12701281
),
12711282
] {
1272-
let regex = regex_from_str(schema, None).expect("To regex failed");
1283+
let regex = regex_from_str(schema, None, None).expect("To regex failed");
12731284
let re = Regex::new(&regex).expect("Regex failed");
12741285
for m in a_match {
12751286
should_match(&re, m);
@@ -1322,7 +1333,7 @@ mod tests {
13221333
vec![r#"{SPACE"date"SPACE:SPACE"2018-11-13"SPACE}"#],
13231334
),
13241335
] {
1325-
let regex = regex_from_str(schema, whitespace_pattern).expect("To regex failed");
1336+
let regex = regex_from_str(schema, whitespace_pattern, None).expect("To regex failed");
13261337
assert_eq!(regex, expected_regex);
13271338

13281339
let re = Regex::new(&regex).expect("Regex failed");
@@ -1346,7 +1357,7 @@ mod tests {
13461357
}
13471358
}"##;
13481359

1349-
let regex = regex_from_str(schema, None);
1360+
let regex = regex_from_str(schema, None, None);
13501361
assert!(regex.is_ok(), "{:?}", regex);
13511362

13521363
// Confirm the depth of 3 recursion levels by default, recursion level starts
@@ -1479,7 +1490,123 @@ mod tests {
14791490
"$ref": "#/definitions/typeA"
14801491
}"##;
14811492

1482-
let regex = regex_from_str(schema, None);
1493+
let regex = regex_from_str(schema, None, None);
1494+
assert!(regex.is_ok(), "{:?}", regex);
1495+
}
1496+
1497+
#[test]
1498+
fn quadruple_recursion_doesnt_include_leaf() {
1499+
let schema = r##"
1500+
{
1501+
"definitions": {
1502+
"typeA": {
1503+
"type": "object",
1504+
"properties": {
1505+
"data": { "type": "string" },
1506+
"typeB": { "$ref": "#/definitions/typeB" }
1507+
},
1508+
"required": ["data", "typeB"]
1509+
},
1510+
"typeB": {
1511+
"type": "object",
1512+
"properties": {
1513+
"data": { "type": "string" },
1514+
"typeC": { "$ref": "#/definitions/typeC" }
1515+
},
1516+
"required": ["data", "typeC"]
1517+
},
1518+
"typeC": {
1519+
"type": "object",
1520+
"properties": {
1521+
"data": { "type": "string" },
1522+
"typeD": { "$ref": "#/definitions/typeD" }
1523+
},
1524+
"required": ["data", "typeD"]
1525+
},
1526+
"typeD": {
1527+
"type": "object",
1528+
"properties": {
1529+
"data": { "type": "string" },
1530+
"typeE": { "$ref": "#/definitions/typeE" }
1531+
},
1532+
"required": ["data", "typeE"]
1533+
},
1534+
"typeE": {
1535+
"type": "object",
1536+
"properties": {
1537+
"data": { "type": "string" },
1538+
"typeA": { "$ref": "#/definitions/typeA" }
1539+
},
1540+
"required": ["data", "typeA"]
1541+
}
1542+
},
1543+
"$ref": "#/definitions/typeA"
1544+
}"##;
1545+
1546+
let regex = regex_from_str(schema, None, None);
14831547
assert!(regex.is_ok(), "{:?}", regex);
1548+
let regex_str = regex.unwrap();
1549+
assert!(
1550+
!regex_str.contains("typeE"),
1551+
"Regex should not contain typeE when max_recursion_depth is not specified"
1552+
);
1553+
}
1554+
1555+
#[test]
1556+
fn quadruple_recursion_includes_leaf_when_max_recursion_depth_is_specified() {
1557+
let schema = r##"
1558+
{
1559+
"definitions": {
1560+
"typeA": {
1561+
"type": "object",
1562+
"properties": {
1563+
"data": { "type": "string" },
1564+
"typeB": { "$ref": "#/definitions/typeB" }
1565+
},
1566+
"required": ["data", "typeB"]
1567+
},
1568+
"typeB": {
1569+
"type": "object",
1570+
"properties": {
1571+
"data": { "type": "string" },
1572+
"typeC": { "$ref": "#/definitions/typeC" }
1573+
},
1574+
"required": ["data", "typeC"]
1575+
},
1576+
"typeC": {
1577+
"type": "object",
1578+
"properties": {
1579+
"data": { "type": "string" },
1580+
"typeD": { "$ref": "#/definitions/typeD" }
1581+
},
1582+
"required": ["data", "typeD"]
1583+
},
1584+
"typeD": {
1585+
"type": "object",
1586+
"properties": {
1587+
"data": { "type": "string" },
1588+
"typeE": { "$ref": "#/definitions/typeE" }
1589+
},
1590+
"required": ["data", "typeE"]
1591+
},
1592+
"typeE": {
1593+
"type": "object",
1594+
"properties": {
1595+
"data": { "type": "string" },
1596+
"typeA": { "$ref": "#/definitions/typeA" }
1597+
},
1598+
"required": ["data", "typeA"]
1599+
}
1600+
},
1601+
"$ref": "#/definitions/typeA"
1602+
}"##;
1603+
1604+
let regex = regex_from_str(schema, None, Some(4));
1605+
assert!(regex.is_ok(), "{:?}", regex);
1606+
let regex_str = regex.unwrap();
1607+
assert!(
1608+
regex_str.contains("typeE"),
1609+
"Regex should contain typeE when max_recursion_depth is specified"
1610+
);
14841611
}
14851612
}

src/json_schema/parsing.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub(crate) struct Parser<'a> {
1616
}
1717

1818
impl<'a> Parser<'a> {
19-
// Max recursion depth is defined at level 3.
19+
// Max recursion depth defaults at level 3.
2020
// Defining recursion depth higher than that should be done cautiously, since
2121
// each +1 step on the depth blows up regex's size exponentially.
2222
//
@@ -39,7 +39,6 @@ impl<'a> Parser<'a> {
3939
}
4040
}
4141

42-
#[allow(dead_code)]
4342
pub fn with_max_recursion_depth(self, max_recursion_depth: usize) -> Self {
4443
Self {
4544
max_recursion_depth,

src/python_bindings/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -473,15 +473,16 @@ impl PyVocabulary {
473473

474474
/// Creates regex string from JSON schema with optional whitespace pattern.
475475
#[pyfunction(name = "build_regex_from_schema")]
476-
#[pyo3(signature = (json_schema, whitespace_pattern=None))]
476+
#[pyo3(signature = (json_schema, whitespace_pattern=None, max_recursion_depth=3))]
477477
pub fn build_regex_from_schema_py(
478478
json_schema: String,
479479
whitespace_pattern: Option<&str>,
480+
max_recursion_depth: usize,
480481
) -> PyResult<String> {
481482
let value = serde_json::from_str(&json_schema).map_err(|_| {
482483
PyErr::new::<pyo3::exceptions::PyTypeError, _>("Expected a valid JSON string.")
483484
})?;
484-
json_schema::regex_from_value(&value, whitespace_pattern)
485+
json_schema::regex_from_value(&value, whitespace_pattern, Some(max_recursion_depth))
485486
.map_err(|e| PyValueError::new_err(e.to_string()))
486487
}
487488

0 commit comments

Comments
 (0)