Skip to content

Commit 3c54664

Browse files
authored
Fix: Error when creating index from schema with Uri (#206)
Fixes #182 The regex for Uris and Emails had problems: - Included `^` and `$`, but it doesn't make sense to match the start and end of line when the Uris and Emails can be in the middle of a JSON file. - Was missing start and end quotes. They are stored as strings within JSON.
1 parent 3cce99e commit 3c54664

File tree

2 files changed

+50
-24
lines changed

2 files changed

+50
-24
lines changed

src/json_schema/mod.rs

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,40 +1048,66 @@ mod tests {
10481048
r#"{"title": "Foo", "type": "string", "format": "uri"}"#,
10491049
URI,
10501050
vec![
1051-
"http://example.com",
1052-
"https://example.com/path?query=param#fragment",
1053-
"ftp://ftp.example.com/resource",
1054-
"urn:isbn:0451450523",
1051+
r#""http://example.com""#,
1052+
r#""https://example.com/path?query=param#fragment""#,
1053+
r#""ftp://ftp.example.com/resource""#,
1054+
r#""urn:isbn:0451450523""#,
10551055
],
10561056
vec![
1057-
"http:/example.com", // missing slash
1058-
"htp://example.com", // invalid scheme
1059-
"http://", // missing host
1060-
"example.com", // missing scheme
1057+
r#""http:/example.com""#, // missing slash
1058+
r#""htp://example.com""#, // invalid scheme
1059+
r#""http://""#, // missing host
1060+
r#""example.com""#, // missing scheme
10611061
]
10621062
),
10631063
(
10641064
r#"{"title": "Bar", "type": "string", "format": "email"}"#,
10651065
EMAIL,
10661066
vec![
10671067
// Valid emails
1068-
"user@example.com", // valid
1069-
"user.name+tag+sorting@example.com", // valid
1070-
"user_name@example.co.uk", // valid
1071-
"user-name@sub.example.com", // valid
1068+
r#""user@example.com""#, // valid
1069+
r#""user.name+tag+sorting@example.com""#, // valid
1070+
r#""user_name@example.co.uk""#, // valid
1071+
r#""user-name@sub.example.com""#, // valid
10721072
],
10731073
vec![
10741074
// Invalid emails
1075-
"plainaddress", // missing '@' and domain
1076-
"@missingusername.com", // missing username
1077-
"username@.com", // leading dot in domain
1078-
"username@com", // TLD must have at least 2 characters
1079-
"username@example,com", // invalid character in domain
1080-
"username@.example.com", // leading dot in domain
1081-
"username@-example.com", // domain cannot start with a hyphen
1082-
"username@example-.com", // domain cannot end with a hyphen
1083-
"username@example..com", // double dot in domain name
1084-
"username@.example..com", // multiple errors in domain
1075+
r#""plainaddress""#, // missing '@' and domain
1076+
r#""@missingusername.com""#, // missing username
1077+
r#""username@.com""#, // leading dot in domain
1078+
r#""username@com""#, // TLD must have at least 2 characters
1079+
r#""username@example,com""#, // invalid character in domain
1080+
r#""username@.example.com""#, // leading dot in domain
1081+
r#""username@-example.com""#, // domain cannot start with a hyphen
1082+
r#""username@example-.com""#, // domain cannot end with a hyphen
1083+
r#""username@example..com""#, // double dot in domain name
1084+
r#""username@.example..com""#, // multiple errors in domain
1085+
]
1086+
),
1087+
// Nested URI and email
1088+
(
1089+
r#"{
1090+
"title": "Test Schema",
1091+
"type": "object",
1092+
"properties": {
1093+
"test_str": {"title": "Test string", "type": "string"},
1094+
"test_uri": {"title": "Test URI", "type": "string", "format": "uri"},
1095+
"test_email": {"title": "Test email", "type": "string", "format": "email"}
1096+
},
1097+
"required": ["test_str", "test_uri", "test_email"]
1098+
}"#,
1099+
format!(
1100+
r#"\{{{0}"test_str"{0}:{0}{STRING}{0},{0}"test_uri"{0}:{0}{URI}{0},{0}"test_email"{0}:{0}{EMAIL}{0}\}}"#,
1101+
WHITESPACE
1102+
).as_str(),
1103+
vec![
1104+
r#"{ "test_str": "cat", "test_uri": "http://example.com", "test_email": "user@example.com" }"#,
1105+
],
1106+
vec![
1107+
// Invalid URI
1108+
r#"{ "test_str": "cat", "test_uri": "http:/example.com", "test_email": "user@example.com" }"#,
1109+
// Invalid email
1110+
r#"{ "test_str": "cat", "test_uri": "http://example.com", "test_email": "username@.com" }"#,
10851111
]
10861112
),
10871113

src/json_schema/types.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ pub static TIME: &str = r#""(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\.[0-9
4545
// https://datatracker.ietf.org/doc/html/rfc9562 and https://stackoverflow.com/questions/136505/searching-for-uuids-in-text-with-regex
4646
pub static UUID: &str = r#""[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}""#;
4747
// https://datatracker.ietf.org/doc/html/rfc3986#appendix-B
48-
pub static URI: &str = r#"^(https?|ftp):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?$|^urn:[a-zA-Z\d][a-zA-Z\d\-]{0,31}:[^\s]+$"#;
48+
pub static URI: &str = r#""(?:(https?|ftp):\/\/([^\s:@]+(:[^\s:@]*)?@)?([a-zA-Z\d.-]+\.[a-zA-Z]{2,}|localhost)(:\d+)?(\/[^\s?#]*)?(\?[^\s#]*)?(#[^\s]*)?|urn:[a-zA-Z\d][a-zA-Z\d\-]{0,31}:[^\s]+)""#;
4949
// https://www.rfc-editor.org/rfc/rfc5322 and https://stackoverflow.com/questions/13992403/regex-validation-of-email-addresses-according-to-rfc5321-rfc5322
50-
pub static EMAIL: &str = r#"^(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])$"#;
50+
pub static EMAIL: &str = r#""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])""#;
5151

5252
/// Supported format type of the `JsonType::String`.
5353
#[derive(Debug, PartialEq)]

0 commit comments

Comments
 (0)