Skip to content

Detect invalid escape for ecmascript #1049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions src/main/java/com/networknt/schema/format/RegexFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,22 @@
*/
package com.networknt.schema.format;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import com.networknt.schema.ExecutionContext;
import com.networknt.schema.Format;
import com.networknt.schema.ValidationContext;
import com.networknt.schema.regex.RegularExpression;

/**
* Format for regex.
*/
public class RegexFormat implements Format {
@Override
public boolean matches(ExecutionContext executionContext, String value) {
public boolean matches(ExecutionContext executionContext, ValidationContext validationContext, String value) {
if (null == value) return true;
try {
Pattern.compile(value);
RegularExpression.compile(value, validationContext);
return true;

} catch (PatternSyntaxException e) {
} catch (RuntimeException e) {
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import java.util.regex.Pattern;

/**
* JDK {@link RegularExpression}.
*/
class JDKRegularExpression implements RegularExpression {
private final Pattern pattern;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,28 @@
package com.networknt.schema.regex;

import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;

import org.jcodings.specific.UTF8Encoding;
import org.joni.Option;
import org.joni.Regex;
import org.joni.Syntax;
import org.joni.exception.SyntaxException;

/**
* ECMAScript {@link RegularExpression}.
*/
class JoniRegularExpression implements RegularExpression {
private final Regex pattern;
private final Pattern INVALID_ESCAPE_PATTERN = Pattern.compile(
".*\\\\([aeg-moqyzACE-OQ-RT-VX-Z1-9]|c$|[pP]([^{]|$)|u([^{0-9]|$)|x([0-9a-fA-F][^0-9a-fA-F]|[^0-9a-fA-F][0-9a-fA-F]|[^0-9a-fA-F][^0-9a-fA-F]|.?$)).*");

JoniRegularExpression(String regex) {
this(regex, Syntax.ECMAScript);
}

JoniRegularExpression(String regex, Syntax syntax) {
validate(regex);
// Joni is too liberal on some constructs
String s = regex
.replace("\\d", "[0-9]")
Expand All @@ -21,7 +33,19 @@ class JoniRegularExpression implements RegularExpression {
.replace("\\S", "[^ \\f\\n\\r\\t\\v\\u00a0\\u1680\\u2000-\\u200a\\u2028\\u2029\\u202f\\u205f\\u3000\\ufeff]");

byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, syntax);
}

protected void validate(String regex) {
// Joni is not strict with escapes
if (INVALID_ESCAPE_PATTERN.matcher(regex).matches()) {
/*
* One option considered was a custom Encoding implementation that rejects
* certain code points but it is unable to distinguish \a vs \cG for instance as
* both translate to BEL
*/
throw new SyntaxException("Invalid escape");
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import com.networknt.schema.ValidationContext;

/**
* Regular expression.
*/
@FunctionalInterface
public interface RegularExpression {
boolean matches(String value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ private static String toForwardSlashPath(Path file) {

private static void executeTest(JsonSchema schema, TestSpec testSpec) {
Set<ValidationMessage> errors = schema.validate(testSpec.getData(), OutputFormat.DEFAULT, (executionContext, validationContext) -> {
if (testSpec.getTestCase().getSource().getPath().getParent().toString().endsWith("format")) {
if (testSpec.getTestCase().getSource().getPath().getParent().toString().endsWith("format")
|| "ecmascript-regex.json"
.equals(testSpec.getTestCase().getSource().getPath().getFileName().toString())) {
executionContext.getExecutionConfig().setFormatAssertionsEnabled(true);
}
});
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright (c) 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.networknt.schema.regex;

import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import org.joni.exception.SyntaxException;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

/**
* Tests for JoniRegularExpression.
*/
class JoniRegularExpressionTest {

enum InvalidEscapeInput {
A("\\a"),
HELLOA("hello\\a"),
C("\\c"),
E("\\e"),
G("\\g"),
H("\\h"),
I("\\i"),
J("\\j"),
K("\\k"),
L("\\l"),
M("\\m"),
O("\\o"),
Q("\\q"),
U("\\u"),
X("\\x"),
X1("\\x1"),
XGG("\\xgg"),
X1G("\\x1g"),
Y("\\y"),
Z("\\z"),
_1("\\1"),
_2("\\2"),
_3("\\3"),
_4("\\4"),
_5("\\5"),
_6("\\6"),
_7("\\7"),
_8("\\8"),
_9("\\9");

String value;

InvalidEscapeInput(String value) {
this.value = value;
}
}

@ParameterizedTest
@EnumSource(InvalidEscapeInput.class)
void invalidEscape(InvalidEscapeInput input) {
SyntaxException e = assertThrows(SyntaxException.class, () -> new JoniRegularExpression(input.value));
assertEquals("Invalid escape", e.getMessage());
}

enum ValidEscapeInput {
B("\\b"),
D("\\d"),
CAP_D("\\D"),
W("\\w"),
CAP_W("\\W"),
S("\\s"),
CAP_S("\\S"),
T("\\t"),
U1234("\\u1234"),
R("\\r"),
N("\\n"),
V("\\v"),
F("\\f"),
X12("\\x12"),
X1F("\\x1f"),
X1234("\\x1234"),
P("\\p{Letter}cole"), // unicode property
CAP_P("\\P{Letter}cole"), // unicode property
_0("\\0"),
CA("\\cA"), // control
CB("\\cB"), // control
CC("\\cC"), // control
CG("\\cG"); // control

String value;

ValidEscapeInput(String value) {
this.value = value;
}
}

@ParameterizedTest
@EnumSource(ValidEscapeInput.class)
void validEscape(ValidEscapeInput input) {
assertDoesNotThrow(() -> new JoniRegularExpression(input.value));
}

@Test
void invalidPropertyName() {
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\p"));
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\P"));
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\pa"));
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\Pa"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -409,12 +409,12 @@
"description": "\\a is not an ECMA 262 control escape",
"schema": {
"$schema": "https://json-schema.org/draft/next/schema",
"$ref": "https://json-schema.org/draft/next/schema"
"format": "regex"
},
"tests": [
{
"description": "when used as a pattern",
"data": { "pattern": "\\a" },
"data": "\\a",
"valid": false
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -409,15 +409,13 @@
"description": "\\a is not an ECMA 262 control escape",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$ref": "https://json-schema.org/draft/2020-12/schema"
"format": "regex"
},
"tests": [
{
"description": "when used as a pattern",
"data": { "pattern": "\\a" },
"valid": false,
"disabled": true,
"reason": "TODO: RegexFormat does not support ECMA 262 regular expressions"
"data": "\\a",
"valid": false
}
]
},
Expand Down