Skip to content

Commit 446a1b2

Browse files
pyek-botHailong-am
andauthored
[Backport 2.x] Manual backport for Adding schema validation and placeholders for index mappings (#3342)
* [Enhancement] Add schema validation and placeholders to index mappings (#3240) * feat(index mappings): fetch mappings and version from json file instead of string constants Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: changing exception being thrown Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * chore: remove unused file Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * chore: fix typo in comment Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * chore: adding new line at the end of files Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * feat: add test cases Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix: remove test code Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix(test): in main the versions were not updated appropriately Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: move mapping templates under common module Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: ensure that conversationindexconstants reference mlindex enums rather than use their own mappings Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: update comment Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * feat: add enhancements to validate index schema and allow using placeholders Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: modifying comment Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * test: adding testcase for MLIndex to catch failures before runtime Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: rename dir from mappings to index-mappings Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix: add null checks Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix: modify mappin paths for placeholders Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix: adding dependencies for testing Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix(test): compare json object rather than strings to avoid eol character issue Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: combine if statements into single check Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactoring: null handling + clean code Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * spotless apply Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * tests: adding more UT Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix: dependencies to handle jarhell Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * spotless apply Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: add header and use single instance of mapper Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fixed: doc syntax Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: renamed files, efficient loading of resources, better exception handling Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * refactor: cleaner comment Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> --------- Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> * fix version mismatch with opensearch core (#2800) Signed-off-by: Hailong Cui <ihailong@amazon.com> * fix: providing explicit version for http5 package Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> --------- Signed-off-by: Pavan Yekbote <mail2pavanyekbote@gmail.com> Signed-off-by: Hailong Cui <ihailong@amazon.com> Co-authored-by: Hailong Cui <ihailong@amazon.com>
1 parent 76f0f3b commit 446a1b2

File tree

25 files changed

+326
-131
lines changed

25 files changed

+326
-131
lines changed

build.gradle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import org.opensearch.gradle.VersionProperties
2+
13
/*
24
* Copyright OpenSearch Contributors
35
* SPDX-License-Identifier: Apache-2.0
@@ -63,6 +65,9 @@ allprojects {
6365
plugins.withId('java') {
6466
sourceCompatibility = targetCompatibility = JavaVersion.VERSION_11
6567
}
68+
69+
project.getExtensions().getExtraProperties().set("versions", VersionProperties.getVersions());
70+
6671
}
6772

6873
subprojects {

common/build.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ dependencies {
3636
exclude group: 'com.google.guava', module: 'listenablefuture'
3737
}
3838
compileOnly 'com.jayway.jsonpath:json-path:2.9.0'
39+
compileOnly("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}")
40+
compileOnly("com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}")
41+
compileOnly group: 'com.networknt' , name: 'json-schema-validator', version: '1.4.0'
3942
}
4043

4144
lombok {

common/src/main/java/org/opensearch/ml/common/CommonValue.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,15 @@ public class CommonValue {
4848
public static final String TOOL_PARAMETERS_PREFIX = "tools.parameters.";
4949

5050
// Index mapping paths
51-
public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml-model-group.json";
52-
public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml-model.json";
53-
public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml-task.json";
54-
public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml-connector.json";
55-
public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml-config.json";
56-
public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml-controller.json";
57-
public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml-agent.json";
58-
public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml-memory-meta.json";
59-
public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml-memory-message.json";
51+
public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml_model_group.json";
52+
public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml_model.json";
53+
public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml_task.json";
54+
public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml_connector.json";
55+
public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml_config.json";
56+
public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml_controller.json";
57+
public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml_agent.json";
58+
public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml_memory_meta.json";
59+
public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml_memory_message.json";
6060

6161
// Calculate Versions independently of OpenSearch core version
6262
public static final Version VERSION_2_11_0 = Version.fromString("2.11.0");

common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55

66
package org.opensearch.ml.common.utils;
77

8+
import static org.opensearch.ml.common.utils.StringUtils.validateSchema;
9+
810
import java.io.IOException;
911
import java.net.URL;
12+
import java.util.HashMap;
1013
import java.util.Map;
1114

1215
import com.google.common.base.Charsets;
@@ -40,20 +43,99 @@ public class IndexUtils {
4043
public static final Map<String, Object> UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1");
4144
public static final Map<String, Object> UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all");
4245

46+
// Schema that validates system index mappings
47+
public static final String MAPPING_SCHEMA_PATH = "index-mappings/schema.json";
48+
49+
// Placeholders to use within the json mapping files
50+
private static final String USER_PLACEHOLDER = "USER_MAPPING_PLACEHOLDER";
51+
private static final String CONNECTOR_PLACEHOLDER = "CONNECTOR_MAPPING_PLACEHOLDER";
52+
public static final Map<String, String> MAPPING_PLACEHOLDERS = Map
53+
.of(USER_PLACEHOLDER, "index-mappings/placeholders/user.json", CONNECTOR_PLACEHOLDER, "index-mappings/placeholders/connector.json");
54+
4355
public static String getMappingFromFile(String path) throws IOException {
4456
URL url = IndexUtils.class.getClassLoader().getResource(path);
4557
if (url == null) {
4658
throw new IOException("Resource not found: " + path);
4759
}
4860

4961
String mapping = Resources.toString(url, Charsets.UTF_8).trim();
50-
if (mapping.isEmpty() || !StringUtils.isJson(mapping)) {
51-
throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path);
62+
if (mapping.isEmpty()) {
63+
throw new IllegalArgumentException("Empty mapping found at: " + path);
5264
}
5365

66+
mapping = replacePlaceholders(mapping);
67+
validateMapping(mapping);
68+
5469
return mapping;
5570
}
5671

72+
public static String replacePlaceholders(String mapping) throws IOException {
73+
if (mapping == null || mapping.isBlank()) {
74+
throw new IllegalArgumentException("Mapping cannot be null or empty");
75+
}
76+
77+
// Preload resources into memory to avoid redundant I/O
78+
Map<String, String> loadedPlaceholders = new HashMap<>();
79+
for (Map.Entry<String, String> placeholder : MAPPING_PLACEHOLDERS.entrySet()) {
80+
URL url = IndexUtils.class.getClassLoader().getResource(placeholder.getValue());
81+
if (url == null) {
82+
throw new IOException("Resource not found: " + placeholder.getValue());
83+
}
84+
85+
loadedPlaceholders.put(placeholder.getKey(), Resources.toString(url, Charsets.UTF_8));
86+
}
87+
88+
StringBuilder result = new StringBuilder(mapping);
89+
for (Map.Entry<String, String> entry : loadedPlaceholders.entrySet()) {
90+
String placeholder = entry.getKey();
91+
String replacement = entry.getValue();
92+
93+
// Replace all occurrences of the placeholder
94+
int index;
95+
while ((index = result.indexOf(placeholder)) != -1) {
96+
result.replace(index, index + placeholder.length(), replacement);
97+
}
98+
}
99+
100+
return result.toString();
101+
}
102+
103+
/**
104+
* Checks if the provided mapping is a valid JSON and validates it against a schema.
105+
*
106+
* <p>The schema is located at <code>mappings/schema.json</code> and enforces the following validations:</p>
107+
*
108+
* <ul>
109+
* <li>Mandatory fields:
110+
* <ul>
111+
* <li><code>_meta</code></li>
112+
* <li><code>_meta.schema_version</code></li>
113+
* <li><code>properties</code></li>
114+
* </ul>
115+
* </li>
116+
* <li>No additional fields are allowed at the root level.</li>
117+
* <li>No additional fields are allowed in the <code>_meta</code> object.</li>
118+
* <li><code>properties</code> must be an object type.</li>
119+
* <li><code>_meta</code> must be an object type.</li>
120+
* <li><code>_meta.schema_version</code> must be an integer.</li>
121+
* </ul>
122+
*
123+
* <p><strong>Note:</strong> Validation can be made stricter if a specific schema is defined for each index.</p>
124+
*/
125+
public static void validateMapping(String mapping) throws IOException {
126+
if (mapping.isBlank() || !StringUtils.isJson(mapping)) {
127+
throw new IllegalArgumentException("Invalid or non-JSON mapping found: " + mapping);
128+
}
129+
130+
URL url = IndexUtils.class.getClassLoader().getResource(MAPPING_SCHEMA_PATH);
131+
if (url == null) {
132+
throw new IOException("Resource not found: " + MAPPING_SCHEMA_PATH);
133+
}
134+
135+
String schema = Resources.toString(url, Charsets.UTF_8);
136+
validateSchema(schema, mapping);
137+
}
138+
57139
public static Integer getVersionFromMapping(String mapping) {
58140
if (mapping == null || mapping.isBlank()) {
59141
throw new IllegalArgumentException("Mapping cannot be null or empty");

common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,27 @@
1818
import java.util.Set;
1919
import java.util.regex.Matcher;
2020
import java.util.regex.Pattern;
21+
import java.util.stream.Collectors;
2122

2223
import org.apache.commons.lang3.BooleanUtils;
2324
import org.json.JSONArray;
2425
import org.json.JSONException;
2526
import org.json.JSONObject;
27+
import org.opensearch.OpenSearchParseException;
2628

29+
import com.fasterxml.jackson.core.JsonProcessingException;
30+
import com.fasterxml.jackson.databind.JsonNode;
31+
import com.fasterxml.jackson.databind.ObjectMapper;
2732
import com.google.gson.Gson;
2833
import com.google.gson.JsonElement;
2934
import com.google.gson.JsonObject;
3035
import com.google.gson.JsonParser;
3136
import com.google.gson.JsonSyntaxException;
3237
import com.jayway.jsonpath.JsonPath;
38+
import com.networknt.schema.JsonSchema;
39+
import com.networknt.schema.JsonSchemaFactory;
40+
import com.networknt.schema.SpecVersion;
41+
import com.networknt.schema.ValidationMessage;
3342

3443
import lombok.extern.log4j.Log4j2;
3544

@@ -54,6 +63,8 @@ public class StringUtils {
5463
}
5564
public static final String TO_STRING_FUNCTION_NAME = ".toString()";
5665

66+
private static final ObjectMapper MAPPER = new ObjectMapper();
67+
5768
public static boolean isValidJsonString(String json) {
5869
if (json == null || json.isBlank()) {
5970
return false;
@@ -336,4 +347,28 @@ public static JsonObject getJsonObjectFromString(String jsonString) {
336347
return JsonParser.parseString(jsonString).getAsJsonObject();
337348
}
338349

350+
public static void validateSchema(String schemaString, String instanceString) {
351+
try {
352+
// parse the schema JSON as string
353+
JsonNode schemaNode = MAPPER.readTree(schemaString);
354+
JsonSchema schema = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V202012).getSchema(schemaNode);
355+
356+
// JSON data to validate
357+
JsonNode jsonNode = MAPPER.readTree(instanceString);
358+
359+
// Validate JSON node against the schema
360+
Set<ValidationMessage> errors = schema.validate(jsonNode);
361+
if (!errors.isEmpty()) {
362+
String errorMessage = errors.stream().map(ValidationMessage::getMessage).collect(Collectors.joining(", "));
363+
364+
throw new OpenSearchParseException(
365+
"Validation failed: " + errorMessage + " for instance: " + instanceString + " with schema: " + schemaString
366+
);
367+
}
368+
} catch (JsonProcessingException e) {
369+
throw new IllegalArgumentException("Invalid JSON format: " + e.getMessage(), e);
370+
} catch (Exception e) {
371+
throw new OpenSearchParseException("Schema validation failed: " + e.getMessage(), e);
372+
}
373+
}
339374
}

0 commit comments

Comments
 (0)