@@ -127,23 +127,131 @@ public abstract class Options {
127
127
// Example - "spark.marklogic.write.json.ignoreNullFields=false.
128
128
public static final String WRITE_JSON_SERIALIZATION_OPTION_PREFIX = "spark.marklogic.write.json." ;
129
129
130
- // Add @since annotations before we release.
130
+ /**
131
+ * Enables the splitter feature by defining an XPath expression for selecting text to split in a document.
132
+ *
133
+ * @since 2.5.0
134
+ */
131
135
public static final String WRITE_SPLITTER_XPATH = "spark.marklogic.write.splitter.xpath" ;
136
+
137
+ /**
138
+ * Enables the splitter feature by defining one or more newline-delimited JSON Pointer expressions for selecting
139
+ * text to split in a document.
140
+ *
141
+ * @since 2.5.0
142
+ */
143
+ public static final String WRITE_SPLITTER_JSON_POINTERS = "spark.marklogic.write.splitter.jsonPointers" ;
144
+
145
+ /**
146
+ * Enables the splitter feature by declaring that all the text in a document should be split. This is typically for
147
+ * text documents, but could be used for JSON and XML as well.
148
+ *
149
+ * @since 2.5.0
150
+ */
151
+ public static final String WRITE_SPLITTER_TEXT = "spark.marklogic.writer.splitter.text" ;
152
+
153
+ /**
154
+ * Defines the maximum chunk size in characters. Defaults to 1000.
155
+ *
156
+ * @since 2.5.0
157
+ */
132
158
public static final String WRITE_SPLITTER_MAX_CHUNK_SIZE = "spark.marklogic.write.splitter.maxChunkSize" ;
159
+
160
+ /**
161
+ * Defines the maximum overlap size in characters between two chunks. Defaults to 0.
162
+ *
163
+ * @since 2.5.0
164
+ */
133
165
public static final String WRITE_SPLITTER_MAX_OVERLAP_SIZE = "spark.marklogic.write.splitter.maxOverlapSize" ;
134
- public static final String WRITE_SPLITTER_TEXT = "spark.marklogic.writer.splitter.text" ;
166
+
167
+ /**
168
+ * Defines a regex for splitting text into chunks. The default strategy is langchain4's "recursive" strategy that
169
+ * splits on paragraphs, sentences, lines, and words.
170
+ *
171
+ * @since 2.5.0
172
+ */
135
173
public static final String WRITE_SPLITTER_REGEX = "spark.marklogic.write.splitter.regex" ;
174
+
175
+ /**
176
+ * Defines a delimiter for usage with the splitter regex option. The delimiter joins together two or more chunks
177
+ * identified via the regex to produce a chunk that is as close as possible to the maximum chunk size.
178
+ *
179
+ * @since 2.5.0
180
+ */
136
181
public static final String WRITE_SPLITTER_JOIN_DELIMITER = "spark.marklogic.splitter.joinDelimiter" ;
137
- public static final String WRITE_SPLITTER_JSON_POINTERS = "spark.marklogic.write.splitter.jsonPointers" ;
182
+
183
+ /**
184
+ * Defines the class name of an implementation of langchain4j's {@code dev.langchain4j.data.document.DocumentSplitter}
185
+ * interface to be used for splitting the selected text into chunks.
186
+ *
187
+ * @since 2.5.0
188
+ */
138
189
public static final String WRITE_SPLITTER_CUSTOM_CLASS = "spark.marklogic.write.splitter.customClass" ;
190
+
191
+ /**
192
+ * Defines one or more options to pass in a {@code Map<String, String>} to the constructor of the custom splitter
193
+ * class.
194
+ *
195
+ * @since 2.5.0
196
+ */
139
197
public static final String WRITE_SPLITTER_CUSTOM_CLASS_OPTION_PREFIX = "spark.marklogic.write.splitter.customClass.option." ;
198
+
199
+ /**
200
+ * Configures the connector to write chunks to separate "sidecar" documents instead of to the source document (the
201
+ * default behavior). Defines the maximum number of chunks to write to a sidecar document.
202
+ *
203
+ * @since 2.5.0
204
+ */
140
205
public static final String WRITE_SPLITTER_SIDECAR_MAX_CHUNKS = "spark.marklogic.write.splitter.sidecar.maxChunks" ;
206
+
207
+ /**
208
+ * Defines the type - either JSON or XML - of each chunk document. Defaults to the type of the source document.
209
+ *
210
+ * @since 2.5.0
211
+ */
141
212
public static final String WRITE_SPLITTER_SIDECAR_DOCUMENT_TYPE = "spark.marklogic.write.splitter.sidecar.documentType" ;
213
+
214
+ /**
215
+ * Comma-delimited list of collections to assign to each chunk document.
216
+ *
217
+ * @since 2.5.0
218
+ */
142
219
public static final String WRITE_SPLITTER_SIDECAR_COLLECTIONS = "spark.marklogic.write.splitter.sidecar.collections" ;
220
+
221
+ /**
222
+ * Comma-delimited list of roles and capabilities to assign to each chunk document. If not defined, chunk documents
223
+ * will inherit the permissions defined by {@code WRITE_PERMISSIONS}.
224
+ *
225
+ * @since 2.5.0
226
+ */
143
227
public static final String WRITE_SPLITTER_SIDECAR_PERMISSIONS = "spark.marklogic.write.splitter.sidecar.permissions" ;
228
+
229
+ /**
230
+ * Root name for a JSON or XML sidecar chunk document.
231
+ *
232
+ * @since 2.5.0
233
+ */
144
234
public static final String WRITE_SPLITTER_SIDECAR_ROOT_NAME = "spark.marklogic.write.splitter.sidecar.rootName" ;
235
+
236
+ /**
237
+ * URI prefix for each sidecar chunk document. If defined, will be followed by a UUID.
238
+ *
239
+ * @since 2.5.0
240
+ */
145
241
public static final String WRITE_SPLITTER_SIDECAR_URI_PREFIX = "spark.marklogic.write.splitter.sidecar.uriPrefix" ;
242
+
243
+ /**
244
+ * URI suffix for each sidecar chunk document. If defined, will be preceded by a UUID.
245
+ *
246
+ * @since 2.5.0
247
+ */
146
248
public static final String WRITE_SPLITTER_SIDECAR_URI_SUFFIX = "spark.marklogic.write.splitter.sidecar.uriSuffix" ;
249
+
250
+ /**
251
+ * Namespace for XML sidecar chunk documents.
252
+ *
253
+ * @since 2.5.0
254
+ */
147
255
public static final String WRITE_SPLITTER_SIDECAR_XML_NAMESPACE = "spark.marklogic.write.splitter.sidecar.xmlNamespace" ;
148
256
149
257
// For writing RDF
0 commit comments