Skip to content

Commit 016dbdf

Browse files
author
lobo
committed
Merge branch 'main' into add-huggingface-text-to-image
2 parents 131155e + 6fc76b7 commit 016dbdf

File tree

662 files changed

+32953
-5860
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

662 files changed

+32953
-5860
lines changed

.github/workflows/continuous-integration.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
SPRING_AI_OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
3737
ARTIFACTORY_USERNAME: ${{ secrets.ARTIFACTORY_USERNAME }}
3838
ARTIFACTORY_PASSWORD: ${{ secrets.ARTIFACTORY_PASSWORD }}
39-
run: mvn -s settings.xml -Pintegration-tests -Dfailsafe.rerunFailingTestsCount=3 --batch-mode --update-snapshots deploy
39+
run: mvn -s settings.xml -Pintegration-tests -Pjavadoc -Dfailsafe.rerunFailingTestsCount=3 --batch-mode --update-snapshots deploy
4040

4141
- name: Generate Java docs
4242
run: mvn javadoc:aggregate
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
<parent>
7+
<groupId>org.springframework.ai</groupId>
8+
<artifactId>spring-ai</artifactId>
9+
<version>1.0.0-SNAPSHOT</version>
10+
<relativePath>../../pom.xml</relativePath>
11+
</parent>
12+
<artifactId>spring-ai-markdown-document-reader</artifactId>
13+
<packaging>jar</packaging>
14+
<name>Spring AI Document Reader - Markdown</name>
15+
<description>Spring AI Markdown document reader</description>
16+
<url>https://github.com/spring-projects/spring-ai</url>
17+
18+
<scm>
19+
<url>https://github.com/spring-projects/spring-ai</url>
20+
<connection>git://github.com/spring-projects/spring-ai.git</connection>
21+
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
22+
</scm>
23+
24+
<dependencies>
25+
<dependency>
26+
<groupId>org.springframework.ai</groupId>
27+
<artifactId>spring-ai-core</artifactId>
28+
<version>${parent.version}</version>
29+
</dependency>
30+
31+
<dependency>
32+
<groupId>org.commonmark</groupId>
33+
<artifactId>commonmark</artifactId>
34+
<version>${commonmark.version}</version>
35+
</dependency>
36+
37+
<!-- TESTING -->
38+
<dependency>
39+
<groupId>org.springframework.boot</groupId>
40+
<artifactId>spring-boot-starter-test</artifactId>
41+
<scope>test</scope>
42+
</dependency>
43+
44+
</dependencies>
45+
46+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
package org.springframework.ai.reader.markdown;
2+
3+
import org.commonmark.node.*;
4+
import org.commonmark.parser.Parser;
5+
import org.springframework.ai.document.Document;
6+
import org.springframework.ai.document.DocumentReader;
7+
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
8+
import org.springframework.core.io.DefaultResourceLoader;
9+
import org.springframework.core.io.Resource;
10+
11+
import java.io.IOException;
12+
import java.io.InputStreamReader;
13+
import java.util.ArrayList;
14+
import java.util.List;
15+
16+
/**
17+
* Reads the given Markdown resource and groups headers, paragraphs, or text divided by
18+
* horizontal lines (depending on the
19+
* {@link MarkdownDocumentReaderConfig#horizontalRuleCreateDocument} configuration) into
20+
* {@link Document}s.
21+
*
22+
* @author Piotr Olaszewski
23+
*/
24+
public class MarkdownDocumentReader implements DocumentReader {
25+
26+
/**
27+
* The resource points to the Markdown document.
28+
*/
29+
private final Resource markdownResource;
30+
31+
/**
32+
* Configuration to a parsing process.
33+
*/
34+
private final MarkdownDocumentReaderConfig config;
35+
36+
/**
37+
* Markdown parser.
38+
*/
39+
private final Parser parser;
40+
41+
public MarkdownDocumentReader(String markdownResource) {
42+
this(new DefaultResourceLoader().getResource(markdownResource), MarkdownDocumentReaderConfig.defaultConfig());
43+
}
44+
45+
public MarkdownDocumentReader(String markdownResource, MarkdownDocumentReaderConfig config) {
46+
this(new DefaultResourceLoader().getResource(markdownResource), config);
47+
}
48+
49+
public MarkdownDocumentReader(Resource markdownResource, MarkdownDocumentReaderConfig config) {
50+
this.markdownResource = markdownResource;
51+
this.config = config;
52+
this.parser = Parser.builder().build();
53+
}
54+
55+
/**
56+
* Extracts and returns a list of documents from the resource.
57+
* @return List of extracted {@link Document}
58+
*/
59+
@Override
60+
public List<Document> get() {
61+
try (var input = markdownResource.getInputStream()) {
62+
Node node = parser.parseReader(new InputStreamReader(input));
63+
64+
DocumentVisitor documentVisitor = new DocumentVisitor(config);
65+
node.accept(documentVisitor);
66+
67+
return documentVisitor.getDocuments();
68+
}
69+
catch (IOException e) {
70+
throw new RuntimeException(e);
71+
}
72+
}
73+
74+
/**
75+
* A convenient class for visiting handled nodes in the Markdown document.
76+
*/
77+
static class DocumentVisitor extends AbstractVisitor {
78+
79+
private final List<Document> documents = new ArrayList<>();
80+
81+
private final List<String> currentParagraphs = new ArrayList<>();
82+
83+
private final MarkdownDocumentReaderConfig config;
84+
85+
private Document.Builder currentDocumentBuilder;
86+
87+
public DocumentVisitor(MarkdownDocumentReaderConfig config) {
88+
this.config = config;
89+
}
90+
91+
@Override
92+
public void visit(org.commonmark.node.Document document) {
93+
currentDocumentBuilder = Document.builder();
94+
super.visit(document);
95+
}
96+
97+
@Override
98+
public void visit(Heading heading) {
99+
buildAndFlush();
100+
super.visit(heading);
101+
}
102+
103+
@Override
104+
public void visit(ThematicBreak thematicBreak) {
105+
if (config.horizontalRuleCreateDocument) {
106+
buildAndFlush();
107+
}
108+
super.visit(thematicBreak);
109+
}
110+
111+
@Override
112+
public void visit(SoftLineBreak softLineBreak) {
113+
translateLineBreakToSpace();
114+
super.visit(softLineBreak);
115+
}
116+
117+
@Override
118+
public void visit(HardLineBreak hardLineBreak) {
119+
translateLineBreakToSpace();
120+
super.visit(hardLineBreak);
121+
}
122+
123+
@Override
124+
public void visit(ListItem listItem) {
125+
translateLineBreakToSpace();
126+
super.visit(listItem);
127+
}
128+
129+
@Override
130+
public void visit(BlockQuote blockQuote) {
131+
if (!config.includeBlockquote) {
132+
buildAndFlush();
133+
}
134+
135+
translateLineBreakToSpace();
136+
currentDocumentBuilder.withMetadata("category", "blockquote");
137+
super.visit(blockQuote);
138+
}
139+
140+
@Override
141+
public void visit(Code code) {
142+
currentParagraphs.add(code.getLiteral());
143+
currentDocumentBuilder.withMetadata("category", "code_inline");
144+
super.visit(code);
145+
}
146+
147+
@Override
148+
public void visit(FencedCodeBlock fencedCodeBlock) {
149+
if (!config.includeCodeBlock) {
150+
buildAndFlush();
151+
}
152+
153+
translateLineBreakToSpace();
154+
currentParagraphs.add(fencedCodeBlock.getLiteral());
155+
currentDocumentBuilder.withMetadata("category", "code_block");
156+
currentDocumentBuilder.withMetadata("lang", fencedCodeBlock.getInfo());
157+
158+
buildAndFlush();
159+
160+
super.visit(fencedCodeBlock);
161+
}
162+
163+
@Override
164+
public void visit(Text text) {
165+
if (text.getParent() instanceof Heading heading) {
166+
currentDocumentBuilder.withMetadata("category", "header_%d".formatted(heading.getLevel()))
167+
.withMetadata("title", text.getLiteral());
168+
}
169+
else {
170+
currentParagraphs.add(text.getLiteral());
171+
}
172+
173+
super.visit(text);
174+
}
175+
176+
public List<Document> getDocuments() {
177+
buildAndFlush();
178+
179+
return documents;
180+
}
181+
182+
private void buildAndFlush() {
183+
if (!currentParagraphs.isEmpty()) {
184+
String content = String.join("", currentParagraphs);
185+
186+
Document.Builder builder = currentDocumentBuilder.withContent(content);
187+
188+
config.additionalMetadata.forEach(builder::withMetadata);
189+
190+
Document document = builder.build();
191+
192+
documents.add(document);
193+
194+
currentParagraphs.clear();
195+
}
196+
currentDocumentBuilder = Document.builder();
197+
}
198+
199+
private void translateLineBreakToSpace() {
200+
if (!currentParagraphs.isEmpty()) {
201+
currentParagraphs.add(" ");
202+
}
203+
}
204+
205+
}
206+
207+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
package org.springframework.ai.reader.markdown.config;
2+
3+
import org.springframework.ai.document.Document;
4+
import org.springframework.ai.reader.markdown.MarkdownDocumentReader;
5+
import org.springframework.util.Assert;
6+
7+
import java.util.HashMap;
8+
import java.util.Map;
9+
10+
/**
11+
* Common configuration for the {@link MarkdownDocumentReader}.
12+
*
13+
* @author Piotr Olaszewski
14+
*/
15+
public class MarkdownDocumentReaderConfig {
16+
17+
public final boolean horizontalRuleCreateDocument;
18+
19+
public final boolean includeCodeBlock;
20+
21+
public final boolean includeBlockquote;
22+
23+
public final Map<String, Object> additionalMetadata;
24+
25+
public MarkdownDocumentReaderConfig(Builder builder) {
26+
horizontalRuleCreateDocument = builder.horizontalRuleCreateDocument;
27+
includeCodeBlock = builder.includeCodeBlock;
28+
includeBlockquote = builder.includeBlockquote;
29+
additionalMetadata = builder.additionalMetadata;
30+
}
31+
32+
/**
33+
* @return the default configuration
34+
*/
35+
public static MarkdownDocumentReaderConfig defaultConfig() {
36+
return builder().build();
37+
}
38+
39+
public static Builder builder() {
40+
return new Builder();
41+
}
42+
43+
public static class Builder {
44+
45+
private boolean horizontalRuleCreateDocument = false;
46+
47+
private boolean includeCodeBlock = false;
48+
49+
private boolean includeBlockquote = false;
50+
51+
private Map<String, Object> additionalMetadata = new HashMap<>();
52+
53+
private Builder() {
54+
}
55+
56+
/**
57+
* Text divided by horizontal lines will create new {@link Document}s. The default
58+
* is {@code false}, meaning text separated by horizontal lines won't create a new
59+
* document.
60+
* @param horizontalRuleCreateDocument flag to determine whether new documents are
61+
* created from text divided by horizontal line
62+
* @return this builder
63+
*/
64+
public Builder withHorizontalRuleCreateDocument(boolean horizontalRuleCreateDocument) {
65+
this.horizontalRuleCreateDocument = horizontalRuleCreateDocument;
66+
return this;
67+
}
68+
69+
/**
70+
* Whatever to include code blocks in {@link Document}s. The default is
71+
* {@code false}, which means all code blocks are in separate documents.
72+
* @param includeCodeBlock flag to include code block into paragraph document or
73+
* create new with code only
74+
* @return this builder
75+
*/
76+
public Builder withIncludeCodeBlock(boolean includeCodeBlock) {
77+
this.includeCodeBlock = includeCodeBlock;
78+
return this;
79+
}
80+
81+
/**
82+
* Whatever to include blockquotes in {@link Document}s. The default is
83+
* {@code false}, which means all blockquotes are in separate documents.
84+
* @param includeBlockquote flag to include blockquotes into paragraph document or
85+
* create new with blockquote only
86+
* @return this builder
87+
*/
88+
public Builder withIncludeBlockquote(boolean includeBlockquote) {
89+
this.includeBlockquote = includeBlockquote;
90+
return this;
91+
}
92+
93+
/**
94+
* Adds this additional metadata to the all built {@link Document}s.
95+
* @return this builder
96+
*/
97+
public Builder withAdditionalMetadata(String key, Object value) {
98+
Assert.notNull(key, "key must not be null");
99+
Assert.notNull(value, "value must not be null");
100+
this.additionalMetadata.put(key, value);
101+
return this;
102+
}
103+
104+
/**
105+
* Adds this additional metadata to the all built {@link Document}s.
106+
* @return this builder
107+
*/
108+
public Builder withAdditionalMetadata(Map<String, Object> additionalMetadata) {
109+
Assert.notNull(additionalMetadata, "additionalMetadata must not be null");
110+
this.additionalMetadata = additionalMetadata;
111+
return this;
112+
}
113+
114+
/**
115+
* @return the immutable configuration
116+
*/
117+
public MarkdownDocumentReaderConfig build() {
118+
return new MarkdownDocumentReaderConfig(this);
119+
}
120+
121+
}
122+
123+
}

0 commit comments

Comments
 (0)