diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/main/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrAutoConfiguration.java b/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/main/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrAutoConfiguration.java new file mode 100644 index 00000000000..e45d0c139dd --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/main/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrAutoConfiguration.java @@ -0,0 +1,67 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.mistralai.autoconfigure; + +import org.springframework.ai.mistralai.ocr.MistralOcrApi; +import org.springframework.ai.model.SpringAIModels; +import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration; +import org.springframework.beans.factory.ObjectProvider; +import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.ImportAutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; +import org.springframework.util.Assert; +import org.springframework.util.StringUtils; + +/** + * OCR {@link AutoConfiguration Auto-configuration} for Mistral AI OCR. + * + * @author Alexandros Pappas + * @since 1.0.0 + */ +@AutoConfiguration(after = { RestClientAutoConfiguration.class, SpringAiRetryAutoConfiguration.class }) +@ConditionalOnClass(MistralOcrApi.class) +@ConditionalOnProperty(name = "spring.ai.model.ocr", havingValue = SpringAIModels.MISTRAL, matchIfMissing = true) +@EnableConfigurationProperties({ MistralAiCommonProperties.class, MistralAiOcrProperties.class }) +@ImportAutoConfiguration(classes = { SpringAiRetryAutoConfiguration.class, RestClientAutoConfiguration.class }) +public class MistralAiOcrAutoConfiguration { + + @Bean + @ConditionalOnMissingBean + public MistralOcrApi mistralOcrApi(MistralAiCommonProperties commonProperties, MistralAiOcrProperties ocrProperties, + ObjectProvider restClientBuilderProvider, ResponseErrorHandler responseErrorHandler) { + + var apiKey = ocrProperties.getApiKey(); + var baseUrl = ocrProperties.getBaseUrl(); + + var resolvedApiKey = StringUtils.hasText(apiKey) ? apiKey : commonProperties.getApiKey(); + var resolvedBaseUrl = StringUtils.hasText(baseUrl) ? baseUrl : commonProperties.getBaseUrl(); + + Assert.hasText(resolvedApiKey, "Mistral API key must be set"); + Assert.hasText(resolvedBaseUrl, "Mistral base URL must be set"); + + return new MistralOcrApi(resolvedBaseUrl, resolvedApiKey, + restClientBuilderProvider.getIfAvailable(RestClient::builder), responseErrorHandler); + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/main/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrProperties.java b/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/main/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrProperties.java new file mode 100644 index 00000000000..01f497a6068 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/main/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrProperties.java @@ -0,0 +1,52 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.mistralai.autoconfigure; + +import org.springframework.ai.mistralai.ocr.MistralAiOcrOptions; +import org.springframework.ai.mistralai.ocr.MistralOcrApi; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.context.properties.NestedConfigurationProperty; + +/** + * Configuration properties for Mistral AI OCR. + * + * @author Alexandros Pappas + * @since 1.0.0 + */ +@ConfigurationProperties(MistralAiOcrProperties.CONFIG_PREFIX) +public class MistralAiOcrProperties extends MistralAiParentProperties { + + public static final String CONFIG_PREFIX = "spring.ai.mistralai.ocr"; + + public static final String DEFAULT_OCR_MODEL = MistralOcrApi.OCRModel.MISTRAL_OCR_LATEST.getValue(); + + @NestedConfigurationProperty + private MistralAiOcrOptions options = MistralAiOcrOptions.builder().model(DEFAULT_OCR_MODEL).build(); + + public MistralAiOcrProperties() { + super.setBaseUrl(MistralAiCommonProperties.DEFAULT_BASE_URL); + } + + public MistralAiOcrOptions getOptions() { + return this.options; + } + + public void setOptions(MistralAiOcrOptions options) { + this.options = options; + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/test/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrAutoConfigurationTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/test/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrAutoConfigurationTests.java new file mode 100644 index 00000000000..cdefaa18000 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/test/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrAutoConfigurationTests.java @@ -0,0 +1,80 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.mistralai.autoconfigure; + +import java.util.List; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import org.springframework.ai.mistralai.ocr.MistralOcrApi; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; +import org.springframework.http.ResponseEntity; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Integration Tests for {@link MistralAiOcrAutoConfiguration}. + * + *

+ * These tests require the {@code MISTRAL_AI_API_KEY} environment variable to be set. They + * verify that the {@link MistralOcrApi} bean is correctly configured and can interact + * with the Mistral AI OCR API + *

+ * + * @author Alexandros Pappas + * @since 1.0.0 + */ +@EnabledIfEnvironmentVariable(named = MistralAiOcrAutoConfigurationTests.ENV_VAR_NAME, matches = ".*") +class MistralAiOcrAutoConfigurationTests { + + static final String ENV_VAR_NAME = "MISTRAL_AI_API_KEY"; + + private final ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withPropertyValues("spring.ai.mistralai.api-key=" + System.getenv(ENV_VAR_NAME)) + .withConfiguration(AutoConfigurations.of(MistralAiOcrAutoConfiguration.class)); + + @Test + void ocrExtractionWithPublicUrl() { + this.contextRunner.run(context -> { + + MistralOcrApi mistralOcrApi = context.getBean(MistralOcrApi.class); + assertThat(mistralOcrApi).isNotNull(); + + String documentUrl = "https://arxiv.org/pdf/2201.04234"; + MistralOcrApi.OCRRequest request = new MistralOcrApi.OCRRequest( + MistralOcrApi.OCRModel.MISTRAL_OCR_LATEST.getValue(), "test_id", + new MistralOcrApi.OCRRequest.DocumentURLChunk(documentUrl), List.of(0, 1), true, 2, 50); + + ResponseEntity response = mistralOcrApi.ocr(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().pages()).isNotNull(); + assertThat(response.getBody().pages()).isNotEmpty(); + assertThat(response.getBody().pages().get(0).markdown()).isNotEmpty(); + + if (request.includeImageBase64() != null && request.includeImageBase64()) { + assertThat(response.getBody().pages().get(1).images()).isNotNull(); + assertThat(response.getBody().pages().get(1).images().get(0).imageBase64()).isNotNull(); + } + + }); + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/test/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrPropertiesTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/test/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrPropertiesTests.java new file mode 100644 index 00000000000..8e4fd0b5ddf --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-mistral-ai/src/test/java/org/springframework/ai/model/mistralai/autoconfigure/MistralAiOcrPropertiesTests.java @@ -0,0 +1,169 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.mistralai.autoconfigure; + +import org.junit.jupiter.api.Test; + +import org.springframework.ai.mistralai.ocr.MistralOcrApi; +import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Unit Tests for {@link MistralAiOcrProperties} interacting with + * {@link MistralAiCommonProperties}. + * + * @author Alexandros Pappas + * @since 1.0.0 + */ +class MistralAiOcrPropertiesTests { + + // Define common configurations to load in tests + private final AutoConfigurations autoConfigurations = AutoConfigurations.of(SpringAiRetryAutoConfiguration.class, + RestClientAutoConfiguration.class, MistralAiOcrAutoConfiguration.class); + + @Test + void commonPropertiesAppliedToOcr() { + new ApplicationContextRunner() + .withPropertyValues("spring.ai.mistralai.base-url=COMMON_BASE_URL", + "spring.ai.mistralai.api-key=COMMON_API_KEY", + "spring.ai.mistralai.ocr.options.model=mistral-ocr-specific-model") + .withConfiguration(this.autoConfigurations) + .run(context -> { + assertThat(context).hasSingleBean(MistralAiCommonProperties.class); + assertThat(context).hasSingleBean(MistralAiOcrProperties.class); + + var commonProps = context.getBean(MistralAiCommonProperties.class); + var ocrProps = context.getBean(MistralAiOcrProperties.class); + + assertThat(commonProps.getBaseUrl()).isEqualTo("COMMON_BASE_URL"); + assertThat(commonProps.getApiKey()).isEqualTo("COMMON_API_KEY"); + + assertThat(ocrProps.getBaseUrl()).isEqualTo(MistralAiCommonProperties.DEFAULT_BASE_URL); + assertThat(ocrProps.getApiKey()).isNull(); + + assertThat(ocrProps.getOptions()).isNotNull(); + assertThat(ocrProps.getOptions().getModel()).isEqualTo("mistral-ocr-specific-model"); + + assertThat(context).hasSingleBean(MistralOcrApi.class); + }); + } + + @Test + void ocrSpecificPropertiesOverrideCommon() { + new ApplicationContextRunner() + .withPropertyValues("spring.ai.mistralai.base-url=COMMON_BASE_URL", + "spring.ai.mistralai.api-key=COMMON_API_KEY", "spring.ai.mistralai.ocr.base-url=OCR_BASE_URL", + "spring.ai.mistralai.ocr.api-key=OCR_API_KEY", + "spring.ai.mistralai.ocr.options.model=mistral-ocr-default") + .withConfiguration(this.autoConfigurations) + .run(context -> { + assertThat(context).hasSingleBean(MistralAiCommonProperties.class); + assertThat(context).hasSingleBean(MistralAiOcrProperties.class); + + var commonProps = context.getBean(MistralAiCommonProperties.class); + var ocrProps = context.getBean(MistralAiOcrProperties.class); + + assertThat(commonProps.getBaseUrl()).isEqualTo("COMMON_BASE_URL"); + assertThat(commonProps.getApiKey()).isEqualTo("COMMON_API_KEY"); + + assertThat(ocrProps.getBaseUrl()).isEqualTo("OCR_BASE_URL"); + assertThat(ocrProps.getApiKey()).isEqualTo("OCR_API_KEY"); + + assertThat(ocrProps.getOptions()).isNotNull(); + assertThat(ocrProps.getOptions().getModel()).isEqualTo("mistral-ocr-default"); + + assertThat(context).hasSingleBean(MistralOcrApi.class); + }); + } + + @Test + void ocrOptionsBinding() { + new ApplicationContextRunner().withPropertyValues("spring.ai.mistralai.api-key=API_KEY", + "spring.ai.mistralai.ocr.options.model=custom-ocr-model", + "spring.ai.mistralai.ocr.options.id=ocr-request-id-123", "spring.ai.mistralai.ocr.options.pages=0,1,5", + "spring.ai.mistralai.ocr.options.includeImageBase64=true", + "spring.ai.mistralai.ocr.options.imageLimit=25", "spring.ai.mistralai.ocr.options.imageMinSize=150") + .withConfiguration(this.autoConfigurations) + .run(context -> { + assertThat(context).hasSingleBean(MistralAiOcrProperties.class); + var ocrProps = context.getBean(MistralAiOcrProperties.class); + var options = ocrProps.getOptions(); + + assertThat(options).isNotNull(); + assertThat(options.getModel()).isEqualTo("custom-ocr-model"); + assertThat(options.getId()).isEqualTo("ocr-request-id-123"); + assertThat(options.getPages()).containsExactly(0, 1, 5); + assertThat(options.getIncludeImageBase64()).isTrue(); + assertThat(options.getImageLimit()).isEqualTo(25); + assertThat(options.getImageMinSize()).isEqualTo(150); + }); + } + + @Test + void ocrActivationViaModelProperty() { + // Scenario 1: OCR explicitly disabled + new ApplicationContextRunner().withConfiguration(this.autoConfigurations) + .withPropertyValues("spring.ai.mistralai.api-key=API_KEY", "spring.ai.model.ocr=none") + .run(context -> { + assertThat(context.getBeansOfType(MistralAiOcrProperties.class)).isEmpty(); + assertThat(context.getBeansOfType(MistralOcrApi.class)).isEmpty(); + // Should not have common properties either if only OCR config was loaded + // and then disabled + assertThat(context.getBeansOfType(MistralAiCommonProperties.class)).isEmpty(); + }); + + // Scenario 2: OCR explicitly enabled for 'mistral' + new ApplicationContextRunner().withConfiguration(this.autoConfigurations) + .withPropertyValues("spring.ai.mistralai.api-key=API_KEY", "spring.ai.model.ocr=mistral") + .run(context -> { + assertThat(context).hasSingleBean(MistralAiCommonProperties.class); // Enabled + // by + // MistralAiOcrAutoConfiguration + assertThat(context).hasSingleBean(MistralAiOcrProperties.class); + assertThat(context).hasSingleBean(MistralOcrApi.class); + }); + + // Scenario 3: OCR implicitly enabled (default behavior when property is absent) + new ApplicationContextRunner().withConfiguration(this.autoConfigurations) + .withPropertyValues("spring.ai.mistralai.api-key=API_KEY") + .run(context -> { + assertThat(context).hasSingleBean(MistralAiCommonProperties.class); // Enabled + // by + // MistralAiOcrAutoConfiguration + assertThat(context).hasSingleBean(MistralAiOcrProperties.class); + assertThat(context).hasSingleBean(MistralOcrApi.class); + }); + + // Scenario 4: OCR implicitly disabled when another provider is chosen + new ApplicationContextRunner().withConfiguration(this.autoConfigurations) + .withPropertyValues("spring.ai.mistralai.api-key=API_KEY", "spring.ai.model.ocr=some-other-provider") + .run(context -> { + assertThat(context.getBeansOfType(MistralAiOcrProperties.class)).isEmpty(); + assertThat(context.getBeansOfType(MistralOcrApi.class)).isEmpty(); + // Common properties might still be loaded if another Mistral AI config + // (like Chat) was active, + // but in this minimal test setup, they shouldn't be loaded if OCR is + // disabled. + assertThat(context.getBeansOfType(MistralAiCommonProperties.class)).isEmpty(); + }); + } + +} diff --git a/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/ocr/MistralAiOcrOptions.java b/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/ocr/MistralAiOcrOptions.java new file mode 100644 index 00000000000..4fd3fb2f4c6 --- /dev/null +++ b/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/ocr/MistralAiOcrOptions.java @@ -0,0 +1,187 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.mistralai.ocr; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; + +import org.springframework.ai.model.ModelOptions; + +import java.util.List; +import java.util.Objects; + +/** + * Options for Mistral AI OCR requests. These options are used at runtime when making an + * OCR call. + * + * @author Alexandros Pappas + * @since 1.0.0 + */ +@JsonInclude(Include.NON_NULL) +public class MistralAiOcrOptions implements ModelOptions { + + /** + * The model to use for OCR. Defaults to mistral-ocr-latest. + */ + @JsonProperty("model") + private String model = MistralOcrApi.OCRModel.MISTRAL_OCR_LATEST.getValue(); + + /** + * An optional string identifier for the request. + */ + @JsonProperty("id") + private String id; + + /** + * Specific pages to process in various formats: single number, range, or list of + * both. Starts from 0. + */ + @JsonProperty("pages") + private List pages; + + /** + * Whether to include base64 encoded image data in the response. + */ + @JsonProperty("include_image_base64") + private Boolean includeImageBase64; + + /** + * Maximum number of images to extract per page. + */ + @JsonProperty("image_limit") + private Integer imageLimit; + + /** + * Minimum height and width (in pixels) of images to extract. + */ + @JsonProperty("image_min_size") + private Integer imageMinSize; + + public static Builder builder() { + return new Builder(); + } + + public String getModel() { + return model; + } + + public String getId() { + return id; + } + + public List getPages() { + return pages; + } + + public Boolean getIncludeImageBase64() { + return includeImageBase64; + } + + public Integer getImageLimit() { + return imageLimit; + } + + public Integer getImageMinSize() { + return imageMinSize; + } + + public void setModel(String model) { + this.model = model; + } + + public void setId(String id) { + this.id = id; + } + + public void setPages(List pages) { + this.pages = pages; + } + + public void setIncludeImageBase64(Boolean includeImageBase64) { + this.includeImageBase64 = includeImageBase64; + } + + public void setImageLimit(Integer imageLimit) { + this.imageLimit = imageLimit; + } + + public void setImageMinSize(Integer imageMinSize) { + this.imageMinSize = imageMinSize; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + MistralAiOcrOptions that = (MistralAiOcrOptions) o; + return Objects.equals(model, that.model) && Objects.equals(id, that.id) && Objects.equals(pages, that.pages) + && Objects.equals(includeImageBase64, that.includeImageBase64) + && Objects.equals(imageLimit, that.imageLimit) && Objects.equals(imageMinSize, that.imageMinSize); + } + + @Override + public int hashCode() { + return Objects.hash(model, id, pages, includeImageBase64, imageLimit, imageMinSize); + } + + public static final class Builder { + + private final MistralAiOcrOptions options = new MistralAiOcrOptions(); + + private Builder() { + } + + public Builder model(String model) { + this.options.setModel(model); + return this; + } + + public Builder id(String id) { + this.options.setId(id); + return this; + } + + public Builder pages(List pages) { + this.options.setPages(pages); + return this; + } + + public Builder includeImageBase64(Boolean includeImageBase64) { + this.options.setIncludeImageBase64(includeImageBase64); + return this; + } + + public Builder imageLimit(Integer imageLimit) { + this.options.setImageLimit(imageLimit); + return this; + } + + public Builder imageMinSize(Integer imageMinSize) { + this.options.setImageMinSize(imageMinSize); + return this; + } + + public MistralAiOcrOptions build() { + return this.options; + } + + } + +} diff --git a/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/ocr/MistralOcrApi.java b/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/ocr/MistralOcrApi.java new file mode 100644 index 00000000000..7dbdaa5e7a3 --- /dev/null +++ b/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/ocr/MistralOcrApi.java @@ -0,0 +1,301 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.mistralai.ocr; + +import java.util.List; +import java.util.Objects; +import java.util.function.Consumer; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import com.fasterxml.jackson.annotation.JsonProperty; + +import org.springframework.ai.retry.RetryUtils; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.util.Assert; +import org.springframework.web.client.ResponseErrorHandler; +import org.springframework.web.client.RestClient; + +/** + * Java Client library for the Mistral AI OCR API. Provides access to the OCR + * functionality. + *

+ * The API processes a document and returns a markdown string representation of the text, + * along with information about extracted images. + * + * @author Alexandros Pappas + * @since 1.0.0 + */ +public class MistralOcrApi { + + private static final String DEFAULT_BASE_URL = "https://api.mistral.ai"; + + private final RestClient restClient; + + /** + * Create a new MistralOcrApi instance. + * @param mistralAiApiKey Mistral AI API key. + */ + public MistralOcrApi(String mistralAiApiKey) { + this(DEFAULT_BASE_URL, mistralAiApiKey); + } + + /** + * Create a new MistralOcrApi instance. + * @param baseUrl API base URL. + * @param mistralAiApiKey Mistral AI API key. + */ + public MistralOcrApi(String baseUrl, String mistralAiApiKey) { + this(baseUrl, mistralAiApiKey, RestClient.builder()); + } + + /** + * Create a new MistralOcrApi instance. + * @param baseUrl API base URL. + * @param mistralAiApiKey Mistral AI API key. + * @param restClientBuilder RestClient builder. + */ + public MistralOcrApi(String baseUrl, String mistralAiApiKey, RestClient.Builder restClientBuilder) { + this(baseUrl, mistralAiApiKey, restClientBuilder, RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER); + } + + /** + * Create a new MistralOcrApi instance. + * @param baseUrl API base URL. + * @param mistralAiApiKey Mistral AI API key. + * @param restClientBuilder RestClient builder. + * @param responseErrorHandler Response error handler. + */ + public MistralOcrApi(String baseUrl, String mistralAiApiKey, RestClient.Builder restClientBuilder, + ResponseErrorHandler responseErrorHandler) { + + Consumer jsonContentHeaders = headers -> { + headers.setBearerAuth(mistralAiApiKey); + headers.setContentType(MediaType.APPLICATION_JSON); + }; + + this.restClient = restClientBuilder.baseUrl(baseUrl) + .defaultHeaders(jsonContentHeaders) + .defaultStatusHandler(responseErrorHandler) + .build(); + } + + /** + * Performs OCR on a document and returns the extracted information. + * @param ocrRequest The OCR request containing document details and processing + * options. + * @return ResponseEntity containing the OCR response with markdown text and image + * data. + */ + public ResponseEntity ocr(OCRRequest ocrRequest) { + + Assert.notNull(ocrRequest, "The request body can not be null."); + Assert.notNull(ocrRequest.model(), "The model can not be null."); + Assert.notNull(ocrRequest.document(), "The document can not be null."); + + return this.restClient.post().uri("/v1/ocr").body(ocrRequest).retrieve().toEntity(OCRResponse.class); + } + + /** + * List of well-known Mistral OCR models. + */ + public enum OCRModel { + + MISTRAL_OCR_LATEST("mistral-ocr-latest"); + + private final String value; + + OCRModel(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + + } + + /** + * Represents the request for the OCR API. + * + * @param model Model to use for OCR. Can be 'mistral-ocr-latest' + * @param id An optional string identifier. + * @param document Document to run OCR on. Can be either a {@link DocumentURLChunk} or + * an {@link ImageURLChunk}. + * @param pages Specific pages to process in various formats: single number, range, or + * list of both. Starts from 0. + * @param includeImageBase64 Whether to include image URLs in the response. + * @param imageLimit Maximum number of images to extract. + * @param imageMinSize Minimum height and width of image to extract. + */ + @JsonInclude(Include.NON_NULL) + public record OCRRequest(@JsonProperty("model") String model, @JsonProperty("id") String id, + @JsonProperty("document") Document document, @JsonProperty("pages") List pages, + @JsonProperty("include_image_base64") Boolean includeImageBase64, + @JsonProperty("image_limit") Integer imageLimit, @JsonProperty("image_min_size") Integer imageMinSize) { + + /** + * Create an OCRRequest. + * @param model The model to use for OCR. + * @param document Document to run OCR on. + */ + public OCRRequest(String model, Document document) { + this(model, null, document, null, null, null, null); + } + + /** + * Represents the document to be processed, which can be either a document URL or + * an image URL. Only one of the fields should be set. + */ + @JsonInclude(Include.NON_NULL) + public sealed interface Document permits DocumentURLChunk, ImageURLChunk { + + } + + /** + * Represents a document URL chunk. + * + * @param type Must be 'document_url'. + * @param documentUrl URL of the document. + * @param documentName Optional name of the document. + */ + @JsonInclude(Include.NON_NULL) + public record DocumentURLChunk( + + @JsonProperty("type") String type, @JsonProperty("document_url") String documentUrl, + @JsonProperty("document_name") String documentName) implements Document { + + /** + * Create a DocumentURLChunk. + * @param documentUrl URL of the document. + */ + public DocumentURLChunk(String documentUrl) { + this("document_url", documentUrl, null); + } + } + + /** + * Represents an image URL chunk. + * + * @param type Must be 'image_url'. + * @param imageUrl URL of the image. + * @param imageName Optional name of the image. + */ + @JsonInclude(Include.NON_NULL) + public record ImageURLChunk( + + @JsonProperty("type") String type, @JsonProperty("image_url") String imageUrl, + @JsonProperty("image_name") String imageName) implements Document { + + /** + * Create an ImageURLChunk. + * @param imageUrl URL of the image. + */ + public ImageURLChunk(String imageUrl) { + this("image_url", imageUrl, null); + } + } + } + + /** + * Represents the response from the OCR API. + * + * @param pages List of OCR info for pages. + * @param model The model used to generate the OCR. + * @param usageInfo Usage info for the OCR request. + * @param pagesProcessed Number of pages processed. + * @param docSizeBytes Document size in bytes. + */ + @JsonInclude(Include.NON_NULL) + public record OCRResponse(@JsonProperty("pages") List pages, @JsonProperty("model") String model, + @JsonProperty("usage_info") OCRUsageInfo usageInfo, @JsonProperty("pages_processed") Integer pagesProcessed, + @JsonProperty("doc_size_bytes") Integer docSizeBytes) { + + } + + /** + * Represents OCR information for a single page. + * + * @param index The page index in a PDF document starting from 0. + * @param markdown The markdown string response of the page. + * @param images List of all extracted images in the page. + * @param dimensions The dimensions of the PDF Page's screenshot image. + */ + @JsonInclude(Include.NON_NULL) + public record OCRPage(@JsonProperty("index") Integer index, @JsonProperty("markdown") String markdown, + @JsonProperty("images") List images, + @JsonProperty("dimensions") OCRPageDimensions dimensions) { + } + + /** + * Represents an extracted image from a page. + * + * @param id Image ID for the extracted image in a page. + * @param topLeftX X coordinate of the top-left corner of the extracted image. + * @param topLeftY Y coordinate of the top-left corner of the extracted image. + * @param bottomRightX X coordinate of the bottom-right corner of the extracted image. + * @param bottomRightY Y coordinate of the bottom-right corner of the extracted image. + * @param imageBase64 Base64 string of the extracted image. + */ + @JsonInclude(Include.NON_NULL) + public record ExtractedImage(@JsonProperty("id") String id, @JsonProperty("top_left_x") Integer topLeftX, + @JsonProperty("top_left_y") Integer topLeftY, @JsonProperty("bottom_right_x") Integer bottomRightX, + @JsonProperty("bottom_right_y") Integer bottomRightY, @JsonProperty("image_base64") String imageBase64) { + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof ExtractedImage that)) + return false; + return Objects.equals(id, that.id) && Objects.equals(topLeftX, that.topLeftX) + && Objects.equals(topLeftY, that.topLeftY) && Objects.equals(bottomRightX, that.bottomRightX) + && Objects.equals(bottomRightY, that.bottomRightY) && Objects.equals(imageBase64, that.imageBase64); + } + + @Override + public int hashCode() { + return Objects.hash(id, topLeftX, topLeftY, bottomRightX, bottomRightY, imageBase64); + } + } + + /** + * Represents the dimensions of a PDF page's screenshot image. + * + * @param dpi Dots per inch of the page-image. + * @param height Height of the image in pixels. + * @param width Width of the image in pixels. + */ + @JsonInclude(Include.NON_NULL) + public record OCRPageDimensions(@JsonProperty("dpi") Integer dpi, @JsonProperty("height") Integer height, + @JsonProperty("width") Integer width) { + } + + /** + * Represents usage information for the OCR request. + * + * @param pagesProcessed Number of pages processed. + * @param docSizeBytes Document size in bytes. + */ + @JsonInclude(Include.NON_NULL) + public record OCRUsageInfo(@JsonProperty("pages_processed") Integer pagesProcessed, + @JsonProperty("doc_size_bytes") Integer docSizeBytes) { + } + +} diff --git a/models/spring-ai-mistral-ai/src/test/java/org/springframework/ai/mistralai/ocr/MistralAiOcrOptionsTests.java b/models/spring-ai-mistral-ai/src/test/java/org/springframework/ai/mistralai/ocr/MistralAiOcrOptionsTests.java new file mode 100644 index 00000000000..30b0319df20 --- /dev/null +++ b/models/spring-ai-mistral-ai/src/test/java/org/springframework/ai/mistralai/ocr/MistralAiOcrOptionsTests.java @@ -0,0 +1,102 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.mistralai.ocr; + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests for {@link MistralAiOcrOptions}. + * + * @author Alexandros Pappas + * @since 1.0.0 + */ +class MistralAiOcrOptionsTests { + + @Test + void testBuilderWithAllFields() { + MistralAiOcrOptions options = MistralAiOcrOptions.builder() + .model("custom-model") + .id("test-id") + .pages(List.of(0, 1, 2)) + .includeImageBase64(true) + .imageLimit(5) + .imageMinSize(100) + .build(); + + assertThat(options).extracting("model", "id", "pages", "includeImageBase64", "imageLimit", "imageMinSize") + .containsExactly("custom-model", "test-id", List.of(0, 1, 2), true, 5, 100); + } + + @Test + void testEqualsAndHashCode() { + MistralAiOcrOptions options1 = MistralAiOcrOptions.builder() + .model("custom-model") + .id("test-id") + .pages(List.of(0, 1, 2)) + .includeImageBase64(true) + .imageLimit(5) + .imageMinSize(100) + .build(); + + MistralAiOcrOptions options2 = MistralAiOcrOptions.builder() + .model("custom-model") + .id("test-id") + .pages(List.of(0, 1, 2)) + .includeImageBase64(true) + .imageLimit(5) + .imageMinSize(100) + .build(); + + assertThat(options1).isEqualTo(options2); + assertThat(options1.hashCode()).isEqualTo(options2.hashCode()); + } + + @Test + void testDefaultValues() { + MistralAiOcrOptions options = new MistralAiOcrOptions(); + assertThat(options.getModel()).isEqualTo("mistral-ocr-latest"); + assertThat(options.getId()).isNull(); + assertThat(options.getPages()).isNull(); + assertThat(options.getIncludeImageBase64()).isNull(); + assertThat(options.getImageLimit()).isNull(); + assertThat(options.getImageMinSize()).isNull(); + } + + @Test + void testGetters() { + MistralAiOcrOptions options = MistralAiOcrOptions.builder() + .model("my-model") + .id("id-123") + .pages(List.of(3, 4)) + .includeImageBase64(false) + .imageLimit(2) + .imageMinSize(50) + .build(); + + assertThat(options.getModel()).isEqualTo("my-model"); + assertThat(options.getId()).isEqualTo("id-123"); + assertThat(options.getPages()).isEqualTo(List.of(3, 4)); + assertThat(options.getIncludeImageBase64()).isFalse(); + assertThat(options.getImageLimit()).isEqualTo(2); + assertThat(options.getImageMinSize()).isEqualTo(50); + } + +} diff --git a/models/spring-ai-mistral-ai/src/test/java/org/springframework/ai/mistralai/ocr/MistralOcrApiIT.java b/models/spring-ai-mistral-ai/src/test/java/org/springframework/ai/mistralai/ocr/MistralOcrApiIT.java new file mode 100644 index 00000000000..195150570cb --- /dev/null +++ b/models/spring-ai-mistral-ai/src/test/java/org/springframework/ai/mistralai/ocr/MistralOcrApiIT.java @@ -0,0 +1,60 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.mistralai.ocr; + +import java.util.List; + +import org.junit.jupiter.api.Test; + +import org.springframework.http.ResponseEntity; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +/** + * Tests for the Mistral OCR API. + * + * @author Alexandros Pappas + * @since 1.0.0 + */ +@EnabledIfEnvironmentVariable(named = "MISTRAL_AI_API_KEY", matches = ".+") +class MistralOcrApiIT { + + MistralOcrApi mistralOcr = new MistralOcrApi(System.getenv("MISTRAL_AI_API_KEY")); + + @Test + void ocrTest() { + String documentUrl = "https://arxiv.org/pdf/2201.04234"; + MistralOcrApi.OCRRequest request = new MistralOcrApi.OCRRequest( + MistralOcrApi.OCRModel.MISTRAL_OCR_LATEST.getValue(), "test_id", + new MistralOcrApi.OCRRequest.DocumentURLChunk(documentUrl), List.of(0, 1, 2), true, 5, 50); + + ResponseEntity response = mistralOcr.ocr(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().pages()).isNotNull(); + assertThat(response.getBody().pages()).isNotEmpty(); + assertThat(response.getBody().pages().get(0).markdown()).isNotEmpty(); + + if (request.includeImageBase64() != null && request.includeImageBase64()) { + assertThat(response.getBody().pages().get(1).images()).isNotNull(); + assertThat(response.getBody().pages().get(1).images().get(0).imageBase64()).isNotNull(); + } + } + +} diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/mistralai-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/mistralai-chat.adoc index 088df589c69..8f0e01b989f 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/mistralai-chat.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/mistralai-chat.adoc @@ -342,3 +342,58 @@ Follow the https://github.com/spring-projects/spring-ai/blob/main/models/spring- * The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-mistral-ai/src/test/java/org/springframework/ai/mistralai/api/tool/PaymentStatusFunctionCallingIT.java[PaymentStatusFunctionCallingIT.java] tests show how to use the low-level API to call tool functions. Based on the link:https://docs.mistral.ai/guides/function-calling/[Mistral AI Function Calling] tutorial. + +== Mistral AI OCR + +Spring AI supports Optical Character Recognition (OCR) with Mistral AI. This allows you to extract text and image data from documents. + +== Prerequisites + +You will need to create an API with Mistral AI to access Mistral AI language models. +Create an account at https://auth.mistral.ai/ui/registration[Mistral AI registration page] and generate the token on the https://console.mistral.ai/api-keys/[API Keys page]. + + +=== Add Dependencies + +To use the Mistral AI OCR API, you will need to add the `spring-ai-mistral-ai` dependency to your project. + +[source, xml] +---- + + org.springframework.ai + spring-ai-mistral-ai + +---- + +or to your Gradle `build.gradle` build file. + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-mistral-ai' +} +---- + +=== Low-level MistralOcrApi Client + +The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/api/MistralOcrApi.java[MistralOcrApi] provides a lightweight Java client for link:https://docs.mistral.ai/api/#tag/OCR[Mistral AI OCR API]. + +Here is a simple snippet showing how to use the API programmatically: + +[source,java] +---- +MistralOcrApi mistralAiApi = new MistralOcrApi(System.getenv("MISTRAL_AI_API_KEY")); + +String documentUrl = "https://arxiv.org/pdf/2201.04234"; +MistralOcrApi.OCRRequest request = new MistralOcrApi.OCRRequest( + MistralOcrApi.OCRModel.MISTRAL_OCR_LATEST.getValue(), "test_id", + new MistralOcrApi.OCRRequest.DocumentURLChunk(documentUrl), List.of(0, 1, 2), true, 5, 50); + +ResponseEntity response = mistralAiApi.ocr(request); +---- + +Follow the https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/api/MistralOcrApi.java[MistralOcrApi.java]'s JavaDoc for further information. + +==== MistralOcrApi Sample + +* The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-mistral-ai/src/test/java/org/springframework/ai/mistralai/api/MistralOcrApiIT.java[MistralOcrApiIT.java] tests provide some general examples of how to use the lightweight library. \ No newline at end of file