Skip to content

feat: Add Mistral AI OCR API integration #2404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.springframework.ai.model.mistralai.autoconfigure;

import org.springframework.ai.mistralai.ocr.MistralOcrApi;
import org.springframework.ai.model.SpringAIModels;
import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration;
import org.springframework.beans.factory.ObjectProvider;
import org.springframework.boot.autoconfigure.AutoConfiguration;
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.web.client.RestClient;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;

/**
* OCR {@link AutoConfiguration Auto-configuration} for Mistral AI OCR.
*
* @author Alexandros Pappas
* @since 1.0.0
*/
@AutoConfiguration(after = { RestClientAutoConfiguration.class, SpringAiRetryAutoConfiguration.class })
@ConditionalOnClass(MistralOcrApi.class)
@ConditionalOnProperty(name = "spring.ai.model.ocr", havingValue = SpringAIModels.MISTRAL, matchIfMissing = true)
@EnableConfigurationProperties({ MistralAiCommonProperties.class, MistralAiOcrProperties.class })
@ImportAutoConfiguration(classes = { SpringAiRetryAutoConfiguration.class, RestClientAutoConfiguration.class })
public class MistralAiOcrAutoConfiguration {

@Bean
@ConditionalOnMissingBean
public MistralOcrApi mistralOcrApi(MistralAiCommonProperties commonProperties, MistralAiOcrProperties ocrProperties,
ObjectProvider<RestClient.Builder> restClientBuilderProvider, ResponseErrorHandler responseErrorHandler) {

var apiKey = ocrProperties.getApiKey();
var baseUrl = ocrProperties.getBaseUrl();

var resolvedApiKey = StringUtils.hasText(apiKey) ? apiKey : commonProperties.getApiKey();
var resolvedBaseUrl = StringUtils.hasText(baseUrl) ? baseUrl : commonProperties.getBaseUrl();

Assert.hasText(resolvedApiKey, "Mistral API key must be set");
Assert.hasText(resolvedBaseUrl, "Mistral base URL must be set");

return new MistralOcrApi(resolvedBaseUrl, resolvedApiKey,
restClientBuilderProvider.getIfAvailable(RestClient::builder), responseErrorHandler);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.springframework.ai.model.mistralai.autoconfigure;

import org.springframework.ai.mistralai.ocr.MistralAiOcrOptions;
import org.springframework.ai.mistralai.ocr.MistralOcrApi;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.NestedConfigurationProperty;

/**
* Configuration properties for Mistral AI OCR.
*
* @author Alexandros Pappas
* @since 1.0.0
*/
@ConfigurationProperties(MistralAiOcrProperties.CONFIG_PREFIX)
public class MistralAiOcrProperties extends MistralAiParentProperties {

public static final String CONFIG_PREFIX = "spring.ai.mistralai.ocr";

public static final String DEFAULT_OCR_MODEL = MistralOcrApi.OCRModel.MISTRAL_OCR_LATEST.getValue();

@NestedConfigurationProperty
private MistralAiOcrOptions options = MistralAiOcrOptions.builder().model(DEFAULT_OCR_MODEL).build();

public MistralAiOcrProperties() {
super.setBaseUrl(MistralAiCommonProperties.DEFAULT_BASE_URL);
}

public MistralAiOcrOptions getOptions() {
return this.options;
}

public void setOptions(MistralAiOcrOptions options) {
this.options = options;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.springframework.ai.model.mistralai.autoconfigure;

import java.util.List;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;

import org.springframework.ai.mistralai.ocr.MistralOcrApi;
import org.springframework.boot.autoconfigure.AutoConfigurations;
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
import org.springframework.http.ResponseEntity;

import static org.assertj.core.api.Assertions.assertThat;

/**
* Integration Tests for {@link MistralAiOcrAutoConfiguration}.
*
* <p>
* These tests require the {@code MISTRAL_AI_API_KEY} environment variable to be set. They
* verify that the {@link MistralOcrApi} bean is correctly configured and can interact
* with the Mistral AI OCR API
* </p>
*
* @author Alexandros Pappas
* @since 1.0.0
*/
@EnabledIfEnvironmentVariable(named = MistralAiOcrAutoConfigurationTests.ENV_VAR_NAME, matches = ".*")
class MistralAiOcrAutoConfigurationTests {

static final String ENV_VAR_NAME = "MISTRAL_AI_API_KEY";

private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
.withPropertyValues("spring.ai.mistralai.api-key=" + System.getenv(ENV_VAR_NAME))
.withConfiguration(AutoConfigurations.of(MistralAiOcrAutoConfiguration.class));

@Test
void ocrExtractionWithPublicUrl() {
this.contextRunner.run(context -> {

MistralOcrApi mistralOcrApi = context.getBean(MistralOcrApi.class);
assertThat(mistralOcrApi).isNotNull();

String documentUrl = "https://arxiv.org/pdf/2201.04234";
MistralOcrApi.OCRRequest request = new MistralOcrApi.OCRRequest(
MistralOcrApi.OCRModel.MISTRAL_OCR_LATEST.getValue(), "test_id",
new MistralOcrApi.OCRRequest.DocumentURLChunk(documentUrl), List.of(0, 1), true, 2, 50);

ResponseEntity<MistralOcrApi.OCRResponse> response = mistralOcrApi.ocr(request);

assertThat(response).isNotNull();
assertThat(response.getBody()).isNotNull();
assertThat(response.getBody().pages()).isNotNull();
assertThat(response.getBody().pages()).isNotEmpty();
assertThat(response.getBody().pages().get(0).markdown()).isNotEmpty();

if (request.includeImageBase64() != null && request.includeImageBase64()) {
assertThat(response.getBody().pages().get(1).images()).isNotNull();
assertThat(response.getBody().pages().get(1).images().get(0).imageBase64()).isNotNull();
}

});
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.springframework.ai.model.mistralai.autoconfigure;

import org.junit.jupiter.api.Test;

import org.springframework.ai.mistralai.ocr.MistralOcrApi;
import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration;
import org.springframework.boot.autoconfigure.AutoConfigurations;
import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration;
import org.springframework.boot.test.context.runner.ApplicationContextRunner;

import static org.assertj.core.api.Assertions.assertThat;

/**
* Unit Tests for {@link MistralAiOcrProperties} interacting with
* {@link MistralAiCommonProperties}.
*
* @author Alexandros Pappas
* @since 1.0.0
*/
class MistralAiOcrPropertiesTests {

// Define common configurations to load in tests
private final AutoConfigurations autoConfigurations = AutoConfigurations.of(SpringAiRetryAutoConfiguration.class,
RestClientAutoConfiguration.class, MistralAiOcrAutoConfiguration.class);

@Test
void commonPropertiesAppliedToOcr() {
new ApplicationContextRunner()
.withPropertyValues("spring.ai.mistralai.base-url=COMMON_BASE_URL",
"spring.ai.mistralai.api-key=COMMON_API_KEY",
"spring.ai.mistralai.ocr.options.model=mistral-ocr-specific-model")
.withConfiguration(this.autoConfigurations)
.run(context -> {
assertThat(context).hasSingleBean(MistralAiCommonProperties.class);
assertThat(context).hasSingleBean(MistralAiOcrProperties.class);

var commonProps = context.getBean(MistralAiCommonProperties.class);
var ocrProps = context.getBean(MistralAiOcrProperties.class);

assertThat(commonProps.getBaseUrl()).isEqualTo("COMMON_BASE_URL");
assertThat(commonProps.getApiKey()).isEqualTo("COMMON_API_KEY");

assertThat(ocrProps.getBaseUrl()).isEqualTo(MistralAiCommonProperties.DEFAULT_BASE_URL);
assertThat(ocrProps.getApiKey()).isNull();

assertThat(ocrProps.getOptions()).isNotNull();
assertThat(ocrProps.getOptions().getModel()).isEqualTo("mistral-ocr-specific-model");

assertThat(context).hasSingleBean(MistralOcrApi.class);
});
}

@Test
void ocrSpecificPropertiesOverrideCommon() {
new ApplicationContextRunner()
.withPropertyValues("spring.ai.mistralai.base-url=COMMON_BASE_URL",
"spring.ai.mistralai.api-key=COMMON_API_KEY", "spring.ai.mistralai.ocr.base-url=OCR_BASE_URL",
"spring.ai.mistralai.ocr.api-key=OCR_API_KEY",
"spring.ai.mistralai.ocr.options.model=mistral-ocr-default")
.withConfiguration(this.autoConfigurations)
.run(context -> {
assertThat(context).hasSingleBean(MistralAiCommonProperties.class);
assertThat(context).hasSingleBean(MistralAiOcrProperties.class);

var commonProps = context.getBean(MistralAiCommonProperties.class);
var ocrProps = context.getBean(MistralAiOcrProperties.class);

assertThat(commonProps.getBaseUrl()).isEqualTo("COMMON_BASE_URL");
assertThat(commonProps.getApiKey()).isEqualTo("COMMON_API_KEY");

assertThat(ocrProps.getBaseUrl()).isEqualTo("OCR_BASE_URL");
assertThat(ocrProps.getApiKey()).isEqualTo("OCR_API_KEY");

assertThat(ocrProps.getOptions()).isNotNull();
assertThat(ocrProps.getOptions().getModel()).isEqualTo("mistral-ocr-default");

assertThat(context).hasSingleBean(MistralOcrApi.class);
});
}

@Test
void ocrOptionsBinding() {
new ApplicationContextRunner().withPropertyValues("spring.ai.mistralai.api-key=API_KEY",
"spring.ai.mistralai.ocr.options.model=custom-ocr-model",
"spring.ai.mistralai.ocr.options.id=ocr-request-id-123", "spring.ai.mistralai.ocr.options.pages=0,1,5",
"spring.ai.mistralai.ocr.options.includeImageBase64=true",
"spring.ai.mistralai.ocr.options.imageLimit=25", "spring.ai.mistralai.ocr.options.imageMinSize=150")
.withConfiguration(this.autoConfigurations)
.run(context -> {
assertThat(context).hasSingleBean(MistralAiOcrProperties.class);
var ocrProps = context.getBean(MistralAiOcrProperties.class);
var options = ocrProps.getOptions();

assertThat(options).isNotNull();
assertThat(options.getModel()).isEqualTo("custom-ocr-model");
assertThat(options.getId()).isEqualTo("ocr-request-id-123");
assertThat(options.getPages()).containsExactly(0, 1, 5);
assertThat(options.getIncludeImageBase64()).isTrue();
assertThat(options.getImageLimit()).isEqualTo(25);
assertThat(options.getImageMinSize()).isEqualTo(150);
});
}

@Test
void ocrActivationViaModelProperty() {
// Scenario 1: OCR explicitly disabled
new ApplicationContextRunner().withConfiguration(this.autoConfigurations)
.withPropertyValues("spring.ai.mistralai.api-key=API_KEY", "spring.ai.model.ocr=none")
.run(context -> {
assertThat(context.getBeansOfType(MistralAiOcrProperties.class)).isEmpty();
assertThat(context.getBeansOfType(MistralOcrApi.class)).isEmpty();
// Should not have common properties either if only OCR config was loaded
// and then disabled
assertThat(context.getBeansOfType(MistralAiCommonProperties.class)).isEmpty();
});

// Scenario 2: OCR explicitly enabled for 'mistral'
new ApplicationContextRunner().withConfiguration(this.autoConfigurations)
.withPropertyValues("spring.ai.mistralai.api-key=API_KEY", "spring.ai.model.ocr=mistral")
.run(context -> {
assertThat(context).hasSingleBean(MistralAiCommonProperties.class); // Enabled
// by
// MistralAiOcrAutoConfiguration
assertThat(context).hasSingleBean(MistralAiOcrProperties.class);
assertThat(context).hasSingleBean(MistralOcrApi.class);
});

// Scenario 3: OCR implicitly enabled (default behavior when property is absent)
new ApplicationContextRunner().withConfiguration(this.autoConfigurations)
.withPropertyValues("spring.ai.mistralai.api-key=API_KEY")
.run(context -> {
assertThat(context).hasSingleBean(MistralAiCommonProperties.class); // Enabled
// by
// MistralAiOcrAutoConfiguration
assertThat(context).hasSingleBean(MistralAiOcrProperties.class);
assertThat(context).hasSingleBean(MistralOcrApi.class);
});

// Scenario 4: OCR implicitly disabled when another provider is chosen
new ApplicationContextRunner().withConfiguration(this.autoConfigurations)
.withPropertyValues("spring.ai.mistralai.api-key=API_KEY", "spring.ai.model.ocr=some-other-provider")
.run(context -> {
assertThat(context.getBeansOfType(MistralAiOcrProperties.class)).isEmpty();
assertThat(context.getBeansOfType(MistralOcrApi.class)).isEmpty();
// Common properties might still be loaded if another Mistral AI config
// (like Chat) was active,
// but in this minimal test setup, they shouldn't be loaded if OCR is
// disabled.
assertThat(context.getBeansOfType(MistralAiCommonProperties.class)).isEmpty();
});
}

}
Loading