diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index f2ee318e7..8c536432d 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -3,21 +3,29 @@ name: Pull Request on: pull_request: +permissions: + contents: read + pull-requests: read + jobs: static_analysis: - uses: mindee/mindee-api-java/.github/workflows/_static-analysis.yml@main + uses: ./.github/workflows/_static-analysis.yml build: - uses: mindee/mindee-api-java/.github/workflows/_build.yml@main + uses: ./.github/workflows/_build.yml needs: static_analysis secrets: inherit codeql: - uses: mindee/mindee-api-java/.github/workflows/_codeql.yml@main + uses: ./.github/workflows/_codeql.yml needs: build + permissions: + contents: read + actions: read + security-events: write test_integrations: - uses: mindee/mindee-api-java/.github/workflows/_test-integrations.yml@main + uses: ./.github/workflows/_test-integrations.yml needs: build secrets: inherit test_code_samples: - uses: mindee/mindee-api-java/.github/workflows/_test-code-samples.yml@main + uses: ./.github/workflows/_test-code-samples.yml needs: build secrets: inherit diff --git a/docs/code_samples/default_v2.txt b/docs/code_samples/default_v2.txt new file mode 100644 index 000000000..221430d2c --- /dev/null +++ b/docs/code_samples/default_v2.txt @@ -0,0 +1,45 @@ +import com.mindee.MindeeClientV2; +import com.mindee.InferenceOptions; +import com.mindee.input.LocalInputSource; +import com.mindee.parsing.v2.InferenceResponse; +import java.io.File; +import java.io.IOException; + +public class SimpleMindeeClient { + + public static void main(String[] args) throws IOException, InterruptedException { + String apiKey = "MY_API_KEY"; + String filePath = "/path/to/the/file.ext"; + + // Init a new client + MindeeClientV2 mindeeClient = new MindeeClientV2(apiKey); + + // Load a file from disk + LocalInputSource inputSource = new LocalInputSource(new File(filePath)); + + // Prepare the enqueueing options + // Note: modelId is mandatory. + InferenceOptions options = InferenceOptions.builder("MY_MODEL_ID").build(); + + // Parse the file + InferenceResponse response = mindeeClient.enqueueAndParse( + inputSource, + options + ); + + // Print a summary of the response + System.out.println(response.getInference().toString()); + + // Print a summary of the predictions +// System.out.println(response.getDocument().toString()); + + // Print the document-level predictions +// System.out.println(response.getDocument().getInference().getPrediction().toString()); + + // Print the page-level predictions +// response.getDocument().getInference().getPages().forEach( +// page -> System.out.println(page.toString()) +// ); + } + +} diff --git a/src/main/java/com/mindee/CommonClient.java b/src/main/java/com/mindee/CommonClient.java new file mode 100644 index 000000000..b7d45f95e --- /dev/null +++ b/src/main/java/com/mindee/CommonClient.java @@ -0,0 +1,36 @@ +package com.mindee; + +import com.mindee.input.LocalInputSource; +import com.mindee.input.PageOptions; +import com.mindee.pdf.PdfOperation; +import com.mindee.pdf.SplitQuery; +import java.io.IOException; + +/** + * Common client for all Mindee API clients. + */ +public abstract class CommonClient { + protected PdfOperation pdfOperation; + + /** + * Retrieves the file after applying page operations to it. + * @param localInputSource Local input source to apply operations to. + * @param pageOptions Options to apply. + * @return A byte array of the file after applying page operations. + * @throws IOException Throws if the file can't be accessed. + */ + protected byte[] getSplitFile( + LocalInputSource localInputSource, + PageOptions pageOptions + ) throws IOException { + byte[] splitFile; + if (pageOptions == null || !localInputSource.isPdf()) { + splitFile = localInputSource.getFile(); + } else { + splitFile = pdfOperation.split( + new SplitQuery(localInputSource.getFile(), pageOptions) + ).getFile(); + } + return splitFile; + } +} diff --git a/src/main/java/com/mindee/InferenceOptions.java b/src/main/java/com/mindee/InferenceOptions.java new file mode 100644 index 000000000..71307f479 --- /dev/null +++ b/src/main/java/com/mindee/InferenceOptions.java @@ -0,0 +1,122 @@ +package com.mindee; + +import com.mindee.input.PageOptions; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import lombok.Data; +import lombok.Getter; + +/** + * Options to pass when calling methods using the API V2. + */ +@Getter +@Data +public final class InferenceOptions { + /** + * ID of the model (required). + */ + private final String modelId; + /** + * Whether to include full text data for async APIs. + * Performing a full OCR on the server increases response time and payload size. + */ + private final boolean fullText; + /** + * Enables Retrieval-Augmented Generation (optional, default: {@code false}). + */ + private final boolean rag; + /** + * Optional alias for the file. + */ + private final String alias; + /** + * IDs of webhooks to propagate the API response to (may be empty). + */ + private final List webhookIds; + /** + * Page options to apply to the document. + */ + private final PageOptions pageOptions; + /* + * Asynchronous polling options. + */ + private final AsyncPollingOptions pollingOptions; + + /** + * Create a new builder. + * + * @param modelId the mandatory model identifier + * @return a fresh {@link Builder} + */ + public static Builder builder(String modelId) { + return new Builder(modelId); + } + + /** + * Fluent builder for {@link InferenceOptions}. + */ + public static final class Builder { + + private final String modelId; + private boolean fullText = false; + private boolean rag = false; + private String alias; + private List webhookIds = Collections.emptyList(); + private PageOptions pageOptions = null; + private AsyncPollingOptions pollingOptions = AsyncPollingOptions.builder().build(); + + private Builder(String modelId) { + this.modelId = Objects.requireNonNull(modelId, "modelId must not be null"); + } + + /** + * Toggle full-text OCR extraction. + */ + public Builder fullText(boolean fullText) { + this.fullText = fullText; + return this; + } + + /** Enable / disable Retrieval-Augmented Generation. */ + public Builder rag(boolean rag) { + this.rag = rag; + return this; + } + + /** Set an alias for the uploaded document. */ + public Builder alias(String alias) { + this.alias = alias; + return this; + } + + /** Provide IDs of webhooks to forward the API response to. */ + public Builder webhookIds(List webhookIds) { + this.webhookIds = webhookIds; + return this; + } + + public Builder pageOptions(PageOptions pageOptions) { + this.pageOptions = pageOptions; + return this; + } + + public Builder pollingOptions(AsyncPollingOptions pollingOptions) { + this.pollingOptions = pollingOptions; + return this; + } + + /** Build an immutable {@link InferenceOptions} instance. */ + public InferenceOptions build() { + return new InferenceOptions( + modelId, + fullText, + rag, + alias, + webhookIds, + pageOptions, + pollingOptions + ); + } + } +} diff --git a/src/main/java/com/mindee/MindeeClient.java b/src/main/java/com/mindee/MindeeClient.java index 272d2195b..2558bd416 100644 --- a/src/main/java/com/mindee/MindeeClient.java +++ b/src/main/java/com/mindee/MindeeClient.java @@ -16,7 +16,6 @@ import com.mindee.parsing.common.WorkflowResponse; import com.mindee.pdf.PdfBoxApi; import com.mindee.pdf.PdfOperation; -import com.mindee.pdf.SplitQuery; import com.mindee.product.custom.CustomV1; import com.mindee.product.generated.GeneratedV1; import java.io.IOException; @@ -25,10 +24,9 @@ /** * Main entrypoint for Mindee operations. */ -public class MindeeClient { +public class MindeeClient extends CommonClient { private final MindeeApi mindeeApi; - private final PdfOperation pdfOperation; /** * Create a default MindeeClient. @@ -1124,19 +1122,4 @@ public AsyncPredictResponse loadPrediction( return objectMapper.readValue(localResponse.getFile(), parametricType); } - private byte[] getSplitFile( - LocalInputSource localInputSource, - PageOptions pageOptions - ) throws IOException { - byte[] splitFile; - if (pageOptions == null || !localInputSource.isPdf()) { - splitFile = localInputSource.getFile(); - } else { - splitFile = pdfOperation.split( - new SplitQuery(localInputSource.getFile(), pageOptions) - ).getFile(); - } - return splitFile; - } - } diff --git a/src/main/java/com/mindee/MindeeClientV2.java b/src/main/java/com/mindee/MindeeClientV2.java new file mode 100644 index 000000000..c42cd35ca --- /dev/null +++ b/src/main/java/com/mindee/MindeeClientV2.java @@ -0,0 +1,130 @@ +package com.mindee; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.mindee.http.MindeeApiV2; +import com.mindee.http.MindeeHttpApiV2; +import com.mindee.input.LocalInputSource; +import com.mindee.input.LocalResponse; +import com.mindee.parsing.v2.CommonResponse; +import com.mindee.parsing.v2.InferenceResponse; +import com.mindee.parsing.v2.JobResponse; +import com.mindee.pdf.PdfBoxApi; +import com.mindee.pdf.PdfOperation; +import java.io.IOException; + +/** + * Entry point for the Mindee **V2** API features. + */ +public class MindeeClientV2 extends CommonClient { + private final MindeeApiV2 mindeeApi; + + /** Uses an API-key read from the environment variables. */ + public MindeeClientV2() { + this(new PdfBoxApi(), createDefaultApiV2("")); + } + + /** Uses the supplied API-key. */ + public MindeeClientV2(String apiKey) { + this(new PdfBoxApi(), createDefaultApiV2(apiKey)); + } + + /** Directly inject an already configured {@link MindeeApiV2}. */ + public MindeeClientV2(MindeeApiV2 mindeeApi) { + this(new PdfBoxApi(), mindeeApi); + } + + /** Inject both a PDF implementation and a HTTP implementation. */ + public MindeeClientV2(PdfOperation pdfOperation, MindeeApiV2 mindeeApi) { + this.pdfOperation = pdfOperation; + this.mindeeApi = mindeeApi; + } + + /** + * Enqueue a document in the asynchronous “Generated” queue. + */ + public JobResponse enqueue( + LocalInputSource inputSource, + InferenceOptions options) throws IOException { + LocalInputSource finalInput; + if (options.getPageOptions() != null) { + finalInput = new LocalInputSource(getSplitFile(inputSource, options.getPageOptions()), inputSource.getFilename()); + } else { + finalInput = inputSource; + } + return mindeeApi.enqueuePost(finalInput, options); + } + + /** + * Retrieve results for a previously enqueued document. + */ + public CommonResponse parseQueued(String jobId) { + if (jobId == null || jobId.trim().isEmpty()) { + throw new IllegalArgumentException("jobId must not be null or blank."); + } + return mindeeApi.getInferenceFromQueue(jobId); + } + + /** + * Send a local file to an async queue, poll, and parse when complete. + * @param inputSource The input source to send. + * @param options The options to send along with the file. + * @return an instance of {@link InferenceResponse}. + * @throws IOException Throws if the file can't be accessed. + * @throws InterruptedException Throws if the thread is interrupted. + */ + public InferenceResponse enqueueAndParse( + LocalInputSource inputSource, + InferenceOptions options) throws IOException, InterruptedException { + + validatePollingOptions(options.getPollingOptions()); + + JobResponse job = enqueue(inputSource, options); + + Thread.sleep((long) (options.getPollingOptions().getInitialDelaySec() * 1000)); + + int attempts = 0; + int max = options.getPollingOptions().getMaxRetries(); + while (attempts < max) { + Thread.sleep((long) (options.getPollingOptions().getIntervalSec() * 1000)); + CommonResponse resp = parseQueued(job.getJob().getId()); + if (resp instanceof InferenceResponse) { + return (InferenceResponse) resp; + } + attempts++; + } + throw new RuntimeException("Max retries exceeded (" + max + ")."); + } + + /** + * Deserialize a webhook payload (or any saved response) into + * {@link InferenceResponse}. + */ + public InferenceResponse loadInference(LocalResponse localResponse) throws IOException { + ObjectMapper mapper = new ObjectMapper().findAndRegisterModules(); + InferenceResponse model = + mapper.readValue(localResponse.getFile(), InferenceResponse.class); + model.setRawResponse(localResponse.toString()); + return model; + } + + private static MindeeApiV2 createDefaultApiV2(String apiKey) { + MindeeSettingsV2 settings = apiKey == null || apiKey.trim().isEmpty() + ? new MindeeSettingsV2() + : new MindeeSettingsV2(apiKey); + return MindeeHttpApiV2.builder() + .mindeeSettings(settings) + .build(); + } + + private static void validatePollingOptions(AsyncPollingOptions p) { + if (p.getInitialDelaySec() < 1) { + throw new IllegalArgumentException("Initial delay must be ≥ 1 s"); + } + if (p.getIntervalSec() < 1) { + throw new IllegalArgumentException("Interval must be ≥ 1 s"); + } + if (p.getMaxRetries() < 2) { + throw new IllegalArgumentException("Max retries must be ≥ 2"); + } + } +} diff --git a/src/main/java/com/mindee/MindeeSettingsV2.java b/src/main/java/com/mindee/MindeeSettingsV2.java new file mode 100644 index 000000000..3d95ba56a --- /dev/null +++ b/src/main/java/com/mindee/MindeeSettingsV2.java @@ -0,0 +1,54 @@ +package com.mindee; + +import java.util.Optional; +import lombok.Builder; +import lombok.Getter; + +/** + * Mindee API V2 configuration. + */ +@Getter +@Builder +public class MindeeSettingsV2 { + + private static final String DEFAULT_MINDEE_V2_API_URL = "https://api-v2.mindee.net/v2"; + private final String apiKey; + private final String baseUrl; + + public MindeeSettingsV2() { + this("", ""); + } + + public Optional getApiKey() { + return Optional.ofNullable(apiKey); + } + + public MindeeSettingsV2(String apiKey) { + this(apiKey, ""); + } + + public MindeeSettingsV2(String apiKey, String baseUrl) { + + if (apiKey == null || apiKey.trim().isEmpty()) { + String apiKeyFromEnv = System.getenv("MINDEE_V2_API_KEY"); + if (apiKeyFromEnv == null || apiKeyFromEnv.trim().isEmpty()) { + this.apiKey = null; + } else { + this.apiKey = apiKeyFromEnv; + } + } else { + this.apiKey = apiKey; + } + + if (baseUrl == null || baseUrl.trim().isEmpty()) { + String baseUrlFromEnv = System.getenv("MINDEE_V2_API_URL"); + if (baseUrlFromEnv != null && !baseUrlFromEnv.trim().isEmpty()) { + this.baseUrl = baseUrlFromEnv; + } else { + this.baseUrl = DEFAULT_MINDEE_V2_API_URL; + } + } else { + this.baseUrl = baseUrl; + } + } +} diff --git a/src/main/java/com/mindee/http/MindeeApi.java b/src/main/java/com/mindee/http/MindeeApi.java index 051d1ffc5..bc01d52d7 100644 --- a/src/main/java/com/mindee/http/MindeeApi.java +++ b/src/main/java/com/mindee/http/MindeeApi.java @@ -4,14 +4,12 @@ import com.mindee.parsing.common.Inference; import com.mindee.parsing.common.PredictResponse; import com.mindee.parsing.common.WorkflowResponse; -import java.io.ByteArrayOutputStream; import java.io.IOException; -import org.apache.hc.core5.http.HttpEntity; /** * Defines required methods for an API. */ -abstract public class MindeeApi { +abstract public class MindeeApi extends MindeeApiCommon { /** * Get a document from the predict queue. @@ -45,38 +43,4 @@ abstract public WorkflowResponse executeWorkflowP String workflowId, RequestParameters requestParameters ) throws IOException; - - protected String getUserAgent() { - String javaVersion = System.getProperty("java.version"); - String sdkVersion = getClass().getPackage().getImplementationVersion(); - String osName = System.getProperty("os.name").toLowerCase(); - - if (osName.contains("windows")) { - osName = "windows"; - } else if (osName.contains("darwin")) { - osName = "macos"; - } else if (osName.contains("mac")) { - osName = "macos"; - } else if (osName.contains("linux")) { - osName = "linux"; - } else if (osName.contains("bsd")) { - osName = "bsd"; - } else if (osName.contains("aix")) { - osName = "aix"; - } - return String.format("mindee-api-java@v%s java-v%s %s", sdkVersion, javaVersion, osName); - } - - protected boolean is2xxStatusCode(int statusCode) { - return statusCode >= 200 && statusCode <= 299; - } - - protected String readRawResponse(HttpEntity responseEntity) throws IOException { - ByteArrayOutputStream contentRead = new ByteArrayOutputStream(); - byte[] buffer = new byte[1024]; - for (int length; (length = responseEntity.getContent().read(buffer)) != -1; ) { - contentRead.write(buffer, 0, length); - } - return contentRead.toString("UTF-8"); - } } diff --git a/src/main/java/com/mindee/http/MindeeApiCommon.java b/src/main/java/com/mindee/http/MindeeApiCommon.java new file mode 100644 index 000000000..6d1bba5dd --- /dev/null +++ b/src/main/java/com/mindee/http/MindeeApiCommon.java @@ -0,0 +1,53 @@ +package com.mindee.http; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.apache.hc.core5.http.HttpEntity; + +/** + * Defines common methods for mindee APIs. + */ +public abstract class MindeeApiCommon { + /** + * Retrieves the user agent. + * @return the user agent. + */ + protected String getUserAgent() { + String javaVersion = System.getProperty("java.version"); + String sdkVersion = getClass().getPackage().getImplementationVersion(); + String osName = System.getProperty("os.name").toLowerCase(); + + if (osName.contains("windows")) { + osName = "windows"; + } else if (osName.contains("darwin")) { + osName = "macos"; + } else if (osName.contains("mac")) { + osName = "macos"; + } else if (osName.contains("linux")) { + osName = "linux"; + } else if (osName.contains("bsd")) { + osName = "bsd"; + } else if (osName.contains("aix")) { + osName = "aix"; + } + return String.format("mindee-api-java@v%s java-v%s %s", sdkVersion, javaVersion, osName); + } + + /** + * Checks if the status code is in the 2xx range. + * @param statusCode the status code to check. + * @return {@code true} if the status code is in the 2xx range, false otherwise. + */ + protected boolean is2xxStatusCode(int statusCode) { + return statusCode >= 200 && statusCode <= 299; + } + + protected String readRawResponse(HttpEntity responseEntity) throws IOException { + ByteArrayOutputStream contentRead = new ByteArrayOutputStream(); + byte[] buffer = new byte[1024]; + for (int length; (length = responseEntity.getContent().read(buffer)) != -1; ) { + contentRead.write(buffer, 0, length); + } + return contentRead.toString("UTF-8"); + } +} diff --git a/src/main/java/com/mindee/http/MindeeApiV2.java b/src/main/java/com/mindee/http/MindeeApiV2.java new file mode 100644 index 000000000..8cd09bee9 --- /dev/null +++ b/src/main/java/com/mindee/http/MindeeApiV2.java @@ -0,0 +1,28 @@ +package com.mindee.http; + +import com.mindee.InferenceOptions; +import com.mindee.input.LocalInputSource; +import com.mindee.parsing.v2.CommonResponse; +import com.mindee.parsing.v2.JobResponse; +import java.io.IOException; + +/** + * Defines required methods for an API. + */ +abstract public class MindeeApiV2 extends MindeeApiCommon { + /** + * Send a file to the prediction queue. + */ + abstract public JobResponse enqueuePost( + LocalInputSource inputSource, + InferenceOptions options + ) throws IOException; + + /** + * Get a document from the predict queue. + */ + abstract public CommonResponse getInferenceFromQueue( + String jobId + ); + +} diff --git a/src/main/java/com/mindee/http/MindeeHttpApiV2.java b/src/main/java/com/mindee/http/MindeeHttpApiV2.java new file mode 100644 index 000000000..c1b0d0dbc --- /dev/null +++ b/src/main/java/com/mindee/http/MindeeHttpApiV2.java @@ -0,0 +1,236 @@ +package com.mindee.http; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.mindee.InferenceOptions; +import com.mindee.MindeeException; +import com.mindee.MindeeSettingsV2; +import com.mindee.input.LocalInputSource; +import com.mindee.parsing.v2.CommonResponse; +import com.mindee.parsing.v2.ErrorResponse; +import com.mindee.parsing.v2.InferenceResponse; +import com.mindee.parsing.v2.JobResponse; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import lombok.Builder; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.classic.methods.HttpPost; +import org.apache.hc.client5.http.entity.mime.HttpMultipartMode; +import org.apache.hc.client5.http.entity.mime.MultipartEntityBuilder; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; +import org.apache.hc.core5.http.ClassicHttpResponse; +import org.apache.hc.core5.http.ContentType; +import org.apache.hc.core5.http.HttpEntity; +import org.apache.hc.core5.http.HttpHeaders; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.net.URIBuilder; + +/** + * HTTP Client class for the V2 API. + */ +public final class MindeeHttpApiV2 extends MindeeApiV2 { + + private static final ObjectMapper mapper = new ObjectMapper(); + + /** + * The MindeeSetting needed to make the api call. + */ + private final MindeeSettingsV2 mindeeSettings; + /** + * The HttpClientBuilder used to create HttpClient objects used to make api calls over http. + * Defaults to HttpClientBuilder.create().useSystemProperties() + */ + private final HttpClientBuilder httpClientBuilder; + + + public MindeeHttpApiV2(MindeeSettingsV2 mindeeSettings) { + this( + mindeeSettings, + null + ); + } + + @Builder + private MindeeHttpApiV2( + MindeeSettingsV2 mindeeSettings, + HttpClientBuilder httpClientBuilder + ) { + this.mindeeSettings = mindeeSettings; + + if (httpClientBuilder != null) { + this.httpClientBuilder = httpClientBuilder; + } else { + this.httpClientBuilder = HttpClientBuilder.create().useSystemProperties(); + } + } + + /** + * Enqueues a doc with the POST method. + * + * @param inputSource Input source to send. + * @param options Options to send the file along with. + * @return A job response. + */ + public JobResponse enqueuePost( + LocalInputSource inputSource, + InferenceOptions options + ) { + String url = this.mindeeSettings.getBaseUrl() + "/inferences/enqueue"; + HttpPost post = buildHttpPost(url, inputSource, options); + + mapper.findAndRegisterModules(); + try (CloseableHttpClient httpClient = httpClientBuilder.build()) { + return httpClient.execute( + post, response -> { + String raw = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); + return deserializeOrThrow(raw, JobResponse.class, response.getCode()); + } + ); + } catch (IOException err) { + throw new MindeeException(err.getMessage(), err); + } + } + + public CommonResponse getInferenceFromQueue( + String jobId + ) { + + String url = this.mindeeSettings.getBaseUrl() + "/inferences/" + jobId; + HttpGet get = new HttpGet(url); + + if (this.mindeeSettings.getApiKey().isPresent()) { + get.setHeader(HttpHeaders.AUTHORIZATION, this.mindeeSettings.getApiKey().get()); + } + get.setHeader(HttpHeaders.USER_AGENT, getUserAgent()); + mapper.findAndRegisterModules(); + try (CloseableHttpClient httpClient = httpClientBuilder.build()) { + return httpClient.execute( + get, response -> { + HttpEntity responseEntity = response.getEntity(); + int statusCode = response.getCode(); + if (!is2xxStatusCode(statusCode)) { + throw getHttpError(response); + } + try { + String raw = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); + + return deserializeOrThrow(raw, InferenceResponse.class, response.getCode()); + } finally { + /* make sure the connection can be reused even if parsing fails */ + EntityUtils.consumeQuietly(responseEntity); + } + } + ); + } catch (IOException err) { + throw new MindeeException(err.getMessage(), err); + } + } + + private MindeeHttpExceptionV2 getHttpError(ClassicHttpResponse response) { + String rawBody; + try { + rawBody = response.getEntity() == null + ? "" + : EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); + + ErrorResponse err = mapper.readValue(rawBody, ErrorResponse.class); + + if (err.getDetail() == null) { + err = new ErrorResponse("Unknown error", response.getCode()); + } + return new MindeeHttpExceptionV2(err.getStatus(), err.getDetail()); + + } catch (Exception e) { + return new MindeeHttpExceptionV2(response.getCode(), "Unknown error"); + } + } + + + private HttpEntity buildHttpBody( + LocalInputSource inputSource, + InferenceOptions options + ) { + MultipartEntityBuilder builder = MultipartEntityBuilder.create(); + builder.setMode(HttpMultipartMode.EXTENDED); + builder.addBinaryBody( + "file", + inputSource.getFile(), + ContentType.DEFAULT_BINARY, + inputSource.getFilename() + ); + + if (options.getAlias() != null) { + builder.addTextBody( + "alias", + options.getAlias().toLowerCase() + ); + } + + builder.addTextBody("model_id", options.getModelId()); + if (options.isFullText()) { + builder.addTextBody("full_text_ocr", "true"); + } + if (options.isRag()) { + builder.addTextBody("rag", "true"); + } + if (options.getAlias() != null) { + builder.addTextBody("alias", options.getAlias()); + } + if (!options.getWebhookIds().isEmpty()) { + builder.addTextBody("webhook_ids", String.join(",", options.getWebhookIds())); + } + return builder.build(); + } + + + private HttpPost buildHttpPost( + String url, + LocalInputSource inputSource, + InferenceOptions options + ) { + HttpPost post; + try { + URIBuilder uriBuilder = new URIBuilder(url); + post = new HttpPost(uriBuilder.build()); + } + // This exception will never happen because we are providing the URL internally. + // Do this to avoid declaring the exception in the method signature. + catch (URISyntaxException err) { + return new HttpPost("invalid URI"); + } + + if (this.mindeeSettings.getApiKey().isPresent()) { + post.setHeader(HttpHeaders.AUTHORIZATION, this.mindeeSettings.getApiKey().get()); + } + post.setHeader(HttpHeaders.USER_AGENT, getUserAgent()); + post.setEntity(buildHttpBody(inputSource, options)); + return post; + } + + + private R deserializeOrThrow( + String body, Class clazz, int httpStatus) throws MindeeHttpExceptionV2 { + + if (httpStatus >= 200 && httpStatus < 300) { + try { + R model = mapper.readerFor(clazz).readValue(body); + model.setRawResponse(body); + return model; + } catch (Exception exception) { + throw new MindeeException("Couldn't deserialize server response:\n" + exception.getMessage()); + } + } + + ErrorResponse err; + try { + err = mapper.readValue(body, ErrorResponse.class); + if (err.getDetail() == null) { + err = new ErrorResponse("Unknown error", httpStatus); + } + } catch (Exception ignored) { + err = new ErrorResponse("Unknown error", httpStatus); + } + throw new MindeeHttpExceptionV2(err.getStatus(), err.getDetail()); + } +} diff --git a/src/main/java/com/mindee/http/MindeeHttpExceptionV2.java b/src/main/java/com/mindee/http/MindeeHttpExceptionV2.java new file mode 100644 index 000000000..056a72747 --- /dev/null +++ b/src/main/java/com/mindee/http/MindeeHttpExceptionV2.java @@ -0,0 +1,29 @@ +package com.mindee.http; + +import com.mindee.MindeeException; +import lombok.Getter; + +/** + * Represent a Mindee exception. + */ +@Getter +public class MindeeHttpExceptionV2 extends MindeeException { + /** Standard HTTP status code. */ + private final int status; + /** Error details. */ + private final String detail; + + public MindeeHttpExceptionV2(int status, String detail) { + super(detail); + this.status = status; + this.detail = detail; + } + + public String toString() { + String outStr = super.toString() + " - HTTP " + getStatus(); + if (!getDetail().isEmpty()) { + outStr += " - " + getDetail(); + } + return outStr; + } +} diff --git a/src/main/java/com/mindee/input/PageOptions.java b/src/main/java/com/mindee/input/PageOptions.java index de344640c..3de632cec 100644 --- a/src/main/java/com/mindee/input/PageOptions.java +++ b/src/main/java/com/mindee/input/PageOptions.java @@ -46,4 +46,31 @@ public PageOptions( this.operation = operation; this.onMinPages = onMinPages; } + + /** + * Builder for page options. + */ + public static final class Builder { + private List pageIndexes; + private PageOptionsOperation operation; + private Integer onMinPages; + + public Builder pageIndexes(List pageIndexes) { + this.pageIndexes = pageIndexes; + return this; + } + + public Builder operation(PageOptionsOperation operation) { + this.operation = operation; + return this; + } + public Builder onMinPages(Integer onMinPages) { + this.onMinPages = onMinPages; + return this; + } + + public PageOptions build() { + return new PageOptions(pageIndexes, operation, onMinPages); + } + } } diff --git a/src/main/java/com/mindee/parsing/common/LocalDateTameTimeDeserializer.java b/src/main/java/com/mindee/parsing/common/LocalDateTimeDeserializer.java similarity index 94% rename from src/main/java/com/mindee/parsing/common/LocalDateTameTimeDeserializer.java rename to src/main/java/com/mindee/parsing/common/LocalDateTimeDeserializer.java index e5f8de4bf..3aadcd05c 100644 --- a/src/main/java/com/mindee/parsing/common/LocalDateTameTimeDeserializer.java +++ b/src/main/java/com/mindee/parsing/common/LocalDateTimeDeserializer.java @@ -14,7 +14,7 @@ /** * Deserializer for LocalDateTime */ -class LocalDateTimeDeserializer extends JsonDeserializer { +public class LocalDateTimeDeserializer extends JsonDeserializer { @Override public LocalDateTime deserialize( JsonParser jsonParser, diff --git a/src/main/java/com/mindee/parsing/v2/CommonResponse.java b/src/main/java/com/mindee/parsing/v2/CommonResponse.java new file mode 100644 index 000000000..263b31c2d --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/CommonResponse.java @@ -0,0 +1,23 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.Data; +import lombok.EqualsAndHashCode; + +/** + * Common response information from Mindee API V2. + */ +@Data +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +abstract public class CommonResponse { + /** + * The raw server response. + * This is not formatted in any way by the library and may contain newline and tab characters. + */ + private String rawResponse; + + public void setRawResponse(String contents) { + rawResponse = contents; + } +} diff --git a/src/main/java/com/mindee/parsing/v2/ErrorResponse.java b/src/main/java/com/mindee/parsing/v2/ErrorResponse.java new file mode 100644 index 000000000..990b07f0b --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/ErrorResponse.java @@ -0,0 +1,36 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Error information from the API. + */ +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public final class ErrorResponse { + /** + * Detail relevant to the error. + */ + @JsonProperty("detail") + private String detail; + + /** + * HTTP error code. + */ + @JsonProperty("status") + private int status; + + /** For prettier display. */ + @Override + public String toString() { + return "HTTP Status: " + status + " - " + detail; + } +} diff --git a/src/main/java/com/mindee/parsing/v2/Inference.java b/src/main/java/com/mindee/parsing/v2/Inference.java new file mode 100644 index 000000000..3b50dd6fe --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/Inference.java @@ -0,0 +1,61 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.StringJoiner; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Inference object for the V2 API. + */ +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public class Inference { + /** + * Inference ID. + */ + @JsonProperty("id") + private String id; + + /** + * Model info. + */ + @JsonProperty("model") + private InferenceResultModel model; + + /** + * File info. + */ + @JsonProperty("file") + private InferenceResultFile file; + + /** + * Model result values. + */ + @JsonProperty("result") + private InferenceResult result; + + @Override + public String toString() { + StringJoiner joiner = new StringJoiner("\n"); + joiner + .add("Inference") + .add("#########") + .add("Model") + .add("=====") + .add(":ID: " + (model != null ? model.getId() : "")) + .add("") + .add("File") + .add("====") + .add(file != null ? file.toString() : "") + .add("") + .add(result != null ? result.toString() : ""); + return joiner.toString().trim() + "\n"; + } +} diff --git a/src/main/java/com/mindee/parsing/v2/InferenceResponse.java b/src/main/java/com/mindee/parsing/v2/InferenceResponse.java new file mode 100644 index 000000000..7c5294c43 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/InferenceResponse.java @@ -0,0 +1,17 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Getter; + +/** + * Represents an asynchronous inference response (V2). + */ +@Getter +public class InferenceResponse extends CommonResponse { + + /** + * Inference result. + */ + @JsonProperty("inference") + private Inference inference; +} diff --git a/src/main/java/com/mindee/parsing/v2/InferenceResult.java b/src/main/java/com/mindee/parsing/v2/InferenceResult.java new file mode 100644 index 000000000..d9e92ba74 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/InferenceResult.java @@ -0,0 +1,47 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.mindee.parsing.v2.field.InferenceFields; +import java.util.StringJoiner; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Generic result for any off-the-shelf Mindee V2 model. + */ +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public final class InferenceResult { + + /** + * Model fields. + */ + @JsonProperty("fields") + private InferenceFields fields; + + /** + * Options. + */ + @JsonProperty("options") + private InferenceResultOptions options; + + @Override + public String toString() { + StringJoiner joiner = new StringJoiner("\n"); + joiner.add("Fields") + .add("======"); + joiner.add(fields.toString()); + if (this.getOptions() != null) { + joiner.add("Options") + .add("=======") + .add(this.getOptions().toString()); + } + return joiner.toString(); + } +} diff --git a/src/main/java/com/mindee/parsing/v2/InferenceResultFile.java b/src/main/java/com/mindee/parsing/v2/InferenceResultFile.java new file mode 100644 index 000000000..5296ae5e9 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/InferenceResultFile.java @@ -0,0 +1,34 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * File info for V2 API. + */ +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public class InferenceResultFile { + /** + * File name. + */ + @JsonProperty("name") + private String name; + + /** + * Optional file alias. + */ + @JsonProperty("alias") + private String alias; + + public String toString() { + return ":Name: " + name + "\n:Alias:" + (alias != null ? " " + alias : ""); + } +} diff --git a/src/main/java/com/mindee/parsing/v2/InferenceResultModel.java b/src/main/java/com/mindee/parsing/v2/InferenceResultModel.java new file mode 100644 index 000000000..70cf0e80d --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/InferenceResultModel.java @@ -0,0 +1,25 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Model information for a V2 API inference. + */ +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public class InferenceResultModel { + + /** + * The ID of the model. + */ + @JsonProperty("id") + private String id; +} diff --git a/src/main/java/com/mindee/parsing/v2/InferenceResultOptions.java b/src/main/java/com/mindee/parsing/v2/InferenceResultOptions.java new file mode 100644 index 000000000..80d4cc349 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/InferenceResultOptions.java @@ -0,0 +1,15 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.List; +import lombok.Getter; + +/** + * Option response for V2 API inference. + */ +@Getter +public final class InferenceResultOptions { + + @JsonProperty("raw_texts") + private List rawTexts; +} diff --git a/src/main/java/com/mindee/parsing/v2/Job.java b/src/main/java/com/mindee/parsing/v2/Job.java new file mode 100644 index 000000000..d08f05e23 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/Job.java @@ -0,0 +1,84 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.mindee.parsing.common.LocalDateTimeDeserializer; +import java.time.LocalDateTime; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Defines an enqueued Job. + */ + +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public final class Job { + /** + * Date and time the job was created at. + */ + @JsonProperty("created_at") + @JsonDeserialize(using = LocalDateTimeDeserializer.class) + private LocalDateTime createdAt; + + /** + * ID of the job. + */ + @JsonProperty("id") + private String id; + + /** + * Status of the job. + */ + @JsonProperty("status") + private String status; + + /** + * Status of the job. + */ + @JsonProperty("error") + private ErrorResponse error; + + /** + * ID of the model. + */ + @JsonProperty("model_id") + private String modelId; + + /** + * Name of the file. + */ + @JsonProperty("file_name") + private String fileName; + + /** + * Optional alias of the file. + */ + @JsonProperty("file_alias") + private String fileAlias; + + /** + * Polling URL. + */ + @JsonProperty("polling_url") + private String pollingUrl; + + /** + * Result URL, when available. + */ + @JsonProperty("result_url") + private String resultUrl; + + /** + * Polling URL. + */ + @JsonProperty("webhooks") + private List webhooks; +} diff --git a/src/main/java/com/mindee/parsing/v2/JobResponse.java b/src/main/java/com/mindee/parsing/v2/JobResponse.java new file mode 100644 index 000000000..dcad0c80a --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/JobResponse.java @@ -0,0 +1,21 @@ +package com.mindee.parsing.v2; + + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; + +/** + * Represents an asynchronous polling response. + */ +@Data +@EqualsAndHashCode(callSuper = true) +@JsonIgnoreProperties(ignoreUnknown = true) +public final class JobResponse extends CommonResponse { + /** + * Representation of the Job. + */ + @JsonProperty("job") + Job job; +} diff --git a/src/main/java/com/mindee/parsing/v2/JobResponseWebhook.java b/src/main/java/com/mindee/parsing/v2/JobResponseWebhook.java new file mode 100644 index 000000000..09ca9f123 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/JobResponseWebhook.java @@ -0,0 +1,47 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.mindee.parsing.common.LocalDateTimeDeserializer; +import java.time.LocalDateTime; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * JobResponseWebhook info. + */ +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public final class JobResponseWebhook { + + /** + * ID of the webhook. + */ + @JsonProperty("id") + private String id; + + /** + * An error encountered while processing the webhook. + */ + @JsonProperty("error") + private ErrorResponse error; + + /** + * Date and time the webhook was created at. + */ + @JsonProperty("created_at") + @JsonDeserialize(using = LocalDateTimeDeserializer.class) + private LocalDateTime createdAt; + + /** + * Status of the webhook. + */ + @JsonProperty("status") + private String status; +} diff --git a/src/main/java/com/mindee/parsing/v2/RawText.java b/src/main/java/com/mindee/parsing/v2/RawText.java new file mode 100644 index 000000000..3fe0a5140 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/RawText.java @@ -0,0 +1,28 @@ +package com.mindee.parsing.v2; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Raw text as found in the document. + */ +@Getter +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public class RawText { + /* + * Page Number the text was found on. + */ + @JsonProperty("page") + private Integer page; + + /* + * Content of the raw text. + */ + @JsonProperty("content") + private String content; +} diff --git a/src/main/java/com/mindee/parsing/v2/field/BaseField.java b/src/main/java/com/mindee/parsing/v2/field/BaseField.java new file mode 100644 index 000000000..f999d5632 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/BaseField.java @@ -0,0 +1,21 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.List; + +/** + * Base class for V2 fields. + */ +public abstract class BaseField { + /** + * Field's location. + */ + @JsonProperty("locations") + private List page; + + /** + * Confidence associated with the field. + */ + @JsonProperty("confidence") + private FieldConfidence confidence; +} diff --git a/src/main/java/com/mindee/parsing/v2/field/DynamicField.java b/src/main/java/com/mindee/parsing/v2/field/DynamicField.java new file mode 100644 index 000000000..a794332b9 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/DynamicField.java @@ -0,0 +1,70 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Dynamically-typed field (simple / object / list). + */ +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonDeserialize(using = DynamicFieldDeserializer.class) +@AllArgsConstructor +@NoArgsConstructor +public class DynamicField { + + /** + * Type of the wrapped field. + */ + @JsonProperty("type") + private FieldType type; + + /** + * Value as simple field. + */ + @JsonProperty("simple_field") + private SimpleField simpleField; + + /** + * Value as list field. + */ + @JsonProperty("list_field") + private ListField listField; + + /** + * Value as object field. + */ + @JsonProperty("object_field") + private ObjectField objectField; + + public static DynamicField of(SimpleField value) { + return new DynamicField(FieldType.SIMPLE_FIELD, value, null, null); + } + + public static DynamicField of(ObjectField value) { + return new DynamicField(FieldType.OBJECT_FIELD, null, null, value); + } + + public static DynamicField of(ListField value) { + return new DynamicField(FieldType.LIST_FIELD, null, value, null); + } + + @Override + public String toString() { + if (simpleField != null) return simpleField.toString(); + if (listField != null) return listField.toString(); + if (objectField != null) return objectField.toString(); + return ""; + } + + /** + * Possible field kinds. + */ + public enum FieldType { SIMPLE_FIELD, OBJECT_FIELD, LIST_FIELD } +} diff --git a/src/main/java/com/mindee/parsing/v2/field/DynamicFieldDeserializer.java b/src/main/java/com/mindee/parsing/v2/field/DynamicFieldDeserializer.java new file mode 100644 index 000000000..4d933aabd --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/DynamicFieldDeserializer.java @@ -0,0 +1,43 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.ObjectCodec; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; + +/** + * Custom deserializer for {@link DynamicField}. + */ +public final class DynamicFieldDeserializer extends JsonDeserializer { + + @Override + public DynamicField deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { + ObjectCodec codec = jp.getCodec(); + JsonNode root = codec.readTree(jp); + + // -------- LIST FEATURE -------- + if (root.has("items") && root.get("items").isArray()) { + ListField list = new ListField(); + for (JsonNode itemNode : root.get("items")) { + list.getItems().add(codec.treeToValue(itemNode, DynamicField.class)); + } + return DynamicField.of(list); + } + + // -------- OBJECT WITH NESTED FIELDS -------- + if (root.has("fields") && root.get("fields").isObject()) { + ObjectField objectField = codec.treeToValue(root, ObjectField.class); + return DynamicField.of(objectField); + } + + // -------- SIMPLE OBJECT -------- + if (root.has("value")) { + SimpleField simple = codec.treeToValue(root, SimpleField.class); + return DynamicField.of(simple); + } + + return null; + } +} diff --git a/src/main/java/com/mindee/parsing/v2/field/FieldConfidence.java b/src/main/java/com/mindee/parsing/v2/field/FieldConfidence.java new file mode 100644 index 000000000..8651f02f6 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/FieldConfidence.java @@ -0,0 +1,38 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; + +/** + * Confidence level of a field as returned by the V2 API. + */ +public enum FieldConfidence { + Certain("Certain"), + High("High"), + Medium("Medium"), + Low("Low"); + + private final String json; + + FieldConfidence(String json) { + this.json = json; + } + + @JsonValue + public String toJson() { + return json; + } + + @JsonCreator + public static FieldConfidence fromJson(String value) { + if (value == null) { + return null; + } + for (FieldConfidence level : values()) { + if (level.json.equalsIgnoreCase(value)) { + return level; + } + } + throw new IllegalArgumentException("Unknown confidence level '" + value + "'."); + } +} diff --git a/src/main/java/com/mindee/parsing/v2/field/FieldLocation.java b/src/main/java/com/mindee/parsing/v2/field/FieldLocation.java new file mode 100644 index 000000000..dea8b6783 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/FieldLocation.java @@ -0,0 +1,35 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.mindee.geometry.Polygon; +import com.mindee.geometry.PolygonDeserializer; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Location data for a field. + */ +@Getter +@EqualsAndHashCode +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public class FieldLocation { + + /** + * Free polygon made up of points. + */ + @JsonProperty("polygon") + @JsonDeserialize(using = PolygonDeserializer.class) + private Polygon polygon; + + /** + * Page ID. + */ + @JsonProperty("page") + private int page; +} diff --git a/src/main/java/com/mindee/parsing/v2/field/InferenceFields.java b/src/main/java/com/mindee/parsing/v2/field/InferenceFields.java new file mode 100644 index 000000000..1b5b8e374 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/InferenceFields.java @@ -0,0 +1,47 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.mindee.parsing.SummaryHelper; +import java.util.LinkedHashMap; +import java.util.StringJoiner; +import lombok.EqualsAndHashCode; + +/** + * Inference fields map. + */ +@EqualsAndHashCode(callSuper = true) +@JsonIgnoreProperties(ignoreUnknown = true) +public final class InferenceFields extends LinkedHashMap { + public String toString(int indent) { + String padding = String.join("", java.util.Collections.nCopies(indent, " ")); + if (this.isEmpty()) { + return ""; + } + StringJoiner joiner = new StringJoiner("\n"); + + this.forEach((fieldKey, fieldValue) -> { + StringBuilder strBuilder = new StringBuilder(); + strBuilder.append(padding).append(":").append(fieldKey).append(": "); + + if (fieldValue.getListField() != null) { + ListField listField = fieldValue.getListField(); + if (listField.getItems() != null && !listField.getItems().isEmpty()) { + strBuilder.append(listField); + } + } else if (fieldValue.getObjectField() != null) { + strBuilder.append(fieldValue.getObjectField()); + } else if (fieldValue.getSimpleField() != null) { + strBuilder.append(fieldValue.getSimpleField().getValue() != null ? fieldValue.getSimpleField().getValue() : ""); + + } + joiner.add(strBuilder); + }); + + return SummaryHelper.cleanSummary(joiner.toString()); + } + + @Override + public String toString() { + return this.toString(0); + } +} diff --git a/src/main/java/com/mindee/parsing/v2/field/ListField.java b/src/main/java/com/mindee/parsing/v2/field/ListField.java new file mode 100644 index 000000000..1234e914e --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/ListField.java @@ -0,0 +1,47 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.ArrayList; +import java.util.List; +import java.util.StringJoiner; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Field holding a list of fields. + */ +@Getter +@EqualsAndHashCode(callSuper = true) +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public final class ListField extends BaseField { + + /** + * Items of the list. + */ + @JsonProperty("items") + private List items = new ArrayList<>(); + + @Override + public String toString() { + if (items == null || items.isEmpty()) { + return "\n"; + } + StringJoiner joiner = new StringJoiner("\n * "); + joiner.add(""); + for (DynamicField item : items) { + if (item != null) { + if (item.getType() == DynamicField.FieldType.OBJECT_FIELD) { + joiner.add(item.getObjectField().toStringFromList()); + } else { + joiner.add(item.toString()); + } + } + } + return joiner.toString(); + } +} diff --git a/src/main/java/com/mindee/parsing/v2/field/ObjectField.java b/src/main/java/com/mindee/parsing/v2/field/ObjectField.java new file mode 100644 index 000000000..36991c12a --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/ObjectField.java @@ -0,0 +1,34 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Field holding a map of sub-fields. + */ +@Getter +@EqualsAndHashCode(callSuper = true) +@JsonIgnoreProperties(ignoreUnknown = true) +@AllArgsConstructor +@NoArgsConstructor +public class ObjectField extends BaseField { + + /** + * Sub-fields keyed by their name. + */ + @JsonProperty("fields") + private InferenceFields fields; + + @Override + public String toString() { + return "\n" + (fields != null ? fields.toString(1) : ""); + } + + public String toStringFromList(){ + return fields != null ? fields.toString(2).substring(4) : ""; + } +} diff --git a/src/main/java/com/mindee/parsing/v2/field/SimpleField.java b/src/main/java/com/mindee/parsing/v2/field/SimpleField.java new file mode 100644 index 000000000..20dd783a8 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/SimpleField.java @@ -0,0 +1,32 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Field holding a single scalar value. + */ +@Getter +@EqualsAndHashCode(callSuper = true) +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonDeserialize(using = SimpleFieldDeserializer.class) +@AllArgsConstructor +@NoArgsConstructor +public final class SimpleField extends BaseField { + + /** + * Value (string, boolean, number … or {@code null}). + */ + @JsonProperty("value") + private Object value; + + @Override + public String toString() { + return value == null ? "" : value.toString(); + } +} diff --git a/src/main/java/com/mindee/parsing/v2/field/SimpleFieldDeserializer.java b/src/main/java/com/mindee/parsing/v2/field/SimpleFieldDeserializer.java new file mode 100644 index 000000000..56de1c508 --- /dev/null +++ b/src/main/java/com/mindee/parsing/v2/field/SimpleFieldDeserializer.java @@ -0,0 +1,34 @@ +package com.mindee.parsing.v2.field; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.ObjectCodec; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; + +/** + * Custom deserializer for {@link SimpleField}. + */ +public final class SimpleFieldDeserializer extends JsonDeserializer { + + @Override + public SimpleField deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { + ObjectCodec codec = jp.getCodec(); + JsonNode root = codec.readTree(jp); + + JsonNode valueNode = root.get("value"); + Object value = null; + + if (valueNode != null && !valueNode.isNull()) { + if (valueNode.isTextual()) { + value = valueNode.asText(); + } else if (valueNode.isNumber()) { + value = valueNode.doubleValue(); + } else if (valueNode.isBoolean()) { + value = valueNode.asBoolean(); + } + } + return new SimpleField(value); + } +} diff --git a/src/test/java/com/mindee/MindeeClientV2IT.java b/src/test/java/com/mindee/MindeeClientV2IT.java new file mode 100644 index 000000000..a3e42c0b3 --- /dev/null +++ b/src/test/java/com/mindee/MindeeClientV2IT.java @@ -0,0 +1,121 @@ +package com.mindee; + +import com.mindee.http.MindeeHttpExceptionV2; +import com.mindee.input.LocalInputSource; +import com.mindee.parsing.v2.InferenceResponse; +import java.io.File; +import java.io.IOException; +import org.junit.jupiter.api.*; +import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@Tag("integration") +@DisplayName("MindeeClientV2 – integration tests (V2)") +class MindeeClientV2IntegrationTest { + + private MindeeClientV2 mindeeClient; + private String modelId; + + @BeforeAll + void setUp() { + String apiKey = System.getenv("MINDEE_V2_API_KEY"); + modelId = System.getenv("MINDEE_V2_FINDOC_MODEL_ID"); + + assumeTrue( + apiKey != null && !apiKey.trim().isEmpty(), + "MINDEE_V2_API_KEY env var is missing – integration tests skipped" + ); + assumeTrue( + modelId != null && !modelId.trim().isEmpty(), + "MINDEE_V2_FINDOC_MODEL_ID env var is missing – integration tests skipped" + ); + + mindeeClient = new MindeeClientV2(apiKey); + } + + @Test + @DisplayName("Empty, multi-page PDF – enqueue & parse must succeed") + void parseFile_emptyMultiPage_mustSucceed() throws IOException, InterruptedException { + LocalInputSource source = new LocalInputSource( + new File("src/test/resources/file_types/pdf/multipage_cut-2.pdf")); + + InferenceOptions options = + InferenceOptions.builder(modelId).build(); + + InferenceResponse response = mindeeClient.enqueueAndParse(source, options); + + assertNotNull(response); + assertNotNull(response.getInference()); + + assertNotNull(response.getInference().getFile()); + assertEquals("multipage_cut-2.pdf", response.getInference().getFile().getName()); + + assertNotNull(response.getInference().getModel()); + assertEquals(modelId, response.getInference().getModel().getId()); + + assertNotNull(response.getInference().getResult()); + assertNull(response.getInference().getResult().getOptions()); + } + + @Test + @DisplayName("Filled, single-page image – enqueue & parse must succeed") + void parseFile_filledSinglePage_mustSucceed() throws IOException, InterruptedException { + LocalInputSource source = new LocalInputSource( + new File("src/test/resources/products/financial_document/default_sample.jpg")); + + InferenceOptions options = + InferenceOptions.builder(modelId).build(); + + InferenceResponse response = mindeeClient.enqueueAndParse(source, options); + + assertNotNull(response); + assertNotNull(response.getInference()); + + assertNotNull(response.getInference().getFile()); + assertEquals("default_sample.jpg", response.getInference().getFile().getName()); + + assertNotNull(response.getInference().getModel()); + assertEquals(modelId, response.getInference().getModel().getId()); + + assertNotNull(response.getInference().getResult()); + assertNotNull(response.getInference().getResult().getFields()); + assertNotNull(response.getInference().getResult().getFields().get("supplier_name")); + assertEquals( + "John Smith", + response.getInference() + .getResult() + .getFields() + .get("supplier_name") + .getSimpleField() + .getValue() + ); + } + + @Test + @DisplayName("Invalid model ID – enqueue must raise 422") + void invalidModel_mustThrowError() throws IOException { + LocalInputSource source = new LocalInputSource( + new File("src/test/resources/file_types/pdf/multipage_cut-2.pdf")); + + InferenceOptions options = + InferenceOptions.builder("INVALID MODEL ID").build(); + + MindeeHttpExceptionV2 ex = assertThrows( + MindeeHttpExceptionV2.class, + () -> mindeeClient.enqueue(source, options) + ); + assertEquals(422, ex.getStatus()); + } + + @Test + @DisplayName("Invalid job ID – parseQueued must raise an error") + void invalidJob_mustThrowError() { + MindeeHttpExceptionV2 ex = assertThrows( + MindeeHttpExceptionV2.class, + () -> mindeeClient.parseQueued("not-a-valid-job-ID") + ); + assertEquals(404, ex.getStatus()); + assertNotNull(ex); + } +} diff --git a/src/test/java/com/mindee/MindeeClientV2Test.java b/src/test/java/com/mindee/MindeeClientV2Test.java new file mode 100644 index 000000000..91831b420 --- /dev/null +++ b/src/test/java/com/mindee/MindeeClientV2Test.java @@ -0,0 +1,104 @@ +package com.mindee; + +import com.mindee.http.MindeeApiV2; +import com.mindee.input.LocalInputSource; +import com.mindee.input.LocalResponse; +import com.mindee.parsing.v2.CommonResponse; +import com.mindee.parsing.v2.InferenceResponse; +import com.mindee.parsing.v2.JobResponse; +import java.io.File; +import java.io.IOException; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +@DisplayName("MindeeClientV2 – client / API interaction tests") +class MindeeClientV2Test { + /** + * Creates a fully mocked MindeeClientV2. + */ + private static MindeeClientV2 makeClientWithMockedApi(MindeeApiV2 mockedApi) { + return new MindeeClientV2(mockedApi); + } + + @Nested + @DisplayName("enqueue()") + class Enqueue { + @Test + @DisplayName("sends exactly one HTTP call and yields a non-null response") + void enqueue_post_async() throws IOException { + MindeeApiV2 predictable = Mockito.mock(MindeeApiV2.class); + when(predictable.enqueuePost(any(LocalInputSource.class), any(InferenceOptions.class))) + .thenReturn(new JobResponse()); + + MindeeClientV2 mindeeClient = makeClientWithMockedApi(predictable); + + LocalInputSource input = + new LocalInputSource(new File("src/test/resources/file_types/pdf/blank_1.pdf")); + JobResponse response = mindeeClient.enqueue( + input, + InferenceOptions.builder("dummy-model-id").build() + ); + + assertNotNull(response, "enqueue() must return a response"); + verify(predictable, atMostOnce()) + .enqueuePost(any(LocalInputSource.class), any(InferenceOptions.class)); + } + } + + @Nested + @DisplayName("parseQueued()") + class ParseQueued { + @Test + @DisplayName("hits the HTTP endpoint once and returns a non-null response") + void document_getQueued_async() { + MindeeApiV2 predictable = Mockito.mock(MindeeApiV2.class); + when(predictable.getInferenceFromQueue(anyString())) + .thenReturn(new JobResponse()); + + MindeeClientV2 mindeeClient = makeClientWithMockedApi(predictable); + + CommonResponse response = mindeeClient.parseQueued("dummy-id"); + assertNotNull(response, "parseQueued() must return a response"); + verify(predictable, atMostOnce()).getInferenceFromQueue(anyString()); + } + } + + @Nested + @DisplayName("loadInference()") + class LoadInference { + + @Test + @DisplayName("parses local JSON and exposes correct field values") + void inference_loadsLocally() throws IOException { + MindeeClientV2 mindeeClient = new MindeeClientV2("dummy"); + File jsonFile = + new File("src/test/resources/v2/products/financial_document/complete.json"); + LocalResponse localResponse = new LocalResponse(jsonFile); + + InferenceResponse loaded = mindeeClient.loadInference(localResponse); + + assertNotNull(loaded, "Loaded InferenceResponse must not be null"); + assertEquals( + "12345678-1234-1234-1234-123456789abc", + loaded.getInference().getModel().getId(), + "Model Id mismatch" + ); + assertEquals( + "John Smith", + loaded.getInference() + .getResult() + .getFields() + .get("supplier_name") + .getSimpleField() + .getValue(), + "Supplier name mismatch" + ); + } + } +} diff --git a/src/test/java/com/mindee/parsing/v2/InferenceTest.java b/src/test/java/com/mindee/parsing/v2/InferenceTest.java new file mode 100644 index 000000000..6c9df015d --- /dev/null +++ b/src/test/java/com/mindee/parsing/v2/InferenceTest.java @@ -0,0 +1,265 @@ +package com.mindee.parsing.v2; + +import com.mindee.MindeeClientV2; +import com.mindee.input.LocalResponse; +import com.mindee.parsing.v2.field.*; +import com.mindee.parsing.v2.field.DynamicField.FieldType; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +@DisplayName("InferenceV2 – field integrity checks") +class InferenceTest { + + private InferenceResponse loadFromResource(String resourcePath) throws IOException { + MindeeClientV2 dummyClient = new MindeeClientV2("dummy"); + return dummyClient.loadInference(new LocalResponse(InferenceTest.class.getClassLoader().getResourceAsStream(resourcePath))); + } + + private String readFileAsString(String path) + throws IOException + { + byte[] encoded = IOUtils.toByteArray(Objects.requireNonNull(InferenceTest.class.getClassLoader().getResourceAsStream(path))); + return new String(encoded); + } + + + @Nested + @DisplayName("When the async prediction is blank") + class BlankPrediction { + + @Test + @DisplayName("all properties must be valid") + void asyncPredict_whenEmpty_mustHaveValidProperties() throws IOException { + InferenceResponse response = loadFromResource("v2/products/financial_document/blank.json"); + InferenceFields fields = response.getInference().getResult().getFields(); + + assertEquals(21, fields.size(), "Expected 21 fields"); + + DynamicField taxes = fields.get("taxes"); + assertNotNull(taxes, "'taxes' field must exist"); + ListField taxesList = taxes.getListField(); + assertNotNull(taxesList, "'taxes' must be a ListField"); + assertTrue(taxesList.getItems().isEmpty(), "'taxes' list must be empty"); + + DynamicField supplierAddress = fields.get("supplier_address"); + assertNotNull(supplierAddress, "'supplier_address' field must exist"); + ObjectField supplierObj = supplierAddress.getObjectField(); + assertNotNull(supplierObj, "'supplier_address' must be an ObjectField"); + + for (Map.Entry entry : fields.entrySet()) { + DynamicField value = entry.getValue(); + if (value == null) { + continue; + } + + FieldType type = value.getType(); + switch (type) { + case LIST_FIELD: + assertNotNull(value.getListField(), entry.getKey() + " – ListField expected"); + assertNull(value.getObjectField(), entry.getKey() + " – ObjectField must be null"); + assertNull(value.getSimpleField(), entry.getKey() + " – SimpleField must be null"); + break; + + case OBJECT_FIELD: + assertNotNull(value.getObjectField(), entry.getKey() + " – ObjectField expected"); + assertNull(value.getListField(), entry.getKey() + " – ListField must be null"); + assertNull(value.getSimpleField(), entry.getKey() + " – SimpleField must be null"); + break; + + case SIMPLE_FIELD: + default: + assertNotNull(value.getSimpleField(), entry.getKey() + " – SimpleField expected"); + assertNull(value.getListField(), entry.getKey() + " – ListField must be null"); + assertNull(value.getObjectField(), entry.getKey() + " – ObjectField must be null"); + break; + } + } + } + } + + @Nested + @DisplayName("When the async prediction is complete") + class CompletePrediction { + + @Test + @DisplayName("all properties must be valid") + void asyncPredict_whenComplete_mustHaveValidProperties() throws IOException { + InferenceResponse response = loadFromResource("v2/products/financial_document/complete.json"); + InferenceFields fields = response.getInference().getResult().getFields(); + + assertEquals(21, fields.size(), "Expected 21 fields"); + + DynamicField taxes = fields.get("taxes"); + assertNotNull(taxes, "'taxes' field must exist"); + ListField taxesList = taxes.getListField(); + assertNotNull(taxesList, "'taxes' must be a ListField"); + assertEquals(1, taxesList.getItems().size(), "'taxes' list must contain exactly one item"); + assertNotNull(taxes.toString(), "'taxes' toString() must not be null"); + + ObjectField taxItemObj = taxesList.getItems().get(0).getObjectField(); + assertNotNull(taxItemObj, "First item of 'taxes' must be an ObjectField"); + assertEquals(3, taxItemObj.getFields().size(), "Tax ObjectField must contain 3 sub-fields"); + assertEquals( + 31.5, + taxItemObj.getFields().get("base").getSimpleField().getValue(), + "'taxes.base' value mismatch" + ); + + DynamicField supplierAddress = fields.get("supplier_address"); + assertNotNull(supplierAddress, "'supplier_address' field must exist"); + + ObjectField supplierObj = supplierAddress.getObjectField(); + assertNotNull(supplierObj, "'supplier_address' must be an ObjectField"); + + DynamicField country = supplierObj.getFields().get("country"); + assertNotNull(country, "'supplier_address.country' must exist"); + assertEquals("USA", country.getSimpleField().getValue()); + assertEquals("USA", country.toString()); + + assertNotNull(supplierAddress.toString(), "'supplier_address'.toString() must not be null"); + } + } + + @Nested + @DisplayName("deep_nested_fields.json") + class DeepNestedFields { + + @Test + @DisplayName("all nested structures must be typed correctly") + void deepNestedFields_mustExposeCorrectTypes() throws IOException { + InferenceResponse resp = loadFromResource("v2/inference/deep_nested_fields.json"); + Inference inf = resp.getInference(); + assertNotNull(inf); + + InferenceFields root = inf.getResult().getFields(); + assertNotNull(root.get("field_simple").getSimpleField()); + assertNotNull(root.get("field_object").getObjectField()); + + ObjectField fieldObject = root.get("field_object").getObjectField(); + InferenceFields lvl1 = fieldObject.getFields(); + assertNotNull(lvl1.get("sub_object_list").getListField()); + assertNotNull(lvl1.get("sub_object_object").getObjectField()); + + ObjectField subObjectObject = lvl1.get("sub_object_object").getObjectField(); + InferenceFields lvl2 = subObjectObject.getFields(); + assertNotNull(lvl2.get("sub_object_object_sub_object_list").getListField()); + + ListField nestedList = lvl2.get("sub_object_object_sub_object_list").getListField(); + List items = nestedList.getItems(); + assertFalse(items.isEmpty()); + assertNotNull(items.get(0).getObjectField()); + + ObjectField firstItem = items.get(0).getObjectField(); + SimpleField deepSimple = firstItem.getFields() + .get("sub_object_object_sub_object_list_simple").getSimpleField(); + assertEquals("value_9", deepSimple.getValue()); + } + } + + @Nested + @DisplayName("standard_field_types.json") + class StandardFieldTypes { + + @Test + @DisplayName("simple / object / list variants must be recognised") + void standardFieldTypes_mustExposeCorrectTypes() throws IOException { + InferenceResponse resp = loadFromResource("v2/inference/standard_field_types.json"); + Inference inf = resp.getInference(); + assertNotNull(inf); + + InferenceFields root = inf.getResult().getFields(); + assertNotNull(root.get("field_simple").getSimpleField()); + assertNotNull(root.get("field_object").getObjectField()); + assertNotNull(root.get("field_simple_list").getListField()); + assertNotNull(root.get("field_object_list").getListField()); + } + } + + @Nested + @DisplayName("raw_texts.json") + class RawTexts { + + @Test + @DisplayName("raw texts option must be parsed and exposed") + void rawTexts_mustBeAccessible() throws IOException { + InferenceResponse resp = loadFromResource("v2/inference/raw_texts.json"); + Inference inf = resp.getInference(); + assertNotNull(inf); + + InferenceResultOptions opts = inf.getResult().getOptions(); + assertNotNull(opts, "Options should not be null"); + + List rawTexts = opts.getRawTexts(); + assertEquals(2, rawTexts.size()); + + RawText first = rawTexts.get(0); + assertEquals(0, first.getPage()); + assertEquals("This is the raw text of the first page...", first.getContent()); + } + } + + @Nested + @DisplayName("complete.json – full inference response") + class FullInference { + @Test + @DisplayName("complete financial-document JSON must round-trip correctly") + void fullInferenceResponse_mustExposeEveryProperty() throws IOException { + InferenceResponse resp = loadFromResource("v2/products/financial_document/complete.json"); + + Inference inf = resp.getInference(); + assertNotNull(inf); + assertEquals("12345678-1234-1234-1234-123456789abc", inf.getId()); + + InferenceFields f = inf.getResult().getFields(); + + SimpleField date = f.get("date").getSimpleField(); + assertEquals("2019-11-02", date.getValue()); + + ListField taxes = f.get("taxes").getListField(); + ObjectField firstTax = taxes.getItems().get(0).getObjectField(); + SimpleField baseTax = firstTax.getFields().get("base").getSimpleField(); + assertEquals(31.5, baseTax.getValue()); + + ObjectField customerAddr = f.get("customer_address").getObjectField(); + SimpleField city = customerAddr.getFields().get("city").getSimpleField(); + assertEquals("New York", city.getValue()); + + InferenceResultModel model = inf.getModel(); + assertNotNull(model); + assertEquals("12345678-1234-1234-1234-123456789abc", model.getId()); + + InferenceResultFile file = inf.getFile(); + assertNotNull(file); + assertEquals("complete.jpg", file.getName()); + assertNull(file.getAlias()); + + assertNull(inf.getResult().getOptions()); + } + } + + @Nested + @DisplayName("rst display") + class RstDisplay { + @Test + @DisplayName("rst display must be parsed and exposed") + void rstDisplay_mustBeAccessible() throws IOException { + InferenceResponse resp = loadFromResource("v2/inference/standard_field_types.json"); + String rstRef = readFileAsString("v2/inference/standard_field_types.rst"); + Inference inf = resp.getInference(); + try (PrintWriter out = new PrintWriter("local_test/dump.txt")){ + out.write(String.valueOf(resp.getInference())); + } + assertNotNull(inf); + assertEquals(rstRef, resp.getInference().toString()); + } + } +} diff --git a/src/test/resources b/src/test/resources index f599a960e..f43634e5b 160000 --- a/src/test/resources +++ b/src/test/resources @@ -1 +1 @@ -Subproject commit f599a960e78f4a390984c6263f387aa8cdebe0f0 +Subproject commit f43634e5b7c7f773c9c3dbec461b143c21a8f6d3 diff --git a/tests/test_code_samples.sh b/tests/test_code_samples.sh index 43e00b2bf..807b02adb 100755 --- a/tests/test_code_samples.sh +++ b/tests/test_code_samples.sh @@ -5,6 +5,8 @@ OUTPUT_FILE='SimpleMindeeClient.java' ACCOUNT=$1 ENDPOINT=$2 API_KEY=$3 +API_KEY_V2=$4 +MODEL_ID=$5 if [ -z "${ACCOUNT}" ]; then echo "ACCOUNT is required"; exit 1; fi if [ -z "${ENDPOINT}" ]; then echo "ENDPOINT is required"; exit 1; fi @@ -14,6 +16,13 @@ mvn dependency:copy-dependencies for f in $(find docs/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_execution.txt" | sort -h) do + if echo "${f}" | grep -q "default_v2.txt"; then + if [ -z "${API_KEY_V2}" ] || [ -z "${MODEL_ID}" ]; then + echo "Skipping ${f} (API_KEY_V2 or MODEL_ID not supplied)" + echo + continue + fi + fi echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo "${f}" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" @@ -41,6 +50,14 @@ do sed -i "s/my-version/1/" $OUTPUT_FILE fi + if echo "${f}" | grep -q "default_v2.txt" + then + sed -i "s/MY_API_KEY/$API_KEY_V2/" $OUTPUT_FILE + sed -i "s/MY_MODEL_ID/$MODEL_ID/" $OUTPUT_FILE + else + sed -i "s/my-api-key/$API_KEY/" $OUTPUT_FILE + fi + sed -i "s/my-api-key/$API_KEY/" $OUTPUT_FILE sed -i "s/\/path\/to\/the\/file.ext/src\/test\/resources\/file_types\/pdf\/blank_1.pdf/" $OUTPUT_FILE