diff --git a/.github/workflows/_static-analysis.yml b/.github/workflows/_static-analysis.yml index 5cf904dc..4c630e59 100644 --- a/.github/workflows/_static-analysis.yml +++ b/.github/workflows/_static-analysis.yml @@ -5,6 +5,7 @@ name: Static Analysis on: workflow_call: + workflow_dispatch: jobs: linting: @@ -18,7 +19,7 @@ jobs: - name: set up Ruby uses: ruby/setup-ruby@v1 with: - ruby-version: "3.0" + ruby-version: "3.0.0" bundler-cache: true - name: Analyse the code with Rubocop diff --git a/.rubocop.yml b/.rubocop.yml index 1d5a8466..7b56d374 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -10,7 +10,7 @@ AllCops: - local_test/* - Steepfile - TargetRubyVersion: 3.0 + TargetRubyVersion: 3.0.0 SuggestExtensions: false Gemspec/DevelopmentDependencies: diff --git a/docs/code_samples/workflow_execution.txt b/docs/code_samples/workflow_execution.txt index ba0a27c4..86776a76 100644 --- a/docs/code_samples/workflow_execution.txt +++ b/docs/code_samples/workflow_execution.txt @@ -3,7 +3,7 @@ # gem install mindee # -require_relative 'mindee' +require 'mindee' workflow_id = 'workflow-id' diff --git a/docs/code_samples/workflow_polling.txt b/docs/code_samples/workflow_polling.txt new file mode 100644 index 00000000..3d0261ec --- /dev/null +++ b/docs/code_samples/workflow_polling.txt @@ -0,0 +1,36 @@ +# +# Install the Ruby client library by running: +# gem install mindee +# + +require 'mindee' + +workflow_id = 'workflow-id' + +# Init a new client +mindee_client = Mindee::Client.new + +# Load a file from disk +input_source = mindee_client.source_from_path('path/to/my/file.ext') + +# Initialize a custom endpoint for this product +custom_endpoint = mindee_client.create_endpoint( + account_name: 'my-account', + endpoint_name: 'my-endpoint', + version: 'my-version' +) + +# Parse the file +result = mindee_client.parse( + input_source, + Mindee::Product::Universal::Universal, + endpoint: custom_endpoint, + options: { + rag: true, + workflow_id: workflow_id + } +) + +# Print a full summary of the parsed data in RST format +puts result.document + diff --git a/lib/mindee/client.rb b/lib/mindee/client.rb index 8ab8f688..1ea903f5 100644 --- a/lib/mindee/client.rb +++ b/lib/mindee/client.rb @@ -54,8 +54,8 @@ def initialize(params: {}) # @!attribute delay_sec [Numeric] Delay between polling attempts. Defaults to 1.5. # @!attribute max_retries [Integer] Maximum number of retries. Defaults to 80. class ParseOptions - attr_accessor :all_words, :full_text, :close_file, :page_options, :cropper, - :initial_delay_sec, :delay_sec, :max_retries + attr_accessor :all_words, :full_text, :close_file, :page_options, :cropper, :rag, + :workflow_id, :initial_delay_sec, :delay_sec, :max_retries def initialize(params: {}) params = params.transform_keys(&:to_sym) @@ -66,6 +66,8 @@ def initialize(params: {}) raw_page_options = PageOptions.new(params: raw_page_options) unless raw_page_options.is_a?(PageOptions) @page_options = raw_page_options @cropper = params.fetch(:cropper, false) + @rag = params.fetch(:rag, false) + @workflow_id = params.fetch(:workflow_id, nil) @initial_delay_sec = params.fetch(:initial_delay_sec, 2) @delay_sec = params.fetch(:delay_sec, 1.5) @max_retries = params.fetch(:max_retries, 80) @@ -176,13 +178,10 @@ def parse_sync(input_source, product_class, endpoint, options) prediction, raw_http = endpoint.predict( input_source, - options.all_words, - options.full_text, - options.close_file, - options.cropper + options ) - Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http) + Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_s) end # Enqueue a document for async parsing @@ -207,6 +206,8 @@ def parse_sync(input_source, product_class, endpoint, options) # - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages. # * `:cropper` [bool] Whether to include cropper results for each page. # This performs a cropping operation on the server and will increase response time. + # * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided. + # * `:workflow_id` [String, nil] ID of the workflow to use. # @param endpoint [Mindee::HTTP::Endpoint] Endpoint of the API. # @return [Mindee::Parsing::Common::ApiResponse] def enqueue(input_source, product_class, endpoint: nil, options: {}) @@ -216,12 +217,9 @@ def enqueue(input_source, product_class, endpoint: nil, options: {}) prediction, raw_http = endpoint.predict_async( input_source, - opts.all_words, - opts.full_text, - opts.close_file, - opts.cropper + opts ) - Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http) + Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json) end # Parses a queued document @@ -236,7 +234,7 @@ def parse_queued(job_id, product_class, endpoint: nil) endpoint = initialize_endpoint(product_class) if endpoint.nil? logger.debug("Fetching queued document as '#{endpoint.url_root}'") prediction, raw_http = endpoint.parse_async(job_id) - Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http) + Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http.to_json) end # Enqueue a document for async parsing and automatically try to retrieve it @@ -261,6 +259,8 @@ def parse_queued(job_id, product_class, endpoint: nil) # - `:on_min_pages` [Integer] Apply the operation only if the document has at least this many pages. # * `:cropper` [bool, nil] Whether to include cropper results for each page. # This performs a cropping operation on the server and will increase response time. + # * `:rag` [bool] Whether to enable Retrieval-Augmented Generation. Only works if a Workflow ID is provided. + # * `:workflow_id` [String, nil] ID of the workflow to use. # * `:initial_delay_sec` [Numeric] Initial delay before polling. Defaults to 2. # * `:delay_sec` [Numeric] Delay between polling attempts. Defaults to 1.5. # * `:max_retries` [Integer] Maximum number of retries. Defaults to 80. diff --git a/lib/mindee/http/endpoint.rb b/lib/mindee/http/endpoint.rb index 3700abfc..53fc1766 100644 --- a/lib/mindee/http/endpoint.rb +++ b/lib/mindee/http/endpoint.rb @@ -34,6 +34,8 @@ class Endpoint attr_reader :request_timeout # @return [String] attr_reader :url_root + # @return [String] + attr_reader :base_url def initialize(owner, url_name, version, api_key: '') @owner = owner @@ -44,25 +46,19 @@ def initialize(owner, url_name, version, api_key: '') logger.debug('API key set from environment') end @api_key = api_key.nil? || api_key.empty? ? ENV.fetch(API_KEY_ENV_NAME, API_KEY_DEFAULT) : api_key - base_url = ENV.fetch(BASE_URL_ENV_NAME, BASE_URL_DEFAULT) - @url_root = "#{base_url.chomp('/')}/products/#{@owner}/#{@url_name}/v#{@version}" + @base_url = ENV.fetch(BASE_URL_ENV_NAME, BASE_URL_DEFAULT).chomp('/') + @url_root = "#{@base_url}/products/#{@owner}/#{@url_name}/v#{@version}" end # Call the prediction API. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] - # @param all_words [bool] Whether the full word extraction needs to be performed - # @param full_text [bool] Whether to include the full OCR text response in compatible APIs - # @param close_file [bool] Whether the file will be closed after reading - # @param cropper [bool] Whether a cropping operation will be applied + # @param opts [ParseOptions] Parse options. # @return [Array] - def predict(input_source, all_words, full_text, close_file, cropper) + def predict(input_source, opts) check_api_key response = predict_req_post( input_source, - all_words: all_words, - full_text: full_text, - close_file: close_file, - cropper: cropper + opts ) if !response.nil? && response.respond_to?(:body) hashed_response = JSON.parse(response.body, object_class: Hash) @@ -76,14 +72,11 @@ def predict(input_source, all_words, full_text, close_file, cropper) # Call the prediction API. # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] - # @param all_words [bool] Whether the full word extraction needs to be performed - # @param full_text [bool] Whether to include the full OCR text response in compatible APIs. - # @param close_file [bool] Whether the file will be closed after reading - # @param cropper [bool] Whether a cropping operation will be applied + # @param opts [ParseOptions, Hash] Parse options. # @return [Array] - def predict_async(input_source, all_words, full_text, close_file, cropper) + def predict_async(input_source, opts) check_api_key - response = document_queue_req_get(input_source, all_words, full_text, close_file, cropper) + response = document_queue_req_post(input_source, opts) if !response.nil? && response.respond_to?(:body) hashed_response = JSON.parse(response.body, object_class: Hash) return [hashed_response, response.body] if ResponseValidation.valid_async_response?(response) @@ -100,7 +93,7 @@ def predict_async(input_source, all_words, full_text, close_file, cropper) # @return [Array] def parse_async(job_id) check_api_key - response = document_queue_req(job_id) + response = document_queue_req_get(job_id) hashed_response = JSON.parse(response.body, object_class: Hash) return [hashed_response, response.body] if ResponseValidation.valid_async_response?(response) @@ -112,17 +105,14 @@ def parse_async(job_id) private # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] - # @param all_words [bool] Whether the full word extraction needs to be performed - # @param full_text [bool] Whether to include the full OCR text response in compatible APIs. - # @param close_file [bool] Whether the file will be closed after reading - # @param cropper [bool] Whether a cropping operation will be applied + # @param opts [ParseOptions] Parse options. # @return [Net::HTTPResponse, nil] - def predict_req_post(input_source, all_words: false, full_text: false, close_file: true, cropper: false) + def predict_req_post(input_source, opts) uri = URI("#{@url_root}/predict") params = {} # : Hash[Symbol | String, untyped] - params[:cropper] = 'true' if cropper - params[:full_text_ocr] = 'true' if full_text + params[:cropper] = 'true' if opts.cropper + params[:full_text_ocr] = 'true' if opts.full_text uri.query = URI.encode_www_form(params) headers = { @@ -131,32 +121,33 @@ def predict_req_post(input_source, all_words: false, full_text: false, close_fil } req = Net::HTTP::Post.new(uri, headers) form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource) - [['document', input_source.url]] + [['document', input_source.url]] # : Array[untyped] else - [input_source.read_contents(close: close_file)] + [input_source.read_contents(close: opts.close_file)] # : Array[untyped] end - form_data.push ['include_mvision', 'true'] if all_words + form_data.push ['include_mvision', 'true'] if opts.all_words req.set_form(form_data, 'multipart/form-data') - response = nil Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http| - response = http.request(req) + return http.request(req) end - response + raise Mindee::Errors::MindeeError, 'Could not resolve server response.' end # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::URLInputSource] - # @param all_words [bool] Whether the full word extraction needs to be performed - # @param full_text [bool] Whether to include the full OCR text response in compatible APIs. - # @param close_file [bool] Whether the file will be closed after reading - # @param cropper [bool] Whether a cropping operation will be applied - # @return [Net::HTTPResponse, nil] - def document_queue_req_get(input_source, all_words, full_text, close_file, cropper) - uri = URI("#{@url_root}/predict_async") + # @param opts [ParseOptions] Parse options. + # @return [Net::HTTPResponse] + def document_queue_req_post(input_source, opts) + uri = if opts.workflow_id + URI("#{@base_url}/workflows/#{opts.workflow_id}/predict_async") + else + URI("#{@url_root}/predict_async") + end params = {} # : Hash[Symbol | String, untyped] - params[:cropper] = 'true' if cropper - params[:full_text_ocr] = 'true' if full_text + params[:cropper] = 'true' if opts.cropper + params[:full_text_ocr] = 'true' if opts.full_text + params[:rag] = 'true' if opts.rag uri.query = URI.encode_www_form(params) headers = { @@ -165,24 +156,23 @@ def document_queue_req_get(input_source, all_words, full_text, close_file, cropp } req = Net::HTTP::Post.new(uri, headers) form_data = if input_source.is_a?(Mindee::Input::Source::URLInputSource) - [['document', input_source.url]] + [['document', input_source.url]] # : Array[untyped] else - [input_source.read_contents(close: close_file)] + [input_source.read_contents(close: opts.close_file)] # : Array[untyped] end - form_data.push ['include_mvision', 'true'] if all_words + form_data.push ['include_mvision', 'true'] if opts.all_words req.set_form(form_data, 'multipart/form-data') - response = nil Net::HTTP.start(uri.hostname, uri.port, use_ssl: true, read_timeout: @request_timeout) do |http| - response = http.request(req) + return http.request(req) end - response + raise Mindee::Errors::MindeeError, 'Could not resolve server response.' end # @param job_id [String] # @return [Net::HTTPResponse, nil] - def document_queue_req(job_id) + def document_queue_req_get(job_id) uri = URI("#{@url_root}/documents/queue/#{job_id}") headers = { diff --git a/lib/mindee/http/response_validation.rb b/lib/mindee/http/response_validation.rb index 4e7004c3..1601e2b5 100644 --- a/lib/mindee/http/response_validation.rb +++ b/lib/mindee/http/response_validation.rb @@ -42,7 +42,7 @@ def self.valid_async_response?(response) # Checks and correct the response object depending on the possible kinds of returns. # @param response [Net::HTTPResponse] def self.clean_request!(response) - return response if (response.code.to_i < 200) || (response.code.to_i > 302) + return response if (response.code.to_i < 200) || (response.code.to_i > 302) # : Net::HTTPResponse return response if response.body.empty? diff --git a/lib/mindee/parsing/common/api_response.rb b/lib/mindee/parsing/common/api_response.rb index 9456badb..056d391b 100644 --- a/lib/mindee/parsing/common/api_response.rb +++ b/lib/mindee/parsing/common/api_response.rb @@ -35,7 +35,7 @@ class ApiResponse # @param product_class [Mindee::Inference] # @param http_response [Hash] - # @param raw_http [String] + # @param raw_http [Hash] def initialize(product_class, http_response, raw_http) logger.debug('Handling API response') @raw_http = raw_http.to_s diff --git a/lib/mindee/parsing/common/document.rb b/lib/mindee/parsing/common/document.rb index ad1d7c1c..bf7bfd5a 100644 --- a/lib/mindee/parsing/common/document.rb +++ b/lib/mindee/parsing/common/document.rb @@ -69,7 +69,7 @@ def inject_full_text_ocr(raw_prediction) full_text_ocr = String.new raw_prediction.dig('inference', 'pages').each do |page| - full_text_ocr << (page['extras']['full_text_ocr']['content']) + full_text_ocr << page['extras']['full_text_ocr']['content'] end artificial_text_obj = { 'content' => full_text_ocr } if @extras.nil? || @extras.empty? diff --git a/lib/mindee/parsing/common/extras.rb b/lib/mindee/parsing/common/extras.rb index a7d9ec75..626c726d 100644 --- a/lib/mindee/parsing/common/extras.rb +++ b/lib/mindee/parsing/common/extras.rb @@ -3,3 +3,4 @@ require_relative 'extras/extras' require_relative 'extras/cropper_extra' require_relative 'extras/full_text_ocr_extra' +require_relative 'extras/rag_extra' diff --git a/lib/mindee/parsing/common/extras/extras.rb b/lib/mindee/parsing/common/extras/extras.rb index 4298880c..b45a4855 100644 --- a/lib/mindee/parsing/common/extras/extras.rb +++ b/lib/mindee/parsing/common/extras/extras.rb @@ -13,6 +13,8 @@ class Extras attr_reader :cropper # @return [Mindee::Parsing::Common::Extras::FullTextOCRExtra, nil] attr_reader :full_text_ocr + # @return [RAGExtra, nil] + attr_reader :rag def initialize(raw_prediction) if raw_prediction['cropper'] @@ -21,9 +23,10 @@ def initialize(raw_prediction) if raw_prediction['full_text_ocr'] @full_text_ocr = Mindee::Parsing::Common::Extras::FullTextOCRExtra.new(raw_prediction['full_text_ocr']) end + @rag = Mindee::Parsing::Common::Extras::RAGExtra.new(raw_prediction['rag']) if raw_prediction['rag'] raw_prediction.each do |key, value| - instance_variable_set("@#{key}", value) unless ['cropper', 'full_text_ocr'].include?(key) + instance_variable_set("@#{key}", value) unless ['cropper', 'full_text_ocr', 'rag'].include?(key) end end diff --git a/lib/mindee/parsing/common/extras/rag_extra.rb b/lib/mindee/parsing/common/extras/rag_extra.rb new file mode 100644 index 00000000..08717cb3 --- /dev/null +++ b/lib/mindee/parsing/common/extras/rag_extra.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Mindee + module Parsing + module Common + module Extras + # Retrieval-Augmented Generation extra. + class RAGExtra + # ID of the matching document + # @return [String, nil] + attr_reader :matching_document_id + + def initialize(raw_prediction) + @matching_document_id = raw_prediction['matching_document_id'] if raw_prediction['matching_document_id'] + end + + def to_s + @matching_document_id || '' + end + end + end + end + end +end diff --git a/lib/mindee/parsing/common/inference.rb b/lib/mindee/parsing/common/inference.rb index 7212a6d8..0a0fb1a8 100644 --- a/lib/mindee/parsing/common/inference.rb +++ b/lib/mindee/parsing/common/inference.rb @@ -29,6 +29,8 @@ class Inference # Whether this product has access to synchronous endpoint. # @return [bool] attr_reader :has_sync + # @return [Mindee::Parsing::Common::Extras::Extras] Potential Extras fields sent back along the prediction. + attr_reader :extras @endpoint_name = nil @endpoint_version = nil @@ -40,6 +42,7 @@ def initialize(raw_prediction) @is_rotation_applied = raw_prediction['is_rotation_applied'] @product = Product.new(raw_prediction['product']) @pages = [] # : Array[Page] + @extras = Extras::Extras.new(raw_prediction['extras']) end # @return [String] diff --git a/lib/mindee/parsing/standard/tax_field.rb b/lib/mindee/parsing/standard/tax_field.rb index 60b23d0e..a449f667 100644 --- a/lib/mindee/parsing/standard/tax_field.rb +++ b/lib/mindee/parsing/standard/tax_field.rb @@ -39,10 +39,10 @@ def print_float(value) def to_s printable = printable_values out_str = String.new - out_str << ("Base: #{printable[:base]}") - out_str << (", Code: #{printable[:code]}") - out_str << (", Rate (%): #{printable[:rate]}") - out_str << (", Amount: #{printable[:value]}") + out_str << "Base: #{printable[:base]}" + out_str << ", Code: #{printable[:code]}" + out_str << ", Rate (%): #{printable[:rate]}" + out_str << ", Amount: #{printable[:value]}" out_str.strip end @@ -60,10 +60,10 @@ def printable_values def to_table_line printable = printable_values out_str = String.new - out_str << ("| #{printable[:base].ljust(13, ' ')}") - out_str << (" | #{printable[:code].ljust(6, ' ')}") - out_str << (" | #{printable[:rate].ljust(8, ' ')}") - out_str << (" | #{printable[:value].ljust(13, ' ')} |") + out_str << "| #{printable[:base].ljust(13, ' ')}" + out_str << " | #{printable[:code].ljust(6, ' ')}" + out_str << " | #{printable[:rate].ljust(8, ' ')}" + out_str << " | #{printable[:value].ljust(13, ' ')} |" out_str.strip end end @@ -94,7 +94,7 @@ def to_s return '' if nil? || empty? out_str = String.new - out_str << ("\n#{line_separator('-')}") + out_str << "\n#{line_separator('-')}" out_str << "\n | Base | Code | Rate (%) | Amount |" out_str << "\n#{line_separator('=')}" each do |entry| diff --git a/lib/mindee/pdf/pdf_extractor.rb b/lib/mindee/pdf/pdf_extractor.rb index f36ad24b..2b78ebf5 100644 --- a/lib/mindee/pdf/pdf_extractor.rb +++ b/lib/mindee/pdf/pdf_extractor.rb @@ -58,7 +58,7 @@ def extract_sub_documents(page_indexes) end formatted_max_index = format('%03d', page_index_list[page_index_list.length - 1] + 1).to_s field_filename = "#{basename}_#{format('%03d', - (page_index_list[0] + 1))}-#{formatted_max_index}#{extension}" + page_index_list[0] + 1)}-#{formatted_max_index}#{extension}" extracted_pdf = Mindee::PDF::PDFExtractor::ExtractedPDF.new(cut_pages(page_index_list), field_filename) extracted_pdfs << extracted_pdf diff --git a/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rb b/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rb index 056e433a..9c5541af 100644 --- a/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rb +++ b/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_items.rb @@ -46,14 +46,14 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Description ' out_str << ' | Gross Weight' out_str << ' | Measurement' out_str << ' | Measurement Unit' out_str << ' | Quantity' out_str << ' | Weight Unit' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/financial_document/financial_document_v1_line_items.rb b/lib/mindee/product/financial_document/financial_document_v1_line_items.rb index 4342fe32..c6e19c14 100644 --- a/lib/mindee/product/financial_document/financial_document_v1_line_items.rb +++ b/lib/mindee/product/financial_document/financial_document_v1_line_items.rb @@ -44,7 +44,7 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Description ' out_str << ' | Product code' out_str << ' | Quantity' @@ -53,7 +53,7 @@ def to_s out_str << ' | Total Amount' out_str << ' | Unit of measure' out_str << ' | Unit Price' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rb b/lib/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rb index b62ca3ff..b45980d8 100644 --- a/lib/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rb +++ b/lib/mindee/product/fr/bank_statement/bank_statement_v2_transactions.rb @@ -40,11 +40,11 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Amount ' out_str << ' | Date ' out_str << ' | Description ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb b/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb index 23b5a599..108b8640 100644 --- a/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb +++ b/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usages.rb @@ -50,7 +50,7 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Consumption' out_str << ' | Description ' out_str << ' | End Date ' @@ -59,7 +59,7 @@ def to_s out_str << ' | Total ' out_str << ' | Unit of Measure' out_str << ' | Unit Price' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rb b/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rb index e5e062a1..0de21ea6 100644 --- a/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rb +++ b/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscriptions.rb @@ -48,14 +48,14 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Description ' out_str << ' | End Date ' out_str << ' | Start Date' out_str << ' | Tax Rate' out_str << ' | Total ' out_str << ' | Unit Price' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rb b/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rb index 4d21d279..f0202a69 100644 --- a/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rb +++ b/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contributions.rb @@ -48,14 +48,14 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Description ' out_str << ' | End Date ' out_str << ' | Start Date' out_str << ' | Tax Rate' out_str << ' | Total ' out_str << ' | Unit Price' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/fr/payslip/payslip_v2_salary_details.rb b/lib/mindee/product/fr/payslip/payslip_v2_salary_details.rb index cd6d0378..f5d5cd5f 100644 --- a/lib/mindee/product/fr/payslip/payslip_v2_salary_details.rb +++ b/lib/mindee/product/fr/payslip/payslip_v2_salary_details.rb @@ -48,12 +48,12 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Amount ' out_str << ' | Base ' out_str << ' | Description ' out_str << ' | Rate ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rb b/lib/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rb index 7cf36b86..b1b280fa 100644 --- a/lib/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rb +++ b/lib/mindee/product/fr/payslip/payslip_v3_paid_time_offs.rb @@ -49,13 +49,13 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Accrued ' out_str << ' | Period' out_str << ' | Type ' out_str << ' | Remaining' out_str << ' | Used ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/fr/payslip/payslip_v3_salary_details.rb b/lib/mindee/product/fr/payslip/payslip_v3_salary_details.rb index c32d254d..9c1d542a 100644 --- a/lib/mindee/product/fr/payslip/payslip_v3_salary_details.rb +++ b/lib/mindee/product/fr/payslip/payslip_v3_salary_details.rb @@ -49,13 +49,13 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Amount ' out_str << ' | Base ' out_str << ' | Description ' out_str << ' | Number' out_str << ' | Rate ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/invoice/invoice_v4_line_items.rb b/lib/mindee/product/invoice/invoice_v4_line_items.rb index 88777d35..1af5d772 100644 --- a/lib/mindee/product/invoice/invoice_v4_line_items.rb +++ b/lib/mindee/product/invoice/invoice_v4_line_items.rb @@ -44,7 +44,7 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Description ' out_str << ' | Product code' out_str << ' | Quantity' @@ -53,7 +53,7 @@ def to_s out_str << ' | Total Amount' out_str << ' | Unit of measure' out_str << ' | Unit Price' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb b/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb index 7d009696..d098b954 100644 --- a/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb +++ b/lib/mindee/product/invoice_splitter/invoice_splitter_v1_invoice_page_groups.rb @@ -37,9 +37,9 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Page Indexes ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rb b/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rb index 5479e461..995a9648 100644 --- a/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rb +++ b/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrients.rb @@ -53,13 +53,13 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Daily Value' out_str << ' | Name ' out_str << ' | Per 100g' out_str << ' | Per Serving' out_str << ' | Unit' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/receipt/receipt_v5_line_items.rb b/lib/mindee/product/receipt/receipt_v5_line_items.rb index 43af7a51..18f3729b 100644 --- a/lib/mindee/product/receipt/receipt_v5_line_items.rb +++ b/lib/mindee/product/receipt/receipt_v5_line_items.rb @@ -40,12 +40,12 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Description ' out_str << ' | Quantity' out_str << ' | Total Amount' out_str << ' | Unit Price' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/resume/resume_v1_certificates.rb b/lib/mindee/product/resume/resume_v1_certificates.rb index c11e7658..076a3032 100644 --- a/lib/mindee/product/resume/resume_v1_certificates.rb +++ b/lib/mindee/product/resume/resume_v1_certificates.rb @@ -44,12 +44,12 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Grade ' out_str << ' | Name ' out_str << ' | Provider ' out_str << ' | Year' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/resume/resume_v1_educations.rb b/lib/mindee/product/resume/resume_v1_educations.rb index a3530bb7..177789ea 100644 --- a/lib/mindee/product/resume/resume_v1_educations.rb +++ b/lib/mindee/product/resume/resume_v1_educations.rb @@ -47,7 +47,7 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Domain ' out_str << ' | Degree ' out_str << ' | End Month' @@ -55,7 +55,7 @@ def to_s out_str << ' | School ' out_str << ' | Start Month' out_str << ' | Start Year' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/resume/resume_v1_languages.rb b/lib/mindee/product/resume/resume_v1_languages.rb index 92c52591..cbb766f7 100644 --- a/lib/mindee/product/resume/resume_v1_languages.rb +++ b/lib/mindee/product/resume/resume_v1_languages.rb @@ -42,10 +42,10 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Language' out_str << ' | Level ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/resume/resume_v1_professional_experiences.rb b/lib/mindee/product/resume/resume_v1_professional_experiences.rb index ac92f2bc..4e0bcab0 100644 --- a/lib/mindee/product/resume/resume_v1_professional_experiences.rb +++ b/lib/mindee/product/resume/resume_v1_professional_experiences.rb @@ -49,7 +49,7 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Contract Type ' out_str << ' | Department' out_str << ' | Description ' @@ -59,7 +59,7 @@ def to_s out_str << ' | Role ' out_str << ' | Start Month' out_str << ' | Start Year' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/resume/resume_v1_social_networks_urls.rb b/lib/mindee/product/resume/resume_v1_social_networks_urls.rb index 40acc2f0..3f8e1883 100644 --- a/lib/mindee/product/resume/resume_v1_social_networks_urls.rb +++ b/lib/mindee/product/resume/resume_v1_social_networks_urls.rb @@ -42,10 +42,10 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Name ' out_str << ' | URL ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rb b/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rb index 7133420d..72f3824d 100644 --- a/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rb +++ b/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copays.rb @@ -39,10 +39,10 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | Service Fees' out_str << ' | Service Name ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rb b/lib/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rb index bb9a2105..d6ef30ea 100644 --- a/lib/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rb +++ b/lib/mindee/product/us/us_mail/us_mail_v2_recipient_addresses.rb @@ -45,7 +45,7 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | City ' out_str << ' | Complete Address ' out_str << ' | Is Address Change' @@ -53,7 +53,7 @@ def to_s out_str << ' | Private Mailbox Number' out_str << ' | State' out_str << ' | Street ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/lib/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rb b/lib/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rb index 1a2e4e8f..2bd0cbd5 100644 --- a/lib/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rb +++ b/lib/mindee/product/us/us_mail/us_mail_v3_recipient_addresses.rb @@ -46,7 +46,7 @@ def to_s "\n #{entry.to_table_line}\n#{self.class.line_items_separator('-')}" end.join out_str = String.new - out_str << ("\n#{self.class.line_items_separator('-')}\n ") + out_str << "\n#{self.class.line_items_separator('-')}\n " out_str << ' | City ' out_str << ' | Complete Address ' out_str << ' | Is Address Change' @@ -55,7 +55,7 @@ def to_s out_str << ' | State' out_str << ' | Street ' out_str << ' | Unit ' - out_str << (" |\n#{self.class.line_items_separator('=')}") + out_str << " |\n#{self.class.line_items_separator('=')}" out_str + lines end end diff --git a/mindee.gemspec b/mindee.gemspec index cda12eb2..6aa822b5 100644 --- a/mindee.gemspec +++ b/mindee.gemspec @@ -37,7 +37,7 @@ Gem::Specification.new do |spec| spec.add_development_dependency 'rake', '~> 13.2' spec.add_development_dependency 'rbs', '~> 3.6' spec.add_development_dependency 'rspec', '~> 3.13' - spec.add_development_dependency 'rubocop', '~> 1.70' + spec.add_development_dependency 'rubocop', '~> 1.75' spec.add_development_dependency 'steep', '~> 1.7' spec.add_development_dependency 'yard', '~> 0.9' end diff --git a/sig/custom/net_http.rbs b/sig/custom/net_http.rbs index f6e4dc33..fa179ec6 100644 --- a/sig/custom/net_http.rbs +++ b/sig/custom/net_http.rbs @@ -29,6 +29,9 @@ module Net class HTTPResponse def self.body: -> untyped def body: -> untyped + def []: (untyped) -> untyped + def key?: (untyped) -> bool + def code: -> String end class HTTPRedirection diff --git a/sig/mindee/client.rbs b/sig/mindee/client.rbs index 5525684e..4d5fb271 100644 --- a/sig/mindee/client.rbs +++ b/sig/mindee/client.rbs @@ -17,6 +17,8 @@ module Mindee attr_accessor close_file: bool attr_accessor page_options: PageOptions attr_accessor cropper: bool + attr_accessor rag: bool + attr_accessor workflow_id: String | nil attr_accessor initial_delay_sec: Integer | Float attr_accessor delay_sec: Integer | Float attr_accessor max_retries: Integer @@ -36,7 +38,7 @@ module Mindee end class Client - def initialize: (api_key: String) -> void + def initialize: (?api_key: String) -> void def logger: () -> untyped def parse: (Input::Source::LocalInputSource | Input::Source::URLInputSource, untyped, ?endpoint: (HTTP::Endpoint?), options: ParseOptions | Hash[Symbol | String, untyped]) -> Parsing::Common::ApiResponse def parse_sync: (Input::Source::LocalInputSource | Input::Source::URLInputSource, untyped, HTTP::Endpoint, ParseOptions) -> Parsing::Common::ApiResponse @@ -57,7 +59,7 @@ module Mindee def process_pdf_if_required: (Input::Source::LocalInputSource, ParseOptions | WorkflowOptions) -> void def initialize_endpoint: (singleton(Parsing::Common::Inference), ?endpoint_name: String, ?account_name: String, ?version: String) -> HTTP::Endpoint def validate_async_params: (Integer | Float, Integer | Float, Integer) -> void - def fix_endpoint_name: (singleton(Parsing::Common::Inference), String?) -> String? + def fix_endpoint_name: (singleton(Parsing::Common::Inference), String?) -> String def fix_account_name: (String) -> String def fix_version: (singleton(Parsing::Common::Inference), String) -> String end diff --git a/sig/mindee/http/endpoint.rbs b/sig/mindee/http/endpoint.rbs index 39a57dc0..a6946d8c 100644 --- a/sig/mindee/http/endpoint.rbs +++ b/sig/mindee/http/endpoint.rbs @@ -13,14 +13,15 @@ module Mindee def api_key: -> String? def request_timeout: -> Integer def url_root: -> String - def initialize: (untyped, untyped, untyped, ?api_key: String) -> String - def predict: (untyped, untyped, untyped, untyped, untyped) -> [untyped, untyped] - def predict_async: (untyped, untyped, untyped, untyped, untyped) -> [untyped, untyped] - def parse_async: (untyped) -> [untyped, untyped] - def predict_req_post: (untyped, ?all_words: false, ?full_text: false, ?close_file: true, ?cropper: false) -> untyped - def document_queue_req_get: (untyped, untyped, untyped, untyped, untyped) -> untyped - def document_queue_req: (untyped) -> untyped - def check_api_key: -> nil + def base_url: -> String + def initialize: (String, String, String | nil, ?api_key: String) -> String + def predict: (Input::Source::LocalInputSource | Input::Source::URLInputSource, ParseOptions) -> [Net::HTTPResponse, Hash[Symbol, untyped]] + def predict_async: (Input::Source::LocalInputSource | Input::Source::URLInputSource, ParseOptions) -> [Net::HTTPResponse, Hash[Symbol, untyped]] + def parse_async: (String) -> [Net::HTTPResponse, Hash[Symbol, untyped]] + def predict_req_post: (Input::Source::LocalInputSource | Input::Source::URLInputSource, ParseOptions) -> Net::HTTPResponse + def document_queue_req_post: (Input::Source::LocalInputSource | Input::Source::URLInputSource, ParseOptions) -> Net::HTTPResponse + def document_queue_req_get: (untyped) -> Net::HTTPResponse + def check_api_key: -> void end end end diff --git a/sig/mindee/http/response_validation.rbs b/sig/mindee/http/response_validation.rbs index 91c43939..447a6aa9 100644 --- a/sig/mindee/http/response_validation.rbs +++ b/sig/mindee/http/response_validation.rbs @@ -2,9 +2,9 @@ module Mindee module HTTP module ResponseValidation - def self.valid_sync_response?: (untyped) -> bool - def self.valid_async_response?: (untyped) -> bool - def self.clean_request!: (untyped) -> nil + def self.valid_sync_response?: (Net::HTTPResponse) -> bool + def self.valid_async_response?: (Net::HTTPResponse) -> bool + def self.clean_request!: (Net::HTTPResponse) -> void end end end diff --git a/sig/mindee/parsing/common/api_response.rbs b/sig/mindee/parsing/common/api_response.rbs index 0e4c7198..8cd595fb 100644 --- a/sig/mindee/parsing/common/api_response.rbs +++ b/sig/mindee/parsing/common/api_response.rbs @@ -20,7 +20,7 @@ module Mindee def job: -> Parsing::Common::Job? def api_request: -> Parsing::Common::ApiRequest? def raw_http: -> String - def initialize: (singleton(Parsing::Common::Inference), Hash[Symbol | String, untyped], String) -> void + def initialize: (singleton(Parsing::Common::Inference), Hash[Symbol | String, untyped] | Net::HTTPResponse, String) -> void end end end diff --git a/sig/mindee/parsing/common/extras/full_text_ocr_extra.rbs b/sig/mindee/parsing/common/extras/full_text_ocr_extra.rbs index d78a0917..72f7f493 100644 --- a/sig/mindee/parsing/common/extras/full_text_ocr_extra.rbs +++ b/sig/mindee/parsing/common/extras/full_text_ocr_extra.rbs @@ -4,8 +4,8 @@ module Mindee module Common module Extras class FullTextOCRExtra - def contents: -> untyped - def language: -> untyped + def contents: -> String? + def language: -> String? def initialize: (untyped) -> nil def to_s: -> String end diff --git a/sig/mindee/parsing/common/extras/rag_extra.rbs b/sig/mindee/parsing/common/extras/rag_extra.rbs new file mode 100644 index 00000000..9ddf0eb1 --- /dev/null +++ b/sig/mindee/parsing/common/extras/rag_extra.rbs @@ -0,0 +1,15 @@ +# lib/mindee/parsing/common/extras/rag_extra.rb + +module Mindee + module Parsing + module Common + module Extras + class RAGExtra + def matching_document_id: -> String? + def initialize: (untyped) -> nil + def to_s: -> String + end + end + end + end +end \ No newline at end of file diff --git a/spec/api_response_spec.rb b/spec/api_response_spec.rb index cd466f12..4d9fc871 100644 --- a/spec/api_response_spec.rb +++ b/spec/api_response_spec.rb @@ -11,7 +11,7 @@ response = load_json(DIR_PRODUCTS, 'invoices/response_v4/complete.json') rst_response = read_file(DIR_PRODUCTS, 'invoices/response_v4/summary_full.rst') parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::Invoice::InvoiceV4, - response, response) + response, response.to_s) expect(parsed_response.document.inference).to be_a Mindee::Product::Invoice::InvoiceV4 expect(parsed_response.document.inference.prediction).to be_a Mindee::Product::Invoice::InvoiceV4Document expect(parsed_response.raw_http).to eq(response.to_s) diff --git a/spec/async_rseponse_spec.rb b/spec/async_rseponse_spec.rb index cc77b223..92cc4d71 100644 --- a/spec/async_rseponse_spec.rb +++ b/spec/async_rseponse_spec.rb @@ -15,14 +15,14 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(true) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response) + response, response.to_json) expect(parsed_response.job.status).to eq(Mindee::Parsing::Common::JobStatus::WAITING) expect(parsed_response.job.id).to eq('76c90710-3a1b-4b91-8a39-31a6543e347c') expect(parsed_response.job.status).to_not respond_to(:available_at) expect(parsed_response.job.status).to_not respond_to(:millisecs_taken) expect(parsed_response.api_request.error).to eq({}) - expect(parsed_response.raw_http).to eq(response.to_s) + expect(parsed_response.raw_http).to eq(response.to_json) end it 'should not be able to be sent on incompatible endpoints' do @@ -31,7 +31,7 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(false) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response) + response, response.to_json) expect(parsed_response.job).to be(nil) end @@ -41,14 +41,14 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(true) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response) + response, response.to_json) expect(parsed_response.job.issued_at.strftime('%Y-%m-%dT%H:%M:%S.%6N')).to eq('2023-03-16T12:33:49.602947') expect(parsed_response.job.status).to eq(Mindee::Parsing::Common::JobStatus::PROCESSING) expect(parsed_response.job.id).to eq('76c90710-3a1b-4b91-8a39-31a6543e347c') expect(parsed_response.job.status).to_not respond_to(:available_at) expect(parsed_response.job.status).to_not respond_to(:millisecs_taken) expect(parsed_response.api_request.error['code']).to eq(nil) - expect(parsed_response.raw_http).to eq(response.to_s) + expect(parsed_response.raw_http).to eq(response.to_json) end it 'should be able to poll a completed queue' do @@ -57,7 +57,7 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(true) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response) + response, response.to_json) expect(parsed_response.job.issued_at.strftime('%Y-%m-%dT%H:%M:%S.%6N')).to eq('2023-03-21T13:52:56.326107') expect(parsed_response.job.status).to eq(Mindee::Parsing::Common::JobStatus::COMPLETED) expect(parsed_response.job.id).to eq('b6caf9e8-9bcc-4412-bcb7-f5b416678f0d') @@ -65,7 +65,7 @@ expect(parsed_response.job.millisecs_taken).to eq(4664) expect(parsed_response.document).to_not be(nil) expect(parsed_response.api_request.error['code']).to eq(nil) - expect(parsed_response.raw_http).to eq(response.to_s) + expect(parsed_response.raw_http).to eq(response.to_json) end it 'should retrieve a failed job' do @@ -74,7 +74,7 @@ JSON.generate(response)) expect(Mindee::HTTP::ResponseValidation.valid_async_response?(fake_response)).to eq(false) parsed_response = Mindee::Parsing::Common::ApiResponse.new(Mindee::Product::InvoiceSplitter::InvoiceSplitterV1, - response, response) + response, response.to_json) expect(parsed_response.job.issued_at.strftime('%Y-%m-%dT%H:%M:%S.%6N')).to eq('2024-02-20T10:31:06.878599') expect(parsed_response.job.available_at.strftime('%Y-%m-%dT%H:%M:%S.%6N')).to eq('2024-02-20T10:31:06.878599') expect(parsed_response.api_request.status).to eq(Mindee::Parsing::Common::RequestStatus::SUCCESS) diff --git a/spec/extras/full_text_ocr_spec.rb b/spec/extras/full_text_ocr_spec.rb index e2f62ee9..f2e6c668 100644 --- a/spec/extras/full_text_ocr_spec.rb +++ b/spec/extras/full_text_ocr_spec.rb @@ -9,7 +9,7 @@ Mindee::Parsing::Common::ApiResponse.new( Mindee::Product::InternationalId::InternationalIdV2, prediction_data, - prediction_data + prediction_data.to_json ).document.inference.pages end end @@ -20,7 +20,7 @@ Mindee::Parsing::Common::ApiResponse.new( Mindee::Product::InternationalId::InternationalIdV2, prediction_data, - prediction_data + prediction_data.to_json ).document end let(:load_invalid_document) do @@ -30,7 +30,7 @@ Mindee::Parsing::Common::ApiResponse.new( Mindee::Product::FR::BankStatement::BankStatementV2, prediction_data, - prediction_data + prediction_data.to_json ).document end end diff --git a/spec/test_code_samples.sh b/spec/test_code_samples.sh index 774c3e07..2860be1c 100755 --- a/spec/test_code_samples.sh +++ b/spec/test_code_samples.sh @@ -9,7 +9,7 @@ API_KEY=$3 if [ -z "${ACCOUNT}" ]; then echo "ACCOUNT is required"; exit 1; fi if [ -z "${ENDPOINT}" ]; then echo "ENDPOINT is required"; exit 1; fi -for f in $(find ./docs/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_execution.txt" | sort -h) +for f in $(find ./docs/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_*.txt" | sort -h) do echo echo "###############################################" diff --git a/spec/workflow/workflow_integration.rb b/spec/workflow/workflow_integration.rb index d23fb7f5..f494e7d6 100644 --- a/spec/workflow/workflow_integration.rb +++ b/spec/workflow/workflow_integration.rb @@ -7,19 +7,21 @@ describe Mindee::Client do describe 'execute_workflow call to API' do let(:product_data_dir) { File.join(DATA_DIR, 'products') } - it 'should return a valid response' do - client = Mindee::Client.new - invoice_splitter_input = Mindee::Input::Source::PathInputSource.new( - File.join(product_data_dir, 'invoice_splitter', 'default_sample.pdf') + let(:client) { Mindee::Client.new } + let(:sample_input) do + Mindee::Input::Source::PathInputSource.new( + File.join(product_data_dir, 'financial_document', 'default_sample.jpg') ) - + end + let(:workflow_id) { ENV.fetch('WORKFLOW_ID') } + it 'should return a valid response' do current_date_time = Time.now.strftime('%Y-%m-%d-%H:%M:%S') document_alias = "ruby-#{current_date_time}" priority = Mindee::Parsing::Common::ExecutionPriority::LOW response = client.execute_workflow( - invoice_splitter_input, - ENV.fetch('WORKFLOW_ID'), + sample_input, + workflow_id, options: { document_alias: document_alias, priority: priority, rag: true } ) @@ -27,5 +29,27 @@ expect(response.execution.file.alias).to eq(document_alias) expect(response.execution.priority).to eq(priority) end + + it 'should poll a workflow with RAG' do + options = { workflow_id: workflow_id, rag: true } + response = client.parse( + sample_input, + Mindee::Product::FinancialDocument::FinancialDocumentV1, + options: options + ) + expect(response.document.to_s).to_not be_empty + expect(response.document.inference.extras.rag.matching_document_id).to_not be_empty + end + + it 'should poll a workflow without RAG' do + options = { workflow_id: workflow_id } + response = client.parse( + sample_input, + Mindee::Product::FinancialDocument::FinancialDocumentV1, + options: options + ) + expect(response.document.to_s).to_not be_empty + expect(response.document.inference.extras.rag).to be_nil + end end end