Skip to content

Add complex flow sample program written in Go #95

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Go/Complex Flow Examples/ocr-with-extract-text/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/datalogics/pdf-rest-api-samples/go-ocr-extract

go 1.24.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package main

import (
"bytes"
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"os"
)

// In this sample, we will show how to convert a scanned document into a PDF with
// searchable and extractable text using Optical Character Recognition (OCR), and then
// extract that text from the newly created document.
//
// First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the
// output ID. Then, we will send the output ID to the /extracted-text route, which will
// return the newly added text.

func main() {
baseUrl := "https://api.pdfrest.com/"

// Replace the values below with your input file's location and name
inputFilePath := "/path/to/input.pdf"
inputFileName := "input.pdf"

// Replace with your API key
apiKey := "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"

// Begin request to /pdf-with-ocr-text
// Create a buffer and a multipart writer
var ocrReqBody bytes.Buffer
ocrReqWriter := multipart.NewWriter(&ocrReqBody)

// Open the input file
fileField := "file"
file, err := os.Open(inputFilePath)
if err != nil {
panic(err)
}
defer file.Close()

filePart, err := ocrReqWriter.CreateFormFile(fileField, inputFileName)
if err != nil {
panic(err)
}
_, err = io.Copy(filePart, file)
if err != nil {
panic(err)
}

err = ocrReqWriter.Close()
if err != nil {
panic(err)
}

// Create the HTTP request
ocrReq, err := http.NewRequest("POST", baseUrl+"pdf-with-ocr-text", &ocrReqBody)
if err != nil {
panic(err)
}

// Set the headers
ocrReq.Header.Set("Content-Type", ocrReqWriter.FormDataContentType())
ocrReq.Header.Set("Api-Key", apiKey)

// Send the request
client := &http.Client{}
ocrResp, err := client.Do(ocrReq)
if err != nil {
panic(err)
}
defer ocrResp.Body.Close()

var ocrRespData map[string]interface{}
err = json.NewDecoder(ocrResp.Body).Decode(&ocrRespData)
if err != nil {
panic(err)
}
errorMessage, hasErrorMessage := ocrRespData["error"]
if hasErrorMessage {
fmt.Println("ERR:", errorMessage)
} else {
// Begin request to /extracted-text
var extractReqBody bytes.Buffer
extractReqWriter := multipart.NewWriter(&extractReqBody)

// Add the "id" form field
err = extractReqWriter.WriteField("id", ocrRespData["outputId"].(string))
if err != nil {
panic(err)
}
err = extractReqWriter.Close()
if err != nil {
panic(err)
}
extractReq, err := http.NewRequest("POST", baseUrl+"extracted-text", &extractReqBody)
if err != nil {
panic(err)
}

extractReq.Header.Set("Content-Type", extractReqWriter.FormDataContentType())
extractReq.Header.Set("Api-Key", apiKey)

extractResp, err := client.Do(extractReq)
if err != nil {
panic(err)
}
defer extractResp.Body.Close()

var extractRespData map[string]interface{}
err = json.NewDecoder(extractResp.Body).Decode(&extractRespData)
if err != nil {
panic(err)
}
errorMessage, hasErrorMessage := extractRespData["error"]
if hasErrorMessage {
fmt.Println("ERR:", errorMessage)
} else {
fmt.Println(extractRespData["fullText"])
}
}
}