POST /ocr

Image OCR API

Extract text from screenshots, scanned images, receipts, labels, and photos.

Key Operations

extract_text

extract_text_with_layout

Use cases

Receipt parsing
Screenshot indexing
Document text extraction

Async Job Lifecycle

All ImageHQ processing endpoints are asynchronous. Upon a successful POST, you receive a 202 Acceptedresponse with a job_id. Poll the status endpoint until the state reaches succeeded.

Request Example

import requests

url = "https://api.imagehq.io/ocr"
payload = {
  "tool_slug": "image-to-text",
  "operation": "extract_text",
  "options": {
    "language": "eng",
    "detect_orientation": True,
    "output_format": "txt"
  }
}
files = [("files[]", open("image.png", "rb"))]
data = {"request": json.dumps(payload)}

response = requests.post(url, files=files, data=data)
print(response.json())

const form = new FormData();
form.append("files[]", file);
form.append("request", JSON.stringify({
  "tool_slug": "image-to-text",
  "operation": "extract_text",
  "options": {
    "language": "eng",
    "detect_orientation": true,
    "output_format": "txt"
  }
}));

const response = await fetch("https://api.imagehq.io/ocr", {
  method: "POST",
  headers: { "Idempotency-Key": crypto.randomUUID() },
  body: form
});

const data = await response.json();
console.log(data);

const form = new FormData();
form.append("files[]", file);
form.append("request", JSON.stringify({
  "tool_slug": "image-to-text",
  "operation": "extract_text",
  "options": {
    "language": "eng",
    "detect_orientation": true,
    "output_format": "txt"
  }
}));

const response = await fetch("https://api.imagehq.io/ocr", {
  method: "POST",
  headers: { "Idempotency-Key": crypto.randomUUID() },
  body: form
});

const data = await response.json();
console.log(data);

curl -X POST "https://api.imagehq.io/ocr" \
  -H "Idempotency-Key: $(uuidgen)" \
  -F "files[]=@image.png" \
  -F 'request={"tool_slug":"image-to-text","operation":"extract_text","options":{"language":"eng","detect_orientation":true,"output_format":"txt"}}'

$client = new GuzzleHttp\Client();
$response = $client->post("https://api.imagehq.io/ocr", [
  "multipart" => [
    ["name" => "files[]", "contents" => fopen("image.png", "r")],
    ["name" => "request", "contents" => '{"tool_slug":"image-to-text","operation":"extract_text","options":{"language":"eng","detect_orientation":true,"output_format":"txt"}}']
  ]
]);

require "faraday"

response = Faraday.post("https://api.imagehq.io/ocr") do |req|
  req.headers["Idempotency-Key"] = SecureRandom.uuid
  req.body = { "files[]" => Faraday::UploadIO.new("image.png", "image/png"), "request" => '{"tool_slug":"image-to-text","operation":"extract_text","options":{"language":"eng","detect_orientation":true,"output_format":"txt"}}' }
end

body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
writer.WriteField("request", `{"tool_slug":"image-to-text","operation":"extract_text","options":{"language":"eng","detect_orientation":true,"output_format":"txt"}}`)
file, _ := writer.CreateFormFile("files[]", "image.png")
_ = file
writer.Close()
http.Post("https://api.imagehq.io/ocr", writer.FormDataContentType(), body)

HttpRequest request = HttpRequest.newBuilder()
  .uri(URI.create("https://api.imagehq.io/ocr"))
  .header("Idempotency-Key", UUID.randomUUID().toString())
  .POST(HttpRequest.BodyPublishers.ofString("multipart form data"))
  .build();

using var form = new MultipartFormDataContent();
form.Add(new StringContent('{"tool_slug":"image-to-text","operation":"extract_text","options":{"language":"eng","detect_orientation":true,"output_format":"txt"}}'), "request");
form.Add(new StreamContent(File.OpenRead("image.png")), "files[]", "image.png");
await httpClient.PostAsync("https://api.imagehq.io/ocr", form);

var request = URLRequest(url: URL(string: "https://api.imagehq.io/ocr")!)
request.httpMethod = "POST"
request.setValue(UUID().uuidString, forHTTPHeaderField: "Idempotency-Key")
// Attach multipart files[] and request fields before sending.

Successful Response

{
  "queued": {
    "id": "job_123",
    "status": "queued",
    "operation": "ocr",
    "tool_slug": "png-to-jpg",
    "client_reference_id": "example-123",
    "progress": 0,
    "current_stage": "queued",
    "poll_url": "/jobs/job_123",
    "created_at": "2026-05-02T00:00:00Z",
    "expires_at": "2026-05-03T00:00:00Z"
  },
  "completed": {
    "id": "job_123",
    "status": "succeeded",
    "progress": 100,
    "inputs": [
      {
        "filename": "input.png",
        "format": "png",
        "mime_type": "image/png",
        "size_bytes": 420122
      }
    ],
    "outputs": [
      {
        "id": "0",
        "filename": "output.jpg",
        "format": "jpg",
        "mime_type": "image/jpeg",
        "size_bytes": 161002
      }
    ],
    "warnings": [],
    "stages": [
      {
        "name": "queued",
        "status": "succeeded",
        "progress": 100
      },
      {
        "name": "processing",
        "status": "succeeded",
        "progress": 100
      }
    ],
    "download_url": "/jobs/job_123/download",
    "retention_policy": {
      "ttl_hours": 24,
      "clamp": true
    },
    "expires_at": "2026-05-03T00:00:00Z",
    "result_json": {
      "text": "Extracted text...",
      "confidence": 0.91
    }
  }
}

Frequently Asked Questions

What languages are supported?

OCR language support depends on configured language packs in your deployment.

Does OCR return confidence values?

Yes. OCR responses include confidence metadata where available.

Can OCR run inside a pipeline?

Yes. OCR can be one step in a pipeline workflow.

Image OCR API

Key Operations

Use cases

Async Job Lifecycle

Successful Response

Frequently Asked Questions

What languages are supported?

Does OCR return confidence values?

Can OCR run inside a pipeline?

Related APIs

Image OCR API

Key Operations

Use cases

Async Job Lifecycle

Successful Response

Frequently Asked Questions

What languages are supported?

Does OCR return confidence values?

Can OCR run inside a pipeline?

Related APIs

Image OCR API

.css-1p2plsj{width:1em;height:1em;display:inline-block;line-height:1em;-webkit-flex-shrink:0;-ms-flex-negative:0;flex-shrink:0;color:var(--chakra-colors-brand-500);margin-right:var(--chakra-space-2);} Key Operations

Use cases

Async Job Lifecycle

Successful Response

Frequently Asked Questions

What languages are supported?

Does OCR return confidence values?

Can OCR run inside a pipeline?

Related APIs

Image OCR API

.css-1p2plsj{width:1em;height:1em;display:inline-block;line-height:1em;-webkit-flex-shrink:0;-ms-flex-negative:0;flex-shrink:0;color:var(--chakra-colors-brand-500);margin-right:var(--chakra-space-2);} Key Operations

Use cases

Async Job Lifecycle

Successful Response

Frequently Asked Questions

What languages are supported?

Does OCR return confidence values?

Can OCR run inside a pipeline?

Related APIs

Key Operations

Key Operations