POST /v1/ocr
Image OCR API
Extract text from screenshots, scanned images, receipts, labels, and photos.
Key Operations
extract_text
extract_text_with_layout
Use cases
Receipt parsing
Screenshot indexing
Document text extraction
Async Job Lifecycle
All ImageHQ processing endpoints are asynchronous. Upon a successful POST, you receive a 202 Acceptedresponse with a job_id. Poll the status endpoint until the state reaches succeeded.
Request Example
import requests
url = "https://api.imagehq.io/v1/ocr"
payload = {
"operation": "extract_text",
"options": {
"detect_orientation": True,
"language": "eng",
"output_format": "txt"
},
"tool_slug": "image-to-text"
}
files = [("files[]", open("image.png", "rb"))]
data = {"request": json.dumps(payload)}
response = requests.post(url, files=files, data=data)
print(response.json())const form = new FormData();
form.append("files[]", file);
form.append("request", JSON.stringify({
"operation": "extract_text",
"options": {
"detect_orientation": true,
"language": "eng",
"output_format": "txt"
},
"tool_slug": "image-to-text"
}));
const response = await fetch("https://api.imagehq.io/v1/ocr", {
method: "POST",
headers: { "Idempotency-Key": crypto.randomUUID() },
body: form
});
const data = await response.json();
console.log(data);const form = new FormData();
form.append("files[]", file);
form.append("request", JSON.stringify({
"operation": "extract_text",
"options": {
"detect_orientation": true,
"language": "eng",
"output_format": "txt"
},
"tool_slug": "image-to-text"
}));
const response = await fetch("https://api.imagehq.io/v1/ocr", {
method: "POST",
headers: { "Idempotency-Key": crypto.randomUUID() },
body: form
});
const data = await response.json();
console.log(data);curl -X POST "https://api.imagehq.io/v1/ocr" \
-H "Idempotency-Key: $(uuidgen)" \
-F "files[]=@image.png" \
-F 'request={"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}'$client = new GuzzleHttp\Client();
$response = $client->post("https://api.imagehq.io/v1/ocr", [
"multipart" => [
["name" => "files[]", "contents" => fopen("image.png", "r")],
["name" => "request", "contents" => '{"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}']
]
]);require "faraday"
response = Faraday.post("https://api.imagehq.io/v1/ocr") do |req|
req.headers["Idempotency-Key"] = SecureRandom.uuid
req.body = { "files[]" => Faraday::UploadIO.new("image.png", "image/png"), "request" => '{"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}' }
endbody := &bytes.Buffer{}
writer := multipart.NewWriter(body)
writer.WriteField("request", `{"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}`)
file, _ := writer.CreateFormFile("files[]", "image.png")
_ = file
writer.Close()
http.Post("https://api.imagehq.io/v1/ocr", writer.FormDataContentType(), body)HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("https://api.imagehq.io/v1/ocr"))
.header("Idempotency-Key", UUID.randomUUID().toString())
.POST(HttpRequest.BodyPublishers.ofString("multipart form data"))
.build();using var form = new MultipartFormDataContent();
form.Add(new StringContent('{"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}'), "request");
form.Add(new StreamContent(File.OpenRead("image.png")), "files[]", "image.png");
await httpClient.PostAsync("https://api.imagehq.io/v1/ocr", form);var request = URLRequest(url: URL(string: "https://api.imagehq.io/v1/ocr")!) request.httpMethod = "POST" request.setValue(UUID().uuidString, forHTTPHeaderField: "Idempotency-Key") // Attach multipart files[] and request fields before sending.
Successful Response
{
"completed": {
"download_url": "/v1/jobs/job_123/download",
"expires_at": "2026-05-03T00:00:00Z",
"id": "job_123",
"inputs": [
{
"filename": "input.png",
"format": "png",
"mime_type": "image/png",
"size_bytes": 420122
}
],
"outputs": [
{
"filename": "output.jpg",
"format": "jpg",
"id": "0",
"mime_type": "image/jpeg",
"size_bytes": 161002
}
],
"progress": 100,
"result_json": {
"confidence": 0.91,
"text": "Extracted text..."
},
"retention_policy": {
"clamp": true,
"ttl_hours": 24
},
"stages": [
{
"name": "queued",
"progress": 100,
"status": "succeeded"
},
{
"name": "processing",
"progress": 100,
"status": "succeeded"
}
],
"status": "succeeded",
"warnings": []
},
"queued": {
"client_reference_id": "example-123",
"created_at": "2026-05-02T00:00:00Z",
"current_stage": "queued",
"expires_at": "2026-05-03T00:00:00Z",
"id": "job_123",
"operation": "ocr",
"poll_url": "/v1/jobs/job_123",
"progress": 0,
"status": "queued",
"tool_slug": "png-to-jpg"
}
}Frequently Asked Questions
What languages are supported?
OCR language support depends on configured language packs in your deployment.
Does OCR return confidence values?
Yes. OCR responses include confidence metadata where available.
Can OCR run inside a pipeline?
Yes. OCR can be one step in a pipeline workflow.