POST /v1/ocr
Image OCR API
Extract text from screenshots, scanned images, receipts, labels, and photos.
Opérations clés
extract_text
extract_text_with_layout
Cas d’utilisation
Receipt parsing
Screenshot indexing
Document text extraction
Cycle de vie de la tâche async
Tous les endpoints de traitement ImageHQ sont asynchrones. Après un POST réussi, vous recevez une réponse 202 Acceptedavec un job_id. Interrogez l’endpoint de statut jusqu’à ce que l’état atteigne succeeded.
Exemple de requête
import requests
url = "https://api.imagehq.io/v1/ocr"
payload = {
"operation": "extract_text",
"options": {
"detect_orientation": True,
"language": "eng",
"output_format": "txt"
},
"tool_slug": "image-to-text"
}
files = [("files[]", open("image.png", "rb"))]
data = {"request": json.dumps(payload)}
response = requests.post(url, files=files, data=data)
print(response.json())const form = new FormData();
form.append("files[]", file);
form.append("request", JSON.stringify({
"operation": "extract_text",
"options": {
"detect_orientation": true,
"language": "eng",
"output_format": "txt"
},
"tool_slug": "image-to-text"
}));
const response = await fetch("https://api.imagehq.io/v1/ocr", {
method: "POST",
headers: { "Idempotency-Key": crypto.randomUUID() },
body: form
});
const data = await response.json();
console.log(data);const form = new FormData();
form.append("files[]", file);
form.append("request", JSON.stringify({
"operation": "extract_text",
"options": {
"detect_orientation": true,
"language": "eng",
"output_format": "txt"
},
"tool_slug": "image-to-text"
}));
const response = await fetch("https://api.imagehq.io/v1/ocr", {
method: "POST",
headers: { "Idempotency-Key": crypto.randomUUID() },
body: form
});
const data = await response.json();
console.log(data);curl -X POST "https://api.imagehq.io/v1/ocr" \
-H "Idempotency-Key: $(uuidgen)" \
-F "files[]=@image.png" \
-F 'request={"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}'$client = new GuzzleHttp\Client();
$response = $client->post("https://api.imagehq.io/v1/ocr", [
"multipart" => [
["name" => "files[]", "contents" => fopen("image.png", "r")],
["name" => "request", "contents" => '{"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}']
]
]);require "faraday"
response = Faraday.post("https://api.imagehq.io/v1/ocr") do |req|
req.headers["Idempotency-Key"] = SecureRandom.uuid
req.body = { "files[]" => Faraday::UploadIO.new("image.png", "image/png"), "request" => '{"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}' }
endbody := &bytes.Buffer{}
writer := multipart.NewWriter(body)
writer.WriteField("request", `{"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}`)
file, _ := writer.CreateFormFile("files[]", "image.png")
_ = file
writer.Close()
http.Post("https://api.imagehq.io/v1/ocr", writer.FormDataContentType(), body)HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("https://api.imagehq.io/v1/ocr"))
.header("Idempotency-Key", UUID.randomUUID().toString())
.POST(HttpRequest.BodyPublishers.ofString("multipart form data"))
.build();using var form = new MultipartFormDataContent();
form.Add(new StringContent('{"operation":"extract_text","options":{"detect_orientation":true,"language":"eng","output_format":"txt"},"tool_slug":"image-to-text"}'), "request");
form.Add(new StreamContent(File.OpenRead("image.png")), "files[]", "image.png");
await httpClient.PostAsync("https://api.imagehq.io/v1/ocr", form);var request = URLRequest(url: URL(string: "https://api.imagehq.io/v1/ocr")!) request.httpMethod = "POST" request.setValue(UUID().uuidString, forHTTPHeaderField: "Idempotency-Key") // Attach multipart files[] and request fields before sending.
Réponse réussie
{
"completed": {
"download_url": "/v1/jobs/job_123/download",
"expires_at": "2026-05-03T00:00:00Z",
"id": "job_123",
"inputs": [
{
"filename": "input.png",
"format": "png",
"mime_type": "image/png",
"size_bytes": 420122
}
],
"outputs": [
{
"filename": "output.jpg",
"format": "jpg",
"id": "0",
"mime_type": "image/jpeg",
"size_bytes": 161002
}
],
"progress": 100,
"result_json": {
"confidence": 0.91,
"text": "Extracted text..."
},
"retention_policy": {
"clamp": true,
"ttl_hours": 24
},
"stages": [
{
"name": "queued",
"progress": 100,
"status": "succeeded"
},
{
"name": "processing",
"progress": 100,
"status": "succeeded"
}
],
"status": "succeeded",
"warnings": []
},
"queued": {
"client_reference_id": "example-123",
"created_at": "2026-05-02T00:00:00Z",
"current_stage": "queued",
"expires_at": "2026-05-03T00:00:00Z",
"id": "job_123",
"operation": "ocr",
"poll_url": "/v1/jobs/job_123",
"progress": 0,
"status": "queued",
"tool_slug": "png-to-jpg"
}
}Questions fréquentes
What languages are supported?
OCR language support depends on configured language packs in your deployment.
Does OCR return confidence values?
Yes. OCR responses include confidence metadata where available.
Can OCR run inside a pipeline?
Yes. OCR can be one step in a pipeline workflow.