Documents
Extract
A route for extracting structured data from documents using LLMs. This endpoint processes document data and extracts information according to the provided JSON schema. It supports various document types and can return either a complete response or streamed chunks.
POST
/
v1
/
documents
/
extract
Copy
from retab.client import Retab
reclient = Retab()
response = reclient.documents.extract(
json_schema = "Invoice_schema.json",
document = "Invoice.pdf",
model="gpt-4.1-nano",
temperature=0
)
Copy
{
"content": {
"id": "chatcmpl-AoBs45TNWTB1VKGSXV7NAwCnxMaNN",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"name\": \"Confirmation d'affr\\u00e9tement\", \"date\": \"2024-11-08\"}",
"refusal": null,
"role": "assistant",
"audio": null,
"function_call": null,
"tool_calls": [],
"parsed": {
"name": "Confirmation d'affr\u00e9tement",
"date": "2024-11-08"
}
}
}
],
"created": 1736525396,
"model": "gpt-4.1-nano",
"object": "chat.completion",
"service_tier": "default",
"system_fingerprint": "fp_f2cd28694a",
"usage": {
"completion_tokens": 20,
"prompt_tokens": 2760,
"total_tokens": 2780,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"likelihoods": {
"name": 0.7227993785831323,
"date": 0.7306298416895017
}
},
"error": null
}
Copy
from retab.client import Retab
reclient = Retab()
response = reclient.documents.extract(
json_schema = "Invoice_schema.json",
document = "Invoice.pdf",
model="gpt-4.1-nano",
temperature=0
)
Copy
{
"content": {
"id": "chatcmpl-AoBs45TNWTB1VKGSXV7NAwCnxMaNN",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"name\": \"Confirmation d'affr\\u00e9tement\", \"date\": \"2024-11-08\"}",
"refusal": null,
"role": "assistant",
"audio": null,
"function_call": null,
"tool_calls": [],
"parsed": {
"name": "Confirmation d'affr\u00e9tement",
"date": "2024-11-08"
}
}
}
],
"created": 1736525396,
"model": "gpt-4.1-nano",
"object": "chat.completion",
"service_tier": "default",
"system_fingerprint": "fp_f2cd28694a",
"usage": {
"completion_tokens": 20,
"prompt_tokens": 2760,
"total_tokens": 2780,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"likelihoods": {
"name": 0.7227993785831323,
"date": 0.7306298416895017
}
},
"error": null
}
Authorizations
Body
application/json
Response
200
application/json
Successful Response
The response is of type object
.
Copy
from retab.client import Retab
reclient = Retab()
response = reclient.documents.extract(
json_schema = "Invoice_schema.json",
document = "Invoice.pdf",
model="gpt-4.1-nano",
temperature=0
)
Copy
{
"content": {
"id": "chatcmpl-AoBs45TNWTB1VKGSXV7NAwCnxMaNN",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"name\": \"Confirmation d'affr\\u00e9tement\", \"date\": \"2024-11-08\"}",
"refusal": null,
"role": "assistant",
"audio": null,
"function_call": null,
"tool_calls": [],
"parsed": {
"name": "Confirmation d'affr\u00e9tement",
"date": "2024-11-08"
}
}
}
],
"created": 1736525396,
"model": "gpt-4.1-nano",
"object": "chat.completion",
"service_tier": "default",
"system_fingerprint": "fp_f2cd28694a",
"usage": {
"completion_tokens": 20,
"prompt_tokens": 2760,
"total_tokens": 2780,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"likelihoods": {
"name": 0.7227993785831323,
"date": 0.7306298416895017
}
},
"error": null
}
Assistant
Responses are generated using AI and may contain mistakes.