Documents
Parse
Parse a document and extract text content from each page. This endpoint processes various document types and returns structured text content along with usage information. Supports different parsing modes and formats.
POST
/
v1
/
documents
/
parse
Copy
from retab.client import Retab
client = Retab()
result = client.documents.parse(
document="document.pdf",
model="gemini-2.5.flash",
table_parsing_format="html",
image_resolution_dpi=72,
browser_canvas="A4"
)
# Access parsed content
for i, page_content in enumerate(result.pages):
print(f"Page {i + 1}: {page_content}")
print(f"Total pages: {result.usage.page_count}")
print(f"Credits used: {result.usage.credits}")
Copy
{
"document": {
"filename": "document.pdf",
"mime_type": "application/pdf",
"size": 15432
},
"usage": {
"page_count": 3,
"credits": 1.5
},
"pages": [
"<h1>Document Title</h1><p>First page content with formatted text and tables...</p>",
"<p>Second page content continues here...</p>",
"<p>Third and final page content...</p>"
],
"text": "<h1>Document Title</h1><p>First page content with formatted text and tables...</p><p>Second page content continues here...</p><p>Third and final page content...</p>"
}
Copy
from retab.client import Retab
client = Retab()
result = client.documents.parse(
document="document.pdf",
model="gemini-2.5.flash",
table_parsing_format="html",
image_resolution_dpi=72,
browser_canvas="A4"
)
# Access parsed content
for i, page_content in enumerate(result.pages):
print(f"Page {i + 1}: {page_content}")
print(f"Total pages: {result.usage.page_count}")
print(f"Credits used: {result.usage.credits}")
Copy
{
"document": {
"filename": "document.pdf",
"mime_type": "application/pdf",
"size": 15432
},
"usage": {
"page_count": 3,
"credits": 1.5
},
"pages": [
"<h1>Document Title</h1><p>First page content with formatted text and tables...</p>",
"<p>Second page content continues here...</p>",
"<p>Third and final page content...</p>"
],
"text": "<h1>Document Title</h1><p>First page content with formatted text and tables...</p><p>Second page content continues here...</p><p>Third and final page content...</p>"
}
Authorizations
Body
application/json
Request model for document parsing.
Response
200
application/json
Successful Response
Result of document parsing.
Copy
from retab.client import Retab
client = Retab()
result = client.documents.parse(
document="document.pdf",
model="gemini-2.5.flash",
table_parsing_format="html",
image_resolution_dpi=72,
browser_canvas="A4"
)
# Access parsed content
for i, page_content in enumerate(result.pages):
print(f"Page {i + 1}: {page_content}")
print(f"Total pages: {result.usage.page_count}")
print(f"Credits used: {result.usage.credits}")
Copy
{
"document": {
"filename": "document.pdf",
"mime_type": "application/pdf",
"size": 15432
},
"usage": {
"page_count": 3,
"credits": 1.5
},
"pages": [
"<h1>Document Title</h1><p>First page content with formatted text and tables...</p>",
"<p>Second page content continues here...</p>",
"<p>Third and final page content...</p>"
],
"text": "<h1>Document Title</h1><p>First page content with formatted text and tables...</p><p>Second page content continues here...</p><p>Third and final page content...</p>"
}
Assistant
Responses are generated using AI and may contain mistakes.