from retab import Retab
client = Retab()
result = client.documents.parse(
document="document.pdf",
model="gemini-2.5.flash",
table_parsing_format="html",
image_resolution_dpi=72,
browser_canvas="A4"
)
# Access parsed content
for i, page_content in enumerate(result.pages):
print(f"Page {i + 1}: {page_content}")
print(f"Total pages: {result.usage.page_count}")
print(f"Credits used: {result.usage.credits}")
{
"document": {
"filename": "document.pdf",
"mime_type": "application/pdf",
"size": 15432
},
"usage": {
"page_count": 3,
"credits": 1.5
},
"pages": [
"<h1>Document Title</h1><p>First page content with formatted text and tables...</p>",
"<p>Second page content continues here...</p>",
"<p>Third and final page content...</p>"
],
"text": "<h1>Document Title</h1><p>First page content with formatted text and tables...</p><p>Second page content continues here...</p><p>Third and final page content...</p>"
}
Parse a document and extract text content from each page. This endpoint processes various document types and returns structured text content along with usage information. Supports different parsing modes and formats.
from retab import Retab
client = Retab()
result = client.documents.parse(
document="document.pdf",
model="gemini-2.5.flash",
table_parsing_format="html",
image_resolution_dpi=72,
browser_canvas="A4"
)
# Access parsed content
for i, page_content in enumerate(result.pages):
print(f"Page {i + 1}: {page_content}")
print(f"Total pages: {result.usage.page_count}")
print(f"Credits used: {result.usage.credits}")
{
"document": {
"filename": "document.pdf",
"mime_type": "application/pdf",
"size": 15432
},
"usage": {
"page_count": 3,
"credits": 1.5
},
"pages": [
"<h1>Document Title</h1><p>First page content with formatted text and tables...</p>",
"<p>Second page content continues here...</p>",
"<p>Third and final page content...</p>"
],
"text": "<h1>Document Title</h1><p>First page content with formatted text and tables...</p><p>Second page content continues here...</p><p>Third and final page content...</p>"
}
from retab import Retab
client = Retab()
result = client.documents.parse(
document="document.pdf",
model="gemini-2.5.flash",
table_parsing_format="html",
image_resolution_dpi=72,
browser_canvas="A4"
)
# Access parsed content
for i, page_content in enumerate(result.pages):
print(f"Page {i + 1}: {page_content}")
print(f"Total pages: {result.usage.page_count}")
print(f"Credits used: {result.usage.credits}")
{
"document": {
"filename": "document.pdf",
"mime_type": "application/pdf",
"size": 15432
},
"usage": {
"page_count": 3,
"credits": 1.5
},
"pages": [
"<h1>Document Title</h1><p>First page content with formatted text and tables...</p>",
"<p>Second page content continues here...</p>",
"<p>Third and final page content...</p>"
],
"text": "<h1>Document Title</h1><p>First page content with formatted text and tables...</p><p>Second page content continues here...</p><p>Third and final page content...</p>"
}
Request model for document parsing.
Successful Response
Result of document parsing.