from retab import Retab
client = Retab()
# Classify a document into one of the provided categories
# Use first_n_pages to only analyze the first 3 pages (useful for large documents)
result = client.documents.classify(
document="invoice.pdf",
model="retab-small",
categories=[
{"name": "invoice", "description": "Invoice documents with billing information"},
{"name": "receipt", "description": "Receipt documents for payments"},
{"name": "contract", "description": "Legal contract documents"},
],
first_n_pages=3 # Optional: only use first 3 pages for classification
)
# Access classification result
print(f"Classification: {result.result.classification}")
print(f"Reasoning: {result.result.reasoning}")
# Example output:
# Classification: invoice
# Reasoning: The document contains billing details including line items,
# unit prices, quantities, and a total amount due. It has a header with
# "INVOICE" and includes payment terms and a due date.