Skip to main content
POST
/
v1
/
documents
/
split
from retab import Retab

client = Retab()

# Split a multi-page document into sections based on categories
result = client.documents.split(
    document="invoice_batch.pdf",
    model="gemini-2.5-flash",
    categories=[
        {"name": "invoice", "description": "Invoice documents with billing information"},
        {"name": "receipt", "description": "Receipt documents for payments"},
        {"name": "contract", "description": "Legal contract documents"},
    ]
)

# Access split results
for split in result.splits:
    print(f"{split.name}: pages {split.start_page}-{split.end_page}")

# Example output:
# invoice: pages 1-3
# receipt: pages 4-5
# invoice: pages 6-8
{
  "splits": [
    {
      "name": "invoice",
      "start_page": 1,
      "end_page": 3
    },
    {
      "name": "receipt",
      "start_page": 4,
      "end_page": 5
    },
    {
      "name": "invoice",
      "start_page": 6,
      "end_page": 8
    }
  ]
}
from retab import Retab

client = Retab()

# Split a multi-page document into sections based on categories
result = client.documents.split(
    document="invoice_batch.pdf",
    model="gemini-2.5-flash",
    categories=[
        {"name": "invoice", "description": "Invoice documents with billing information"},
        {"name": "receipt", "description": "Receipt documents for payments"},
        {"name": "contract", "description": "Legal contract documents"},
    ]
)

# Access split results
for split in result.splits:
    print(f"{split.name}: pages {split.start_page}-{split.end_page}")

# Example output:
# invoice: pages 1-3
# receipt: pages 4-5
# invoice: pages 6-8
{
  "splits": [
    {
      "name": "invoice",
      "start_page": 1,
      "end_page": 3
    },
    {
      "name": "receipt",
      "start_page": 4,
      "end_page": 5
    },
    {
      "name": "invoice",
      "start_page": 6,
      "end_page": 8
    }
  ]
}

Authorizations

Api-Key
string
header
required

Query Parameters

access_token
string | null

Body

application/json
document
MIMEData · object
required

The document to split

categories
Category · object[]
required

The categories to split the document into

model
string
required

The model to use to split the document

Response

Successful Response

splits
SplitResult · object[]
required

The list of document splits with their page ranges