Skip to main content
POST
/
v1
/
documents
/
split
from retab import Retab

client = Retab()

# Split a multi-page document into sections based on subdocuments
# Use partition_key to identify individual items within a subdocument
result = client.documents.split(
    document="invoice_batch.pdf",
    model="retab-small",
    subdocuments=[
        {"name": "invoice", "description": "Invoice documents with billing information", "partition_key": "invoice number"},
        {"name": "receipt", "description": "Receipt documents for payments"},
        {"name": "contract", "description": "Legal contract documents"},
    ],
    context="Processing Q4 2024 vendor document batch"  # Optional: additional context
)

# Access split results
for split in result.splits:
    print(f"{split.name}: pages {split.pages}")
    # Access partitions when partition_key is specified
    for partition in split.partitions:
        print(f"  {partition.key}: pages {partition.pages}")

# Example output:
# invoice: pages [1, 2, 3]
#   INV-001: pages [1, 2]
#   INV-002: pages [3]
# receipt: pages [4, 5]
# contract: pages [6, 7, 8]
{
  "splits": [
    {
      "name": "invoice",
      "pages": [1, 2, 3],
      "partitions": [
        {
          "key": "INV-001",
          "pages": [1, 2],
          "first_page_y_start": 0.0,
          "last_page_y_end": 1.0
        },
        {
          "key": "INV-002",
          "pages": [3],
          "first_page_y_start": 0.0,
          "last_page_y_end": 1.0
        }
      ]
    },
    {
      "name": "receipt",
      "pages": [4, 5],
      "partitions": []
    },
    {
      "name": "contract",
      "pages": [6, 7, 8],
      "partitions": []
    }
  ]
}
from retab import Retab

client = Retab()

# Split a multi-page document into sections based on subdocuments
# Use partition_key to identify individual items within a subdocument
result = client.documents.split(
    document="invoice_batch.pdf",
    model="retab-small",
    subdocuments=[
        {"name": "invoice", "description": "Invoice documents with billing information", "partition_key": "invoice number"},
        {"name": "receipt", "description": "Receipt documents for payments"},
        {"name": "contract", "description": "Legal contract documents"},
    ],
    context="Processing Q4 2024 vendor document batch"  # Optional: additional context
)

# Access split results
for split in result.splits:
    print(f"{split.name}: pages {split.pages}")
    # Access partitions when partition_key is specified
    for partition in split.partitions:
        print(f"  {partition.key}: pages {partition.pages}")

# Example output:
# invoice: pages [1, 2, 3]
#   INV-001: pages [1, 2]
#   INV-002: pages [3]
# receipt: pages [4, 5]
# contract: pages [6, 7, 8]
{
  "splits": [
    {
      "name": "invoice",
      "pages": [1, 2, 3],
      "partitions": [
        {
          "key": "INV-001",
          "pages": [1, 2],
          "first_page_y_start": 0.0,
          "last_page_y_end": 1.0
        },
        {
          "key": "INV-002",
          "pages": [3],
          "first_page_y_start": 0.0,
          "last_page_y_end": 1.0
        }
      ]
    },
    {
      "name": "receipt",
      "pages": [4, 5],
      "partitions": []
    },
    {
      "name": "contract",
      "pages": [6, 7, 8],
      "partitions": []
    }
  ]
}

Authorizations

Api-Key
string
header
required

Query Parameters

access_token
string | null

Body

application/json
document
MIMEData · object
required

The document to split

subdocuments
Subdocument · object[]
required

The subdocuments to split the document into

model
string
default:retab-small

The model to use to split the document

context
string | null

Additional context for the split operation (e.g., iteration context from a loop)

Response

Successful Response

splits
SplitResult · object[]
required

The list of document splits with their page ranges