from retab import Retab
client = Retab()
# Split a multi-page document into sections based on subdocuments
# Use partition_key to identify individual items within a subdocument
# Use n_consensus when you want consensus-based confidence signals
result = client.documents.split(
document="invoice_batch.pdf",
model="retab-small",
subdocuments=[
{"name": "invoice", "description": "Invoice documents with billing information", "partition_key": "invoice number"},
{"name": "receipt", "description": "Receipt documents for payments"},
{"name": "contract", "description": "Legal contract documents"},
],
context="Processing Q4 2024 vendor document batch", # Optional: additional context
n_consensus=3
)
# Access split results
for split in result.splits:
print(f"{split.name}: pages {split.pages}")
print(f"likelihood={split.likelihood}")
print(f"votes={len(split.votes)}")
# Access partitions when partition_key is specified
for partition in split.partitions:
print(f" {partition.key}: pages {partition.pages}")
# Example output:
# invoice: pages [1, 2, 3]
# INV-001: pages [1, 2]
# INV-002: pages [3]
# receipt: pages [4, 5]
# contract: pages [6, 7, 8]
{
"splits": [
{
"name": "invoice",
"pages": [1, 2, 3],
"likelihood": 0.98,
"votes": [
{"pages": [1, 2, 3]},
{"pages": [1, 2, 3]},
{"pages": [1, 2, 3]}
],
"partitions": [
{
"key": "INV-001",
"pages": [1, 2],
"first_page_y_start": 0.0,
"last_page_y_end": 1.0
},
{
"key": "INV-002",
"pages": [3],
"first_page_y_start": 0.0,
"last_page_y_end": 1.0
}
]
},
{
"name": "receipt",
"pages": [4, 5],
"likelihood": 0.93,
"votes": [
{"pages": [4, 5]},
{"pages": [4, 5]},
{"pages": [4, 5]}
],
"partitions": []
},
{
"name": "contract",
"pages": [6, 7, 8],
"likelihood": 0.89,
"votes": [
{"pages": [6, 7, 8]},
{"pages": [6, 7, 8]},
{"pages": [6, 7, 8]}
],
"partitions": []
}
]
}
Split a document into sections based on provided subdocuments.
from retab import Retab
client = Retab()
# Split a multi-page document into sections based on subdocuments
# Use partition_key to identify individual items within a subdocument
# Use n_consensus when you want consensus-based confidence signals
result = client.documents.split(
document="invoice_batch.pdf",
model="retab-small",
subdocuments=[
{"name": "invoice", "description": "Invoice documents with billing information", "partition_key": "invoice number"},
{"name": "receipt", "description": "Receipt documents for payments"},
{"name": "contract", "description": "Legal contract documents"},
],
context="Processing Q4 2024 vendor document batch", # Optional: additional context
n_consensus=3
)
# Access split results
for split in result.splits:
print(f"{split.name}: pages {split.pages}")
print(f"likelihood={split.likelihood}")
print(f"votes={len(split.votes)}")
# Access partitions when partition_key is specified
for partition in split.partitions:
print(f" {partition.key}: pages {partition.pages}")
# Example output:
# invoice: pages [1, 2, 3]
# INV-001: pages [1, 2]
# INV-002: pages [3]
# receipt: pages [4, 5]
# contract: pages [6, 7, 8]
{
"splits": [
{
"name": "invoice",
"pages": [1, 2, 3],
"likelihood": 0.98,
"votes": [
{"pages": [1, 2, 3]},
{"pages": [1, 2, 3]},
{"pages": [1, 2, 3]}
],
"partitions": [
{
"key": "INV-001",
"pages": [1, 2],
"first_page_y_start": 0.0,
"last_page_y_end": 1.0
},
{
"key": "INV-002",
"pages": [3],
"first_page_y_start": 0.0,
"last_page_y_end": 1.0
}
]
},
{
"name": "receipt",
"pages": [4, 5],
"likelihood": 0.93,
"votes": [
{"pages": [4, 5]},
{"pages": [4, 5]},
{"pages": [4, 5]}
],
"partitions": []
},
{
"name": "contract",
"pages": [6, 7, 8],
"likelihood": 0.89,
"votes": [
{"pages": [6, 7, 8]},
{"pages": [6, 7, 8]},
{"pages": [6, 7, 8]}
],
"partitions": []
}
]
}
from retab import Retab
client = Retab()
# Split a multi-page document into sections based on subdocuments
# Use partition_key to identify individual items within a subdocument
# Use n_consensus when you want consensus-based confidence signals
result = client.documents.split(
document="invoice_batch.pdf",
model="retab-small",
subdocuments=[
{"name": "invoice", "description": "Invoice documents with billing information", "partition_key": "invoice number"},
{"name": "receipt", "description": "Receipt documents for payments"},
{"name": "contract", "description": "Legal contract documents"},
],
context="Processing Q4 2024 vendor document batch", # Optional: additional context
n_consensus=3
)
# Access split results
for split in result.splits:
print(f"{split.name}: pages {split.pages}")
print(f"likelihood={split.likelihood}")
print(f"votes={len(split.votes)}")
# Access partitions when partition_key is specified
for partition in split.partitions:
print(f" {partition.key}: pages {partition.pages}")
# Example output:
# invoice: pages [1, 2, 3]
# INV-001: pages [1, 2]
# INV-002: pages [3]
# receipt: pages [4, 5]
# contract: pages [6, 7, 8]
{
"splits": [
{
"name": "invoice",
"pages": [1, 2, 3],
"likelihood": 0.98,
"votes": [
{"pages": [1, 2, 3]},
{"pages": [1, 2, 3]},
{"pages": [1, 2, 3]}
],
"partitions": [
{
"key": "INV-001",
"pages": [1, 2],
"first_page_y_start": 0.0,
"last_page_y_end": 1.0
},
{
"key": "INV-002",
"pages": [3],
"first_page_y_start": 0.0,
"last_page_y_end": 1.0
}
]
},
{
"name": "receipt",
"pages": [4, 5],
"likelihood": 0.93,
"votes": [
{"pages": [4, 5]},
{"pages": [4, 5]},
{"pages": [4, 5]}
],
"partitions": []
},
{
"name": "contract",
"pages": [6, 7, 8],
"likelihood": 0.89,
"votes": [
{"pages": [6, 7, 8]},
{"pages": [6, 7, 8]},
{"pages": [6, 7, 8]}
],
"partitions": []
}
]
}
The document to split
Show child attributes
The subdocuments to split the document into
Show child attributes
The model to use to split the document
Additional context for the split operation (e.g., iteration context from a loop)
Number of consensus split runs to perform. Uses deterministic single-pass when set to 1.
1 <= x <= 8Successful Response
The list of document splits with their page ranges
Show child attributes