Create Partitions - Retab Docs

from retab import MIMEData, Retab

client = Retab()

document = MIMEData(
    filename="invoice_batch.pdf",
    url="https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
)

response = client.partitions.create(
    document=document,
    key="invoice_number",
    instructions="Return one chunk per invoice number and keep all pages for the same invoice together.",
    model="retab-small",
    n_consensus=3,
    allow_overlap=True,
    bust_cache=False,
)

for chunk in response.output:
    print(chunk.key, chunk.pages)

import { Retab } from "@retab/node";

const client = new Retab({ apiKey: process.env.RETAB_API_KEY });

const document = {
  filename: "invoice_batch.pdf",
  url: "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
};

const response = await client.partitions.create(document, "invoice_number", "Return one chunk per invoice number and keep all pages for the same invoice together.", "retab-small", 3, true, false);

for (const chunk of response.output) {
  console.log(chunk.key, chunk.pages);
}

package main

import (
	"context"
	"fmt"
	"log"

	retab "github.com/retab-dev/retab/clients/go"
)

func ptr[T any](v T) *T { return &v }

func main() {
	ctx := context.Background()

	client, err := retab.NewClient("")
	if err != nil {
		log.Fatal(err)
	}

	document := retab.MIMEData{
		Filename: "invoice_batch.pdf",
		URL:      "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
	}

	response, err := client.Partitions.Create(ctx, &retab.PartitionsCreateParams{
		Document:     document,
		Key:          "invoice_number",
		Instructions: "Return one chunk per invoice number and keep all pages for the same invoice together.",
		Model:        ptr("retab-small"),
		NConsensus:   ptr(3),
		AllowOverlap: ptr(true),
		BustCache:    ptr(false),
	})
	if err != nil {
		log.Fatal(err)
	}

	for _, chunk := range response.Output {
		fmt.Println(chunk.Key, chunk.Pages)
	}
}

require 'retab'

client = Retab::Client.new(api_key: ENV['RETAB_API_KEY'])

document = {
  filename: 'invoice_batch.pdf',
  url: 'https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf',
}

response = client.partitions.create(
  document: document,
  key: 'invoice_number',
  instructions: 'Return one chunk per invoice number and keep all pages for the same invoice together.',
  model: 'retab-small',
  n_consensus: 3,
  allow_overlap: true,
  bust_cache: false,
)

response.output.each do |chunk|
  puts "#{chunk.key} #{chunk.pages}"
end

use retab::resources::partitions::CreateParams;
use retab::{MimeData, Retab};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = Retab::new(std::env::var("RETAB_API_KEY")?);
    let document = MimeData::new(
        "invoice_batch.pdf",
        "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
    );

    let mut params = CreateParams::new(
        document,
        "invoice_number",
        "Return one chunk per invoice number and keep all pages for the same invoice together.",
    );
    params.body.model = Some("retab-small".into());
    params.body.n_consensus = Some(3);
    params.body.allow_overlap = Some(true);
    params.body.bust_cache = Some(false);

    let response = client.partitions().create(params).await?;

    for chunk in response.output.as_ref().map(|v| v.as_slice()).unwrap_or_default() {
        println!("{} {:?}", chunk.key, chunk.pages.as_ref().unwrap_or(&vec![]));
    }
    Ok(())
}

<?php
require 'vendor/autoload.php';

use Retab\Client;

$client = new Client(apiKey: getenv('RETAB_API_KEY'));

$result = $client->partitions()->create(
    document: [
        'filename' => 'invoice_batch.pdf',
        'url' => 'https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf',
    ],
    key: 'value',
    instructions: 'value',
);
print_r($result);

using Retab;
using RetabClient = Retab.Retab;

var apiKey = Environment.GetEnvironmentVariable("RETAB_API_KEY")!;
var client = new RetabClient(apiKey);

var document = MimeData.FromUrl(new Uri("https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"));

var result = await client.Partitions.CreateAsync(new PartitionsCreateOptions
{
    Document = document,
    Key = "invoice_number",
    Instructions = "Return one chunk per invoice number and keep all pages for the same invoice together.",
    Model = "retab-small",
    NConsensus = 3,
    AllowOverlap = true,
    BustCache = false,
});
Console.WriteLine(result);

import com.retab.RetabClient;
import com.retab.models.MimeData;
import java.net.URI;

public final class Example {
  public static void main(String[] args) throws Exception {
    RetabClient client = new RetabClient(System.getenv("RETAB_API_KEY"));

    MimeData document = MimeData.fromUrl(
        URI.create("https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"));

    var result = client.partitions().create(
        document,
        "invoice_number",
        "Return one chunk per invoice number and keep all pages for the same invoice together.",
        "retab-small",
        3L,
        true,
        false,
        null);
    System.out.println(result);
  }
}

curl -X POST \
  'https://api.retab.com/v1/partitions' \
  -H "Authorization: Bearer $RETAB_API_KEY" \
  -H 'Content-Type: application/json' \
  -d '{
  "document": {
    "filename": "invoice_batch.pdf",
    "url": "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"
  },
  "key": "invoice_number",
  "instructions": "Return one chunk per invoice number and keep all pages for the same invoice together.",
  "model": "retab-small",
  "n_consensus": 3,
  "allow_overlap": true,
  "bust_cache": false
}'

{
  "output": [
    {
      "key": "INV-001",
      "pages": [1, 2]
    },
    {
      "key": "INV-002",
      "pages": [3, 4]
    }
  ],
  "consensus": {
    "likelihoods": [
      {
        "key": 0.99,
        "pages": [0.99, 0.98]
      },
      {
        "key": 0.96,
        "pages": [0.95, 0.95]
      }
    ],
    "choices": [
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ],
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ],
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ]
    ]
  },
  "usage": {
    "credits": 3.0
  }
}

POST

partitions

from retab import MIMEData, Retab

client = Retab()

document = MIMEData(
    filename="invoice_batch.pdf",
    url="https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
)

response = client.partitions.create(
    document=document,
    key="invoice_number",
    instructions="Return one chunk per invoice number and keep all pages for the same invoice together.",
    model="retab-small",
    n_consensus=3,
    allow_overlap=True,
    bust_cache=False,
)

for chunk in response.output:
    print(chunk.key, chunk.pages)

import { Retab } from "@retab/node";

const client = new Retab({ apiKey: process.env.RETAB_API_KEY });

const document = {
  filename: "invoice_batch.pdf",
  url: "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
};

const response = await client.partitions.create(document, "invoice_number", "Return one chunk per invoice number and keep all pages for the same invoice together.", "retab-small", 3, true, false);

for (const chunk of response.output) {
  console.log(chunk.key, chunk.pages);
}

package main

import (
	"context"
	"fmt"
	"log"

	retab "github.com/retab-dev/retab/clients/go"
)

func ptr[T any](v T) *T { return &v }

func main() {
	ctx := context.Background()

	client, err := retab.NewClient("")
	if err != nil {
		log.Fatal(err)
	}

	document := retab.MIMEData{
		Filename: "invoice_batch.pdf",
		URL:      "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
	}

	response, err := client.Partitions.Create(ctx, &retab.PartitionsCreateParams{
		Document:     document,
		Key:          "invoice_number",
		Instructions: "Return one chunk per invoice number and keep all pages for the same invoice together.",
		Model:        ptr("retab-small"),
		NConsensus:   ptr(3),
		AllowOverlap: ptr(true),
		BustCache:    ptr(false),
	})
	if err != nil {
		log.Fatal(err)
	}

	for _, chunk := range response.Output {
		fmt.Println(chunk.Key, chunk.Pages)
	}
}

require 'retab'

client = Retab::Client.new(api_key: ENV['RETAB_API_KEY'])

document = {
  filename: 'invoice_batch.pdf',
  url: 'https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf',
}

response = client.partitions.create(
  document: document,
  key: 'invoice_number',
  instructions: 'Return one chunk per invoice number and keep all pages for the same invoice together.',
  model: 'retab-small',
  n_consensus: 3,
  allow_overlap: true,
  bust_cache: false,
)

response.output.each do |chunk|
  puts "#{chunk.key} #{chunk.pages}"
end

use retab::resources::partitions::CreateParams;
use retab::{MimeData, Retab};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = Retab::new(std::env::var("RETAB_API_KEY")?);
    let document = MimeData::new(
        "invoice_batch.pdf",
        "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
    );

    let mut params = CreateParams::new(
        document,
        "invoice_number",
        "Return one chunk per invoice number and keep all pages for the same invoice together.",
    );
    params.body.model = Some("retab-small".into());
    params.body.n_consensus = Some(3);
    params.body.allow_overlap = Some(true);
    params.body.bust_cache = Some(false);

    let response = client.partitions().create(params).await?;

    for chunk in response.output.as_ref().map(|v| v.as_slice()).unwrap_or_default() {
        println!("{} {:?}", chunk.key, chunk.pages.as_ref().unwrap_or(&vec![]));
    }
    Ok(())
}

<?php
require 'vendor/autoload.php';

use Retab\Client;

$client = new Client(apiKey: getenv('RETAB_API_KEY'));

$result = $client->partitions()->create(
    document: [
        'filename' => 'invoice_batch.pdf',
        'url' => 'https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf',
    ],
    key: 'value',
    instructions: 'value',
);
print_r($result);

using Retab;
using RetabClient = Retab.Retab;

var apiKey = Environment.GetEnvironmentVariable("RETAB_API_KEY")!;
var client = new RetabClient(apiKey);

var document = MimeData.FromUrl(new Uri("https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"));

var result = await client.Partitions.CreateAsync(new PartitionsCreateOptions
{
    Document = document,
    Key = "invoice_number",
    Instructions = "Return one chunk per invoice number and keep all pages for the same invoice together.",
    Model = "retab-small",
    NConsensus = 3,
    AllowOverlap = true,
    BustCache = false,
});
Console.WriteLine(result);

import com.retab.RetabClient;
import com.retab.models.MimeData;
import java.net.URI;

public final class Example {
  public static void main(String[] args) throws Exception {
    RetabClient client = new RetabClient(System.getenv("RETAB_API_KEY"));

    MimeData document = MimeData.fromUrl(
        URI.create("https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"));

    var result = client.partitions().create(
        document,
        "invoice_number",
        "Return one chunk per invoice number and keep all pages for the same invoice together.",
        "retab-small",
        3L,
        true,
        false,
        null);
    System.out.println(result);
  }
}

curl -X POST \
  'https://api.retab.com/v1/partitions' \
  -H "Authorization: Bearer $RETAB_API_KEY" \
  -H 'Content-Type: application/json' \
  -d '{
  "document": {
    "filename": "invoice_batch.pdf",
    "url": "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"
  },
  "key": "invoice_number",
  "instructions": "Return one chunk per invoice number and keep all pages for the same invoice together.",
  "model": "retab-small",
  "n_consensus": 3,
  "allow_overlap": true,
  "bust_cache": false
}'

{
  "output": [
    {
      "key": "INV-001",
      "pages": [1, 2]
    },
    {
      "key": "INV-002",
      "pages": [3, 4]
    }
  ],
  "consensus": {
    "likelihoods": [
      {
        "key": 0.99,
        "pages": [0.99, 0.98]
      },
      {
        "key": 0.96,
        "pages": [0.95, 0.95]
      }
    ],
    "choices": [
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ],
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ],
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ]
    ]
  },
  "usage": {
    "credits": 3.0
  }
}

Partition a document into repeated chunks keyed by a value such as invoice_number, policy_id, or claim_number.

from retab import MIMEData, Retab

client = Retab()

document = MIMEData(
    filename="invoice_batch.pdf",
    url="https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
)

response = client.partitions.create(
    document=document,
    key="invoice_number",
    instructions="Return one chunk per invoice number and keep all pages for the same invoice together.",
    model="retab-small",
    n_consensus=3,
    allow_overlap=True,
    bust_cache=False,
)

for chunk in response.output:
    print(chunk.key, chunk.pages)

import { Retab } from "@retab/node";

const client = new Retab({ apiKey: process.env.RETAB_API_KEY });

const document = {
  filename: "invoice_batch.pdf",
  url: "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
};

const response = await client.partitions.create(document, "invoice_number", "Return one chunk per invoice number and keep all pages for the same invoice together.", "retab-small", 3, true, false);

for (const chunk of response.output) {
  console.log(chunk.key, chunk.pages);
}

package main

import (
	"context"
	"fmt"
	"log"

	retab "github.com/retab-dev/retab/clients/go"
)

func ptr[T any](v T) *T { return &v }

func main() {
	ctx := context.Background()

	client, err := retab.NewClient("")
	if err != nil {
		log.Fatal(err)
	}

	document := retab.MIMEData{
		Filename: "invoice_batch.pdf",
		URL:      "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
	}

	response, err := client.Partitions.Create(ctx, &retab.PartitionsCreateParams{
		Document:     document,
		Key:          "invoice_number",
		Instructions: "Return one chunk per invoice number and keep all pages for the same invoice together.",
		Model:        ptr("retab-small"),
		NConsensus:   ptr(3),
		AllowOverlap: ptr(true),
		BustCache:    ptr(false),
	})
	if err != nil {
		log.Fatal(err)
	}

	for _, chunk := range response.Output {
		fmt.Println(chunk.Key, chunk.Pages)
	}
}

require 'retab'

client = Retab::Client.new(api_key: ENV['RETAB_API_KEY'])

document = {
  filename: 'invoice_batch.pdf',
  url: 'https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf',
}

response = client.partitions.create(
  document: document,
  key: 'invoice_number',
  instructions: 'Return one chunk per invoice number and keep all pages for the same invoice together.',
  model: 'retab-small',
  n_consensus: 3,
  allow_overlap: true,
  bust_cache: false,
)

response.output.each do |chunk|
  puts "#{chunk.key} #{chunk.pages}"
end

use retab::resources::partitions::CreateParams;
use retab::{MimeData, Retab};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = Retab::new(std::env::var("RETAB_API_KEY")?);
    let document = MimeData::new(
        "invoice_batch.pdf",
        "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf",
    );

    let mut params = CreateParams::new(
        document,
        "invoice_number",
        "Return one chunk per invoice number and keep all pages for the same invoice together.",
    );
    params.body.model = Some("retab-small".into());
    params.body.n_consensus = Some(3);
    params.body.allow_overlap = Some(true);
    params.body.bust_cache = Some(false);

    let response = client.partitions().create(params).await?;

    for chunk in response.output.as_ref().map(|v| v.as_slice()).unwrap_or_default() {
        println!("{} {:?}", chunk.key, chunk.pages.as_ref().unwrap_or(&vec![]));
    }
    Ok(())
}

<?php
require 'vendor/autoload.php';

use Retab\Client;

$client = new Client(apiKey: getenv('RETAB_API_KEY'));

$result = $client->partitions()->create(
    document: [
        'filename' => 'invoice_batch.pdf',
        'url' => 'https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf',
    ],
    key: 'value',
    instructions: 'value',
);
print_r($result);

using Retab;
using RetabClient = Retab.Retab;

var apiKey = Environment.GetEnvironmentVariable("RETAB_API_KEY")!;
var client = new RetabClient(apiKey);

var document = MimeData.FromUrl(new Uri("https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"));

var result = await client.Partitions.CreateAsync(new PartitionsCreateOptions
{
    Document = document,
    Key = "invoice_number",
    Instructions = "Return one chunk per invoice number and keep all pages for the same invoice together.",
    Model = "retab-small",
    NConsensus = 3,
    AllowOverlap = true,
    BustCache = false,
});
Console.WriteLine(result);

import com.retab.RetabClient;
import com.retab.models.MimeData;
import java.net.URI;

public final class Example {
  public static void main(String[] args) throws Exception {
    RetabClient client = new RetabClient(System.getenv("RETAB_API_KEY"));

    MimeData document = MimeData.fromUrl(
        URI.create("https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"));

    var result = client.partitions().create(
        document,
        "invoice_number",
        "Return one chunk per invoice number and keep all pages for the same invoice together.",
        "retab-small",
        3L,
        true,
        false,
        null);
    System.out.println(result);
  }
}

curl -X POST \
  'https://api.retab.com/v1/partitions' \
  -H "Authorization: Bearer $RETAB_API_KEY" \
  -H 'Content-Type: application/json' \
  -d '{
  "document": {
    "filename": "invoice_batch.pdf",
    "url": "https://my-bucket.s3.us-east-1.amazonaws.com/documents/invoice_batch.pdf"
  },
  "key": "invoice_number",
  "instructions": "Return one chunk per invoice number and keep all pages for the same invoice together.",
  "model": "retab-small",
  "n_consensus": 3,
  "allow_overlap": true,
  "bust_cache": false
}'

{
  "output": [
    {
      "key": "INV-001",
      "pages": [1, 2]
    },
    {
      "key": "INV-002",
      "pages": [3, 4]
    }
  ],
  "consensus": {
    "likelihoods": [
      {
        "key": 0.99,
        "pages": [0.99, 0.98]
      },
      {
        "key": 0.96,
        "pages": [0.95, 0.95]
      }
    ],
    "choices": [
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ],
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ],
      [
        { "key": "INV-001", "pages": [1, 2] },
        { "key": "INV-002", "pages": [3, 4] }
      ]
    ]
  },
  "usage": {
    "credits": 3.0
  }
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

application/json

Public create-partition request body.

document

MIMEData · object

required

The document to partition

Show child attributes

key

string

required

The key to partition the document by

instructions

string

required

Instructions describing how the document should be partitioned

model

string

default:retab-small

The model to use for partitioning

n_consensus

integer

default:1

Number of partitioning runs to use for consensus voting. Uses deterministic single-pass when set to 1.

allow_overlap

boolean

default:true

If true, allow a page to appear in more than one partition chunk

bust_cache

boolean

default:false

If true, skip the LLM cache and force a fresh completion

background

boolean

default:false

If true, run asynchronously: returns immediately with status 'queued' and an empty output. Poll GET /v1//{id} until status is terminal. Mutually exclusive with stream.

Response

Successful Response

A partition result: a document segmented into chunks along the requested key.

string

required

Unique identifier of the partition

file

FileRef · object

required

Information about the partitioned file

Show child attributes

model

string

required

Model used for the partition operation

key

string

required

Partition key used for the run

instructions

string | null

Free-form instructions supplied with the partition request

n_consensus

integer

default:1

Number of consensus votes used

allow_overlap

boolean

default:true

Whether pages were allowed to appear in more than one partition chunk

output

PartitionChunk · object[]

The list of partition chunks with their assigned pages. Empty [] until status == 'completed'.

Show child attributes

status

enum<string>

default:pending

Lifecycle status. The synchronous path returns 'completed'. Background runs progress pending -> queued -> in_progress -> completed | failed | cancelled.

Available options:

pending,

queued,

in_progress,

completed,

failed,

cancelled

error

PrimitiveError · object | null

Error details when a background run fails; null otherwise. Always present so consumers can read it without an existence check.

Show child attributes

consensus

PartitionConsensus · object | null

Consensus metadata for multi-vote partition runs

Show child attributes

usage

RetabUsage · object | null

Usage information for the partition operation

Show child attributes

created_at

string<date-time> | null

Reconstruct Split Get Partition