Skip to main content

Python SDK

The official Perf SDK for Python provides both synchronous and asynchronous clients with Pydantic models, streaming support, and comprehensive error handling.

Installation

pip install perf-sdk
# or
poetry add perf-sdk

Quick Start

from perf import PerfClient

client = PerfClient(api_key="pk_live_your_key_here")

# Simple chat completion
response = client.chat(
    messages=[{"role": "user", "content": "Hello, world!"}],
    max_cost_per_call=0.01
)

print(response.output)
print(f"Cost: ${response.billing.cost_usd}")
print(f"Model: {response.model_used}")

Configuration

client = PerfClient(
    api_key="pk_live_xxx",              # Required: Your API key
    base_url="https://api.withperf.pro", # Optional: Custom base URL
    timeout=30.0,                        # Optional: Request timeout in seconds
    max_retries=3                        # Optional: Max retry attempts
)

Synchronous Client

Basic Request

from perf import PerfClient

client = PerfClient(api_key="pk_live_xxx")

response = client.chat(
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is the capital of France?"}
    ]
)

print(response.output)  # "The capital of France is Paris."

With Cost Control

response = client.chat(
    messages=[{"role": "user", "content": "Explain quantum computing"}],
    max_cost_per_call=0.005  # Maximum 0.5 cents
)

if response.billing.cost_warning:
    print("Cost exceeded budget, cheaper model was used")

Request Options

response = client.chat(
    messages=[{"role": "user", "content": "Generate a haiku"}],

    # Cost control
    max_cost_per_call=0.01,

    # Generation parameters
    temperature=0.7,
    max_tokens=500,
    top_p=0.9,
    frequency_penalty=0.5,
    presence_penalty=0.5,
    stop=["\n\n"],

    # Output format
    response_format="json",

    # Tracking
    user_id="user_123",
    metadata={
        "session_id": "sess_abc",
        "feature": "chat"
    }
)

Async Client

For async applications, use AsyncPerfClient:
from perf import AsyncPerfClient
import asyncio

async def main():
    client = AsyncPerfClient(api_key="pk_live_xxx")

    response = await client.chat(
        messages=[{"role": "user", "content": "Hello, world!"}]
    )

    print(response.output)

    # Always close the client when done
    await client.close()

asyncio.run(main())

Context Manager

async def main():
    async with AsyncPerfClient(api_key="pk_live_xxx") as client:
        response = await client.chat(
            messages=[{"role": "user", "content": "Hello!"}]
        )
        print(response.output)

Streaming

Synchronous Streaming

for chunk in client.chat_stream(
    messages=[{"role": "user", "content": "Write a short story"}]
):
    if chunk.done:
        # Final chunk with metadata
        print(f"\nModel: {chunk.model_used}")
        print(f"Cost: ${chunk.billing.cost_usd}")
    else:
        # Content chunk
        print(chunk.chunk, end="", flush=True)

Async Streaming

async for chunk in await client.chat_stream(
    messages=[{"role": "user", "content": "Write a poem"}]
):
    if chunk.done:
        print(f"\nModel: {chunk.model_used}")
    else:
        print(chunk.chunk, end="", flush=True)

Error Handling

The SDK provides typed exceptions for different error scenarios:
from perf import (
    PerfClient,
    PerfError,
    AuthenticationError,
    RateLimitError,
    ValidationError,
    ServerError
)
import time

client = PerfClient(api_key="pk_live_xxx")

try:
    response = client.chat(
        messages=[{"role": "user", "content": "Hello"}]
    )
except RateLimitError as e:
    print(f"Rate limited. Retry after {e.retry_after}s")
    time.sleep(e.retry_after)
    # Retry the request
except AuthenticationError:
    print("Invalid API key")
    # Don't retry - fix the API key
except ValidationError as e:
    print(f"Invalid request: {e.message}")
    # Fix the request parameters
except ServerError as e:
    print(f"Server error: {e.message}")
    # SDK will automatically retry
except PerfError as e:
    print(f"Error: {e.code} - {e.message}")

    if e.is_retryable:
        # Safe to retry
        pass

Exception Properties

All PerfError exceptions have these properties:
PropertyTypeDescription
codestrMachine-readable error code
messagestrHuman-readable description
statusintHTTP status code
request_idstrUnique request ID for debugging
is_retryableboolWhether safe to retry

Rate Limit Error

except RateLimitError as e:
    print(e.retry_after)  # Seconds to wait
    print(e.limit)        # Rate limit ceiling
    print(e.remaining)    # Remaining requests
    print(e.reset)        # Unix timestamp when limit resets

Type Hints

The SDK uses Pydantic models with full type hints:
from perf import PerfClient
from perf.types import ChatRequest, ChatResponse, Message

client = PerfClient(api_key="pk_live_xxx")

messages: list[Message] = [
    {"role": "user", "content": "Hello"}
]

response: ChatResponse = client.chat(messages=messages)

Response Types

ChatResponse

@dataclass
class ChatResponse:
    model_used: str
    output: str
    billing: Billing
    tokens: TokenUsage
    metadata: Metadata

@dataclass
class Billing:
    cost_usd: float
    cost_warning: bool

@dataclass
class TokenUsage:
    input: int
    output: int
    total: int

@dataclass
class Metadata:
    call_id: str
    task_type: str
    complexity_score: float
    routing_reason: str
    latency_ms: int
    fallback_used: bool
    validation_passed: bool
    timestamp: str

StreamChunk

@dataclass
class ContentChunk:
    chunk: str
    done: Literal[False]

@dataclass
class FinalChunk:
    chunk: str  # Empty string
    done: Literal[True]
    model_used: str
    billing: Billing
    tokens: TokenUsage
    metadata: Metadata

Examples

Multi-turn Conversation

messages = [
    {"role": "user", "content": "What is 2+2?"}
]

response1 = client.chat(messages=messages)
print(response1.output)  # "4"

messages.append({"role": "assistant", "content": response1.output})
messages.append({"role": "user", "content": "Multiply that by 3"})

response2 = client.chat(messages=messages)
print(response2.output)  # "12"

JSON Output

import json

response = client.chat(
    messages=[{
        "role": "user",
        "content": "Extract: John Smith, [email protected], 555-1234. Return JSON with name, email, phone."
    }],
    response_format="json",
    temperature=0.2
)

data = json.loads(response.output)
print(data["name"])   # "John Smith"
print(data["email"])  # "[email protected]"

FastAPI Integration

from fastapi import FastAPI, HTTPException
from perf import AsyncPerfClient, PerfError

app = FastAPI()
client = AsyncPerfClient(api_key="pk_live_xxx")

@app.post("/chat")
async def chat(message: str):
    try:
        response = await client.chat(
            messages=[{"role": "user", "content": message}],
            max_cost_per_call=0.01
        )
        return {
            "response": response.output,
            "model": response.model_used,
            "cost": response.billing.cost_usd
        }
    except PerfError as e:
        raise HTTPException(status_code=e.status, detail=e.message)

@app.on_event("shutdown")
async def shutdown():
    await client.close()

Retry with Backoff

import time
import random
from perf import PerfClient, PerfError

client = PerfClient(api_key="pk_live_xxx")

def chat_with_retry(messages, max_retries=3):
    for attempt in range(max_retries + 1):
        try:
            return client.chat(messages=messages)
        except PerfError as e:
            if not e.is_retryable or attempt == max_retries:
                raise

            # Exponential backoff with jitter
            delay = (2 ** attempt) + random.random()
            print(f"Retrying in {delay:.1f}s...")
            time.sleep(delay)

# Usage
response = chat_with_retry([{"role": "user", "content": "Hello"}])

Requirements

  • Python 3.9 or higher
  • Dependencies: httpx, pydantic

Support