Python SDK
The official Perf SDK for Python provides both synchronous and asynchronous clients with Pydantic models, streaming support, and comprehensive error handling.Installation
Copy
pip install perf-sdk
# or
poetry add perf-sdk
Quick Start
Copy
from perf import PerfClient
client = PerfClient(api_key="pk_live_your_key_here")
# Simple chat completion
response = client.chat(
messages=[{"role": "user", "content": "Hello, world!"}],
max_cost_per_call=0.01
)
print(response.output)
print(f"Cost: ${response.billing.cost_usd}")
print(f"Model: {response.model_used}")
Configuration
Copy
client = PerfClient(
api_key="pk_live_xxx", # Required: Your API key
base_url="https://api.withperf.pro", # Optional: Custom base URL
timeout=30.0, # Optional: Request timeout in seconds
max_retries=3 # Optional: Max retry attempts
)
Synchronous Client
Basic Request
Copy
from perf import PerfClient
client = PerfClient(api_key="pk_live_xxx")
response = client.chat(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"}
]
)
print(response.output) # "The capital of France is Paris."
With Cost Control
Copy
response = client.chat(
messages=[{"role": "user", "content": "Explain quantum computing"}],
max_cost_per_call=0.005 # Maximum 0.5 cents
)
if response.billing.cost_warning:
print("Cost exceeded budget, cheaper model was used")
Request Options
Copy
response = client.chat(
messages=[{"role": "user", "content": "Generate a haiku"}],
# Cost control
max_cost_per_call=0.01,
# Generation parameters
temperature=0.7,
max_tokens=500,
top_p=0.9,
frequency_penalty=0.5,
presence_penalty=0.5,
stop=["\n\n"],
# Output format
response_format="json",
# Tracking
user_id="user_123",
metadata={
"session_id": "sess_abc",
"feature": "chat"
}
)
Async Client
For async applications, useAsyncPerfClient:
Copy
from perf import AsyncPerfClient
import asyncio
async def main():
client = AsyncPerfClient(api_key="pk_live_xxx")
response = await client.chat(
messages=[{"role": "user", "content": "Hello, world!"}]
)
print(response.output)
# Always close the client when done
await client.close()
asyncio.run(main())
Context Manager
Copy
async def main():
async with AsyncPerfClient(api_key="pk_live_xxx") as client:
response = await client.chat(
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.output)
Streaming
Synchronous Streaming
Copy
for chunk in client.chat_stream(
messages=[{"role": "user", "content": "Write a short story"}]
):
if chunk.done:
# Final chunk with metadata
print(f"\nModel: {chunk.model_used}")
print(f"Cost: ${chunk.billing.cost_usd}")
else:
# Content chunk
print(chunk.chunk, end="", flush=True)
Async Streaming
Copy
async for chunk in await client.chat_stream(
messages=[{"role": "user", "content": "Write a poem"}]
):
if chunk.done:
print(f"\nModel: {chunk.model_used}")
else:
print(chunk.chunk, end="", flush=True)
Error Handling
The SDK provides typed exceptions for different error scenarios:Copy
from perf import (
PerfClient,
PerfError,
AuthenticationError,
RateLimitError,
ValidationError,
ServerError
)
import time
client = PerfClient(api_key="pk_live_xxx")
try:
response = client.chat(
messages=[{"role": "user", "content": "Hello"}]
)
except RateLimitError as e:
print(f"Rate limited. Retry after {e.retry_after}s")
time.sleep(e.retry_after)
# Retry the request
except AuthenticationError:
print("Invalid API key")
# Don't retry - fix the API key
except ValidationError as e:
print(f"Invalid request: {e.message}")
# Fix the request parameters
except ServerError as e:
print(f"Server error: {e.message}")
# SDK will automatically retry
except PerfError as e:
print(f"Error: {e.code} - {e.message}")
if e.is_retryable:
# Safe to retry
pass
Exception Properties
AllPerfError exceptions have these properties:
| Property | Type | Description |
|---|---|---|
code | str | Machine-readable error code |
message | str | Human-readable description |
status | int | HTTP status code |
request_id | str | Unique request ID for debugging |
is_retryable | bool | Whether safe to retry |
Rate Limit Error
Copy
except RateLimitError as e:
print(e.retry_after) # Seconds to wait
print(e.limit) # Rate limit ceiling
print(e.remaining) # Remaining requests
print(e.reset) # Unix timestamp when limit resets
Type Hints
The SDK uses Pydantic models with full type hints:Copy
from perf import PerfClient
from perf.types import ChatRequest, ChatResponse, Message
client = PerfClient(api_key="pk_live_xxx")
messages: list[Message] = [
{"role": "user", "content": "Hello"}
]
response: ChatResponse = client.chat(messages=messages)
Response Types
ChatResponse
Copy
@dataclass
class ChatResponse:
model_used: str
output: str
billing: Billing
tokens: TokenUsage
metadata: Metadata
@dataclass
class Billing:
cost_usd: float
cost_warning: bool
@dataclass
class TokenUsage:
input: int
output: int
total: int
@dataclass
class Metadata:
call_id: str
task_type: str
complexity_score: float
routing_reason: str
latency_ms: int
fallback_used: bool
validation_passed: bool
timestamp: str
StreamChunk
Copy
@dataclass
class ContentChunk:
chunk: str
done: Literal[False]
@dataclass
class FinalChunk:
chunk: str # Empty string
done: Literal[True]
model_used: str
billing: Billing
tokens: TokenUsage
metadata: Metadata
Examples
Multi-turn Conversation
Copy
messages = [
{"role": "user", "content": "What is 2+2?"}
]
response1 = client.chat(messages=messages)
print(response1.output) # "4"
messages.append({"role": "assistant", "content": response1.output})
messages.append({"role": "user", "content": "Multiply that by 3"})
response2 = client.chat(messages=messages)
print(response2.output) # "12"
JSON Output
Copy
import json
response = client.chat(
messages=[{
"role": "user",
"content": "Extract: John Smith, [email protected], 555-1234. Return JSON with name, email, phone."
}],
response_format="json",
temperature=0.2
)
data = json.loads(response.output)
print(data["name"]) # "John Smith"
print(data["email"]) # "[email protected]"
FastAPI Integration
Copy
from fastapi import FastAPI, HTTPException
from perf import AsyncPerfClient, PerfError
app = FastAPI()
client = AsyncPerfClient(api_key="pk_live_xxx")
@app.post("/chat")
async def chat(message: str):
try:
response = await client.chat(
messages=[{"role": "user", "content": message}],
max_cost_per_call=0.01
)
return {
"response": response.output,
"model": response.model_used,
"cost": response.billing.cost_usd
}
except PerfError as e:
raise HTTPException(status_code=e.status, detail=e.message)
@app.on_event("shutdown")
async def shutdown():
await client.close()
Retry with Backoff
Copy
import time
import random
from perf import PerfClient, PerfError
client = PerfClient(api_key="pk_live_xxx")
def chat_with_retry(messages, max_retries=3):
for attempt in range(max_retries + 1):
try:
return client.chat(messages=messages)
except PerfError as e:
if not e.is_retryable or attempt == max_retries:
raise
# Exponential backoff with jitter
delay = (2 ** attempt) + random.random()
print(f"Retrying in {delay:.1f}s...")
time.sleep(delay)
# Usage
response = chat_with_retry([{"role": "user", "content": "Hello"}])
Requirements
- Python 3.9 or higher
- Dependencies:
httpx,pydantic
Support
- PyPI: perf-sdk
- Documentation: docs.withperf.pro
- Email: [email protected]