Python Integration

Connect to Perf Voice Agents from Python for server-side applications, IVR systems, telephony integrations, or testing.

Prerequisites

pip install websocket-client pyaudio

websocket-client — WebSocket client library
PyAudio — Cross-platform audio I/O (requires PortAudio system library)

Installing PortAudio

PyAudio requires the PortAudio system library:

# macOS
brew install portaudio

# Ubuntu/Debian
sudo apt-get install portaudio19-dev

# Windows (via pip)
pip install pyaudio  # includes prebuilt binaries

Quick Start

import websocket
import json
import base64
import threading
import pyaudio

WS_URL = 'wss://api.withperf.pro/v1/voice/conversation'
API_KEY = 'YOUR_API_KEY'
AGENT_ID = 'YOUR_AGENT_ID'

RATE = 16000
CHUNK = 2048
ready = False

# Audio setup
pa = pyaudio.PyAudio()
mic_stream = pa.open(format=pyaudio.paInt16, channels=1, rate=RATE,
                     input=True, frames_per_buffer=CHUNK)
spk_stream = pa.open(format=pyaudio.paInt16, channels=1, rate=RATE,
                     output=True, frames_per_buffer=CHUNK)

def on_message(ws, message):
    global ready
    data = json.loads(message)
    msg_type = data.get('type', '')

    if msg_type == 'conversation_initiation_metadata':
        ready = True
        conv_id = data.get('conversation_initiation_metadata_event', {}).get('conversation_id')
        print(f'Session started: {conv_id}')

    elif msg_type == 'audio':
        audio_b64 = data.get('audio_event', {}).get('audio_base_64', '')
        if audio_b64:
            spk_stream.write(base64.b64decode(audio_b64))

    elif msg_type == 'agent_response':
        text = data.get('agent_response_event', {}).get('agent_response', '')
        if text:
            print(f'Agent: {text}')

    elif msg_type == 'user_transcript':
        text = data.get('user_transcription_event', {}).get('user_transcript', '')
        if text:
            print(f'You: {text}')

    elif msg_type == 'ping':
        event_id = data.get('ping_event', {}).get('event_id')
        ws.send(json.dumps({'type': 'pong', 'event_id': event_id}))

def send_audio(ws):
    """Stream microphone audio as base64 PCM16 chunks."""
    while ws.sock and ws.sock.connected:
        if not ready:
            continue
        pcm_data = mic_stream.read(CHUNK, exception_on_overflow=False)
        b64 = base64.b64encode(pcm_data).decode('utf-8')
        ws.send(json.dumps({'user_audio_chunk': b64}))

def on_open(ws):
    threading.Thread(target=send_audio, args=(ws,), daemon=True).start()
    print('Connected — speak now')

def on_close(ws, code, reason):
    print(f'Disconnected (code={code}, reason={reason})')

def on_error(ws, error):
    print(f'Error: {error}')

url = f'{WS_URL}?api_key={API_KEY}&agent_id={AGENT_ID}'
ws = websocket.WebSocketApp(url,
    on_message=on_message,
    on_open=on_open,
    on_close=on_close,
    on_error=on_error)
ws.run_forever()

How It Works

Connect — Opens a WebSocket to wss://api.withperf.pro/v1/voice/conversation with your API key and agent ID
Wait for init — The conversation_initiation_metadata message signals the pipeline is ready
Stream audio — A background thread reads microphone PCM16 chunks, base64-encodes them, and sends via WebSocket
Play responses — Agent audio arrives as base64 PCM16 and is written directly to the speaker stream
Keepalive — Responds to ping messages with pong to keep the connection alive

Audio Format

Property	Value
Encoding	PCM signed 16-bit integer (`pyaudio.paInt16`)
Sample rate	16,000 Hz
Channels	1 (mono)
Chunk size	2048 samples
Transport	Base64-encoded JSON messages

Sending Pre-Recorded Audio

To send a WAV file instead of live microphone input:

import wave

def send_wav_file(ws, filepath):
    """Send a WAV file as audio chunks."""
    with wave.open(filepath, 'rb') as wf:
        assert wf.getsampwidth() == 2, 'Must be 16-bit PCM'
        assert wf.getnchannels() == 1, 'Must be mono'
        assert wf.getframerate() == 16000, 'Must be 16kHz'

        while True:
            frames = wf.readframes(2048)
            if not frames:
                break
            b64 = base64.b64encode(frames).decode('utf-8')
            ws.send(json.dumps({'user_audio_chunk': b64}))
            # Pace to real-time (2048 samples at 16kHz = 128ms)
            time.sleep(0.128)

Saving Transcripts

transcripts = []

def on_message(ws, message):
    global ready
    data = json.loads(message)
    msg_type = data.get('type', '')

    if msg_type == 'agent_response':
        text = data.get('agent_response_event', {}).get('agent_response', '')
        if text:
            transcripts.append({'role': 'agent', 'text': text})

    elif msg_type == 'user_transcript':
        text = data.get('user_transcription_event', {}).get('user_transcript', '')
        if text:
            transcripts.append({'role': 'user', 'text': text})

    # ... handle other message types

# After conversation ends:
for t in transcripts:
    print(f"{t['role'].capitalize()}: {t['text']}")

Connection Test

Quick test without audio — verify authentication and agent connectivity:

import websocket
import json

url = 'wss://api.withperf.pro/v1/voice/conversation?api_key=YOUR_API_KEY&agent_id=YOUR_AGENT_ID'

ws = websocket.create_connection(url)
response = ws.recv()
data = json.loads(response)

if data.get('type') == 'conversation_initiation_metadata':
    conv_id = data['conversation_initiation_metadata_event']['conversation_id']
    print(f'Connected successfully. Session: {conv_id}')
else:
    print(f'Unexpected response: {data}')

ws.close()

Error Handling

Error	Cause	Resolution
`ConnectionRefused`	Invalid URL or server down	Verify the WebSocket URL
WebSocket close code `4001`	Invalid API key	Check your `api_key` parameter
WebSocket close code `4004`	Invalid agent ID	Verify the `agent_id` exists in your project
WebSocket close code `1008`	Sent audio before init	Wait for `conversation_initiation_metadata`
`OSError: [Errno -9999]`	No audio device available	Check microphone is connected and accessible
Audio crackling/gaps	Chunk size too small or CPU overloaded	Increase `CHUNK` size or reduce processing

Voice Agents Overview — Architecture and features
JavaScript SDK — For web applications
WebSocket Protocol — Full protocol reference

Getting Started

Voice Agents

SDKs

API Documentation

Platform

Advanced

Resources

Python Integration

Python Integration

Prerequisites

Installing PortAudio

Quick Start

How It Works

Audio Format

Sending Pre-Recorded Audio

Saving Transcripts

Connection Test

Error Handling

​Python Integration

​Prerequisites

​Installing PortAudio

​Quick Start

​How It Works

​Audio Format

​Sending Pre-Recorded Audio

​Saving Transcripts

​Connection Test

​Error Handling

​Related

Python Integration

Prerequisites

Installing PortAudio

Quick Start

How It Works

Audio Format

Sending Pre-Recorded Audio

Saving Transcripts

Connection Test

Error Handling

Related