const WS_URL = 'wss://api.withperf.pro/v1/voice/conversation';
const API_KEY = 'YOUR_API_KEY';
const AGENT_ID = 'YOUR_AGENT_ID';
let ws, audioCtx, micStream, ready = false, nextPlay = 0, sources = [];
async function startVoice() {
// 1. Get microphone
micStream = await navigator.mediaDevices.getUserMedia({
audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true }
});
audioCtx = new AudioContext({ sampleRate: 16000 });
const mic = audioCtx.createMediaStreamSource(micStream);
const proc = audioCtx.createScriptProcessor(2048, 1, 1);
// 2. Stream mic audio as base64 PCM16
proc.onaudioprocess = (e) => {
if (!ws || ws.readyState !== WebSocket.OPEN || !ready) return;
const input = e.inputBuffer.getChannelData(0);
const pcm = new Int16Array(input.length);
for (let i = 0; i < input.length; i++) {
const s = Math.max(-1, Math.min(1, input[i]));
pcm[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
const bytes = new Uint8Array(pcm.buffer);
let bin = '';
for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]);
ws.send(JSON.stringify({ user_audio_chunk: btoa(bin) }));
};
mic.connect(proc);
proc.connect(audioCtx.destination);
// 3. Connect WebSocket
ws = new WebSocket(WS_URL + '?api_key=' + API_KEY + '&agent_id=' + AGENT_ID);
ws.onmessage = (event) => {
if (typeof event.data !== 'string') return;
const data = JSON.parse(event.data);
switch (data.type) {
case 'conversation_initiation_metadata':
ready = true;
console.log('Session:', data.conversation_initiation_metadata_event?.conversation_id);
break;
case 'audio':
if (data.audio_event?.audio_base_64) playAudio(data.audio_event.audio_base_64);
break;
case 'agent_response':
console.log('Agent:', data.agent_response_event?.agent_response);
break;
case 'user_transcript':
console.log('You:', data.user_transcription_event?.user_transcript);
break;
case 'interruption':
sources.forEach(s => { try { s.stop(); } catch (e) {} });
sources = []; nextPlay = 0;
break;
case 'ping':
ws.send(JSON.stringify({ type: 'pong', event_id: data.ping_event?.event_id }));
break;
}
};
ws.onclose = () => stopVoice();
}
// 4. Play agent audio (base64 PCM16 → AudioBuffer)
function playAudio(base64) {
if (!audioCtx) return;
const bin = atob(base64), bytes = new Uint8Array(bin.length);
for (let i = 0; i < bin.length; i++) bytes[i] = bin.charCodeAt(i);
const pcm = new Int16Array(bytes.buffer);
const f32 = new Float32Array(pcm.length);
for (let i = 0; i < pcm.length; i++) f32[i] = pcm[i] / 32768;
const buf = audioCtx.createBuffer(1, f32.length, 16000);
buf.getChannelData(0).set(f32);
const src = audioCtx.createBufferSource();
src.buffer = buf;
src.connect(audioCtx.destination);
const now = audioCtx.currentTime;
if (nextPlay < now) nextPlay = now;
src.start(nextPlay);
nextPlay += buf.duration;
sources.push(src);
src.onended = () => { const i = sources.indexOf(src); if (i !== -1) sources.splice(i, 1); };
}
// 5. Cleanup
function stopVoice() {
ready = false;
sources.forEach(s => { try { s.stop(); } catch (e) {} });
sources = []; nextPlay = 0;
if (micStream) { micStream.getTracks().forEach(t => t.stop()); micStream = null; }
if (audioCtx) { audioCtx.close().catch(() => {}); audioCtx = null; }
if (ws) { ws.close(); ws = null; }
}