Skip to main content

Metrics API Reference

The Metrics API provides comprehensive analytics and performance data for your LLM usage, enabling you to track costs, optimize routing, and measure ROI.

Authentication

All metrics endpoints require API key authentication:
Authorization: Bearer YOUR_API_KEY

Endpoints Overview

EndpointDescription
GET /v1/metrics/overviewHigh-level performance summary
GET /v1/metrics/customerYour account-specific metrics
GET /v1/metrics/performanceModel performance by task type
GET /v1/metrics/failuresFailure mode analysis
GET /v1/metrics/providersProvider health and reliability
GET /v1/metrics/dailyDaily aggregated metrics

Overview Metrics

Get a high-level summary of your routing performance.

Endpoint

GET https://api.withperf.pro/v1/metrics/overview

Query Parameters

ParameterTypeDefaultDescription
daysnumber7Number of days to analyze (1-90)

Example Request

curl https://api.withperf.pro/v1/metrics/overview?days=30 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_date": "2024-01-01T00:00:00Z",
    "end_date": "2024-01-30T23:59:59Z",
    "days": 30
  },
  "summary": {
    "total_calls": 45678,
    "total_cost_usd": 234.56,
    "avg_cost_per_call": 0.00514,
    "avg_latency_ms": 1245,
    "success_rate": 0.987
  },
  "routing": {
    "accuracy": 0.923,
    "fallback_rate": 0.034,
    "retry_rate": 0.012
  },
  "models": {
    "gpt-4o-mini": {
      "calls": 23456,
      "percentage": 0.514,
      "avg_cost": 0.00234,
      "avg_latency_ms": 834
    },
    "claude-sonnet-4-5": {
      "calls": 18234,
      "percentage": 0.399,
      "avg_cost": 0.00876,
      "avg_latency_ms": 1456
    },
    "gpt-4o": {
      "calls": 3988,
      "percentage": 0.087,
      "avg_cost": 0.01234,
      "avg_latency_ms": 1876
    }
  },
  "task_distribution": {
    "extraction": 0.34,
    "classification": 0.23,
    "summarization": 0.18,
    "reasoning": 0.12,
    "code": 0.08,
    "writing": 0.05
  }
}

Customer Metrics

Get detailed metrics specific to your API key and usage patterns.

Endpoint

GET https://api.withperf.pro/v1/metrics/customer

Query Parameters

ParameterTypeDefaultDescription
daysnumber30Number of days to analyze (1-90)

Example Request

curl https://api.withperf.pro/v1/metrics/customer?days=30 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "account_id": "acct_abc123",
  "period": {
    "start_date": "2024-01-01T00:00:00Z",
    "end_date": "2024-01-30T23:59:59Z"
  },
  "usage": {
    "total_calls": 12456,
    "total_tokens": 5678901,
    "total_cost_usd": 123.45,
    "avg_calls_per_day": 414
  },
  "cost_analysis": {
    "current_month": 123.45,
    "previous_month": 156.78,
    "change_percentage": -21.2,
    "projected_month_end": 145.67,
    "vs_gpt4o_only": {
      "cost_if_gpt4o": 412.34,
      "savings_usd": 288.89,
      "savings_percentage": 70.1
    }
  },
  "quality_preferences": {
    "cost_vs_quality_ratio": 0.65,
    "latency_sensitivity": 0.42,
    "preferred_models": ["gpt-4o-mini", "claude-sonnet-4-5"]
  },
  "task_profile": {
    "dominant_tasks": ["extraction", "classification"],
    "complexity_avg": 0.42,
    "task_overrides": {
      "reasoning": "claude-sonnet-4-5"
    }
  },
  "behavior_insights": {
    "peak_hours": [9, 10, 11, 14, 15, 16],
    "avg_conversation_length": 4.2,
    "retry_rate": 0.023,
    "cost_ceiling_hit_rate": 0.087
  }
}

Performance Metrics

Analyze model performance broken down by task type.

Endpoint

GET https://api.withperf.pro/v1/metrics/performance

Query Parameters

ParameterTypeDefaultDescription
daysnumber7Number of days to analyze (1-90)
task_typestringnullFilter by specific task type

Example Request

curl "https://api.withperf.pro/v1/metrics/performance?days=7&task_type=extraction" \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_date": "2024-01-24T00:00:00Z",
    "end_date": "2024-01-30T23:59:59Z"
  },
  "by_task_type": {
    "extraction": {
      "total_calls": 5678,
      "models": {
        "gpt-4o-mini": {
          "calls": 4234,
          "success_rate": 0.987,
          "avg_cost_usd": 0.00123,
          "avg_latency_ms": 567,
          "quality_score": 0.92
        },
        "claude-haiku-4-5": {
          "calls": 1444,
          "success_rate": 0.991,
          "avg_cost_usd": 0.00098,
          "avg_latency_ms": 423,
          "quality_score": 0.94
        }
      },
      "optimal_model": "claude-haiku-4-5",
      "optimal_reason": "Best cost/quality balance for structured extraction"
    },
    "reasoning": {
      "total_calls": 1234,
      "models": {
        "claude-sonnet-4-5": {
          "calls": 987,
          "success_rate": 0.956,
          "avg_cost_usd": 0.00876,
          "avg_latency_ms": 1876,
          "quality_score": 0.95
        },
        "gpt-4o": {
          "calls": 247,
          "success_rate": 0.943,
          "avg_cost_usd": 0.01234,
          "avg_latency_ms": 2134,
          "quality_score": 0.93
        }
      },
      "optimal_model": "claude-sonnet-4-5",
      "optimal_reason": "Superior reasoning with better cost efficiency"
    }
  },
  "recommendations": [
    {
      "task_type": "extraction",
      "current_model": "gpt-4o-mini",
      "recommended_model": "claude-haiku-4-5",
      "potential_savings_usd": 14.23,
      "quality_improvement": 0.02
    }
  ]
}

Failure Analysis

Understand why calls fail and which models are most reliable.

Endpoint

GET https://api.withperf.pro/v1/metrics/failures

Query Parameters

ParameterTypeDefaultDescription
daysnumber7Number of days to analyze (1-90)

Example Request

curl https://api.withperf.pro/v1/metrics/failures?days=7 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_date": "2024-01-24T00:00:00Z",
    "end_date": "2024-01-30T23:59:59Z"
  },
  "summary": {
    "total_failures": 234,
    "failure_rate": 0.013,
    "retries_succeeded": 156,
    "fallback_succeeded": 67,
    "unrecoverable": 11
  },
  "by_failure_mode": {
    "format_violation": {
      "count": 89,
      "percentage": 0.38,
      "affected_models": ["gpt-4o-mini", "claude-haiku-4-5"],
      "common_triggers": ["complex JSON structures", "nested arrays"]
    },
    "refusal": {
      "count": 56,
      "percentage": 0.24,
      "affected_models": ["gpt-4o", "claude-sonnet-4-5"],
      "common_triggers": ["policy violations", "ambiguous requests"]
    },
    "hallucination": {
      "count": 34,
      "percentage": 0.15,
      "affected_models": ["gpt-4o-mini"],
      "common_triggers": ["data extraction from noise", "edge cases"]
    },
    "incomplete": {
      "count": 31,
      "percentage": 0.13,
      "affected_models": ["claude-haiku-4-5"],
      "common_triggers": ["max tokens exceeded", "complex outputs"]
    },
    "reasoning_error": {
      "count": 24,
      "percentage": 0.10,
      "affected_models": ["gpt-4o-mini"],
      "common_triggers": ["multi-step logic", "mathematical reasoning"]
    }
  },
  "by_model": {
    "gpt-4o-mini": {
      "total_calls": 8234,
      "failures": 123,
      "failure_rate": 0.015,
      "top_failure_modes": ["hallucination", "format_violation"]
    },
    "claude-sonnet-4-5": {
      "total_calls": 5678,
      "failures": 67,
      "failure_rate": 0.012,
      "top_failure_modes": ["refusal", "format_violation"]
    }
  },
  "mitigation_recommendations": [
    {
      "issue": "High format_violation rate for complex JSON",
      "recommendation": "Add schema validation to prompts",
      "expected_improvement": "40% reduction in failures"
    },
    {
      "issue": "Hallucinations in data extraction",
      "recommendation": "Use Claude Haiku for structured extraction",
      "expected_improvement": "60% reduction in hallucinations"
    }
  ]
}

Provider Health

Monitor provider reliability and performance trends.

Endpoint

GET https://api.withperf.pro/v1/metrics/providers

Query Parameters

ParameterTypeDefaultDescription
hoursnumber24Number of hours to analyze (1-168)

Example Request

curl https://api.withperf.pro/v1/metrics/providers?hours=24 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_time": "2024-01-30T10:00:00Z",
    "end_time": "2024-01-31T10:00:00Z",
    "hours": 24
  },
  "providers": {
    "openai": {
      "status": "operational",
      "uptime": 0.998,
      "avg_latency_ms": 1234,
      "p95_latency_ms": 2345,
      "p99_latency_ms": 3456,
      "error_rate": 0.002,
      "rate_limit_hits": 12,
      "incidents": []
    },
    "anthropic": {
      "status": "operational",
      "uptime": 0.999,
      "avg_latency_ms": 1567,
      "p95_latency_ms": 2876,
      "p99_latency_ms": 4123,
      "error_rate": 0.001,
      "rate_limit_hits": 3,
      "incidents": []
    }
  },
  "trends": {
    "openai": {
      "latency_trend": "stable",
      "error_trend": "improving",
      "vs_previous_period": {
        "latency_change": -0.05,
        "error_change": -0.43
      }
    },
    "anthropic": {
      "latency_trend": "stable",
      "error_trend": "stable",
      "vs_previous_period": {
        "latency_change": 0.02,
        "error_change": 0.00
      }
    }
  },
  "recommendations": [
    {
      "provider": "openai",
      "message": "Experiencing slightly elevated latency during peak hours (2-4pm UTC)",
      "action": "Consider using Anthropic for latency-sensitive tasks during this window"
    }
  ]
}

Daily Metrics

Get daily aggregated metrics for trend analysis and reporting.

Endpoint

GET https://api.withperf.pro/v1/metrics/daily

Query Parameters

ParameterTypeDefaultDescription
daysnumber30Number of days to retrieve (1-90)

Example Request

curl https://api.withperf.pro/v1/metrics/daily?days=30 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_date": "2024-01-01",
    "end_date": "2024-01-30"
  },
  "daily_data": [
    {
      "date": "2024-01-01",
      "total_calls": 456,
      "total_cost_usd": 2.34,
      "avg_latency_ms": 1234,
      "success_rate": 0.987,
      "model_distribution": {
        "gpt-4o-mini": 234,
        "claude-sonnet-4-5": 178,
        "gpt-4o": 44
      },
      "task_distribution": {
        "extraction": 145,
        "classification": 98,
        "summarization": 76,
        "reasoning": 54,
        "code": 45,
        "writing": 38
      }
    },
    {
      "date": "2024-01-02",
      "total_calls": 523,
      "total_cost_usd": 2.87,
      "avg_latency_ms": 1187,
      "success_rate": 0.991,
      "model_distribution": {
        "gpt-4o-mini": 289,
        "claude-sonnet-4-5": 198,
        "gpt-4o": 36
      },
      "task_distribution": {
        "extraction": 167,
        "classification": 112,
        "summarization": 89,
        "reasoning": 61,
        "code": 52,
        "writing": 42
      }
    }
  ],
  "trends": {
    "calls_trend": "increasing",
    "cost_trend": "stable",
    "quality_trend": "improving",
    "avg_daily_growth": 0.034
  }
}

Use Cases

Dashboard Building

// Fetch overview for homepage dashboard
const overview = await fetch(
  'https://api.withperf.pro/v1/metrics/overview?days=7',
  { headers: { Authorization: `Bearer ${API_KEY}` } }
).then(r => r.json());

// Display KPIs
displayKPI('Total Calls', overview.summary.total_calls);
displayKPI('Avg Cost', `$${overview.summary.avg_cost_per_call.toFixed(5)}`);
displayKPI('Success Rate', `${(overview.summary.success_rate * 100).toFixed(1)}%`);

Cost Monitoring

# Alert if costs exceed threshold
customer = requests.get(
    "https://api.withperf.pro/v1/metrics/customer?days=30",
    headers={"Authorization": f"Bearer {API_KEY}"}
).json()

projected_cost = customer['cost_analysis']['projected_month_end']
if projected_cost > BUDGET_LIMIT:
    send_alert(f"Projected cost ${projected_cost} exceeds budget ${BUDGET_LIMIT}")

Performance Optimization

# Find optimization opportunities
performance = requests.get(
    "https://api.withperf.pro/v1/metrics/performance?days=30",
    headers={"Authorization": f"Bearer {API_KEY}"}
).json()

for rec in performance['recommendations']:
    print(f"Switch {rec['task_type']} to {rec['recommended_model']}")
    print(f"  Savings: ${rec['potential_savings_usd']:.2f}")
    print(f"  Quality: +{rec['quality_improvement']:.1%}")

Rate Limits

Metrics API has separate rate limits:
TierRequests/Minute
Free10
Pro60
Enterprise300

Best Practices

  1. Cache metrics data: Results change slowly, cache for 5-15 minutes
  2. Use appropriate time ranges: Longer periods for trends, shorter for real-time monitoring
  3. Set up alerts: Monitor projected_month_end and failure_rate
  4. Review weekly: Check recommendations for optimization opportunities

Support