Skip to main content

Metrics API Reference

The Metrics API provides analytics and performance data for your LLM usage, enabling you to track costs, optimize routing, and measure performance.
Note: Response schemas shown are illustrative. Actual responses may vary.

Authentication

All metrics endpoints require API key authentication:
Authorization: Bearer YOUR_API_KEY

Endpoints Overview

EndpointDescription
GET /v1/metrics/overviewHigh-level performance summary
GET /v1/metrics/customerYour account-specific metrics
GET /v1/metrics/performanceModel performance by task type
GET /v1/metrics/failuresFailure mode analysis
GET /v1/metrics/providersProvider health and reliability
GET /v1/metrics/dailyDaily aggregated metrics

Overview Metrics

Get a high-level summary of your routing performance.

Endpoint

GET https://api.withperf.pro/v1/metrics/overview

Query Parameters

ParameterTypeDefaultDescription
daysnumber7Number of days to analyze (1-90)

Example Request

curl https://api.withperf.pro/v1/metrics/overview?days=30 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_date": "2024-01-01T00:00:00Z",
    "end_date": "2024-01-30T23:59:59Z",
    "days": 30
  },
  "summary": {
    "total_calls": 45678,
    "total_cost_usd": 234.56,
    "avg_cost_per_call": 0.00514,
    "avg_latency_ms": 1245,
    "success_rate": 0.987
  },
  "routing": {
    "accuracy": 0.923,
    "fallback_rate": 0.034,
    "retry_rate": 0.012
  },
  "models": {
    "gpt-4o-mini": {
      "calls": 23456,
      "percentage": 0.514,
      "avg_cost": 0.00234,
      "avg_latency_ms": 834
    },
    "claude-sonnet-4-5": {
      "calls": 18234,
      "percentage": 0.399,
      "avg_cost": 0.00876,
      "avg_latency_ms": 1456
    },
    "gpt-4o": {
      "calls": 3988,
      "percentage": 0.087,
      "avg_cost": 0.01234,
      "avg_latency_ms": 1876
    }
  },
  "task_distribution": {
    "extraction": 0.34,
    "classification": 0.23,
    "summarization": 0.18,
    "reasoning": 0.12,
    "code": 0.08,
    "writing": 0.05
  }
}

Customer Metrics

Get detailed metrics specific to your API key and usage patterns.

Endpoint

GET https://api.withperf.pro/v1/metrics/customer

Query Parameters

ParameterTypeDefaultDescription
daysnumber30Number of days to analyze (1-90)

Example Request

curl https://api.withperf.pro/v1/metrics/customer?days=30 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "account_id": "acct_abc123",
  "period": {
    "start_date": "2024-01-01T00:00:00Z",
    "end_date": "2024-01-30T23:59:59Z"
  },
  "usage": {
    "total_calls": 12456,
    "total_tokens": 5678901,
    "total_cost_usd": 123.45,
    "avg_calls_per_day": 414
  },
  "cost_analysis": {
    "current_month": 123.45,
    "previous_month": 156.78,
    "change_percentage": -21.2,
    "projected_month_end": 145.67,
    "vs_gpt4o_only": {
      "cost_if_gpt4o": 412.34,
      "savings_usd": 288.89,
      "savings_percentage": 70.1
    }
  },
  "quality_preferences": {
    "cost_vs_quality_ratio": 0.65,
    "latency_sensitivity": 0.42,
    "preferred_models": ["gpt-4o-mini", "claude-sonnet-4-5"]
  },
  "task_profile": {
    "dominant_tasks": ["extraction", "classification"],
    "complexity_avg": 0.42,
    "task_overrides": {
      "reasoning": "claude-sonnet-4-5"
    }
  },
  "behavior_insights": {
    "peak_hours": [9, 10, 11, 14, 15, 16],
    "avg_conversation_length": 4.2,
    "retry_rate": 0.023,
    "cost_ceiling_hit_rate": 0.087
  }
}

Performance Metrics

Analyze model performance broken down by task type.

Endpoint

GET https://api.withperf.pro/v1/metrics/performance

Query Parameters

ParameterTypeDefaultDescription
daysnumber7Number of days to analyze (1-90)
task_typestringnullFilter by specific task type

Example Request

curl "https://api.withperf.pro/v1/metrics/performance?days=7&task_type=extraction" \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_date": "2024-01-24T00:00:00Z",
    "end_date": "2024-01-30T23:59:59Z"
  },
  "by_task_type": {
    "extraction": {
      "total_calls": 5678,
      "models": {
        "gpt-4o-mini": {
          "calls": 4234,
          "success_rate": 0.987,
          "avg_cost_usd": 0.00123,
          "avg_latency_ms": 567,
          "quality_score": 0.92
        },
        "claude-haiku-4-5": {
          "calls": 1444,
          "success_rate": 0.991,
          "avg_cost_usd": 0.00098,
          "avg_latency_ms": 423,
          "quality_score": 0.94
        }
      },
      "optimal_model": "claude-haiku-4-5",
      "optimal_reason": "Best cost/quality balance for structured extraction"
    },
    "reasoning": {
      "total_calls": 1234,
      "models": {
        "claude-sonnet-4-5": {
          "calls": 987,
          "success_rate": 0.956,
          "avg_cost_usd": 0.00876,
          "avg_latency_ms": 1876,
          "quality_score": 0.95
        },
        "gpt-4o": {
          "calls": 247,
          "success_rate": 0.943,
          "avg_cost_usd": 0.01234,
          "avg_latency_ms": 2134,
          "quality_score": 0.93
        }
      },
      "optimal_model": "claude-sonnet-4-5",
      "optimal_reason": "Superior reasoning with better cost efficiency"
    }
  },
  "recommendations": [
    {
      "task_type": "extraction",
      "current_model": "gpt-4o-mini",
      "recommended_model": "claude-haiku-4-5",
      "potential_savings_usd": 14.23,
      "quality_improvement": 0.02
    }
  ]
}

Failure Analysis

Understand why calls fail and which models are most reliable.

Endpoint

GET https://api.withperf.pro/v1/metrics/failures

Query Parameters

ParameterTypeDefaultDescription
daysnumber7Number of days to analyze (1-90)

Example Request

curl https://api.withperf.pro/v1/metrics/failures?days=7 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_date": "2024-01-24T00:00:00Z",
    "end_date": "2024-01-30T23:59:59Z"
  },
  "summary": {
    "total_failures": 234,
    "failure_rate": 0.013,
    "retries_succeeded": 156,
    "fallback_succeeded": 67,
    "unrecoverable": 11
  },
  "by_failure_mode": {
    "format_violation": {
      "count": 89,
      "percentage": 0.38,
      "affected_models": ["gpt-4o-mini", "claude-haiku-4-5"],
      "common_triggers": ["complex JSON structures", "nested arrays"]
    },
    "refusal": {
      "count": 56,
      "percentage": 0.24,
      "affected_models": ["gpt-4o", "claude-sonnet-4-5"],
      "common_triggers": ["policy violations", "ambiguous requests"]
    },
    "hallucination": {
      "count": 34,
      "percentage": 0.15,
      "affected_models": ["gpt-4o-mini"],
      "common_triggers": ["data extraction from noise", "edge cases"]
    },
    "incomplete": {
      "count": 31,
      "percentage": 0.13,
      "affected_models": ["claude-haiku-4-5"],
      "common_triggers": ["max tokens exceeded", "complex outputs"]
    },
    "reasoning_error": {
      "count": 24,
      "percentage": 0.10,
      "affected_models": ["gpt-4o-mini"],
      "common_triggers": ["multi-step logic", "mathematical reasoning"]
    }
  },
  "by_model": {
    "gpt-4o-mini": {
      "total_calls": 8234,
      "failures": 123,
      "failure_rate": 0.015,
      "top_failure_modes": ["hallucination", "format_violation"]
    },
    "claude-sonnet-4-5": {
      "total_calls": 5678,
      "failures": 67,
      "failure_rate": 0.012,
      "top_failure_modes": ["refusal", "format_violation"]
    }
  },
  "mitigation_recommendations": [
    {
      "issue": "High format_violation rate for complex JSON",
      "recommendation": "Add schema validation to prompts",
      "expected_improvement": "40% reduction in failures"
    },
    {
      "issue": "Hallucinations in data extraction",
      "recommendation": "Use Claude Haiku for structured extraction",
      "expected_improvement": "60% reduction in hallucinations"
    }
  ]
}

Provider Health

Monitor provider reliability and performance trends.

Endpoint

GET https://api.withperf.pro/v1/metrics/providers

Query Parameters

ParameterTypeDefaultDescription
hoursnumber24Number of hours to analyze (1-168)

Example Request

curl https://api.withperf.pro/v1/metrics/providers?hours=24 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_time": "2024-01-30T10:00:00Z",
    "end_time": "2024-01-31T10:00:00Z",
    "hours": 24
  },
  "providers": {
    "openai": {
      "status": "operational",
      "uptime": 0.998,
      "avg_latency_ms": 1234,
      "p95_latency_ms": 2345,
      "p99_latency_ms": 3456,
      "error_rate": 0.002,
      "rate_limit_hits": 12,
      "incidents": []
    },
    "anthropic": {
      "status": "operational",
      "uptime": 0.999,
      "avg_latency_ms": 1567,
      "p95_latency_ms": 2876,
      "p99_latency_ms": 4123,
      "error_rate": 0.001,
      "rate_limit_hits": 3,
      "incidents": []
    }
  },
  "trends": {
    "openai": {
      "latency_trend": "stable",
      "error_trend": "improving",
      "vs_previous_period": {
        "latency_change": -0.05,
        "error_change": -0.43
      }
    },
    "anthropic": {
      "latency_trend": "stable",
      "error_trend": "stable",
      "vs_previous_period": {
        "latency_change": 0.02,
        "error_change": 0.00
      }
    }
  },
  "recommendations": [
    {
      "provider": "openai",
      "message": "Experiencing slightly elevated latency during peak hours (2-4pm UTC)",
      "action": "Consider using Anthropic for latency-sensitive tasks during this window"
    }
  ]
}

Daily Metrics

Get daily aggregated metrics for trend analysis and reporting.

Endpoint

GET https://api.withperf.pro/v1/metrics/daily

Query Parameters

ParameterTypeDefaultDescription
daysnumber30Number of days to retrieve (1-90)

Example Request

curl https://api.withperf.pro/v1/metrics/daily?days=30 \
  -H "Authorization: Bearer pk_live_abc123"

Response

{
  "period": {
    "start_date": "2024-01-01",
    "end_date": "2024-01-30"
  },
  "daily_data": [
    {
      "date": "2024-01-01",
      "total_calls": 456,
      "total_cost_usd": 2.34,
      "avg_latency_ms": 1234,
      "success_rate": 0.987,
      "model_distribution": {
        "gpt-4o-mini": 234,
        "claude-sonnet-4-5": 178,
        "gpt-4o": 44
      },
      "task_distribution": {
        "extraction": 145,
        "classification": 98,
        "summarization": 76,
        "reasoning": 54,
        "code": 45,
        "writing": 38
      }
    },
    {
      "date": "2024-01-02",
      "total_calls": 523,
      "total_cost_usd": 2.87,
      "avg_latency_ms": 1187,
      "success_rate": 0.991,
      "model_distribution": {
        "gpt-4o-mini": 289,
        "claude-sonnet-4-5": 198,
        "gpt-4o": 36
      },
      "task_distribution": {
        "extraction": 167,
        "classification": 112,
        "summarization": 89,
        "reasoning": 61,
        "code": 52,
        "writing": 42
      }
    }
  ],
  "trends": {
    "calls_trend": "increasing",
    "cost_trend": "stable",
    "quality_trend": "improving",
    "avg_daily_growth": 0.034
  }
}

Use Cases

Dashboard Building

// Fetch overview for homepage dashboard
const overview = await fetch(
  'https://api.withperf.pro/v1/metrics/overview?days=7',
  { headers: { Authorization: `Bearer ${API_KEY}` } }
).then(r => r.json());

// Display KPIs
displayKPI('Total Calls', overview.summary.total_calls);
displayKPI('Avg Cost', `$${overview.summary.avg_cost_per_call.toFixed(5)}`);
displayKPI('Success Rate', `${(overview.summary.success_rate * 100).toFixed(1)}%`);

Cost Monitoring

# Alert if costs exceed threshold
customer = requests.get(
    "https://api.withperf.pro/v1/metrics/customer?days=30",
    headers={"Authorization": f"Bearer {API_KEY}"}
).json()

projected_cost = customer['cost_analysis']['projected_month_end']
if projected_cost > BUDGET_LIMIT:
    send_alert(f"Projected cost ${projected_cost} exceeds budget ${BUDGET_LIMIT}")

Performance Optimization

# Find optimization opportunities
performance = requests.get(
    "https://api.withperf.pro/v1/metrics/performance?days=30",
    headers={"Authorization": f"Bearer {API_KEY}"}
).json()

for rec in performance['recommendations']:
    print(f"Switch {rec['task_type']} to {rec['recommended_model']}")
    print(f"  Savings: ${rec['potential_savings_usd']:.2f}")
    print(f"  Quality: +{rec['quality_improvement']:.1%}")

Rate Limits

Metrics API has separate rate limits:
TierRequests/Minute
Free10
Pro60
Enterprise300

Best Practices

  1. Cache metrics data: Results change slowly, cache for 5-15 minutes
  2. Use appropriate time ranges: Longer periods for trends, shorter for real-time monitoring
  3. Set up alerts: Monitor projected_month_end and failure_rate
  4. Review weekly: Check recommendations for optimization opportunities

Support