Calculating request cost
Overview¶
When using LLM APIs, tracking costs is essential for budgeting and optimization. Cost calculation is decoupled from usage tracking — you use a calculator to compute cost from usage and pricing data.
This example demonstrates how to:
1. Define pricing rates ($/1M tokens) with InferencePricing
2. Calculate cost using FlatRateCostCalculator
3. Compare costs across different models
<?php
require 'examples/boot.php';
use Cognesy\Instructor\StructuredOutput;
use Cognesy\Polyglot\Inference\Data\InferencePricing;
use Cognesy\Polyglot\Inference\Data\InferenceUsage;
use Cognesy\Polyglot\Inference\Pricing\FlatRateCostCalculator;
use Cognesy\Polyglot\Pricing\Cost;
class User {
public int $age;
public string $name;
}
$calculator = new FlatRateCostCalculator();
// Helper to display cost breakdown
function printCostBreakdown(InferenceUsage $usage, InferencePricing $pricing, FlatRateCostCalculator $calculator): void {
$cost = $calculator->calculate($usage, $pricing);
echo "Token Usage:\n";
echo " Input tokens: {$usage->inputTokens}\n";
echo " Output tokens: {$usage->outputTokens}\n";
echo " Cache read: {$usage->cacheReadTokens}\n";
echo " Cache write: {$usage->cacheWriteTokens}\n";
echo " Reasoning: {$usage->reasoningTokens}\n";
echo "\nPricing ($/1M tokens):\n";
echo " Input: \${$pricing->inputPerMToken}\n";
echo " Output: \${$pricing->outputPerMToken}\n";
echo " Cache read: \${$pricing->cacheReadPerMToken}\n";
echo "\nTotal cost: \$" . number_format($cost->total, 6) . "\n";
echo "\nBreakdown:\n";
foreach ($cost->breakdown as $category => $amount) {
printf(" %-12s \$%.6f\n", $category, $amount);
}
}
echo "CALCULATING COST WITH EXPLICIT PRICING\n";
echo str_repeat("=", 50) . "\n\n";
$text = "Jason is 25 years old and works as an engineer.";
$response = StructuredOutput::using('openai')
->with(
messages: $text,
responseModel: User::class,
)->response();
// Define pricing for default model gpt-4.1-nano
$pricing = InferencePricing::fromArray([
'input' => 0.2, // $0.2 per 1M input tokens
'output' => 0.8, // $0.8 per 1M output tokens
'cacheRead' => 0.05, // $0.05 per 1M cache read tokens
]);
echo "TEXT: $text\n\n";
printCostBreakdown($response->usage(), $pricing, $calculator);
// COMPARE COSTS ACROSS DIFFERENT MODELS
echo "\n\n" . str_repeat("=", 50) . "\n";
echo "COST COMPARISON ACROSS MODELS\n";
echo str_repeat("=", 50) . "\n\n";
$usage = $response->usage();
$models = [
'GPT-4o' => ['input' => 2.50, 'output' => 10.0],
'GPT-4o-mini' => ['input' => 0.15, 'output' => 0.60],
'Claude 3.5 Sonnet' => ['input' => 3.0, 'output' => 15.0],
'Claude 3.5 Haiku' => ['input' => 0.80, 'output' => 4.0],
'Gemini 2.0 Flash' => ['input' => 0.10, 'output' => 0.40],
];
echo "For {$usage->inputTokens} input + {$usage->outputTokens} output tokens:\n\n";
foreach ($models as $model => $prices) {
$pricing = InferencePricing::fromArray($prices);
$cost = $calculator->calculate($usage, $pricing);
printf(" %-20s \$%.6f\n", $model, $cost->total);
}
assert($response->value()->name === 'Jason');
assert($response->value()->age === 25);
assert($response->usage()->inputTokens > 0);
assert($response->usage()->outputTokens > 0);
?>