From 73194c7505d5b17be899387f13d7ee2184afb8fa Mon Sep 17 00:00:00 2001 From: waldekmastykarz Date: Sat, 11 Apr 2026 16:53:36 +0200 Subject: [PATCH 1/2] Add cached input pricing support in cost calculations. Closes #1582 --- DevProxy.Abstractions/LanguageModel/PricesData.cs | 12 ++++++++++-- .../Inspection/LanguageModelPricingLoader.cs | 9 ++++++++- DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs | 6 ++++-- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/DevProxy.Abstractions/LanguageModel/PricesData.cs b/DevProxy.Abstractions/LanguageModel/PricesData.cs index cfe2cebf..c951121c 100644 --- a/DevProxy.Abstractions/LanguageModel/PricesData.cs +++ b/DevProxy.Abstractions/LanguageModel/PricesData.cs @@ -9,6 +9,7 @@ namespace DevProxy.Abstractions.LanguageModel; public class ModelPrices { public double Input { get; set; } + public double CachedInput { get; set; } public double Output { get; set; } } @@ -44,7 +45,7 @@ public bool TryGetModelPrices(string modelName, out ModelPrices? prices) return false; } - public (double Input, double Output) CalculateCost(string modelName, long inputTokens, long outputTokens) + public (double Input, double Output) CalculateCost(string modelName, long inputTokens, long outputTokens, long cachedInputTokens = 0) { if (!TryGetModelPrices(modelName, out var prices)) { @@ -54,7 +55,14 @@ public bool TryGetModelPrices(string modelName, out ModelPrices? prices) Debug.Assert(prices != null, "Prices data should not be null here."); // Prices in the data are per 1M tokens - var inputCost = prices.Input * (inputTokens / 1_000_000.0); + // When cached input pricing is available, separate cached tokens + // from regular input tokens for accurate cost calculation + var regularInputTokens = inputTokens - cachedInputTokens; + var inputCost = prices.Input * (regularInputTokens / 1_000_000.0); + if (cachedInputTokens > 0 && prices.CachedInput > 0) + { + inputCost += prices.CachedInput * (cachedInputTokens / 1_000_000.0); + } var outputCost = prices.Output * (outputTokens / 1_000_000.0); return (inputCost, outputCost); diff --git a/DevProxy.Plugins/Inspection/LanguageModelPricingLoader.cs b/DevProxy.Plugins/Inspection/LanguageModelPricingLoader.cs index 2b3cff38..bee49a76 100644 --- a/DevProxy.Plugins/Inspection/LanguageModelPricingLoader.cs +++ b/DevProxy.Plugins/Inspection/LanguageModelPricingLoader.cs @@ -40,11 +40,18 @@ protected override void LoadData(string fileContents) if (modelProperty.Value.TryGetProperty("input", out var inputElement) && modelProperty.Value.TryGetProperty("output", out var outputElement)) { - pricesData[modelName] = new() + var modelPrices = new ModelPrices { Input = inputElement.GetDouble(), Output = outputElement.GetDouble() }; + + if (modelProperty.Value.TryGetProperty("cached_input", out var cachedInputElement)) + { + modelPrices.CachedInput = cachedInputElement.GetDouble(); + } + + pricesData[modelName] = modelPrices; } } diff --git a/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs b/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs index e76064b1..58836067 100644 --- a/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs +++ b/DevProxy.Plugins/Inspection/OpenAITelemetryPlugin.cs @@ -931,7 +931,8 @@ private void RecordUsageMetrics(Activity activity, OpenAIRequest request, OpenAI return; } - var (inputCost, outputCost) = Configuration.Prices.CalculateCost(response.Model, usage.PromptTokens, usage.CompletionTokens); + var cachedTokens = usage.PromptTokensDetails?.CachedTokens ?? 0L; + var (inputCost, outputCost) = Configuration.Prices.CalculateCost(response.Model, usage.PromptTokens, usage.CompletionTokens, cachedTokens); if (inputCost > 0) { @@ -1042,7 +1043,8 @@ private List GetReportModelUsa return usagePerModel; } - var (inputCost, outputCost) = Configuration.Prices.CalculateCost(response.Model, usage.PromptTokens, usage.CompletionTokens); + var cachedTokens = usage.PromptTokensDetails?.CachedTokens ?? 0L; + var (inputCost, outputCost) = Configuration.Prices.CalculateCost(response.Model, usage.PromptTokens, usage.CompletionTokens, cachedTokens); if (inputCost > 0) { From 6e64f46fb663fedcc945a6649870e50e95e805b1 Mon Sep 17 00:00:00 2001 From: waldekmastykarz Date: Sat, 11 Apr 2026 18:24:33 +0200 Subject: [PATCH 2/2] Simplify cached input cost calculation --- DevProxy.Abstractions/LanguageModel/PricesData.cs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/DevProxy.Abstractions/LanguageModel/PricesData.cs b/DevProxy.Abstractions/LanguageModel/PricesData.cs index c951121c..6f8a0509 100644 --- a/DevProxy.Abstractions/LanguageModel/PricesData.cs +++ b/DevProxy.Abstractions/LanguageModel/PricesData.cs @@ -54,15 +54,13 @@ public bool TryGetModelPrices(string modelName, out ModelPrices? prices) Debug.Assert(prices != null, "Prices data should not be null here."); - // Prices in the data are per 1M tokens - // When cached input pricing is available, separate cached tokens - // from regular input tokens for accurate cost calculation + // Prices in the data are per 1M tokens. + // When no cached input price is configured, fall back to the + // regular input price so all tokens are billed correctly. + var effectiveCachedPrice = prices.CachedInput > 0 ? prices.CachedInput : prices.Input; var regularInputTokens = inputTokens - cachedInputTokens; - var inputCost = prices.Input * (regularInputTokens / 1_000_000.0); - if (cachedInputTokens > 0 && prices.CachedInput > 0) - { - inputCost += prices.CachedInput * (cachedInputTokens / 1_000_000.0); - } + var inputCost = (prices.Input * (regularInputTokens / 1_000_000.0)) + + (effectiveCachedPrice * (cachedInputTokens / 1_000_000.0)); var outputCost = prices.Output * (outputTokens / 1_000_000.0); return (inputCost, outputCost);