fix(dashscope): enhance usage parsing robustness to prevent VSCode cr… (#2425)

* fix(dashscope): enhance usage parsing robustness to prevent VSCode crashes Enhanced build_anthropic_usage_from_responses() to handle null, missing, empty, and partial usage fields gracefully. This prevents VSCode Extension crashes with "Cannot read properties of null (reading 'output_tokens')" when connecting to DashScope (Alibaba Cloud Bailian) models. Changes: - Added defensive null checks and empty object detection - Implemented OpenAI field name fallbacks (prompt_tokens/completion_tokens) - Added comprehensive logging for malformed usage scenarios - Fixed streaming SSE event handlers with null-safe usage access - Preserved cache token fields even when input/output tokens are missing This ensures the proxy never crashes on malformed Responses API usage objects, returning valid Anthropic-compatible usage structures (input_tokens/output_tokens) in all cases. * fix(proxy): tighten Responses API usage fix per review - Drop redundant fallback in streaming.rs Chat Completions path; the existing if-let-Some guard already prevents usage:null, so the extra layer was dead code and caused a fmt-breaking indentation issue. - Demote partial-usage warn to debug. Streaming chunks legitimately arrive with partial token counts and the warn-level log was noisy. - Rewrite CHANGELOG entry: reference #2422, broaden scope from DashScope-only to all api_format=openai_responses users (Codex OAuth is the strongest signal; DashScope compatible-mode/v1/responses is the original report). - cargo fmt to clear 12 formatting differences vs main. --------- Co-authored-by: Jason <farion1231@gmail.com>
2026-05-23 06:04:43 +08:00 · 2026-04-30 22:37:54 +08:00
parent 7965862e66
commit 693c36a12a
3 changed files with 180 additions and 10 deletions
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+### Fixed
+
+- **OpenAI Responses API usage parsing robustness**: Hardened `build_anthropic_usage_from_responses()` and the Responses → Anthropic SSE translator so a missing or malformed upstream `usage` no longer produces `"usage": null` in `message_delta`. This unblocks strict Anthropic clients (notably the VSCode Claude Code extension) that crashed with "Cannot read properties of null (reading 'output_tokens')" against providers such as Codex OAuth and DashScope's `compatible-mode/v1/responses` endpoint. Added OpenAI field-name fallbacks (`prompt_tokens` / `completion_tokens`), null/empty/partial object handling, and preserved cache token fields even when input/output tokens are missing (#2422).
+
 ## [3.14.1] - 2026-04-23

 Development since v3.14.0 focuses on Codex OAuth stability, tray usage visibility, Skills import/install reliability, Gemini session restore paths, and simplifying Hermes configuration health handling.
@@ -170,9 +170,12 @@ pub fn create_anthropic_sse_stream_from_responses<E: std::error::Error + Send +
                                }

                                has_sent_message_start = true;
-                                // Build usage with cache tokens if available
+                                // Build usage with defensive null handling
+                                // Some() wrapper ensures build function always receives valid input
+                                // Fallback to empty object {} if usage field missing, ensuring message_start
+                                // event always has valid usage structure for VSCode Extension compatibility
                                let start_usage = build_anthropic_usage_from_responses(
-                                    response_obj.get("usage"),
+                                    Some(response_obj.get("usage").unwrap_or(&json!({}))),
                                );

                                let event = json!({
@@ -670,9 +673,12 @@ pub fn create_anthropic_sse_stream_from_responses<E: std::error::Error + Send +
                                }
                                fallback_open_index = None;

-                                let usage_json = response_obj.get("usage").map(|u| {
-                                    build_anthropic_usage_from_responses(Some(u))
-                                });
+                                // Defensive: Always build usage_json, even if usage field missing
+                                // Some() wrapper with fallback to {} ensures build_anthropic_usage_from_responses
+                                // always receives valid input, preventing null pointer errors in VSCode Extension
+                                let usage_json = build_anthropic_usage_from_responses(
+                                    Some(response_obj.get("usage").unwrap_or(&json!({})))
+                                );

                                // Emit message_delta (with usage + stop_reason)
                                let delta_event = json!({
@@ -218,12 +218,28 @@ pub(crate) fn map_responses_stop_reason(

 /// Build Anthropic-style usage JSON from Responses API usage, including cache tokens.
 ///
-/// Priority order:
+/// **Robustness Features**:
+/// - Handles null, missing, empty objects, and partial objects gracefully
+/// - Supports OpenAI field name variants (prompt_tokens/completion_tokens) as fallbacks
+/// - Always returns valid structure: {"input_tokens": N, "output_tokens": N}
+/// - Preserves cache token fields even when input/output tokens are missing
+///
+/// **Field Name Resolution Priority**:
+/// 1. input_tokens: Anthropic `input_tokens` → OpenAI `prompt_tokens` → default 0
+/// 2. output_tokens: Anthropic `output_tokens` → OpenAI `completion_tokens` → default 0
+/// 3. cache_read_input_tokens: Direct field → nested input_tokens_details.cached_tokens → prompt_tokens_details.cached_tokens
+/// 4. cache_creation_input_tokens: Direct field only
+///
+/// **Cache Token Priority Order**:
 /// 1. OpenAI nested details (`input_tokens_details.cached_tokens`, `prompt_tokens_details.cached_tokens`) as initial value
 /// 2. Direct Anthropic-style fields (`cache_read_input_tokens`, `cache_creation_input_tokens`) override if present
+///
+/// **Logging**:
+/// - Warns on empty objects {} or partial objects (only one field present)
+/// - Debug logs when using OpenAI field name fallbacks
 pub(crate) fn build_anthropic_usage_from_responses(usage: Option<&Value>) -> Value {
    let u = match usage {
-        Some(v) if !v.is_null() => v,
+        Some(v) if !v.is_null() && v.is_object() => v,
        _ => {
            return json!({
                "input_tokens": 0,
@@ -232,15 +248,56 @@ pub(crate) fn build_anthropic_usage_from_responses(usage: Option<&Value>) -> Val
        }
    };

-    let input = u.get("input_tokens").and_then(|v| v.as_u64()).unwrap_or(0);
-    let output = u.get("output_tokens").and_then(|v| v.as_u64()).unwrap_or(0);
+    // Detect empty object {} and log warning
+    if u.as_object().map(|obj| obj.is_empty()).unwrap_or(false) {
+        log::warn!("[Responses] Empty usage object received, using defaults");
+        return json!({
+            "input_tokens": 0,
+            "output_tokens": 0
+        });
+    }
+
+    // Extract input_tokens with OpenAI field name fallback
+    // Priority: input_tokens (Anthropic) → prompt_tokens (OpenAI) → 0
+    let input = u
+        .get("input_tokens")
+        .and_then(|v| v.as_u64())
+        .or_else(|| {
+            let prompt_tokens = u.get("prompt_tokens").and_then(|v| v.as_u64());
+            if prompt_tokens.is_some() {
+                log::debug!(
+                    "[Responses] Using OpenAI field name fallback 'prompt_tokens' for input_tokens"
+                );
+            }
+            prompt_tokens
+        })
+        .unwrap_or(0);
+
+    // Extract output_tokens with OpenAI field name fallback
+    // Priority: output_tokens (Anthropic) → completion_tokens (OpenAI) → 0
+    let output = u.get("output_tokens")
+        .and_then(|v| v.as_u64())
+        .or_else(|| {
+            let completion_tokens = u.get("completion_tokens").and_then(|v| v.as_u64());
+            if completion_tokens.is_some() {
+                log::debug!("[Responses] Using OpenAI field name fallback 'completion_tokens' for output_tokens");
+            }
+            completion_tokens
+        })
+        .unwrap_or(0);
+
+    // Log if only one field present (partial object). Streaming chunks legitimately
+    // arrive with partial usage, so this stays at debug level to avoid noise.
+    if (input == 0 && output > 0) || (input > 0 && output == 0) {
+        log::debug!("[Responses] Partial usage object: {:?}", u);
+    }

    let mut result = json!({
        "input_tokens": input,
        "output_tokens": output
    });

-    // Step 1: OpenAI nested details as fallback
+    // Step 1: OpenAI nested details as fallback for cache tokens
    // OpenAI Responses API: input_tokens_details.cached_tokens
    if let Some(cached) = u
        .pointer("/input_tokens_details/cached_tokens")
@@ -259,6 +316,7 @@ pub(crate) fn build_anthropic_usage_from_responses(usage: Option<&Value>) -> Val
    }

    // Step 2: Direct Anthropic-style fields override (authoritative if present)
+    // These preserve cache tokens even if input/output_tokens are missing
    if let Some(v) = u.get("cache_read_input_tokens") {
        result["cache_read_input_tokens"] = v.clone();
    }
@@ -1352,4 +1410,106 @@ mod tests {
            "非 Codex OAuth 路径下 tools 在客户端未送时不应被注入"
        );
    }
+
+    // ==================== Usage Field Robustness Tests ====================
+
+    #[test]
+    fn test_build_usage_from_null_parameter() {
+        let result = build_anthropic_usage_from_responses(None);
+        assert_eq!(result["input_tokens"], json!(0));
+        assert_eq!(result["output_tokens"], json!(0));
+    }
+
+    #[test]
+    fn test_build_usage_from_null_json_value() {
+        let result = build_anthropic_usage_from_responses(Some(&json!(null)));
+        assert_eq!(result["input_tokens"], json!(0));
+        assert_eq!(result["output_tokens"], json!(0));
+    }
+
+    #[test]
+    fn test_build_usage_from_empty_object() {
+        let result = build_anthropic_usage_from_responses(Some(&json!({})));
+        assert_eq!(result["input_tokens"], json!(0));
+        assert_eq!(result["output_tokens"], json!(0));
+    }
+
+    #[test]
+    fn test_build_usage_from_partial_input_only() {
+        let result = build_anthropic_usage_from_responses(Some(&json!({
+            "input_tokens": 100
+        })));
+        assert_eq!(result["input_tokens"], json!(100));
+        assert_eq!(result["output_tokens"], json!(0));
+    }
+
+    #[test]
+    fn test_build_usage_from_partial_output_only() {
+        let result = build_anthropic_usage_from_responses(Some(&json!({
+            "output_tokens": 50
+        })));
+        assert_eq!(result["input_tokens"], json!(0));
+        assert_eq!(result["output_tokens"], json!(50));
+    }
+
+    #[test]
+    fn test_build_usage_with_openai_field_names() {
+        let result = build_anthropic_usage_from_responses(Some(&json!({
+            "prompt_tokens": 120,
+            "completion_tokens": 45
+        })));
+        assert_eq!(result["input_tokens"], json!(120));
+        assert_eq!(result["output_tokens"], json!(45));
+    }
+
+    #[test]
+    fn test_build_usage_anthropic_names_precedence() {
+        let result = build_anthropic_usage_from_responses(Some(&json!({
+            "input_tokens": 100,
+            "prompt_tokens": 120,
+            "output_tokens": 50,
+            "completion_tokens": 45
+        })));
+        assert_eq!(result["input_tokens"], json!(100)); // Anthropic name takes precedence
+        assert_eq!(result["output_tokens"], json!(50)); // Anthropic name takes precedence
+    }
+
+    #[test]
+    fn test_build_usage_cache_tokens_from_nested_details() {
+        let result = build_anthropic_usage_from_responses(Some(&json!({
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "input_tokens_details": {
+                "cached_tokens": 80
+            }
+        })));
+        assert_eq!(result["input_tokens"], json!(100));
+        assert_eq!(result["output_tokens"], json!(50));
+        assert_eq!(result["cache_read_input_tokens"], json!(80));
+    }
+
+    #[test]
+    fn test_build_usage_cache_tokens_direct_override() {
+        let result = build_anthropic_usage_from_responses(Some(&json!({
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "input_tokens_details": {
+                "cached_tokens": 80
+            },
+            "cache_read_input_tokens": 100
+        })));
+        assert_eq!(result["cache_read_input_tokens"], json!(100)); // Direct field overrides nested
+    }
+
+    #[test]
+    fn test_build_usage_cache_tokens_without_input_output() {
+        let result = build_anthropic_usage_from_responses(Some(&json!({
+            "cache_read_input_tokens": 60,
+            "cache_creation_input_tokens": 20
+        })));
+        assert_eq!(result["input_tokens"], json!(0));
+        assert_eq!(result["output_tokens"], json!(0));
+        assert_eq!(result["cache_read_input_tokens"], json!(60));
+        assert_eq!(result["cache_creation_input_tokens"], json!(20));
+    }
 }