refactor: deduplicate and improve OpenAI Responses API conversion

- Extract shared map_responses_stop_reason and build_anthropic_usage_from_responses into transform_responses.rs as pub(crate) - Align cache token extraction priority: OpenAI nested details as fallback, direct Anthropic fields as override - Extract resolve_content_index helper to eliminate 3x copy-paste in streaming_responses.rs - Add streaming reasoning/thinking event handlers (response.reasoning.delta/done) - Add explanatory comment to transform_response heuristic detection - Add openai_responses to api_format doc comment and needs_transform test - Add explicit no-op match arms for lifecycle events - Add promptCacheKey to TS ProviderMeta type - Update toast i18n key to be generic for both OpenAI formats (zh/en/ja)
2026-05-14 08:19:10 +08:00 · 2026-03-05 21:52:41 +08:00
parent a30e2096bb
commit 11f70f676e
11 changed files with 1201 additions and 110 deletions
@@ -233,8 +233,14 @@ pub struct ProviderMeta {
    /// Claude API 格式（仅 Claude 供应商使用）
    /// - "anthropic": 原生 Anthropic Messages API，直接透传
    /// - "openai_chat": OpenAI Chat Completions 格式，需要转换
+    /// - "openai_responses": OpenAI Responses API 格式，需要转换
    #[serde(rename = "apiFormat", skip_serializing_if = "Option::is_none")]
    pub api_format: Option<String>,
+    /// Prompt cache key for OpenAI-compatible endpoints.
+    /// When set, injected into converted requests to improve cache hit rate.
+    /// If not set, provider ID is used automatically during format conversion.
+    #[serde(rename = "promptCacheKey", skip_serializing_if = "Option::is_none")]
+    pub prompt_cache_key: Option<String>,
 }

 impl ProviderManager {
@@ -324,16 +324,27 @@ impl ProviderAdapter for ClaudeAdapter {
        body: serde_json::Value,
        provider: &Provider,
    ) -> Result<serde_json::Value, ProxyError> {
+        // Use meta.prompt_cache_key if set by user, otherwise fall back to provider.id
+        let cache_key = provider
+            .meta
+            .as_ref()
+            .and_then(|m| m.prompt_cache_key.as_deref())
+            .unwrap_or(&provider.id);
+
        match self.get_api_format(provider) {
-            "openai_responses" => super::transform_responses::anthropic_to_responses(body),
-            _ => super::transform::anthropic_to_openai(body),
+            "openai_responses" => {
+                super::transform_responses::anthropic_to_responses(body, Some(cache_key))
+            }
+            _ => super::transform::anthropic_to_openai(body, Some(cache_key)),
        }
    }

    fn transform_response(&self, body: serde_json::Value) -> Result<serde_json::Value, ProxyError> {
-        // 响应格式通过检测 "output" vs "choices" 字段区分
-        // "output" 字段 → Responses API 格式
-        // "choices" 字段 → Chat Completions 格式
+        // Heuristic: detect response format by presence of top-level fields.
+        // The ProviderAdapter trait's transform_response doesn't receive the Provider
+        // config, so we can't check api_format here. Instead we rely on the fact that
+        // Responses API always returns "output" while Chat Completions returns "choices".
+        // This is safe because the two formats are structurally disjoint.
        if body.get("output").is_some() {
            super::transform_responses::responses_to_anthropic(body)
        } else {
@@ -623,6 +634,20 @@ mod tests {
        );
        assert!(adapter.needs_transform(&openai_chat_provider));

+        // OpenAI Responses format in meta: needs transform
+        let openai_responses_provider = create_provider_with_meta(
+            json!({
+                "env": {
+                    "ANTHROPIC_BASE_URL": "https://api.example.com"
+                }
+            }),
+            ProviderMeta {
+                api_format: Some("openai_responses".to_string()),
+                ..Default::default()
+            },
+        );
+        assert!(adapter.needs_transform(&openai_responses_provider));
+
        // meta takes precedence over legacy settings_config fields
        let meta_precedence_over_settings = create_provider_with_meta(
            json!({
@@ -60,6 +60,20 @@ struct Usage {
    prompt_tokens: u32,
    #[serde(default)]
    completion_tokens: u32,
+    #[serde(default)]
+    prompt_tokens_details: Option<PromptTokensDetails>,
+    /// Some compatible servers return Anthropic-style cache fields directly
+    #[serde(default)]
+    cache_read_input_tokens: Option<u32>,
+    #[serde(default)]
+    cache_creation_input_tokens: Option<u32>,
+}
+
+/// Nested token details from OpenAI format
+#[derive(Debug, Deserialize)]
+struct PromptTokensDetails {
+    #[serde(default)]
+    cached_tokens: u32,
 }

 /// 创建 Anthropic SSE 流
@@ -115,6 +129,21 @@ pub fn create_anthropic_sse_stream(

                                    if let Some(choice) = chunk.choices.first() {
                                        if !has_sent_message_start {
+                                            // Build usage with cache tokens if available from first chunk
+                                            let mut start_usage = json!({
+                                                "input_tokens": 0,
+                                                "output_tokens": 0
+                                            });
+                                            if let Some(u) = &chunk.usage {
+                                                start_usage["input_tokens"] = json!(u.prompt_tokens);
+                                                if let Some(cached) = extract_cache_read_tokens(u) {
+                                                    start_usage["cache_read_input_tokens"] = json!(cached);
+                                                }
+                                                if let Some(created) = u.cache_creation_input_tokens {
+                                                    start_usage["cache_creation_input_tokens"] = json!(created);
+                                                }
+                                            }
+
                                            let event = json!({
                                                "type": "message_start",
                                                "message": {
@@ -122,10 +151,7 @@ pub fn create_anthropic_sse_stream(
                                                    "type": "message",
                                                    "role": "assistant",
                                                    "model": current_model.clone().unwrap_or_default(),
-                                                    "usage": {
-                                                        "input_tokens": 0,
-                                                        "output_tokens": 0
-                                                    }
+                                                    "usage": start_usage
                                                }
                                            });
                                            let sse_data = format!("event: message_start\ndata: {}\n\n",
@@ -272,11 +298,20 @@ pub fn create_anthropic_sse_stream(
                                            }

                                            let stop_reason = map_stop_reason(Some(finish_reason));
-                                            // 构建 usage 信息，包含 input_tokens 和 output_tokens
-                                            let usage_json = chunk.usage.as_ref().map(|u| json!({
-                                                "input_tokens": u.prompt_tokens,
-                                                "output_tokens": u.completion_tokens
-                                            }));
+                                            // Build usage with cache token fields
+                                            let usage_json = chunk.usage.as_ref().map(|u| {
+                                                let mut uj = json!({
+                                                    "input_tokens": u.prompt_tokens,
+                                                    "output_tokens": u.completion_tokens
+                                                });
+                                                if let Some(cached) = extract_cache_read_tokens(u) {
+                                                    uj["cache_read_input_tokens"] = json!(cached);
+                                                }
+                                                if let Some(created) = u.cache_creation_input_tokens {
+                                                    uj["cache_creation_input_tokens"] = json!(created);
+                                                }
+                                                uj
+                                            });
                                            let event = json!({
                                                "type": "message_delta",
                                                "delta": {
@@ -314,6 +349,20 @@ pub fn create_anthropic_sse_stream(
    }
 }

+/// Extract cache_read tokens from Usage, checking both direct field and nested details
+fn extract_cache_read_tokens(usage: &Usage) -> Option<u32> {
+    // Direct field takes priority (compatible servers)
+    if let Some(v) = usage.cache_read_input_tokens {
+        return Some(v);
+    }
+    // OpenAI standard: prompt_tokens_details.cached_tokens
+    usage
+        .prompt_tokens_details
+        .as_ref()
+        .map(|d| d.cached_tokens)
+        .filter(|&v| v > 0)
+}
+
 /// 映射停止原因
 fn map_stop_reason(finish_reason: Option<&str>) -> Option<String> {
    finish_reason.map(|r| {
@@ -8,13 +8,92 @@
 //!
 //! 与 Chat Completions 的 delta chunk 模型完全不同，需要独立的状态机处理。

+use super::transform_responses::{build_anthropic_usage_from_responses, map_responses_stop_reason};
 use bytes::Bytes;
 use futures::stream::{Stream, StreamExt};
 use serde_json::{json, Value};
+use std::collections::{HashMap, HashSet};
+
+#[inline]
+fn response_object_from_event(data: &Value) -> &Value {
+    data.get("response").unwrap_or(data)
+}
+
+#[inline]
+fn content_part_key(data: &Value) -> Option<String> {
+    if let (Some(item_id), Some(content_index)) = (
+        data.get("item_id").and_then(|v| v.as_str()),
+        data.get("content_index").and_then(|v| v.as_u64()),
+    ) {
+        return Some(format!("part:{item_id}:{content_index}"));
+    }
+    if let (Some(output_index), Some(content_index)) = (
+        data.get("output_index").and_then(|v| v.as_u64()),
+        data.get("content_index").and_then(|v| v.as_u64()),
+    ) {
+        return Some(format!("part:out:{output_index}:{content_index}"));
+    }
+    None
+}
+
+#[inline]
+fn tool_item_key_from_added(data: &Value, item: &Value) -> Option<String> {
+    if let Some(item_id) = item.get("id").and_then(|v| v.as_str()) {
+        return Some(format!("tool:{item_id}"));
+    }
+    if let Some(item_id) = data.get("item_id").and_then(|v| v.as_str()) {
+        return Some(format!("tool:{item_id}"));
+    }
+    if let Some(output_index) = data.get("output_index").and_then(|v| v.as_u64()) {
+        return Some(format!("tool:out:{output_index}"));
+    }
+    None
+}
+
+#[inline]
+fn tool_item_key_from_event(data: &Value) -> Option<String> {
+    if let Some(item_id) = data.get("item_id").and_then(|v| v.as_str()) {
+        return Some(format!("tool:{item_id}"));
+    }
+    if let Some(output_index) = data.get("output_index").and_then(|v| v.as_u64()) {
+        return Some(format!("tool:out:{output_index}"));
+    }
+    None
+}
+
+/// Resolve content index for a text/refusal content part event.
+///
+/// Uses `content_part_key` to look up or assign a stable index, falling back to
+/// `fallback_open_index` when no key is available.
+#[inline]
+fn resolve_content_index(
+    data: &Value,
+    next_content_index: &mut u32,
+    index_by_key: &mut HashMap<String, u32>,
+    fallback_open_index: &mut Option<u32>,
+) -> u32 {
+    if let Some(k) = content_part_key(data) {
+        if let Some(existing) = index_by_key.get(&k).copied() {
+            existing
+        } else {
+            let assigned = *next_content_index;
+            *next_content_index += 1;
+            index_by_key.insert(k, assigned);
+            assigned
+        }
+    } else if let Some(existing) = *fallback_open_index {
+        existing
+    } else {
+        let assigned = *next_content_index;
+        *next_content_index += 1;
+        *fallback_open_index = Some(assigned);
+        assigned
+    }
+}

 /// 创建从 Responses API SSE 到 Anthropic SSE 的转换流
 ///
-/// 状态机跟踪: message_id, current_model, content_index, has_sent_message_start
+/// 状态机跟踪: message_id, current_model, has_sent_message_start, item/content index map
 /// SSE 解析支持 named events (event: + data: 行)
 pub fn create_anthropic_sse_stream_from_responses(
    stream: impl Stream<Item = Result<Bytes, reqwest::Error>> + Send + 'static,
@@ -23,8 +102,14 @@ pub fn create_anthropic_sse_stream_from_responses(
        let mut buffer = String::new();
        let mut message_id: Option<String> = None;
        let mut current_model: Option<String> = None;
-        let mut content_index: u32 = 0;
        let mut has_sent_message_start = false;
+        let mut has_tool_use = false;
+        let mut next_content_index: u32 = 0;
+        let mut index_by_key: HashMap<String, u32> = HashMap::new();
+        let mut open_indices: HashSet<u32> = HashSet::new();
+        let mut fallback_open_index: Option<u32> = None;
+        let mut tool_index_by_item_id: HashMap<String, u32> = HashMap::new();
+        let mut last_tool_index: Option<u32> = None;

        tokio::pin!(stream);

@@ -75,14 +160,22 @@ pub fn create_anthropic_sse_stream_from_responses(
                            // response.created → message_start
                            // ================================================
                            "response.created" => {
-                                if let Some(id) = data.get("id").and_then(|i| i.as_str()) {
+                                let response_obj = response_object_from_event(&data);
+                                if let Some(id) = response_obj.get("id").and_then(|i| i.as_str()) {
                                    message_id = Some(id.to_string());
                                }
-                                if let Some(model) = data.get("model").and_then(|m| m.as_str()) {
+                                if let Some(model) =
+                                    response_obj.get("model").and_then(|m| m.as_str())
+                                {
                                    current_model = Some(model.to_string());
                                }

                                has_sent_message_start = true;
+                                // Build usage with cache tokens if available
+                                let start_usage = build_anthropic_usage_from_responses(
+                                    response_obj.get("usage"),
+                                );
+
                                let event = json!({
                                    "type": "message_start",
                                    "message": {
@@ -90,10 +183,7 @@ pub fn create_anthropic_sse_stream_from_responses(
                                        "type": "message",
                                        "role": "assistant",
                                        "model": current_model.clone().unwrap_or_default(),
-                                        "usage": {
-                                            "input_tokens": 0,
-                                            "output_tokens": 0
-                                        }
+                                        "usage": start_usage
                                    }
                                });
                                let sse = format!("event: message_start\ndata: {}\n\n",
@@ -125,10 +215,22 @@ pub fn create_anthropic_sse_stream_from_responses(
                                }

                                if let Some(part) = data.get("part") {
-                                    if part.get("type").and_then(|t| t.as_str()) == Some("output_text") {
+                                    let part_type = part.get("type").and_then(|t| t.as_str());
+                                    if matches!(part_type, Some("output_text") | Some("refusal")) {
+                                        let index = resolve_content_index(
+                                            &data,
+                                            &mut next_content_index,
+                                            &mut index_by_key,
+                                            &mut fallback_open_index,
+                                        );
+
+                                        if open_indices.contains(&index) {
+                                            continue;
+                                        }
+
                                        let event = json!({
                                            "type": "content_block_start",
-                                            "index": content_index,
+                                            "index": index,
                                            "content_block": {
                                                "type": "text",
                                                "text": ""
@@ -137,6 +239,7 @@ pub fn create_anthropic_sse_stream_from_responses(
                                        let sse = format!("event: content_block_start\ndata: {}\n\n",
                                            serde_json::to_string(&event).unwrap_or_default());
                                        yield Ok(Bytes::from(sse));
+                                        open_indices.insert(index);
                                    }
                                }
                            }
@@ -146,9 +249,71 @@ pub fn create_anthropic_sse_stream_from_responses(
                            // ================================================
                            "response.output_text.delta" => {
                                if let Some(delta) = data.get("delta").and_then(|d| d.as_str()) {
+                                    let index = resolve_content_index(
+                                        &data,
+                                        &mut next_content_index,
+                                        &mut index_by_key,
+                                        &mut fallback_open_index,
+                                    );
+
+                                    if !open_indices.contains(&index) {
+                                        let start_event = json!({
+                                            "type": "content_block_start",
+                                            "index": index,
+                                            "content_block": {
+                                                "type": "text",
+                                                "text": ""
+                                            }
+                                        });
+                                        let start_sse = format!("event: content_block_start\ndata: {}\n\n",
+                                            serde_json::to_string(&start_event).unwrap_or_default());
+                                        yield Ok(Bytes::from(start_sse));
+                                        open_indices.insert(index);
+                                    }
                                    let event = json!({
                                        "type": "content_block_delta",
-                                        "index": content_index,
+                                        "index": index,
+                                        "delta": {
+                                            "type": "text_delta",
+                                            "text": delta
+                                        }
+                                    });
+                                    let sse = format!("event: content_block_delta\ndata: {}\n\n",
+                                        serde_json::to_string(&event).unwrap_or_default());
+                                    yield Ok(Bytes::from(sse));
+                                }
+                            }
+
+                            // ================================================
+                            // response.refusal.delta → content_block_delta (text_delta)
+                            // ================================================
+                            "response.refusal.delta" => {
+                                if let Some(delta) = data.get("delta").and_then(|d| d.as_str()) {
+                                    let index = resolve_content_index(
+                                        &data,
+                                        &mut next_content_index,
+                                        &mut index_by_key,
+                                        &mut fallback_open_index,
+                                    );
+
+                                    if !open_indices.contains(&index) {
+                                        let start_event = json!({
+                                            "type": "content_block_start",
+                                            "index": index,
+                                            "content_block": {
+                                                "type": "text",
+                                                "text": ""
+                                            }
+                                        });
+                                        let start_sse = format!("event: content_block_start\ndata: {}\n\n",
+                                            serde_json::to_string(&start_event).unwrap_or_default());
+                                        yield Ok(Bytes::from(start_sse));
+                                        open_indices.insert(index);
+                                    }
+
+                                    let event = json!({
+                                        "type": "content_block_delta",
+                                        "index": index,
                                        "delta": {
                                            "type": "text_delta",
                                            "text": delta
@@ -164,14 +329,27 @@ pub fn create_anthropic_sse_stream_from_responses(
                            // response.content_part.done → content_block_stop
                            // ================================================
                            "response.content_part.done" => {
-                                let event = json!({
-                                    "type": "content_block_stop",
-                                    "index": content_index
-                                });
-                                let sse = format!("event: content_block_stop\ndata: {}\n\n",
-                                    serde_json::to_string(&event).unwrap_or_default());
-                                yield Ok(Bytes::from(sse));
-                                content_index += 1;
+                                let key = content_part_key(&data);
+                                let index = if let Some(k) = key {
+                                    index_by_key.get(&k).copied()
+                                } else {
+                                    fallback_open_index
+                                };
+                                if let Some(index) = index {
+                                    if !open_indices.remove(&index) {
+                                        continue;
+                                    }
+                                    let event = json!({
+                                        "type": "content_block_stop",
+                                        "index": index
+                                    });
+                                    let sse = format!("event: content_block_stop\ndata: {}\n\n",
+                                        serde_json::to_string(&event).unwrap_or_default());
+                                    yield Ok(Bytes::from(sse));
+                                    if fallback_open_index == Some(index) {
+                                        fallback_open_index = None;
+                                    }
+                                }
                            }

                            // ================================================
@@ -181,6 +359,7 @@ pub fn create_anthropic_sse_stream_from_responses(
                                if let Some(item) = data.get("item") {
                                    let item_type = item.get("type").and_then(|t| t.as_str()).unwrap_or("");
                                    if item_type == "function_call" {
+                                        has_tool_use = true;
                                        // 确保 message_start 已发送
                                        if !has_sent_message_start {
                                            let start_event = json!({
@@ -201,10 +380,36 @@ pub fn create_anthropic_sse_stream_from_responses(

                                        let call_id = item.get("call_id").and_then(|i| i.as_str()).unwrap_or("");
                                        let name = item.get("name").and_then(|n| n.as_str()).unwrap_or("");
+                                        let index = if let Some(k) = tool_item_key_from_added(&data, item) {
+                                            if let Some(existing) = index_by_key.get(&k).copied() {
+                                                existing
+                                            } else {
+                                                let assigned = next_content_index;
+                                                next_content_index += 1;
+                                                index_by_key.insert(k, assigned);
+                                                assigned
+                                            }
+                                        } else {
+                                            let assigned = next_content_index;
+                                            next_content_index += 1;
+                                            assigned
+                                        };
+                                        if let Some(item_id) = item
+                                            .get("id")
+                                            .and_then(|v| v.as_str())
+                                            .or_else(|| data.get("item_id").and_then(|v| v.as_str()))
+                                        {
+                                            tool_index_by_item_id.insert(item_id.to_string(), index);
+                                        }
+                                        last_tool_index = Some(index);
+
+                                        if open_indices.contains(&index) {
+                                            continue;
+                                        }

                                        let event = json!({
                                            "type": "content_block_start",
-                                            "index": content_index,
+                                            "index": index,
                                            "content_block": {
                                                "type": "tool_use",
                                                "id": call_id,
@@ -214,6 +419,7 @@ pub fn create_anthropic_sse_stream_from_responses(
                                        let sse = format!("event: content_block_start\ndata: {}\n\n",
                                            serde_json::to_string(&event).unwrap_or_default());
                                        yield Ok(Bytes::from(sse));
+                                        open_indices.insert(index);
                                    }
                                    // message type output_item.added is handled via content_part.added
                                }
@@ -224,9 +430,49 @@ pub fn create_anthropic_sse_stream_from_responses(
                            // ================================================
                            "response.function_call_arguments.delta" => {
                                if let Some(delta) = data.get("delta").and_then(|d| d.as_str()) {
+                                    let item_id = data.get("item_id").and_then(|v| v.as_str());
+                                    let index = if let Some(id) = item_id {
+                                        tool_index_by_item_id.get(id).copied()
+                                    } else {
+                                        None
+                                    }
+                                    .or_else(|| {
+                                        tool_item_key_from_event(&data)
+                                            .and_then(|k| index_by_key.get(&k).copied())
+                                    })
+                                    .or(last_tool_index)
+                                    .unwrap_or_else(|| {
+                                        let assigned = next_content_index;
+                                        next_content_index += 1;
+                                        assigned
+                                    });
+
+                                    if !open_indices.contains(&index) {
+                                        let start_event = json!({
+                                            "type": "content_block_start",
+                                            "index": index,
+                                            "content_block": {
+                                                "type": "tool_use",
+                                                "id": data
+                                                    .get("call_id")
+                                                    .and_then(|v| v.as_str())
+                                                    .or(item_id)
+                                                    .unwrap_or(""),
+                                                "name": data
+                                                    .get("name")
+                                                    .and_then(|v| v.as_str())
+                                                    .unwrap_or("")
+                                            }
+                                        });
+                                        let start_sse = format!("event: content_block_start\ndata: {}\n\n",
+                                            serde_json::to_string(&start_event).unwrap_or_default());
+                                        yield Ok(Bytes::from(start_sse));
+                                        open_indices.insert(index);
+                                    }
+
                                    let event = json!({
                                        "type": "content_block_delta",
-                                        "index": content_index,
+                                        "index": index,
                                        "delta": {
                                            "type": "input_json_delta",
                                            "partial_json": delta
@@ -242,32 +488,166 @@ pub fn create_anthropic_sse_stream_from_responses(
                            // response.function_call_arguments.done → content_block_stop
                            // ================================================
                            "response.function_call_arguments.done" => {
-                                let event = json!({
-                                    "type": "content_block_stop",
-                                    "index": content_index
-                                });
-                                let sse = format!("event: content_block_stop\ndata: {}\n\n",
-                                    serde_json::to_string(&event).unwrap_or_default());
-                                yield Ok(Bytes::from(sse));
-                                content_index += 1;
+                                let item_id = data.get("item_id").and_then(|v| v.as_str());
+                                let index = if let Some(id) = item_id {
+                                    tool_index_by_item_id.get(id).copied()
+                                } else {
+                                    None
+                                }
+                                .or_else(|| {
+                                    tool_item_key_from_event(&data)
+                                        .and_then(|k| index_by_key.get(&k).copied())
+                                })
+                                .or(last_tool_index);
+                                if let Some(index) = index {
+                                    if !open_indices.remove(&index) {
+                                        continue;
+                                    }
+                                    let event = json!({
+                                        "type": "content_block_stop",
+                                        "index": index
+                                    });
+                                    let sse = format!("event: content_block_stop\ndata: {}\n\n",
+                                        serde_json::to_string(&event).unwrap_or_default());
+                                    yield Ok(Bytes::from(sse));
+                                    if let Some(item_id) = item_id {
+                                        tool_index_by_item_id.remove(item_id);
+                                    }
+                                }
+                            }
+
+                            // ================================================
+                            // response.refusal.done → content_block_stop
+                            // ================================================
+                            "response.refusal.done" => {
+                                let key = content_part_key(&data);
+                                let index = if let Some(k) = key {
+                                    index_by_key.get(&k).copied()
+                                } else {
+                                    fallback_open_index
+                                };
+                                if let Some(index) = index {
+                                    if !open_indices.remove(&index) {
+                                        continue;
+                                    }
+                                    let event = json!({
+                                        "type": "content_block_stop",
+                                        "index": index
+                                    });
+                                    let sse = format!("event: content_block_stop\ndata: {}\n\n",
+                                        serde_json::to_string(&event).unwrap_or_default());
+                                    yield Ok(Bytes::from(sse));
+                                    if fallback_open_index == Some(index) {
+                                        fallback_open_index = None;
+                                    }
+                                }
+                            }
+
+                            // ================================================
+                            // response.reasoning.delta → content_block_delta (thinking_delta)
+                            // ================================================
+                            "response.reasoning.delta" => {
+                                if let Some(delta) = data
+                                    .get("delta")
+                                    .or_else(|| data.get("text"))
+                                    .and_then(|d| d.as_str())
+                                {
+                                    let index = resolve_content_index(
+                                        &data,
+                                        &mut next_content_index,
+                                        &mut index_by_key,
+                                        &mut fallback_open_index,
+                                    );
+
+                                    if !open_indices.contains(&index) {
+                                        let start_event = json!({
+                                            "type": "content_block_start",
+                                            "index": index,
+                                            "content_block": {
+                                                "type": "thinking",
+                                                "thinking": ""
+                                            }
+                                        });
+                                        let start_sse = format!("event: content_block_start\ndata: {}\n\n",
+                                            serde_json::to_string(&start_event).unwrap_or_default());
+                                        yield Ok(Bytes::from(start_sse));
+                                        open_indices.insert(index);
+                                    }
+
+                                    let event = json!({
+                                        "type": "content_block_delta",
+                                        "index": index,
+                                        "delta": {
+                                            "type": "thinking_delta",
+                                            "thinking": delta
+                                        }
+                                    });
+                                    let sse = format!("event: content_block_delta\ndata: {}\n\n",
+                                        serde_json::to_string(&event).unwrap_or_default());
+                                    yield Ok(Bytes::from(sse));
+                                }
+                            }
+
+                            // ================================================
+                            // response.reasoning.done → content_block_stop
+                            // ================================================
+                            "response.reasoning.done" => {
+                                let key = content_part_key(&data);
+                                let index = if let Some(k) = key {
+                                    index_by_key.get(&k).copied()
+                                } else {
+                                    fallback_open_index
+                                };
+                                if let Some(index) = index {
+                                    if !open_indices.remove(&index) {
+                                        continue;
+                                    }
+                                    let event = json!({
+                                        "type": "content_block_stop",
+                                        "index": index
+                                    });
+                                    let sse = format!("event: content_block_stop\ndata: {}\n\n",
+                                        serde_json::to_string(&event).unwrap_or_default());
+                                    yield Ok(Bytes::from(sse));
+                                    if fallback_open_index == Some(index) {
+                                        fallback_open_index = None;
+                                    }
+                                }
                            }

                            // ================================================
                            // response.completed → message_delta + message_stop
                            // ================================================
                            "response.completed" => {
-                                let stop_reason = data.get("status")
-                                    .and_then(|s| s.as_str())
-                                    .map(|s| match s {
-                                        "completed" => "end_turn",
-                                        "incomplete" => "max_tokens",
-                                        _ => "end_turn",
-                                    });
+                                let response_obj = response_object_from_event(&data);
+                                let stop_reason = map_responses_stop_reason(
+                                    response_obj.get("status").and_then(|s| s.as_str()),
+                                    has_tool_use,
+                                    response_obj
+                                        .pointer("/incomplete_details/reason")
+                                        .and_then(|r| r.as_str()),
+                                );

-                                let usage_json = data.get("usage").map(|u| json!({
-                                    "input_tokens": u.get("input_tokens").and_then(|v| v.as_u64()).unwrap_or(0),
-                                    "output_tokens": u.get("output_tokens").and_then(|v| v.as_u64()).unwrap_or(0)
-                                }));
+                                // Best effort: close any dangling blocks before message_delta/message_stop.
+                                if !open_indices.is_empty() {
+                                    let mut remaining: Vec<u32> = open_indices.iter().copied().collect();
+                                    remaining.sort_unstable();
+                                    for index in remaining {
+                                        let stop_event = json!({
+                                            "type": "content_block_stop",
+                                            "index": index
+                                        });
+                                        let stop_sse = format!("event: content_block_stop\ndata: {}\n\n",
+                                            serde_json::to_string(&stop_event).unwrap_or_default());
+                                        yield Ok(Bytes::from(stop_sse));
+                                        open_indices.remove(&index);
+                                    }
+                                }
+                                fallback_open_index = None;
+
+                                let usage_json = response_obj.get("usage").map(|u| {
+                                    build_anthropic_usage_from_responses(Some(u))
+                                });

                                // Emit message_delta (with usage + stop_reason)
                                let delta_event = json!({
@@ -291,7 +671,13 @@ pub fn create_anthropic_sse_stream_from_responses(
                                yield Ok(Bytes::from(stop_sse));
                            }

-                            // Ignore other events (response.in_progress, output_item.done, etc.)
+                            // Lifecycle events that don't need Anthropic counterparts.
+                            // Listed explicitly so new events trigger a match-completeness review.
+                            "response.output_text.done"
+                            | "response.output_item.done"
+                            | "response.in_progress" => {}
+
+                            // Any other unknown/future events — silently skip.
                            _ => {}
                        }
                    }
@@ -314,3 +700,203 @@ pub fn create_anthropic_sse_stream_from_responses(
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use futures::stream;
+    use futures::StreamExt;
+    use std::collections::HashMap;
+
+    #[test]
+    fn test_map_responses_stop_reason_tool_use() {
+        assert_eq!(
+            map_responses_stop_reason(Some("completed"), true, None),
+            Some("tool_use")
+        );
+        assert_eq!(
+            map_responses_stop_reason(Some("completed"), false, None),
+            Some("end_turn")
+        );
+        assert_eq!(
+            map_responses_stop_reason(Some("incomplete"), false, Some("max_output_tokens")),
+            Some("max_tokens")
+        );
+        assert_eq!(
+            map_responses_stop_reason(Some("incomplete"), false, Some("content_filter")),
+            Some("end_turn")
+        );
+    }
+
+    #[test]
+    fn test_response_object_from_event_with_wrapper() {
+        let data = json!({
+            "type": "response.created",
+            "response": {
+                "id": "resp_1",
+                "model": "gpt-4o"
+            }
+        });
+        let obj = response_object_from_event(&data);
+        assert_eq!(obj["id"], "resp_1");
+        assert_eq!(obj["model"], "gpt-4o");
+    }
+
+    #[tokio::test]
+    async fn test_streaming_conversion_with_wrapped_response_events() {
+        let input = concat!(
+            "event: response.created\n",
+            "data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_1\",\"model\":\"gpt-4o\",\"usage\":{\"input_tokens\":12,\"output_tokens\":0}}}\n\n",
+            "event: response.output_item.added\n",
+            "data: {\"type\":\"response.output_item.added\",\"item\":{\"type\":\"function_call\",\"call_id\":\"call_1\",\"name\":\"get_weather\"}}\n\n",
+            "event: response.function_call_arguments.delta\n",
+            "data: {\"type\":\"response.function_call_arguments.delta\",\"delta\":\"{\\\"city\\\":\\\"Tokyo\\\"}\"}\n\n",
+            "event: response.function_call_arguments.done\n",
+            "data: {\"type\":\"response.function_call_arguments.done\"}\n\n",
+            "event: response.completed\n",
+            "data: {\"type\":\"response.completed\",\"response\":{\"status\":\"completed\",\"usage\":{\"input_tokens\":12,\"output_tokens\":3}}}\n\n"
+        );
+
+        let upstream = stream::iter(vec![Ok(Bytes::from(input.as_bytes().to_vec()))]);
+        let converted = create_anthropic_sse_stream_from_responses(upstream);
+        let chunks: Vec<_> = converted.collect().await;
+
+        let merged = chunks
+            .into_iter()
+            .map(|c| String::from_utf8_lossy(c.unwrap().as_ref()).to_string())
+            .collect::<String>();
+
+        assert!(merged.contains("\"type\":\"message_start\""));
+        assert!(merged.contains("\"id\":\"resp_1\""));
+        assert!(merged.contains("\"model\":\"gpt-4o\""));
+        assert!(merged.contains("\"type\":\"tool_use\""));
+        assert!(merged.contains("\"name\":\"get_weather\""));
+        assert!(merged.contains("\"type\":\"input_json_delta\""));
+        assert!(merged.contains("\"stop_reason\":\"tool_use\""));
+        assert!(merged.contains("\"input_tokens\":12"));
+        assert!(merged.contains("\"output_tokens\":3"));
+        assert!(merged.contains("\"type\":\"message_stop\""));
+    }
+
+    #[tokio::test]
+    async fn test_streaming_conversion_interleaved_tool_deltas_by_item_id() {
+        let input = concat!(
+            "event: response.created\n",
+            "data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_2\",\"model\":\"gpt-4o\"}}\n\n",
+            "event: response.output_item.added\n",
+            "data: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"fc_1\",\"type\":\"function_call\",\"call_id\":\"call_1\",\"name\":\"first_tool\"}}\n\n",
+            "event: response.output_item.added\n",
+            "data: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"fc_2\",\"type\":\"function_call\",\"call_id\":\"call_2\",\"name\":\"second_tool\"}}\n\n",
+            "event: response.function_call_arguments.delta\n",
+            "data: {\"type\":\"response.function_call_arguments.delta\",\"item_id\":\"fc_2\",\"delta\":\"{\\\"b\\\":2}\"}\n\n",
+            "event: response.function_call_arguments.delta\n",
+            "data: {\"type\":\"response.function_call_arguments.delta\",\"item_id\":\"fc_1\",\"delta\":\"{\\\"a\\\":1}\"}\n\n",
+            "event: response.function_call_arguments.done\n",
+            "data: {\"type\":\"response.function_call_arguments.done\",\"item_id\":\"fc_1\"}\n\n",
+            "event: response.function_call_arguments.done\n",
+            "data: {\"type\":\"response.function_call_arguments.done\",\"item_id\":\"fc_2\"}\n\n",
+            "event: response.completed\n",
+            "data: {\"type\":\"response.completed\",\"response\":{\"status\":\"completed\",\"usage\":{\"input_tokens\":8,\"output_tokens\":4}}}\n\n"
+        );
+
+        let upstream = stream::iter(vec![Ok(Bytes::from(input.as_bytes().to_vec()))]);
+        let converted = create_anthropic_sse_stream_from_responses(upstream);
+        let chunks: Vec<_> = converted.collect().await;
+        let merged = chunks
+            .into_iter()
+            .map(|c| String::from_utf8_lossy(c.unwrap().as_ref()).to_string())
+            .collect::<String>();
+
+        let events: Vec<Value> = merged
+            .split("\n\n")
+            .filter_map(|block| {
+                let data = block.lines().find_map(|line| line.strip_prefix("data: "))?;
+                serde_json::from_str::<Value>(data).ok()
+            })
+            .collect();
+
+        let mut tool_index_by_call: HashMap<String, u64> = HashMap::new();
+        for event in &events {
+            if event.get("type").and_then(|v| v.as_str()) == Some("content_block_start") {
+                let cb = event.get("content_block");
+                if cb.and_then(|v| v.get("type")).and_then(|v| v.as_str()) == Some("tool_use") {
+                    if let (Some(call_id), Some(index)) = (
+                        cb.and_then(|v| v.get("id")).and_then(|v| v.as_str()),
+                        event.get("index").and_then(|v| v.as_u64()),
+                    ) {
+                        tool_index_by_call.insert(call_id.to_string(), index);
+                    }
+                }
+            }
+        }
+
+        let delta_indices: Vec<u64> = events
+            .iter()
+            .filter(|event| {
+                event.get("type").and_then(|v| v.as_str()) == Some("content_block_delta")
+                    && event.pointer("/delta/type").and_then(|v| v.as_str())
+                        == Some("input_json_delta")
+            })
+            .filter_map(|event| event.get("index").and_then(|v| v.as_u64()))
+            .collect();
+
+        assert_eq!(delta_indices.len(), 2);
+        assert_eq!(delta_indices[0], *tool_index_by_call.get("call_2").unwrap());
+        assert_eq!(delta_indices[1], *tool_index_by_call.get("call_1").unwrap());
+        assert_ne!(
+            tool_index_by_call.get("call_1"),
+            tool_index_by_call.get("call_2")
+        );
+    }
+
+    #[tokio::test]
+    async fn test_streaming_reasoning_delta_emits_thinking_blocks() {
+        let input = concat!(
+            "event: response.created\n",
+            "data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_r\",\"model\":\"o3\",\"usage\":{\"input_tokens\":5,\"output_tokens\":0}}}\n\n",
+            "event: response.reasoning.delta\n",
+            "data: {\"type\":\"response.reasoning.delta\",\"delta\":\"Let me think...\"}\n\n",
+            "event: response.reasoning.done\n",
+            "data: {\"type\":\"response.reasoning.done\"}\n\n",
+            "event: response.content_part.added\n",
+            "data: {\"type\":\"response.content_part.added\",\"part\":{\"type\":\"output_text\",\"text\":\"\"},\"output_index\":0,\"content_index\":0}\n\n",
+            "event: response.output_text.delta\n",
+            "data: {\"type\":\"response.output_text.delta\",\"delta\":\"42\",\"output_index\":0,\"content_index\":0}\n\n",
+            "event: response.content_part.done\n",
+            "data: {\"type\":\"response.content_part.done\",\"output_index\":0,\"content_index\":0}\n\n",
+            "event: response.completed\n",
+            "data: {\"type\":\"response.completed\",\"response\":{\"status\":\"completed\",\"usage\":{\"input_tokens\":5,\"output_tokens\":10}}}\n\n"
+        );
+
+        let upstream = stream::iter(vec![Ok(Bytes::from(input.as_bytes().to_vec()))]);
+        let converted = create_anthropic_sse_stream_from_responses(upstream);
+        let chunks: Vec<_> = converted.collect().await;
+        let merged = chunks
+            .into_iter()
+            .map(|c| String::from_utf8_lossy(c.unwrap().as_ref()).to_string())
+            .collect::<String>();
+
+        // Should contain thinking block start, thinking delta, and text content
+        assert!(
+            merged.contains("\"type\":\"thinking\""),
+            "should emit thinking content_block_start"
+        );
+        assert!(
+            merged.contains("\"type\":\"thinking_delta\""),
+            "should emit thinking_delta"
+        );
+        assert!(
+            merged.contains("\"thinking\":\"Let me think...\""),
+            "should contain thinking text"
+        );
+        assert!(
+            merged.contains("\"type\":\"text_delta\""),
+            "should also emit text content"
+        );
+        assert!(
+            merged.contains("\"text\":\"42\""),
+            "should contain text delta"
+        );
+        assert!(merged.contains("\"stop_reason\":\"end_turn\""));
+    }
+}
@@ -7,7 +7,9 @@ use crate::proxy::error::ProxyError;
 use serde_json::{json, Value};

 /// Anthropic 请求 → OpenAI 请求
-pub fn anthropic_to_openai(body: Value) -> Result<Value, ProxyError> {
+///
+/// `cache_key`: optional prompt_cache_key to inject for improved cache routing
+pub fn anthropic_to_openai(body: Value, cache_key: Option<&str>) -> Result<Value, ProxyError> {
    let mut result = json!({});

    // NOTE: 模型映射由上游统一处理（proxy::model_mapper），格式转换层只做结构转换。
@@ -23,10 +25,14 @@ pub fn anthropic_to_openai(body: Value) -> Result<Value, ProxyError> {
            // 单个字符串
            messages.push(json!({"role": "system", "content": text}));
        } else if let Some(arr) = system.as_array() {
-            // 多个 system message
+            // 多个 system message — preserve cache_control for compatible proxies
            for msg in arr {
                if let Some(text) = msg.get("text").and_then(|t| t.as_str()) {
-                    messages.push(json!({"role": "system", "content": text}));
+                    let mut sys_msg = json!({"role": "system", "content": text});
+                    if let Some(cc) = msg.get("cache_control") {
+                        sys_msg["cache_control"] = cc.clone();
+                    }
+                    messages.push(sys_msg);
                }
            }
        }
@@ -67,14 +73,18 @@ pub fn anthropic_to_openai(body: Value) -> Result<Value, ProxyError> {
            .iter()
            .filter(|t| t.get("type").and_then(|v| v.as_str()) != Some("BatchTool"))
            .map(|t| {
-                json!({
+                let mut tool = json!({
                    "type": "function",
                    "function": {
                        "name": t.get("name").and_then(|n| n.as_str()).unwrap_or(""),
                        "description": t.get("description"),
                        "parameters": clean_schema(t.get("input_schema").cloned().unwrap_or(json!({})))
                    }
-                })
+                });
+                if let Some(cc) = t.get("cache_control") {
+                    tool["cache_control"] = cc.clone();
+                }
+                tool
            })
            .collect();

@@ -87,6 +97,11 @@ pub fn anthropic_to_openai(body: Value) -> Result<Value, ProxyError> {
        result["tool_choice"] = v.clone();
    }

+    // Inject prompt_cache_key for improved cache routing on OpenAI-compatible endpoints
+    if let Some(key) = cache_key {
+        result["prompt_cache_key"] = json!(key);
+    }
+
    Ok(result)
 }

@@ -122,7 +137,11 @@ fn convert_message_to_openai(
            match block_type {
                "text" => {
                    if let Some(text) = block.get("text").and_then(|t| t.as_str()) {
-                        content_parts.push(json!({"type": "text", "text": text}));
+                        let mut part = json!({"type": "text", "text": text});
+                        if let Some(cc) = block.get("cache_control") {
+                            part["cache_control"] = cc.clone();
+                        }
+                        content_parts.push(part);
                    }
                }
                "image" => {
@@ -184,8 +203,14 @@ fn convert_message_to_openai(
            if content_parts.is_empty() {
                msg["content"] = Value::Null;
            } else if content_parts.len() == 1 {
-                if let Some(text) = content_parts[0].get("text") {
-                    msg["content"] = text.clone();
+                // When cache_control is present, keep array format to preserve it
+                let has_cache_control = content_parts[0].get("cache_control").is_some();
+                if !has_cache_control {
+                    if let Some(text) = content_parts[0].get("text") {
+                        msg["content"] = text.clone();
+                    } else {
+                        msg["content"] = json!(content_parts);
+                    }
                } else {
                    msg["content"] = json!(content_parts);
                }
@@ -288,7 +313,7 @@ pub fn openai_to_anthropic(body: Value) -> Result<Value, ProxyError> {
            other => other,
        });

-    // usage
+    // usage — map cache tokens from OpenAI format to Anthropic format
    let usage = body.get("usage").cloned().unwrap_or(json!({}));
    let input_tokens = usage
        .get("prompt_tokens")
@@ -299,6 +324,26 @@ pub fn openai_to_anthropic(body: Value) -> Result<Value, ProxyError> {
        .and_then(|v| v.as_u64())
        .unwrap_or(0) as u32;

+    let mut usage_json = json!({
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens
+    });
+
+    // OpenAI standard: prompt_tokens_details.cached_tokens
+    if let Some(cached) = usage
+        .pointer("/prompt_tokens_details/cached_tokens")
+        .and_then(|v| v.as_u64())
+    {
+        usage_json["cache_read_input_tokens"] = json!(cached);
+    }
+    // Some compatible servers return these fields directly
+    if let Some(v) = usage.get("cache_read_input_tokens") {
+        usage_json["cache_read_input_tokens"] = v.clone();
+    }
+    if let Some(v) = usage.get("cache_creation_input_tokens") {
+        usage_json["cache_creation_input_tokens"] = v.clone();
+    }
+
    let result = json!({
        "id": body.get("id").and_then(|i| i.as_str()).unwrap_or(""),
        "type": "message",
@@ -307,10 +352,7 @@ pub fn openai_to_anthropic(body: Value) -> Result<Value, ProxyError> {
        "model": body.get("model").and_then(|m| m.as_str()).unwrap_or(""),
        "stop_reason": stop_reason,
        "stop_sequence": null,
-        "usage": {
-            "input_tokens": input_tokens,
-            "output_tokens": output_tokens
-        }
+        "usage": usage_json
    });

    Ok(result)
@@ -328,7 +370,7 @@ mod tests {
            "messages": [{"role": "user", "content": "Hello"}]
        });

-        let result = anthropic_to_openai(input).unwrap();
+        let result = anthropic_to_openai(input, None).unwrap();
        assert_eq!(result["model"], "claude-3-opus");
        assert_eq!(result["max_tokens"], 1024);
        assert_eq!(result["messages"][0]["role"], "user");
@@ -344,7 +386,7 @@ mod tests {
            "messages": [{"role": "user", "content": "Hello"}]
        });

-        let result = anthropic_to_openai(input).unwrap();
+        let result = anthropic_to_openai(input, None).unwrap();
        assert_eq!(result["messages"][0]["role"], "system");
        assert_eq!(
            result["messages"][0]["content"],
@@ -366,7 +408,7 @@ mod tests {
            }]
        });

-        let result = anthropic_to_openai(input).unwrap();
+        let result = anthropic_to_openai(input, None).unwrap();
        assert_eq!(result["tools"][0]["type"], "function");
        assert_eq!(result["tools"][0]["function"]["name"], "get_weather");
    }
@@ -385,7 +427,7 @@ mod tests {
            }]
        });

-        let result = anthropic_to_openai(input).unwrap();
+        let result = anthropic_to_openai(input, None).unwrap();
        let msg = &result["messages"][0];
        assert_eq!(msg["role"], "assistant");
        assert!(msg.get("tool_calls").is_some());
@@ -405,7 +447,7 @@ mod tests {
            }]
        });

-        let result = anthropic_to_openai(input).unwrap();
+        let result = anthropic_to_openai(input, None).unwrap();
        let msg = &result["messages"][0];
        assert_eq!(msg["role"], "tool");
        assert_eq!(msg["tool_call_id"], "call_123");
@@ -477,7 +519,117 @@ mod tests {
            "messages": [{"role": "user", "content": "Hello"}]
        });

-        let result = anthropic_to_openai(input).unwrap();
+        let result = anthropic_to_openai(input, None).unwrap();
        assert_eq!(result["model"], "gpt-4o");
    }
+
+    #[test]
+    fn test_anthropic_to_openai_with_cache_key() {
+        let input = json!({
+            "model": "claude-3-opus",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Hello"}]
+        });
+
+        let result = anthropic_to_openai(input, Some("provider-123")).unwrap();
+        assert_eq!(result["prompt_cache_key"], "provider-123");
+    }
+
+    #[test]
+    fn test_anthropic_to_openai_no_cache_key() {
+        let input = json!({
+            "model": "claude-3-opus",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Hello"}]
+        });
+
+        let result = anthropic_to_openai(input, None).unwrap();
+        assert!(result.get("prompt_cache_key").is_none());
+    }
+
+    #[test]
+    fn test_anthropic_to_openai_cache_control_preserved() {
+        let input = json!({
+            "model": "claude-3-opus",
+            "max_tokens": 1024,
+            "system": [
+                {"type": "text", "text": "System prompt", "cache_control": {"type": "ephemeral"}}
+            ],
+            "messages": [{
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Hello", "cache_control": {"type": "ephemeral", "ttl": "5m"}}
+                ]
+            }],
+            "tools": [{
+                "name": "get_weather",
+                "description": "Get weather",
+                "input_schema": {"type": "object"},
+                "cache_control": {"type": "ephemeral"}
+            }]
+        });
+
+        let result = anthropic_to_openai(input, None).unwrap();
+        // System message cache_control preserved
+        assert_eq!(result["messages"][0]["cache_control"]["type"], "ephemeral");
+        // Text block cache_control preserved
+        assert_eq!(
+            result["messages"][1]["content"][0]["cache_control"]["type"],
+            "ephemeral"
+        );
+        assert_eq!(
+            result["messages"][1]["content"][0]["cache_control"]["ttl"],
+            "5m"
+        );
+        // Tool cache_control preserved
+        assert_eq!(result["tools"][0]["cache_control"]["type"], "ephemeral");
+    }
+
+    #[test]
+    fn test_openai_to_anthropic_with_cache_tokens() {
+        let input = json!({
+            "id": "chatcmpl-123",
+            "model": "gpt-4",
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": "Hello!"},
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "prompt_tokens_details": {
+                    "cached_tokens": 80
+                }
+            }
+        });
+
+        let result = openai_to_anthropic(input).unwrap();
+        assert_eq!(result["usage"]["input_tokens"], 100);
+        assert_eq!(result["usage"]["output_tokens"], 50);
+        assert_eq!(result["usage"]["cache_read_input_tokens"], 80);
+    }
+
+    #[test]
+    fn test_openai_to_anthropic_with_direct_cache_fields() {
+        let input = json!({
+            "id": "chatcmpl-123",
+            "model": "gpt-4",
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": "Hello!"},
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "cache_read_input_tokens": 60,
+                "cache_creation_input_tokens": 20
+            }
+        });
+
+        let result = openai_to_anthropic(input).unwrap();
+        assert_eq!(result["usage"]["cache_read_input_tokens"], 60);
+        assert_eq!(result["usage"]["cache_creation_input_tokens"], 20);
+    }
 }
@@ -12,7 +12,9 @@ use crate::proxy::error::ProxyError;
 use serde_json::{json, Value};

 /// Anthropic 请求 → OpenAI Responses 请求
-pub fn anthropic_to_responses(body: Value) -> Result<Value, ProxyError> {
+///
+/// `cache_key`: optional prompt_cache_key to inject for improved cache routing
+pub fn anthropic_to_responses(body: Value, cache_key: Option<&str>) -> Result<Value, ProxyError> {
    let mut result = json!({});

    // NOTE: 模型映射由上游统一处理（proxy::model_mapper），格式转换层只做结构转换。
@@ -84,12 +86,120 @@ pub fn anthropic_to_responses(body: Value) -> Result<Value, ProxyError> {
    }

    if let Some(v) = body.get("tool_choice") {
-        result["tool_choice"] = v.clone();
+        result["tool_choice"] = map_tool_choice_to_responses(v);
+    }
+
+    // Inject prompt_cache_key for improved cache routing on OpenAI-compatible endpoints
+    if let Some(key) = cache_key {
+        result["prompt_cache_key"] = json!(key);
    }

    Ok(result)
 }

+fn map_tool_choice_to_responses(tool_choice: &Value) -> Value {
+    match tool_choice {
+        Value::String(_) => tool_choice.clone(),
+        Value::Object(obj) => match obj.get("type").and_then(|t| t.as_str()) {
+            // Anthropic "any" means at least one tool call is required
+            Some("any") => json!("required"),
+            Some("auto") => json!("auto"),
+            Some("none") => json!("none"),
+            // Anthropic forced tool -> Responses function tool selector
+            Some("tool") => {
+                let name = obj.get("name").and_then(|n| n.as_str()).unwrap_or("");
+                json!({
+                    "type": "function",
+                    "name": name
+                })
+            }
+            _ => tool_choice.clone(),
+        },
+        _ => tool_choice.clone(),
+    }
+}
+
+pub(crate) fn map_responses_stop_reason(
+    status: Option<&str>,
+    has_tool_use: bool,
+    incomplete_reason: Option<&str>,
+) -> Option<&'static str> {
+    status.map(|s| match s {
+        "completed" => {
+            if has_tool_use {
+                "tool_use"
+            } else {
+                "end_turn"
+            }
+        }
+        "incomplete" => {
+            if matches!(
+                incomplete_reason,
+                Some("max_output_tokens") | Some("max_tokens")
+            ) || incomplete_reason.is_none()
+            {
+                "max_tokens"
+            } else {
+                "end_turn"
+            }
+        }
+        _ => "end_turn",
+    })
+}
+
+/// Build Anthropic-style usage JSON from Responses API usage, including cache tokens.
+///
+/// Priority order:
+/// 1. OpenAI nested details (`input_tokens_details.cached_tokens`, `prompt_tokens_details.cached_tokens`) as initial value
+/// 2. Direct Anthropic-style fields (`cache_read_input_tokens`, `cache_creation_input_tokens`) override if present
+pub(crate) fn build_anthropic_usage_from_responses(usage: Option<&Value>) -> Value {
+    let u = match usage {
+        Some(v) if !v.is_null() => v,
+        _ => {
+            return json!({
+                "input_tokens": 0,
+                "output_tokens": 0
+            })
+        }
+    };
+
+    let input = u.get("input_tokens").and_then(|v| v.as_u64()).unwrap_or(0);
+    let output = u.get("output_tokens").and_then(|v| v.as_u64()).unwrap_or(0);
+
+    let mut result = json!({
+        "input_tokens": input,
+        "output_tokens": output
+    });
+
+    // Step 1: OpenAI nested details as fallback
+    // OpenAI Responses API: input_tokens_details.cached_tokens
+    if let Some(cached) = u
+        .pointer("/input_tokens_details/cached_tokens")
+        .and_then(|v| v.as_u64())
+    {
+        result["cache_read_input_tokens"] = json!(cached);
+    }
+    // OpenAI standard: prompt_tokens_details.cached_tokens
+    if let Some(cached) = u
+        .pointer("/prompt_tokens_details/cached_tokens")
+        .and_then(|v| v.as_u64())
+    {
+        if result.get("cache_read_input_tokens").is_none() {
+            result["cache_read_input_tokens"] = json!(cached);
+        }
+    }
+
+    // Step 2: Direct Anthropic-style fields override (authoritative if present)
+    if let Some(v) = u.get("cache_read_input_tokens") {
+        result["cache_read_input_tokens"] = v.clone();
+    }
+    if let Some(v) = u.get("cache_creation_input_tokens") {
+        result["cache_creation_input_tokens"] = v.clone();
+    }
+
+    result
+}
+
 /// 将 Anthropic messages 数组转换为 Responses API input 数组
 ///
 /// 核心转换逻辑：
@@ -133,6 +243,8 @@ fn convert_messages_to_input(messages: &[Value]) -> Result<Vec<Value>, ProxyErro
                                } else {
                                    "input_text"
                                };
+                                // OpenAI Responses API does not accept Anthropic cache_control
+                                // under input[].content[].
                                message_content.push(json!({ "type": content_type, "text": text }));
                            }
                        }
@@ -239,6 +351,7 @@ pub fn responses_to_anthropic(body: Value) -> Result<Value, ProxyError> {

    let mut content = Vec::new();

+    let mut has_tool_use = false;
    for item in output {
        let item_type = item.get("type").and_then(|t| t.as_str()).unwrap_or("");

@@ -253,6 +366,12 @@ pub fn responses_to_anthropic(body: Value) -> Result<Value, ProxyError> {
                                    content.push(json!({"type": "text", "text": text}));
                                }
                            }
+                        } else if block_type == "refusal" {
+                            if let Some(refusal) = block.get("refusal").and_then(|t| t.as_str()) {
+                                if !refusal.is_empty() {
+                                    content.push(json!({"type": "text", "text": refusal}));
+                                }
+                            }
                        }
                    }
                }
@@ -273,6 +392,7 @@ pub fn responses_to_anthropic(body: Value) -> Result<Value, ProxyError> {
                    "name": name,
                    "input": input
                }));
+                has_tool_use = true;
            }

            "reasoning" => {
@@ -304,25 +424,14 @@ pub fn responses_to_anthropic(body: Value) -> Result<Value, ProxyError> {
    }

    // status → stop_reason
-    let stop_reason = body
-        .get("status")
-        .and_then(|s| s.as_str())
-        .map(|s| match s {
-            "completed" => "end_turn",
-            "incomplete" => "max_tokens",
-            _ => "end_turn",
-        });
+    let stop_reason = map_responses_stop_reason(
+        body.get("status").and_then(|s| s.as_str()),
+        has_tool_use,
+        body.pointer("/incomplete_details/reason")
+            .and_then(|r| r.as_str()),
+    );

-    // Usage — Responses API 使用与 Anthropic 相同的字段名
-    let usage = body.get("usage").cloned().unwrap_or(json!({}));
-    let input_tokens = usage
-        .get("input_tokens")
-        .and_then(|v| v.as_u64())
-        .unwrap_or(0) as u32;
-    let output_tokens = usage
-        .get("output_tokens")
-        .and_then(|v| v.as_u64())
-        .unwrap_or(0) as u32;
+    let usage_json = build_anthropic_usage_from_responses(body.get("usage"));

    let result = json!({
        "id": body.get("id").and_then(|i| i.as_str()).unwrap_or(""),
@@ -332,10 +441,7 @@ pub fn responses_to_anthropic(body: Value) -> Result<Value, ProxyError> {
        "model": body.get("model").and_then(|m| m.as_str()).unwrap_or(""),
        "stop_reason": stop_reason,
        "stop_sequence": null,
-        "usage": {
-            "input_tokens": input_tokens,
-            "output_tokens": output_tokens
-        }
+        "usage": usage_json
    });

    Ok(result)
@@ -353,7 +459,7 @@ mod tests {
            "messages": [{"role": "user", "content": "Hello"}]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        assert_eq!(result["model"], "gpt-4o");
        assert_eq!(result["max_output_tokens"], 1024);
        assert_eq!(result["input"][0]["role"], "user");
@@ -372,7 +478,7 @@ mod tests {
            "messages": [{"role": "user", "content": "Hello"}]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        assert_eq!(result["instructions"], "You are a helpful assistant.");
        // system should not appear in input
        assert_eq!(result["input"].as_array().unwrap().len(), 1);
@@ -390,7 +496,7 @@ mod tests {
            "messages": [{"role": "user", "content": "Hello"}]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        assert_eq!(result["instructions"], "Part 1\n\nPart 2");
    }

@@ -407,7 +513,7 @@ mod tests {
            }]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        assert_eq!(result["tools"][0]["type"], "function");
        assert_eq!(result["tools"][0]["name"], "get_weather");
        assert!(result["tools"][0].get("parameters").is_some());
@@ -415,6 +521,33 @@ mod tests {
        assert!(result["tools"][0].get("input_schema").is_none());
    }

+    #[test]
+    fn test_anthropic_to_responses_tool_choice_any_to_required() {
+        let input = json!({
+            "model": "gpt-4o",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Weather?"}],
+            "tool_choice": {"type": "any"}
+        });
+
+        let result = anthropic_to_responses(input, None).unwrap();
+        assert_eq!(result["tool_choice"], "required");
+    }
+
+    #[test]
+    fn test_anthropic_to_responses_tool_choice_tool_to_function() {
+        let input = json!({
+            "model": "gpt-4o",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Weather?"}],
+            "tool_choice": {"type": "tool", "name": "get_weather"}
+        });
+
+        let result = anthropic_to_responses(input, None).unwrap();
+        assert_eq!(result["tool_choice"]["type"], "function");
+        assert_eq!(result["tool_choice"]["name"], "get_weather");
+    }
+
    #[test]
    fn test_anthropic_to_responses_tool_use_lifting() {
        let input = json!({
@@ -429,7 +562,7 @@ mod tests {
            }]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        let input_arr = result["input"].as_array().unwrap();

        // Should produce: assistant message (text) + function_call item
@@ -459,7 +592,7 @@ mod tests {
            }]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        let input_arr = result["input"].as_array().unwrap();

        // Should produce: function_call_output item (lifted)
@@ -483,7 +616,7 @@ mod tests {
            }]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        let input_arr = result["input"].as_array().unwrap();

        // thinking should be discarded, only text remains
@@ -506,7 +639,7 @@ mod tests {
            }]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        let content = result["input"][0]["content"].as_array().unwrap();

        assert_eq!(content[0]["type"], "input_text");
@@ -563,6 +696,26 @@ mod tests {
        assert_eq!(result["content"][0]["id"], "call_123");
        assert_eq!(result["content"][0]["name"], "get_weather");
        assert_eq!(result["content"][0]["input"]["location"], "Tokyo");
+        assert_eq!(result["stop_reason"], "tool_use");
+    }
+
+    #[test]
+    fn test_responses_to_anthropic_with_refusal_block() {
+        let input = json!({
+            "id": "resp_123",
+            "status": "completed",
+            "model": "gpt-4o",
+            "output": [{
+                "type": "message",
+                "content": [{"type": "refusal", "refusal": "I can't help with that."}]
+            }],
+            "usage": {"input_tokens": 10, "output_tokens": 5}
+        });
+
+        let result = responses_to_anthropic(input).unwrap();
+        assert_eq!(result["content"][0]["type"], "text");
+        assert_eq!(result["content"][0]["text"], "I can't help with that.");
+        assert_eq!(result["stop_reason"], "end_turn");
    }

    #[test]
@@ -618,6 +771,24 @@ mod tests {
        assert_eq!(result["stop_reason"], "max_tokens");
    }

+    #[test]
+    fn test_responses_to_anthropic_incomplete_non_token_reason() {
+        let input = json!({
+            "id": "resp_123",
+            "status": "incomplete",
+            "incomplete_details": {"reason": "content_filter"},
+            "model": "gpt-4o",
+            "output": [{
+                "type": "message",
+                "content": [{"type": "output_text", "text": "Blocked"}]
+            }],
+            "usage": {"input_tokens": 10, "output_tokens": 1}
+        });
+
+        let result = responses_to_anthropic(input).unwrap();
+        assert_eq!(result["stop_reason"], "end_turn");
+    }
+
    #[test]
    fn test_model_passthrough() {
        let input = json!({
@@ -626,7 +797,104 @@ mod tests {
            "messages": [{"role": "user", "content": "Hello"}]
        });

-        let result = anthropic_to_responses(input).unwrap();
+        let result = anthropic_to_responses(input, None).unwrap();
        assert_eq!(result["model"], "o3-mini");
    }
+
+    #[test]
+    fn test_anthropic_to_responses_with_cache_key() {
+        let input = json!({
+            "model": "gpt-4o",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Hello"}]
+        });
+
+        let result = anthropic_to_responses(input, Some("my-provider-id")).unwrap();
+        assert_eq!(result["prompt_cache_key"], "my-provider-id");
+    }
+
+    #[test]
+    fn test_anthropic_to_responses_strip_cache_control_on_tools() {
+        let input = json!({
+            "model": "gpt-4o",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Weather?"}],
+            "tools": [{
+                "name": "get_weather",
+                "description": "Get weather",
+                "input_schema": {"type": "object"},
+                "cache_control": {"type": "ephemeral"}
+            }]
+        });
+
+        let result = anthropic_to_responses(input, None).unwrap();
+        assert!(result["tools"][0].get("cache_control").is_none());
+    }
+
+    #[test]
+    fn test_anthropic_to_responses_strip_cache_control_on_text() {
+        let input = json!({
+            "model": "gpt-4o",
+            "max_tokens": 1024,
+            "messages": [{
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Hello", "cache_control": {"type": "ephemeral"}}
+                ]
+            }]
+        });
+
+        let result = anthropic_to_responses(input, None).unwrap();
+        assert!(result["input"][0]["content"][0]
+            .get("cache_control")
+            .is_none());
+    }
+
+    #[test]
+    fn test_responses_to_anthropic_with_cache_tokens() {
+        let input = json!({
+            "id": "resp_123",
+            "status": "completed",
+            "model": "gpt-4o",
+            "output": [{
+                "type": "message",
+                "content": [{"type": "output_text", "text": "Hello!"}]
+            }],
+            "usage": {
+                "input_tokens": 100,
+                "output_tokens": 50,
+                "input_tokens_details": {
+                    "cached_tokens": 80
+                }
+            }
+        });
+
+        let result = responses_to_anthropic(input).unwrap();
+        assert_eq!(result["usage"]["input_tokens"], 100);
+        assert_eq!(result["usage"]["output_tokens"], 50);
+        assert_eq!(result["usage"]["cache_read_input_tokens"], 80);
+    }
+
+    #[test]
+    fn test_responses_to_anthropic_with_direct_cache_fields() {
+        let input = json!({
+            "id": "resp_123",
+            "status": "completed",
+            "model": "gpt-4o",
+            "output": [{
+                "type": "message",
+                "content": [{"type": "output_text", "text": "Hello!"}]
+            }],
+            "usage": {
+                "input_tokens": 100,
+                "output_tokens": 50,
+                "cache_read_input_tokens": 60,
+                "cache_creation_input_tokens": 20
+            }
+        });
+
+        let result = responses_to_anthropic(input).unwrap();
+        assert_eq!(result["usage"]["cache_read_input_tokens"], 60);
+        assert_eq!(result["usage"]["cache_creation_input_tokens"], 20);
+    }
 }
@@ -155,11 +155,11 @@ export function useProviderActions(activeApp: AppId) {
          (provider.meta?.apiFormat === "openai_chat" ||
            provider.meta?.apiFormat === "openai_responses")
        ) {
-          // OpenAI Chat 格式供应商：显示代理提示
+          // OpenAI format provider: show proxy hint
          toast.info(
-            t("notifications.openAIChatFormatHint", {
+            t("notifications.openAIFormatHint", {
              defaultValue:
-                "此供应商使用 OpenAI Chat 格式，需要开启代理服务才能正常使用",
+                "此供应商使用 OpenAI 兼容格式，需要开启代理服务才能正常使用",
            }),
            {
              duration: 5000,
@@ -170,6 +170,7 @@
    "settingsSaved": "Settings saved",
    "settingsSaveFailed": "Failed to save settings: {{error}}",
    "openAIChatFormatHint": "This provider uses OpenAI Chat format and requires the proxy service to be enabled",
+    "openAIFormatHint": "This provider uses OpenAI-compatible format and requires the proxy service to be enabled",
    "openLinkFailed": "Failed to open link",
    "openclawModelsRegistered": "Models have been registered to /model list",
    "openclawDefaultModelSet": "Set as default model",
@@ -170,6 +170,7 @@
    "settingsSaved": "設定を保存しました",
    "settingsSaveFailed": "設定の保存に失敗しました: {{error}}",
    "openAIChatFormatHint": "このプロバイダーは OpenAI Chat フォーマットを使用しており、プロキシサービスの有効化が必要です",
+    "openAIFormatHint": "このプロバイダーは OpenAI 互換フォーマットを使用しており、プロキシサービスの有効化が必要です",
    "openLinkFailed": "リンクを開けませんでした",
    "openclawModelsRegistered": "モデルが /model リストに登録されました",
    "openclawDefaultModelSet": "デフォルトモデルに設定しました",
@@ -170,6 +170,7 @@
    "settingsSaved": "设置已保存",
    "settingsSaveFailed": "保存设置失败：{{error}}",
    "openAIChatFormatHint": "此供应商使用 OpenAI Chat 格式，需要开启代理服务才能正常使用",
+    "openAIFormatHint": "此供应商使用 OpenAI 兼容格式，需要开启代理服务才能正常使用",
    "openLinkFailed": "链接打开失败",
    "openclawModelsRegistered": "模型已注册到 /model 列表",
    "openclawDefaultModelSet": "已设为默认模型",
@@ -147,6 +147,8 @@ export interface ProviderMeta {
  // - "openai_chat": OpenAI Chat Completions 格式，需要格式转换
  // - "openai_responses": OpenAI Responses API 格式，需要格式转换
  apiFormat?: "anthropic" | "openai_chat" | "openai_responses";
+  // Prompt cache key for OpenAI-compatible endpoints (improves cache hit rate)
+  promptCacheKey?: string;
 }

 // Skill 同步方式