feat(copilot): strip thinking blocks before forwarding to save premium quota

Copilot routes through OpenAI-compatible endpoints that reject Anthropic's thinking and redacted_thinking blocks. Previously the request would fail upstream, burning one premium interaction, and only then trigger thinking_rectifier to retry. This adds a proactive strip_thinking_blocks pass in the Copilot optimization pipeline (step 3.5, after tool_result merging). Signature fields and top-level thinking are left alone — those are the reactive rectifier's job on the error path. Also fixes a default-value inconsistency where CopilotOptimizerConfig's Default impl used "gpt-4o-mini" while the serde default function returned "gpt-5-mini" (aligned to gpt-5-mini, matching the reference implementation). Aligned with yuegongzi/copilot-api's /v1/messages handler behavior.
2026-05-24 14:50:20 +08:00 · 2026-04-18 17:37:48 +08:00
parent 615c430dd3
commit 63aa310576
3 changed files with 184 additions and 2 deletions
@@ -443,6 +443,41 @@ pub fn sanitize_orphan_tool_results(mut body: Value) -> Value {
    body
 }

+/// 请求前主动剥离所有 assistant 消息里的 thinking / redacted_thinking block
+///
+/// Copilot 的三条目标端点（`/chat/completions`、`/v1/responses`、`/v1/chat/completions`）
+/// 均为 OpenAI 兼容格式，不识别 Anthropic 的 thinking block。若原样转发，上游会
+/// 拒绝并返回 invalid_request_error —— 届时 `thinking_rectifier` 才做反应式清理并
+/// 重试。那次已经失败的请求依旧消耗一次 premium quota，所以此处提前剥离。
+///
+/// 与 `thinking_rectifier::rectify_anthropic_request` 的区别：
+/// - 本函数只剥 thinking / redacted_thinking 两类 block，不触碰 signature，也不
+///   移除顶层 thinking 字段——那些是错误路径上的激进整流，常规路径不需要。
+/// - 保持与 `merge_tool_results` / `sanitize_orphan_tool_results` 一致的"消费 body、
+///   返回新 body"签名，便于接入 forwarder 管道。
+pub fn strip_thinking_blocks(mut body: Value) -> Value {
+    let Some(messages) = body.get_mut("messages").and_then(|m| m.as_array_mut()) else {
+        return body;
+    };
+
+    for msg in messages.iter_mut() {
+        if msg.get("role").and_then(|r| r.as_str()) != Some("assistant") {
+            continue;
+        }
+        let Some(content) = msg.get_mut("content").and_then(|c| c.as_array_mut()) else {
+            continue;
+        };
+        content.retain(|block| {
+            !matches!(
+                block.get("type").and_then(|t| t.as_str()),
+                Some("thinking") | Some("redacted_thinking")
+            )
+        });
+    }
+
+    body
+}
+
 // ─── 内部辅助 ─────────────────────────────────

 /// 从请求体的 `system` 字段提取文本（处理 string/array 两种格式）。
@@ -1371,4 +1406,138 @@ mod tests {
        assert_eq!(content[0]["type"], "text");
        assert_eq!(content[1]["type"], "text");
    }
+
+    // === strip_thinking_blocks 测试 ===
+
+    #[test]
+    fn test_strip_thinking_removes_assistant_thinking_blocks() {
+        let body = serde_json::json!({
+            "messages": [
+                {"role": "user", "content": [{"type": "text", "text": "hi"}]},
+                {"role": "assistant", "content": [
+                    {"type": "thinking", "thinking": "let me ponder", "signature": "sig"},
+                    {"type": "redacted_thinking", "data": "opaque"},
+                    {"type": "text", "text": "hello"},
+                    {"type": "tool_use", "id": "t1", "name": "read", "input": {}}
+                ]}
+            ]
+        });
+        let result = strip_thinking_blocks(body);
+        let content = result["messages"][1]["content"].as_array().unwrap();
+        assert_eq!(content.len(), 2);
+        assert_eq!(content[0]["type"], "text");
+        assert_eq!(content[1]["type"], "tool_use");
+    }
+
+    #[test]
+    fn test_strip_thinking_leaves_user_messages_untouched() {
+        // 仅处理 assistant，user 的 thinking 块（极少见，但可能）不动
+        let body = serde_json::json!({
+            "messages": [
+                {"role": "user", "content": [
+                    {"type": "thinking", "thinking": "x"},
+                    {"type": "text", "text": "hi"}
+                ]}
+            ]
+        });
+        let result = strip_thinking_blocks(body);
+        let content = result["messages"][0]["content"].as_array().unwrap();
+        assert_eq!(content.len(), 2);
+    }
+
+    #[test]
+    fn test_strip_thinking_handles_missing_messages() {
+        let body = serde_json::json!({ "model": "claude-3-5-sonnet" });
+        let result = strip_thinking_blocks(body.clone());
+        assert_eq!(result, body);
+    }
+
+    #[test]
+    fn test_strip_thinking_leaves_empty_content_array() {
+        // 仅含 thinking 的 assistant 消息剥完后 content 为空——保留上游自处理
+        let body = serde_json::json!({
+            "messages": [
+                {"role": "assistant", "content": [
+                    {"type": "thinking", "thinking": "solo"}
+                ]}
+            ]
+        });
+        let result = strip_thinking_blocks(body);
+        let content = result["messages"][0]["content"].as_array().unwrap();
+        assert_eq!(content.len(), 0);
+    }
+
+    #[test]
+    fn test_strip_thinking_preserves_signature_on_non_thinking_blocks() {
+        // signature 留给 thinking_rectifier 在错误路径处理，此处不动
+        let body = serde_json::json!({
+            "messages": [
+                {"role": "assistant", "content": [
+                    {"type": "tool_use", "id": "t1", "name": "x", "input": {}, "signature": "s"}
+                ]}
+            ]
+        });
+        let result = strip_thinking_blocks(body);
+        let block = &result["messages"][0]["content"][0];
+        assert_eq!(block["signature"], "s");
+    }
+
+    #[test]
+    fn test_strip_thinking_multiple_assistant_turns() {
+        let body = serde_json::json!({
+            "messages": [
+                {"role": "user", "content": [{"type": "text", "text": "q1"}]},
+                {"role": "assistant", "content": [
+                    {"type": "thinking", "thinking": "a"},
+                    {"type": "text", "text": "r1"}
+                ]},
+                {"role": "user", "content": [{"type": "text", "text": "q2"}]},
+                {"role": "assistant", "content": [
+                    {"type": "redacted_thinking", "data": "x"},
+                    {"type": "text", "text": "r2"}
+                ]}
+            ]
+        });
+        let result = strip_thinking_blocks(body);
+        let a1 = result["messages"][1]["content"].as_array().unwrap();
+        let a2 = result["messages"][3]["content"].as_array().unwrap();
+        assert_eq!(a1.len(), 1);
+        assert_eq!(a1[0]["text"], "r1");
+        assert_eq!(a2.len(), 1);
+        assert_eq!(a2[0]["text"], "r2");
+    }
+
+    #[test]
+    fn test_strip_thinking_ignores_string_content() {
+        // assistant.content 是字符串而非 block 数组 — 历史请求或极简客户端会这样
+        // 不应崩溃，也不应转换结构
+        let body = serde_json::json!({
+            "messages": [
+                {"role": "assistant", "content": "plain text response"}
+            ]
+        });
+        let result = strip_thinking_blocks(body.clone());
+        assert_eq!(result, body);
+    }
+
+    #[test]
+    fn test_strip_thinking_preserves_block_order() {
+        let body = serde_json::json!({
+            "messages": [
+                {"role": "assistant", "content": [
+                    {"type": "thinking", "thinking": "pre"},
+                    {"type": "text", "text": "A"},
+                    {"type": "tool_use", "id": "t1", "name": "x", "input": {}},
+                    {"type": "redacted_thinking", "data": "mid"},
+                    {"type": "text", "text": "B"}
+                ]}
+            ]
+        });
+        let result = strip_thinking_blocks(body);
+        let content = result["messages"][0]["content"].as_array().unwrap();
+        assert_eq!(content.len(), 3);
+        assert_eq!(content[0]["text"], "A");
+        assert_eq!(content[1]["type"], "tool_use");
+        assert_eq!(content[2]["text"], "B");
+    }
 }
@@ -821,6 +821,12 @@ impl RequestForwarder {
                mapped_body = super::copilot_optimizer::merge_tool_results(mapped_body);
            }

+            // 3.5. 主动剥离 thinking block — Copilot 走 OpenAI 兼容端点不识别该块
+            //      避免上游拒绝后由 rectifier 反应式重试（首次请求已消耗 quota）
+            if self.copilot_optimizer_config.strip_thinking {
+                mapped_body = super::copilot_optimizer::strip_thinking_blocks(mapped_body);
+            }
+
            // 4. Warmup 小模型降级
            if self.copilot_optimizer_config.warmup_downgrade && classification.is_warmup {
                log::info!(
@@ -298,9 +298,15 @@ pub struct CopilotOptimizerConfig {
    /// Warmup 小模型降级（默认开启 — 与参考实现对齐，避免探针请求消耗 premium quota）
    #[serde(default = "default_true")]
    pub warmup_downgrade: bool,
-    /// Warmup 降级使用的模型（默认 "gpt-4o-mini"）
+    /// Warmup 降级使用的模型（默认 "gpt-5-mini"）
    #[serde(default = "default_warmup_model")]
    pub warmup_model: String,
+    /// 请求前主动剥离 assistant 消息里的 thinking / redacted_thinking block
+    ///
+    /// Copilot 走 OpenAI 兼容端点，thinking block 会被上游拒绝并触发 rectifier 反应式
+    /// 重试，那时第一次请求已经消耗了一次 premium quota。主动剥离避免这次浪费。
+    #[serde(default = "default_true")]
+    pub strip_thinking: bool,
 }

 fn default_warmup_model() -> String {
@@ -317,7 +323,8 @@ impl Default for CopilotOptimizerConfig {
            deterministic_request_id: true,
            subagent_detection: true,
            warmup_downgrade: true,
-            warmup_model: "gpt-4o-mini".to_string(),
+            warmup_model: "gpt-5-mini".to_string(),
+            strip_thinking: true,
        }
    }
 }