mirror of
https://github.com/farion1231/cc-switch.git
synced 2026-05-24 14:50:20 +08:00
feat(copilot): strip thinking blocks before forwarding to save premium quota
Copilot routes through OpenAI-compatible endpoints that reject Anthropic's thinking and redacted_thinking blocks. Previously the request would fail upstream, burning one premium interaction, and only then trigger thinking_rectifier to retry. This adds a proactive strip_thinking_blocks pass in the Copilot optimization pipeline (step 3.5, after tool_result merging). Signature fields and top-level thinking are left alone — those are the reactive rectifier's job on the error path. Also fixes a default-value inconsistency where CopilotOptimizerConfig's Default impl used "gpt-4o-mini" while the serde default function returned "gpt-5-mini" (aligned to gpt-5-mini, matching the reference implementation). Aligned with yuegongzi/copilot-api's /v1/messages handler behavior.
This commit is contained in:
@@ -443,6 +443,41 @@ pub fn sanitize_orphan_tool_results(mut body: Value) -> Value {
|
||||
body
|
||||
}
|
||||
|
||||
/// 请求前主动剥离所有 assistant 消息里的 thinking / redacted_thinking block
|
||||
///
|
||||
/// Copilot 的三条目标端点(`/chat/completions`、`/v1/responses`、`/v1/chat/completions`)
|
||||
/// 均为 OpenAI 兼容格式,不识别 Anthropic 的 thinking block。若原样转发,上游会
|
||||
/// 拒绝并返回 invalid_request_error —— 届时 `thinking_rectifier` 才做反应式清理并
|
||||
/// 重试。那次已经失败的请求依旧消耗一次 premium quota,所以此处提前剥离。
|
||||
///
|
||||
/// 与 `thinking_rectifier::rectify_anthropic_request` 的区别:
|
||||
/// - 本函数只剥 thinking / redacted_thinking 两类 block,不触碰 signature,也不
|
||||
/// 移除顶层 thinking 字段——那些是错误路径上的激进整流,常规路径不需要。
|
||||
/// - 保持与 `merge_tool_results` / `sanitize_orphan_tool_results` 一致的"消费 body、
|
||||
/// 返回新 body"签名,便于接入 forwarder 管道。
|
||||
pub fn strip_thinking_blocks(mut body: Value) -> Value {
|
||||
let Some(messages) = body.get_mut("messages").and_then(|m| m.as_array_mut()) else {
|
||||
return body;
|
||||
};
|
||||
|
||||
for msg in messages.iter_mut() {
|
||||
if msg.get("role").and_then(|r| r.as_str()) != Some("assistant") {
|
||||
continue;
|
||||
}
|
||||
let Some(content) = msg.get_mut("content").and_then(|c| c.as_array_mut()) else {
|
||||
continue;
|
||||
};
|
||||
content.retain(|block| {
|
||||
!matches!(
|
||||
block.get("type").and_then(|t| t.as_str()),
|
||||
Some("thinking") | Some("redacted_thinking")
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
body
|
||||
}
|
||||
|
||||
// ─── 内部辅助 ─────────────────────────────────
|
||||
|
||||
/// 从请求体的 `system` 字段提取文本(处理 string/array 两种格式)。
|
||||
@@ -1371,4 +1406,138 @@ mod tests {
|
||||
assert_eq!(content[0]["type"], "text");
|
||||
assert_eq!(content[1]["type"], "text");
|
||||
}
|
||||
|
||||
// === strip_thinking_blocks 测试 ===
|
||||
|
||||
#[test]
|
||||
fn test_strip_thinking_removes_assistant_thinking_blocks() {
|
||||
let body = serde_json::json!({
|
||||
"messages": [
|
||||
{"role": "user", "content": [{"type": "text", "text": "hi"}]},
|
||||
{"role": "assistant", "content": [
|
||||
{"type": "thinking", "thinking": "let me ponder", "signature": "sig"},
|
||||
{"type": "redacted_thinking", "data": "opaque"},
|
||||
{"type": "text", "text": "hello"},
|
||||
{"type": "tool_use", "id": "t1", "name": "read", "input": {}}
|
||||
]}
|
||||
]
|
||||
});
|
||||
let result = strip_thinking_blocks(body);
|
||||
let content = result["messages"][1]["content"].as_array().unwrap();
|
||||
assert_eq!(content.len(), 2);
|
||||
assert_eq!(content[0]["type"], "text");
|
||||
assert_eq!(content[1]["type"], "tool_use");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_thinking_leaves_user_messages_untouched() {
|
||||
// 仅处理 assistant,user 的 thinking 块(极少见,但可能)不动
|
||||
let body = serde_json::json!({
|
||||
"messages": [
|
||||
{"role": "user", "content": [
|
||||
{"type": "thinking", "thinking": "x"},
|
||||
{"type": "text", "text": "hi"}
|
||||
]}
|
||||
]
|
||||
});
|
||||
let result = strip_thinking_blocks(body);
|
||||
let content = result["messages"][0]["content"].as_array().unwrap();
|
||||
assert_eq!(content.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_thinking_handles_missing_messages() {
|
||||
let body = serde_json::json!({ "model": "claude-3-5-sonnet" });
|
||||
let result = strip_thinking_blocks(body.clone());
|
||||
assert_eq!(result, body);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_thinking_leaves_empty_content_array() {
|
||||
// 仅含 thinking 的 assistant 消息剥完后 content 为空——保留上游自处理
|
||||
let body = serde_json::json!({
|
||||
"messages": [
|
||||
{"role": "assistant", "content": [
|
||||
{"type": "thinking", "thinking": "solo"}
|
||||
]}
|
||||
]
|
||||
});
|
||||
let result = strip_thinking_blocks(body);
|
||||
let content = result["messages"][0]["content"].as_array().unwrap();
|
||||
assert_eq!(content.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_thinking_preserves_signature_on_non_thinking_blocks() {
|
||||
// signature 留给 thinking_rectifier 在错误路径处理,此处不动
|
||||
let body = serde_json::json!({
|
||||
"messages": [
|
||||
{"role": "assistant", "content": [
|
||||
{"type": "tool_use", "id": "t1", "name": "x", "input": {}, "signature": "s"}
|
||||
]}
|
||||
]
|
||||
});
|
||||
let result = strip_thinking_blocks(body);
|
||||
let block = &result["messages"][0]["content"][0];
|
||||
assert_eq!(block["signature"], "s");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_thinking_multiple_assistant_turns() {
|
||||
let body = serde_json::json!({
|
||||
"messages": [
|
||||
{"role": "user", "content": [{"type": "text", "text": "q1"}]},
|
||||
{"role": "assistant", "content": [
|
||||
{"type": "thinking", "thinking": "a"},
|
||||
{"type": "text", "text": "r1"}
|
||||
]},
|
||||
{"role": "user", "content": [{"type": "text", "text": "q2"}]},
|
||||
{"role": "assistant", "content": [
|
||||
{"type": "redacted_thinking", "data": "x"},
|
||||
{"type": "text", "text": "r2"}
|
||||
]}
|
||||
]
|
||||
});
|
||||
let result = strip_thinking_blocks(body);
|
||||
let a1 = result["messages"][1]["content"].as_array().unwrap();
|
||||
let a2 = result["messages"][3]["content"].as_array().unwrap();
|
||||
assert_eq!(a1.len(), 1);
|
||||
assert_eq!(a1[0]["text"], "r1");
|
||||
assert_eq!(a2.len(), 1);
|
||||
assert_eq!(a2[0]["text"], "r2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_thinking_ignores_string_content() {
|
||||
// assistant.content 是字符串而非 block 数组 — 历史请求或极简客户端会这样
|
||||
// 不应崩溃,也不应转换结构
|
||||
let body = serde_json::json!({
|
||||
"messages": [
|
||||
{"role": "assistant", "content": "plain text response"}
|
||||
]
|
||||
});
|
||||
let result = strip_thinking_blocks(body.clone());
|
||||
assert_eq!(result, body);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_thinking_preserves_block_order() {
|
||||
let body = serde_json::json!({
|
||||
"messages": [
|
||||
{"role": "assistant", "content": [
|
||||
{"type": "thinking", "thinking": "pre"},
|
||||
{"type": "text", "text": "A"},
|
||||
{"type": "tool_use", "id": "t1", "name": "x", "input": {}},
|
||||
{"type": "redacted_thinking", "data": "mid"},
|
||||
{"type": "text", "text": "B"}
|
||||
]}
|
||||
]
|
||||
});
|
||||
let result = strip_thinking_blocks(body);
|
||||
let content = result["messages"][0]["content"].as_array().unwrap();
|
||||
assert_eq!(content.len(), 3);
|
||||
assert_eq!(content[0]["text"], "A");
|
||||
assert_eq!(content[1]["type"], "tool_use");
|
||||
assert_eq!(content[2]["text"], "B");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -821,6 +821,12 @@ impl RequestForwarder {
|
||||
mapped_body = super::copilot_optimizer::merge_tool_results(mapped_body);
|
||||
}
|
||||
|
||||
// 3.5. 主动剥离 thinking block — Copilot 走 OpenAI 兼容端点不识别该块
|
||||
// 避免上游拒绝后由 rectifier 反应式重试(首次请求已消耗 quota)
|
||||
if self.copilot_optimizer_config.strip_thinking {
|
||||
mapped_body = super::copilot_optimizer::strip_thinking_blocks(mapped_body);
|
||||
}
|
||||
|
||||
// 4. Warmup 小模型降级
|
||||
if self.copilot_optimizer_config.warmup_downgrade && classification.is_warmup {
|
||||
log::info!(
|
||||
|
||||
@@ -298,9 +298,15 @@ pub struct CopilotOptimizerConfig {
|
||||
/// Warmup 小模型降级(默认开启 — 与参考实现对齐,避免探针请求消耗 premium quota)
|
||||
#[serde(default = "default_true")]
|
||||
pub warmup_downgrade: bool,
|
||||
/// Warmup 降级使用的模型(默认 "gpt-4o-mini")
|
||||
/// Warmup 降级使用的模型(默认 "gpt-5-mini")
|
||||
#[serde(default = "default_warmup_model")]
|
||||
pub warmup_model: String,
|
||||
/// 请求前主动剥离 assistant 消息里的 thinking / redacted_thinking block
|
||||
///
|
||||
/// Copilot 走 OpenAI 兼容端点,thinking block 会被上游拒绝并触发 rectifier 反应式
|
||||
/// 重试,那时第一次请求已经消耗了一次 premium quota。主动剥离避免这次浪费。
|
||||
#[serde(default = "default_true")]
|
||||
pub strip_thinking: bool,
|
||||
}
|
||||
|
||||
fn default_warmup_model() -> String {
|
||||
@@ -317,7 +323,8 @@ impl Default for CopilotOptimizerConfig {
|
||||
deterministic_request_id: true,
|
||||
subagent_detection: true,
|
||||
warmup_downgrade: true,
|
||||
warmup_model: "gpt-4o-mini".to_string(),
|
||||
warmup_model: "gpt-5-mini".to_string(),
|
||||
strip_thinking: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user