mirror of
https://github.com/farion1231/cc-switch.git
synced 2026-05-17 18:38:57 +08:00
Preserve cache hints when collapsing system prompts
Strict OpenAI-compatible chat backends still need fragmented Claude\nsystem prompts collapsed into one leading system message, but that\nnormalization should not silently drop stable cache hints. Preserve\nmessage-level cache_control when the merged system fragments agree,\nand fall back to omitting it when the fragments conflict.\n\nConstraint: Must keep single-system normalization for Nvidia/Qwen-style chat backends\nRejected: Always copy the first cache_control | could misrepresent conflicting cache boundaries\nConfidence: high\nScope-risk: narrow\nReversibility: clean\nDirective: If system prompt merging changes again, preserve cache_control whenever the merged metadata is unambiguous\nTested: cargo test proxy::providers::transform --manifest-path src-tauri/Cargo.toml\nNot-tested: End-to-end prompt caching behavior against cache-aware OpenAI-compatible upstreams\nRelated: #1881
This commit is contained in:
@@ -206,6 +206,8 @@ fn normalize_openai_system_messages(messages: &mut Vec<Value>) {
|
||||
}
|
||||
|
||||
let mut parts = Vec::new();
|
||||
let mut inherited_cache_control: Option<Value> = None;
|
||||
let mut cache_control_conflict = false;
|
||||
messages.retain(|message| {
|
||||
if message.get("role").and_then(|value| value.as_str()) != Some("system") {
|
||||
return true;
|
||||
@@ -226,11 +228,25 @@ fn normalize_openai_system_messages(messages: &mut Vec<Value>) {
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if let Some(cache_control) = message.get("cache_control") {
|
||||
match &inherited_cache_control {
|
||||
None => inherited_cache_control = Some(cache_control.clone()),
|
||||
Some(existing) if existing == cache_control => {}
|
||||
Some(_) => cache_control_conflict = true,
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
});
|
||||
|
||||
if !parts.is_empty() {
|
||||
messages.insert(0, json!({"role": "system", "content": parts.join("\n")}));
|
||||
let mut merged = json!({"role": "system", "content": parts.join("\n")});
|
||||
if !cache_control_conflict {
|
||||
if let Some(cache_control) = inherited_cache_control {
|
||||
merged["cache_control"] = cache_control;
|
||||
}
|
||||
}
|
||||
messages.insert(0, merged);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -618,7 +634,7 @@ mod tests {
|
||||
"model": "claude-3-sonnet",
|
||||
"max_tokens": 1024,
|
||||
"system": [
|
||||
{"type": "text", "text": "You are Claude Code."},
|
||||
{"type": "text", "text": "You are Claude Code.", "cache_control": {"type": "ephemeral"}},
|
||||
{"type": "text", "text": "Be concise."}
|
||||
],
|
||||
"messages": [
|
||||
@@ -634,9 +650,28 @@ mod tests {
|
||||
result["messages"][0]["content"],
|
||||
"You are Claude Code.\nBe concise.\nFollow repo conventions."
|
||||
);
|
||||
assert_eq!(result["messages"][0]["cache_control"]["type"], "ephemeral");
|
||||
assert_eq!(result["messages"][1]["role"], "user");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anthropic_to_openai_drops_conflicting_system_cache_control_when_merging() {
|
||||
let input = json!({
|
||||
"model": "claude-3-sonnet",
|
||||
"max_tokens": 1024,
|
||||
"system": [
|
||||
{"type": "text", "text": "You are Claude Code.", "cache_control": {"type": "ephemeral"}},
|
||||
{"type": "text", "text": "Be concise.", "cache_control": {"type": "ephemeral", "ttl": "5m"}}
|
||||
],
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
});
|
||||
|
||||
let result = anthropic_to_openai(input, None).unwrap();
|
||||
assert_eq!(result["messages"][0]["role"], "system");
|
||||
assert_eq!(result["messages"][0]["content"], "You are Claude Code.\nBe concise.");
|
||||
assert!(result["messages"][0].get("cache_control").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anthropic_to_openai_tool_use() {
|
||||
let input = json!({
|
||||
|
||||
Reference in New Issue
Block a user