mirror of
https://github.com/farion1231/cc-switch.git
synced 2026-03-22 15:08:22 +08:00
fix(proxy): use max_completion_tokens for o1/o3 series models (#1451)
* fix(proxy): use max_completion_tokens for o1/o3 series models When converting Anthropic requests to OpenAI format for o1/o3 series models (like o1-mini, o3-mini), use max_completion_tokens instead of max_tokens to avoid unsupported_parameter errors. Fixes #1448 * fix: revert incorrect o-series max_completion_tokens in Responses API path Responses API uses max_output_tokens for all models including o-series. The o-series max_completion_tokens fix should only apply to Chat Completions API. --------- Co-authored-by: Hajen Teowideo <hajen.teowideo@example.com> Co-authored-by: Jason Young <44939412+farion1231@users.noreply.github.com> Co-authored-by: Jason <farion1231@gmail.com>
This commit is contained in:
@@ -6,6 +6,14 @@
|
||||
use crate::proxy::error::ProxyError;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
/// Detect OpenAI o-series reasoning models (o1, o3, o4-mini, etc.)
|
||||
/// These models require `max_completion_tokens` instead of `max_tokens`.
|
||||
pub fn is_openai_o_series(model: &str) -> bool {
|
||||
model.len() > 1
|
||||
&& model.starts_with('o')
|
||||
&& model.as_bytes().get(1).is_some_and(|b| b.is_ascii_digit())
|
||||
}
|
||||
|
||||
/// Anthropic 请求 → OpenAI 请求
|
||||
///
|
||||
/// `cache_key`: optional prompt_cache_key to inject for improved cache routing
|
||||
@@ -50,9 +58,14 @@ pub fn anthropic_to_openai(body: Value, cache_key: Option<&str>) -> Result<Value
|
||||
|
||||
result["messages"] = json!(messages);
|
||||
|
||||
// 转换参数
|
||||
// 转换参数 — o-series 模型需要 max_completion_tokens
|
||||
let model = body.get("model").and_then(|m| m.as_str()).unwrap_or("");
|
||||
if let Some(v) = body.get("max_tokens") {
|
||||
result["max_tokens"] = v.clone();
|
||||
if is_openai_o_series(model) {
|
||||
result["max_completion_tokens"] = v.clone();
|
||||
} else {
|
||||
result["max_tokens"] = v.clone();
|
||||
}
|
||||
}
|
||||
if let Some(v) = body.get("temperature") {
|
||||
result["temperature"] = v.clone();
|
||||
@@ -772,4 +785,52 @@ mod tests {
|
||||
assert_eq!(result["content"][1]["type"], "text");
|
||||
assert_eq!(result["content"][1]["text"], "I can't do that");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_openai_o_series() {
|
||||
assert!(is_openai_o_series("o1"));
|
||||
assert!(is_openai_o_series("o1-preview"));
|
||||
assert!(is_openai_o_series("o1-mini"));
|
||||
assert!(is_openai_o_series("o3"));
|
||||
assert!(is_openai_o_series("o3-mini"));
|
||||
assert!(is_openai_o_series("o4-mini"));
|
||||
assert!(!is_openai_o_series("gpt-4o"));
|
||||
assert!(!is_openai_o_series("openai-gpt"));
|
||||
assert!(!is_openai_o_series("o"));
|
||||
assert!(!is_openai_o_series(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anthropic_to_openai_o_series_max_completion_tokens() {
|
||||
for model in &["o1", "o3-mini", "o4-mini"] {
|
||||
let input = json!({
|
||||
"model": model,
|
||||
"max_tokens": 4096,
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
});
|
||||
|
||||
let result = anthropic_to_openai(input, None).unwrap();
|
||||
assert!(
|
||||
result.get("max_tokens").is_none(),
|
||||
"{model} should not have max_tokens"
|
||||
);
|
||||
assert_eq!(
|
||||
result["max_completion_tokens"], 4096,
|
||||
"{model} should use max_completion_tokens"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anthropic_to_openai_non_o_series_keeps_max_tokens() {
|
||||
let input = json!({
|
||||
"model": "gpt-4o",
|
||||
"max_tokens": 1024,
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
});
|
||||
|
||||
let result = anthropic_to_openai(input, None).unwrap();
|
||||
assert_eq!(result["max_tokens"], 1024);
|
||||
assert!(result.get("max_completion_tokens").is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ pub fn anthropic_to_responses(body: Value, cache_key: Option<&str>) -> Result<Va
|
||||
result["input"] = json!(input);
|
||||
}
|
||||
|
||||
// max_tokens → max_output_tokens
|
||||
// max_tokens → max_output_tokens (Responses API uses max_output_tokens for all models)
|
||||
if let Some(v) = body.get("max_tokens") {
|
||||
result["max_output_tokens"] = v.clone();
|
||||
}
|
||||
@@ -897,4 +897,17 @@ mod tests {
|
||||
assert_eq!(result["usage"]["cache_read_input_tokens"], 60);
|
||||
assert_eq!(result["usage"]["cache_creation_input_tokens"], 20);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anthropic_to_responses_o_series_uses_max_output_tokens() {
|
||||
// Responses API always uses max_output_tokens, even for o-series models
|
||||
let input = json!({
|
||||
"model": "o3-mini",
|
||||
"max_tokens": 4096,
|
||||
"messages": [{"role": "user", "content": "Hello"}]
|
||||
});
|
||||
let result = anthropic_to_responses(input, None).unwrap();
|
||||
assert_eq!(result["max_output_tokens"], 4096);
|
||||
assert!(result.get("max_completion_tokens").is_none());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user