fix(proxy): use max_completion_tokens for o1/o3 series models (#1451)

* fix(proxy): use max_completion_tokens for o1/o3 series models

When converting Anthropic requests to OpenAI format for o1/o3 series
models (like o1-mini, o3-mini), use max_completion_tokens instead of
max_tokens to avoid unsupported_parameter errors.

Fixes #1448

* fix: revert incorrect o-series max_completion_tokens in Responses API path

Responses API uses max_output_tokens for all models including o-series.
The o-series max_completion_tokens fix should only apply to Chat Completions API.

---------

Co-authored-by: Hajen Teowideo <hajen.teowideo@example.com>
Co-authored-by: Jason Young <44939412+farion1231@users.noreply.github.com>
Co-authored-by: Jason <farion1231@gmail.com>
This commit is contained in:
Hemilt0n
2026-03-15 20:39:17 +08:00
committed by GitHub
parent 5c03de53f7
commit f38facd430
2 changed files with 77 additions and 3 deletions

View File

@@ -6,6 +6,14 @@
use crate::proxy::error::ProxyError;
use serde_json::{json, Value};
/// Detect OpenAI o-series reasoning models (o1, o3, o4-mini, etc.)
/// These models require `max_completion_tokens` instead of `max_tokens`.
pub fn is_openai_o_series(model: &str) -> bool {
model.len() > 1
&& model.starts_with('o')
&& model.as_bytes().get(1).is_some_and(|b| b.is_ascii_digit())
}
/// Anthropic 请求 → OpenAI 请求
///
/// `cache_key`: optional prompt_cache_key to inject for improved cache routing
@@ -50,9 +58,14 @@ pub fn anthropic_to_openai(body: Value, cache_key: Option<&str>) -> Result<Value
result["messages"] = json!(messages);
// 转换参数
// 转换参数 — o-series 模型需要 max_completion_tokens
let model = body.get("model").and_then(|m| m.as_str()).unwrap_or("");
if let Some(v) = body.get("max_tokens") {
result["max_tokens"] = v.clone();
if is_openai_o_series(model) {
result["max_completion_tokens"] = v.clone();
} else {
result["max_tokens"] = v.clone();
}
}
if let Some(v) = body.get("temperature") {
result["temperature"] = v.clone();
@@ -772,4 +785,52 @@ mod tests {
assert_eq!(result["content"][1]["type"], "text");
assert_eq!(result["content"][1]["text"], "I can't do that");
}
#[test]
fn test_is_openai_o_series() {
assert!(is_openai_o_series("o1"));
assert!(is_openai_o_series("o1-preview"));
assert!(is_openai_o_series("o1-mini"));
assert!(is_openai_o_series("o3"));
assert!(is_openai_o_series("o3-mini"));
assert!(is_openai_o_series("o4-mini"));
assert!(!is_openai_o_series("gpt-4o"));
assert!(!is_openai_o_series("openai-gpt"));
assert!(!is_openai_o_series("o"));
assert!(!is_openai_o_series(""));
}
#[test]
fn test_anthropic_to_openai_o_series_max_completion_tokens() {
for model in &["o1", "o3-mini", "o4-mini"] {
let input = json!({
"model": model,
"max_tokens": 4096,
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_openai(input, None).unwrap();
assert!(
result.get("max_tokens").is_none(),
"{model} should not have max_tokens"
);
assert_eq!(
result["max_completion_tokens"], 4096,
"{model} should use max_completion_tokens"
);
}
}
#[test]
fn test_anthropic_to_openai_non_o_series_keeps_max_tokens() {
let input = json!({
"model": "gpt-4o",
"max_tokens": 1024,
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_openai(input, None).unwrap();
assert_eq!(result["max_tokens"], 1024);
assert!(result.get("max_completion_tokens").is_none());
}
}

View File

@@ -45,7 +45,7 @@ pub fn anthropic_to_responses(body: Value, cache_key: Option<&str>) -> Result<Va
result["input"] = json!(input);
}
// max_tokens → max_output_tokens
// max_tokens → max_output_tokens (Responses API uses max_output_tokens for all models)
if let Some(v) = body.get("max_tokens") {
result["max_output_tokens"] = v.clone();
}
@@ -897,4 +897,17 @@ mod tests {
assert_eq!(result["usage"]["cache_read_input_tokens"], 60);
assert_eq!(result["usage"]["cache_creation_input_tokens"], 20);
}
#[test]
fn test_anthropic_to_responses_o_series_uses_max_output_tokens() {
// Responses API always uses max_output_tokens, even for o-series models
let input = json!({
"model": "o3-mini",
"max_tokens": 4096,
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_responses(input, None).unwrap();
assert_eq!(result["max_output_tokens"], 4096);
assert!(result.get("max_completion_tokens").is_none());
}
}