feat(proxy): resolve reasoning_effort from explicit effort with budget fallback

Replace map_thinking_to_reasoning_effort() with resolve_reasoning_effort()
that uses a two-tier priority system:

1. Explicit output_config.effort: low/medium/high map 1:1, max → xhigh
2. Fallback: thinking.type + budget_tokens thresholds (<4k → low,
   4k-16k → medium, ≥16k → high, adaptive → high)

Both Chat Completions and Responses API paths share the same helper,
ensuring consistent mapping across all OpenAI-compatible endpoints.
This commit is contained in:
Jason
2026-03-20 23:36:47 +08:00
parent 3e78fe8305
commit fe3f9b60de
2 changed files with 339 additions and 0 deletions

View File

@@ -14,6 +14,63 @@ pub fn is_openai_o_series(model: &str) -> bool {
&& model.as_bytes().get(1).is_some_and(|b| b.is_ascii_digit())
}
/// Detect OpenAI models that support reasoning_effort.
///
/// Supported families:
/// - o-series: o1, o3, o4-mini, etc.
/// - GPT-5+: gpt-5, gpt-5.1, gpt-5.4, gpt-5-codex, etc.
pub fn supports_reasoning_effort(model: &str) -> bool {
is_openai_o_series(model)
|| model
.to_lowercase()
.strip_prefix("gpt-")
.and_then(|rest| rest.chars().next())
.is_some_and(|c| c.is_ascii_digit() && c >= '5')
}
/// Resolve the appropriate OpenAI `reasoning_effort` from an Anthropic request body.
///
/// Priority:
/// 1. Explicit `output_config.effort` — preserves the user's intent directly.
/// `low`/`medium`/`high` map 1:1; `max` maps to `xhigh`
/// (supported by mainstream GPT models). Unknown values are ignored.
/// 2. Fallback: `thinking.type` + `budget_tokens`:
/// - `adaptive` → `high` (mirrors optimizer semantics where adaptive ≈ max effort)
/// - `enabled` with budget → `low` (<4 000) / `medium` (4 00015 999) / `high` (≥16 000)
/// - `enabled` without budget → `high` (conservative default)
/// - `disabled` / absent → `None`
pub fn resolve_reasoning_effort(body: &Value) -> Option<&'static str> {
// --- Priority 1: explicit output_config.effort ---
if let Some(effort) = body
.pointer("/output_config/effort")
.and_then(|v| v.as_str())
{
return match effort {
"low" => Some("low"),
"medium" => Some("medium"),
"high" => Some("high"),
"max" => Some("xhigh"), // OpenAI xhigh = maximum reasoning effort
_ => None, // unknown value — do not inject
};
}
// --- Priority 2: thinking.type + budget_tokens fallback ---
let thinking = body.get("thinking")?;
match thinking.get("type").and_then(|t| t.as_str()) {
Some("adaptive") => Some("high"),
Some("enabled") => {
let budget = thinking.get("budget_tokens").and_then(|b| b.as_u64());
match budget {
Some(b) if b < 4_000 => Some("low"),
Some(b) if b < 16_000 => Some("medium"),
Some(_) => Some("high"),
None => Some("high"), // enabled but no budget — assume strong reasoning
}
}
_ => None, // disabled or missing
}
}
/// Anthropic 请求 → OpenAI 请求
///
/// `cache_key`: optional prompt_cache_key to inject for improved cache routing
@@ -80,6 +137,13 @@ pub fn anthropic_to_openai(body: Value, cache_key: Option<&str>) -> Result<Value
result["stream"] = v.clone();
}
// Map Anthropic thinking → OpenAI reasoning_effort
if supports_reasoning_effort(model) {
if let Some(effort) = resolve_reasoning_effort(&body) {
result["reasoning_effort"] = json!(effort);
}
}
// 转换 tools (过滤 BatchTool)
if let Some(tools) = body.get("tools").and_then(|t| t.as_array()) {
let openai_tools: Vec<Value> = tools
@@ -800,6 +864,180 @@ mod tests {
assert!(!is_openai_o_series(""));
}
#[test]
fn test_supports_reasoning_effort() {
assert!(supports_reasoning_effort("o1"));
assert!(supports_reasoning_effort("o3-mini"));
assert!(supports_reasoning_effort("gpt-5"));
assert!(supports_reasoning_effort("gpt-5.4"));
assert!(supports_reasoning_effort("gpt-5-codex"));
assert!(!supports_reasoning_effort("gpt-4o"));
assert!(!supports_reasoning_effort("claude-sonnet-4-6"));
}
// ── resolve_reasoning_effort unit tests ──
#[test]
fn test_output_config_low_maps_to_reasoning_effort_low() {
let body = json!({"output_config": {"effort": "low"}});
assert_eq!(resolve_reasoning_effort(&body), Some("low"));
}
#[test]
fn test_output_config_medium_maps_to_reasoning_effort_medium() {
let body = json!({"output_config": {"effort": "medium"}});
assert_eq!(resolve_reasoning_effort(&body), Some("medium"));
}
#[test]
fn test_output_config_high_maps_to_reasoning_effort_high() {
let body = json!({"output_config": {"effort": "high"}});
assert_eq!(resolve_reasoning_effort(&body), Some("high"));
}
#[test]
fn test_output_config_max_maps_to_reasoning_effort_xhigh() {
let body = json!({"output_config": {"effort": "max"}});
assert_eq!(resolve_reasoning_effort(&body), Some("xhigh"));
}
#[test]
fn test_output_config_takes_priority_over_thinking() {
// Even with thinking.adaptive present, explicit effort wins
let body = json!({
"output_config": {"effort": "low"},
"thinking": {"type": "adaptive"}
});
assert_eq!(resolve_reasoning_effort(&body), Some("low"));
}
#[test]
fn test_output_config_unknown_value_no_reasoning_effort() {
let body = json!({"output_config": {"effort": "turbo"}});
assert_eq!(resolve_reasoning_effort(&body), None);
}
#[test]
fn test_thinking_enabled_small_budget_maps_low() {
let body = json!({"thinking": {"type": "enabled", "budget_tokens": 1024}});
assert_eq!(resolve_reasoning_effort(&body), Some("low"));
}
#[test]
fn test_thinking_enabled_medium_budget_maps_medium() {
let body = json!({"thinking": {"type": "enabled", "budget_tokens": 8000}});
assert_eq!(resolve_reasoning_effort(&body), Some("medium"));
}
#[test]
fn test_thinking_enabled_large_budget_maps_high() {
let body = json!({"thinking": {"type": "enabled", "budget_tokens": 32000}});
assert_eq!(resolve_reasoning_effort(&body), Some("high"));
}
#[test]
fn test_thinking_enabled_without_budget_maps_high() {
let body = json!({"thinking": {"type": "enabled"}});
assert_eq!(resolve_reasoning_effort(&body), Some("high"));
}
#[test]
fn test_thinking_adaptive_maps_high() {
let body = json!({"thinking": {"type": "adaptive"}});
assert_eq!(resolve_reasoning_effort(&body), Some("high"));
}
#[test]
fn test_thinking_disabled_no_reasoning_effort() {
let body = json!({"thinking": {"type": "disabled"}});
assert_eq!(resolve_reasoning_effort(&body), None);
}
#[test]
fn test_no_thinking_field_no_reasoning_effort() {
let body = json!({"messages": [{"role": "user", "content": "Hello"}]});
assert_eq!(resolve_reasoning_effort(&body), None);
}
// ── Integration: anthropic_to_openai with resolve_reasoning_effort ──
#[test]
fn test_non_reasoning_model_no_reasoning_effort() {
let input = json!({
"model": "gpt-4o",
"max_tokens": 1024,
"thinking": {"type": "enabled", "budget_tokens": 2048},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_openai(input, None).unwrap();
assert!(result.get("reasoning_effort").is_none());
}
#[test]
fn test_reasoning_model_with_output_config_effort() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"output_config": {"effort": "medium"},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_openai(input, None).unwrap();
assert_eq!(result["reasoning_effort"], "medium");
}
#[test]
fn test_reasoning_model_with_output_config_max() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"output_config": {"effort": "max"},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_openai(input, None).unwrap();
assert_eq!(result["reasoning_effort"], "xhigh");
}
#[test]
fn test_reasoning_model_thinking_enabled_small_budget() {
let input = json!({
"model": "o3",
"max_tokens": 1024,
"thinking": {"type": "enabled", "budget_tokens": 2048},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_openai(input, None).unwrap();
assert_eq!(result["reasoning_effort"], "low");
}
#[test]
fn test_reasoning_model_thinking_adaptive() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"thinking": {"type": "adaptive"},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_openai(input, None).unwrap();
assert_eq!(result["reasoning_effort"], "high");
}
#[test]
fn test_reasoning_model_no_thinking_no_effort() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_openai(input, None).unwrap();
assert!(result.get("reasoning_effort").is_none());
}
#[test]
fn test_anthropic_to_openai_o_series_max_completion_tokens() {
for model in &["o1", "o3-mini", "o4-mini"] {

View File

@@ -61,6 +61,15 @@ pub fn anthropic_to_responses(body: Value, cache_key: Option<&str>) -> Result<Va
result["stream"] = v.clone();
}
// Map Anthropic thinking → OpenAI Responses reasoning.effort
if let Some(model_name) = body.get("model").and_then(|m| m.as_str()) {
if super::transform::supports_reasoning_effort(model_name) {
if let Some(effort) = super::transform::resolve_reasoning_effort(&body) {
result["reasoning"] = json!({ "effort": effort });
}
}
}
// stop_sequences → 丢弃 (Responses API 不支持)
// 转换 tools (过滤 BatchTool)
@@ -910,4 +919,96 @@ mod tests {
assert_eq!(result["max_output_tokens"], 4096);
assert!(result.get("max_completion_tokens").is_none());
}
#[test]
fn test_responses_output_config_max_sets_reasoning_xhigh() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"output_config": {"effort": "max"},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_responses(input, None).unwrap();
assert_eq!(result["reasoning"]["effort"], "xhigh");
}
#[test]
fn test_responses_output_config_takes_priority_over_thinking() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"output_config": {"effort": "low"},
"thinking": {"type": "adaptive"},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_responses(input, None).unwrap();
assert_eq!(result["reasoning"]["effort"], "low");
}
#[test]
fn test_responses_thinking_enabled_small_budget_sets_reasoning_low() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"thinking": {"type": "enabled", "budget_tokens": 2048},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_responses(input, None).unwrap();
assert_eq!(result["reasoning"]["effort"], "low");
}
#[test]
fn test_responses_thinking_enabled_medium_budget_sets_reasoning_medium() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"thinking": {"type": "enabled", "budget_tokens": 8000},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_responses(input, None).unwrap();
assert_eq!(result["reasoning"]["effort"], "medium");
}
#[test]
fn test_responses_thinking_enabled_large_budget_sets_reasoning_high() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"thinking": {"type": "enabled", "budget_tokens": 32000},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_responses(input, None).unwrap();
assert_eq!(result["reasoning"]["effort"], "high");
}
#[test]
fn test_responses_thinking_adaptive_sets_reasoning_high() {
let input = json!({
"model": "gpt-5.4",
"max_tokens": 1024,
"thinking": {"type": "adaptive"},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_responses(input, None).unwrap();
assert_eq!(result["reasoning"]["effort"], "high");
}
#[test]
fn test_responses_non_reasoning_model_no_reasoning() {
let input = json!({
"model": "gpt-4o",
"max_tokens": 1024,
"thinking": {"type": "enabled", "budget_tokens": 2048},
"messages": [{"role": "user", "content": "Hello"}]
});
let result = anthropic_to_responses(input, None).unwrap();
assert!(result.get("reasoning").is_none());
}
}