feat(proxy): implement independent failover queue management

Add a new failover queue system that operates independently from provider
sortIndex, allowing users to configure failover order per app type.

Backend changes:
- Add failover_queue table to schema.rs for persistent storage
- Create dao/failover.rs with CRUD operations for queue management
- Add Tauri commands for queue operations (get, add, remove, reorder, toggle)
- Refactor provider_router.rs select_providers() to use failover queue:
  - Current provider always takes first priority
  - Queue providers ordered by queue_order as fallback
  - Only providers with open circuit breakers are included

Frontend changes:
- Add FailoverQueueItem type to proxy.ts
- Extend failover.ts API with queue management methods
- Add React Query hooks for queue data fetching and mutations
- Create FailoverQueueManager component with drag-and-drop reordering
- Integrate queue management into SettingsPage under "Auto Failover"
- Add i18n translations for zh and en locales
This commit is contained in:
Jason
2025-12-12 16:13:07 +08:00
parent c42a0dccaf
commit 5d424b1383
15 changed files with 1198 additions and 47 deletions

View File

@@ -0,0 +1,84 @@
//! 故障转移队列命令
//!
//! 管理代理模式下的故障转移队列
use crate::database::FailoverQueueItem;
use crate::provider::Provider;
use crate::store::AppState;
/// 获取故障转移队列
#[tauri::command]
pub async fn get_failover_queue(
state: tauri::State<'_, AppState>,
app_type: String,
) -> Result<Vec<FailoverQueueItem>, String> {
state
.db
.get_failover_queue(&app_type)
.map_err(|e| e.to_string())
}
/// 获取可添加到故障转移队列的供应商(不在队列中的)
#[tauri::command]
pub async fn get_available_providers_for_failover(
state: tauri::State<'_, AppState>,
app_type: String,
) -> Result<Vec<Provider>, String> {
state
.db
.get_available_providers_for_failover(&app_type)
.map_err(|e| e.to_string())
}
/// 添加供应商到故障转移队列
#[tauri::command]
pub async fn add_to_failover_queue(
state: tauri::State<'_, AppState>,
app_type: String,
provider_id: String,
) -> Result<(), String> {
state
.db
.add_to_failover_queue(&app_type, &provider_id)
.map_err(|e| e.to_string())
}
/// 从故障转移队列移除供应商
#[tauri::command]
pub async fn remove_from_failover_queue(
state: tauri::State<'_, AppState>,
app_type: String,
provider_id: String,
) -> Result<(), String> {
state
.db
.remove_from_failover_queue(&app_type, &provider_id)
.map_err(|e| e.to_string())
}
/// 重新排序故障转移队列
#[tauri::command]
pub async fn reorder_failover_queue(
state: tauri::State<'_, AppState>,
app_type: String,
provider_ids: Vec<String>,
) -> Result<(), String> {
state
.db
.reorder_failover_queue(&app_type, &provider_ids)
.map_err(|e| e.to_string())
}
/// 设置故障转移队列项的启用状态
#[tauri::command]
pub async fn set_failover_item_enabled(
state: tauri::State<'_, AppState>,
app_type: String,
provider_id: String,
enabled: bool,
) -> Result<(), String> {
state
.db
.set_failover_item_enabled(&app_type, &provider_id, enabled)
.map_err(|e| e.to_string())
}

View File

@@ -3,6 +3,7 @@
mod config;
mod deeplink;
mod env;
mod failover;
mod import_export;
mod mcp;
mod misc;
@@ -18,6 +19,7 @@ mod usage;
pub use config::*;
pub use deeplink::*;
pub use env::*;
pub use failover::*;
pub use import_export::*;
pub use mcp::*;
pub use misc::*;

View File

@@ -0,0 +1,248 @@
//! 故障转移队列 DAO
//!
//! 管理代理模式下的故障转移队列
use crate::database::{lock_conn, Database};
use crate::error::AppError;
use crate::provider::Provider;
use serde::{Deserialize, Serialize};
use std::time::{SystemTime, UNIX_EPOCH};
/// 故障转移队列条目
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct FailoverQueueItem {
pub provider_id: String,
pub provider_name: String,
pub queue_order: i32,
pub enabled: bool,
pub created_at: i64,
}
impl Database {
/// 获取故障转移队列(按 queue_order 排序)
pub fn get_failover_queue(&self, app_type: &str) -> Result<Vec<FailoverQueueItem>, AppError> {
let conn = lock_conn!(self.conn);
let mut stmt = conn
.prepare(
"SELECT fq.provider_id, p.name, fq.queue_order, fq.enabled, fq.created_at
FROM failover_queue fq
JOIN providers p ON fq.provider_id = p.id AND fq.app_type = p.app_type
WHERE fq.app_type = ?1
ORDER BY fq.queue_order ASC",
)
.map_err(|e| AppError::Database(e.to_string()))?;
let items = stmt
.query_map([app_type], |row| {
Ok(FailoverQueueItem {
provider_id: row.get(0)?,
provider_name: row.get(1)?,
queue_order: row.get(2)?,
enabled: row.get(3)?,
created_at: row.get(4)?,
})
})
.map_err(|e| AppError::Database(e.to_string()))?
.collect::<Result<Vec<_>, _>>()
.map_err(|e| AppError::Database(e.to_string()))?;
Ok(items)
}
/// 获取故障转移队列中的供应商(完整 Provider 信息,按顺序)
pub fn get_failover_providers(&self, app_type: &str) -> Result<Vec<Provider>, AppError> {
let queue = self.get_failover_queue(app_type)?;
let all_providers = self.get_all_providers(app_type)?;
let mut result = Vec::new();
for item in queue {
if item.enabled {
if let Some(provider) = all_providers.get(&item.provider_id) {
result.push(provider.clone());
}
}
}
Ok(result)
}
/// 添加供应商到故障转移队列末尾
pub fn add_to_failover_queue(
&self,
app_type: &str,
provider_id: &str,
) -> Result<(), AppError> {
let conn = lock_conn!(self.conn);
// 获取当前最大 queue_order
let max_order: i32 = conn
.query_row(
"SELECT COALESCE(MAX(queue_order), 0) FROM failover_queue WHERE app_type = ?1",
[app_type],
|row| row.get(0),
)
.map_err(|e| AppError::Database(e.to_string()))?;
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
conn.execute(
"INSERT OR IGNORE INTO failover_queue (app_type, provider_id, queue_order, enabled, created_at)
VALUES (?1, ?2, ?3, 1, ?4)",
rusqlite::params![app_type, provider_id, max_order + 1, now],
)
.map_err(|e| AppError::Database(e.to_string()))?;
Ok(())
}
/// 从故障转移队列中移除供应商
pub fn remove_from_failover_queue(
&self,
app_type: &str,
provider_id: &str,
) -> Result<(), AppError> {
let conn = lock_conn!(self.conn);
// 获取被删除项的 queue_order
let removed_order: Option<i32> = conn
.query_row(
"SELECT queue_order FROM failover_queue WHERE app_type = ?1 AND provider_id = ?2",
[app_type, provider_id],
|row| row.get(0),
)
.ok();
// 删除该项
conn.execute(
"DELETE FROM failover_queue WHERE app_type = ?1 AND provider_id = ?2",
[app_type, provider_id],
)
.map_err(|e| AppError::Database(e.to_string()))?;
// 重新排序后面的项(填补空隙)
if let Some(order) = removed_order {
conn.execute(
"UPDATE failover_queue
SET queue_order = queue_order - 1
WHERE app_type = ?1 AND queue_order > ?2",
rusqlite::params![app_type, order],
)
.map_err(|e| AppError::Database(e.to_string()))?;
}
Ok(())
}
/// 重新排序故障转移队列
/// provider_ids: 按新顺序排列的 provider_id 列表
pub fn reorder_failover_queue(
&self,
app_type: &str,
provider_ids: &[String],
) -> Result<(), AppError> {
let conn = lock_conn!(self.conn);
// 使用事务确保原子性
conn.execute("BEGIN TRANSACTION", [])
.map_err(|e| AppError::Database(e.to_string()))?;
let result = (|| {
for (index, provider_id) in provider_ids.iter().enumerate() {
conn.execute(
"UPDATE failover_queue
SET queue_order = ?3
WHERE app_type = ?1 AND provider_id = ?2",
rusqlite::params![app_type, provider_id, (index + 1) as i32],
)
.map_err(|e| AppError::Database(e.to_string()))?;
}
Ok(())
})();
match result {
Ok(_) => {
conn.execute("COMMIT", [])
.map_err(|e| AppError::Database(e.to_string()))?;
Ok(())
}
Err(e) => {
conn.execute("ROLLBACK", []).ok();
Err(e)
}
}
}
/// 设置故障转移队列中供应商的启用状态
pub fn set_failover_item_enabled(
&self,
app_type: &str,
provider_id: &str,
enabled: bool,
) -> Result<(), AppError> {
let conn = lock_conn!(self.conn);
conn.execute(
"UPDATE failover_queue SET enabled = ?3 WHERE app_type = ?1 AND provider_id = ?2",
rusqlite::params![app_type, provider_id, enabled],
)
.map_err(|e| AppError::Database(e.to_string()))?;
Ok(())
}
/// 清空故障转移队列
pub fn clear_failover_queue(&self, app_type: &str) -> Result<(), AppError> {
let conn = lock_conn!(self.conn);
conn.execute(
"DELETE FROM failover_queue WHERE app_type = ?1",
[app_type],
)
.map_err(|e| AppError::Database(e.to_string()))?;
Ok(())
}
/// 检查供应商是否在故障转移队列中
pub fn is_in_failover_queue(
&self,
app_type: &str,
provider_id: &str,
) -> Result<bool, AppError> {
let conn = lock_conn!(self.conn);
let count: i32 = conn
.query_row(
"SELECT COUNT(*) FROM failover_queue WHERE app_type = ?1 AND provider_id = ?2",
[app_type, provider_id],
|row| row.get(0),
)
.map_err(|e| AppError::Database(e.to_string()))?;
Ok(count > 0)
}
/// 获取可添加到故障转移队列的供应商(不在队列中的)
pub fn get_available_providers_for_failover(
&self,
app_type: &str,
) -> Result<Vec<Provider>, AppError> {
let all_providers = self.get_all_providers(app_type)?;
let queue = self.get_failover_queue(app_type)?;
let queue_ids: std::collections::HashSet<_> =
queue.iter().map(|item| &item.provider_id).collect();
let available: Vec<Provider> = all_providers
.into_values()
.filter(|p| !queue_ids.contains(&p.id))
.collect();
Ok(available)
}
}

View File

@@ -2,6 +2,7 @@
//!
//! Database access operations for each domain
pub mod failover;
pub mod mcp;
pub mod prompts;
pub mod providers;
@@ -11,3 +12,5 @@ pub mod skills;
pub mod stream_check;
// 所有 DAO 方法都通过 Database impl 提供,无需单独导出
// 导出 FailoverQueueItem 供外部使用
pub use failover::FailoverQueueItem;

View File

@@ -31,6 +31,9 @@ mod schema;
#[cfg(test)]
mod tests;
// DAO 类型导出供外部使用
pub use dao::FailoverQueueItem;
use crate::config::get_app_config_dir;
use crate::error::AppError;
use rusqlite::Connection;

View File

@@ -319,6 +319,30 @@ impl Database {
[],
);
// 14. Failover Queue 表 (故障转移队列)
conn.execute(
"CREATE TABLE IF NOT EXISTS failover_queue (
id INTEGER PRIMARY KEY AUTOINCREMENT,
app_type TEXT NOT NULL,
provider_id TEXT NOT NULL,
queue_order INTEGER NOT NULL,
enabled INTEGER NOT NULL DEFAULT 1,
created_at INTEGER NOT NULL,
UNIQUE (app_type, provider_id),
FOREIGN KEY (provider_id, app_type) REFERENCES providers(id, app_type) ON DELETE CASCADE
)",
[],
)
.map_err(|e| AppError::Database(e.to_string()))?;
// 为故障转移队列创建索引
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_failover_queue_order
ON failover_queue(app_type, queue_order)",
[],
)
.map_err(|e| AppError::Database(e.to_string()))?;
Ok(())
}

View File

@@ -686,6 +686,13 @@ pub fn run() {
commands::get_circuit_breaker_config,
commands::update_circuit_breaker_config,
commands::get_circuit_breaker_stats,
// Failover queue management
commands::get_failover_queue,
commands::get_available_providers_for_failover,
commands::add_to_failover_queue,
commands::remove_from_failover_queue,
commands::reorder_failover_queue,
commands::set_failover_item_enabled,
// Usage statistics
commands::get_usage_summary,
commands::get_usage_trends,

View File

@@ -28,44 +28,92 @@ impl ProviderRouter {
}
/// 选择可用的供应商(支持故障转移)
/// 返回按优先级排序的可用供应商列表
///
/// 返回按优先级排序的可用供应商列表:
/// 1. 当前供应商is_current=true始终第一位
/// 2. 故障转移队列中的其他供应商(按 queue_order 排序)
/// 3. 只返回熔断器未打开的供应商
pub async fn select_providers(&self, app_type: &str) -> Result<Vec<Provider>, AppError> {
// 直接获取当前选中的供应商(基于 is_current 字段)
let current_id = self
.db
.get_current_provider(app_type)?
.ok_or_else(|| AppError::Config(format!("No current provider for {}", app_type)))?;
let mut result = Vec::new();
let all_providers = self.db.get_all_providers(app_type)?;
let providers = self.db.get_all_providers(app_type)?;
let provider = providers
.get(&current_id)
.ok_or_else(|| AppError::Config(format!("Current provider {} not found", current_id)))?
.clone();
// 1. 当前供应商始终第一位
if let Some(current_id) = self.db.get_current_provider(app_type)? {
if let Some(current) = all_providers.get(&current_id) {
let circuit_key = format!("{}:{}", app_type, current.id);
let breaker = self.get_or_create_circuit_breaker(&circuit_key).await;
log::info!(
"[{}] Selected current provider: {} ({})",
app_type,
provider.name,
provider.id
);
if breaker.allow_request().await {
log::info!(
"[{}] Current provider available: {} ({})",
app_type,
current.name,
current.id
);
result.push(current.clone());
} else {
log::warn!(
"[{}] Current provider {} circuit breaker open, checking failover queue",
app_type,
current.name
);
}
}
}
// 检查熔断器状态
let circuit_key = format!("{}:{}", app_type, provider.id);
let breaker = self.get_or_create_circuit_breaker(&circuit_key).await;
// 2. 获取故障转移队列中的供应商
let queue = self.db.get_failover_queue(app_type)?;
if !breaker.allow_request().await {
log::warn!(
"Provider {} is unavailable (circuit breaker open)",
provider.id
);
for item in queue {
// 跳过已添加的当前供应商
if result.iter().any(|p| p.id == item.provider_id) {
continue;
}
// 跳过禁用的队列项
if !item.enabled {
continue;
}
// 获取供应商信息
if let Some(provider) = all_providers.get(&item.provider_id) {
// 检查熔断器状态
let circuit_key = format!("{}:{}", app_type, provider.id);
let breaker = self.get_or_create_circuit_breaker(&circuit_key).await;
if breaker.allow_request().await {
log::info!(
"[{}] Failover provider available: {} ({}) at queue position {}",
app_type,
provider.name,
provider.id,
item.queue_order
);
result.push(provider.clone());
} else {
log::debug!(
"[{}] Failover provider {} circuit breaker open, skipping",
app_type,
provider.name
);
}
}
}
if result.is_empty() {
return Err(AppError::Config(format!(
"Current provider {} is unavailable (circuit breaker open)",
provider.name
"No available provider for {} (all circuit breakers open or no providers configured)",
app_type
)));
}
// 返回单个供应商(保留 Vec 接口以兼容现有代码)
Ok(vec![provider])
log::info!(
"[{}] Failover chain: {} provider(s) available",
app_type,
result.len()
);
Ok(result)
}
/// 记录供应商请求结果