mirror of
https://github.com/farion1231/cc-switch.git
synced 2026-04-02 18:12:05 +08:00
fix(proxy): harden crash recovery with fallback detection
- Set takeover flag before writing proxy config to fix race condition where crash during takeover left Live configs corrupted but flag unset - Add fallback detection by checking for placeholder tokens in Live configs when backups exist but flag is false (handles legacy/edge cases) - Improve error handling with proper rollback at each stage of startup - Clean up stale backups when Live configs are not in takeover state to avoid long-term storage of sensitive tokens
This commit is contained in:
@@ -321,6 +321,17 @@ impl Database {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 检查是否存在任意 Live 配置备份
|
||||
pub async fn has_any_live_backup(&self) -> Result<bool, AppError> {
|
||||
let conn = lock_conn!(self.conn);
|
||||
let count: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM proxy_live_backup", [], |row| {
|
||||
row.get(0)
|
||||
})
|
||||
.map_err(|e| AppError::Database(e.to_string()))?;
|
||||
Ok(count > 0)
|
||||
}
|
||||
|
||||
/// 获取 Live 配置备份
|
||||
pub async fn get_live_backup(&self, app_type: &str) -> Result<Option<LiveBackup>, AppError> {
|
||||
let conn = lock_conn!(self.conn);
|
||||
|
||||
@@ -531,23 +531,41 @@ pub fn run() {
|
||||
let state = app_handle.state::<AppState>();
|
||||
|
||||
// 1. 检测异常退出并恢复 Live 配置
|
||||
match state.db.is_live_takeover_active().await {
|
||||
Ok(true) => {
|
||||
// 接管标志为 true 但代理未运行 → 上次异常退出
|
||||
if !state.proxy_service.is_running().await {
|
||||
log::warn!("检测到上次异常退出,正在恢复 Live 配置...");
|
||||
if let Err(e) = state.proxy_service.recover_from_crash().await {
|
||||
log::error!("恢复 Live 配置失败: {e}");
|
||||
} else {
|
||||
log::info!("Live 配置已从异常退出中恢复");
|
||||
}
|
||||
let is_proxy_running = state.proxy_service.is_running().await;
|
||||
if !is_proxy_running {
|
||||
let takeover_flag = match state.db.is_live_takeover_active().await {
|
||||
Ok(active) => active,
|
||||
Err(e) => {
|
||||
log::error!("检查接管状态失败: {e}");
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
let has_backups = match state.db.has_any_live_backup().await {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
log::error!("检查 Live 备份失败: {e}");
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
// 兜底检测:旧版本/极端窗口期可能出现“标志未写入,但 Live 已被写成占位符”的残留状态。
|
||||
// 只有在存在备份时才检查占位符,避免误判覆盖用户正常配置。
|
||||
let live_taken_over =
|
||||
has_backups && state.proxy_service.detect_takeover_in_live_configs();
|
||||
|
||||
if takeover_flag || live_taken_over {
|
||||
log::warn!("检测到上次异常退出或残留接管状态,正在恢复 Live 配置...");
|
||||
if let Err(e) = state.proxy_service.recover_from_crash().await {
|
||||
log::error!("恢复 Live 配置失败: {e}");
|
||||
} else {
|
||||
log::info!("Live 配置已从异常退出中恢复");
|
||||
}
|
||||
} else if has_backups {
|
||||
// 备份残留但 Live 未处于接管状态:清理敏感备份,避免长期存储 Token
|
||||
if let Err(e) = state.db.delete_all_live_backups().await {
|
||||
log::warn!("清理残留 Live 备份失败: {e}");
|
||||
}
|
||||
}
|
||||
Ok(false) => {
|
||||
// 正常状态,无需恢复
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("检查接管状态失败: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -84,16 +84,38 @@ impl ProxyService {
|
||||
self.backup_live_configs().await?;
|
||||
|
||||
// 2. 同步 Live 配置中的 Token 到数据库(确保代理能读到最新的 Token)
|
||||
self.sync_live_to_providers().await?;
|
||||
if let Err(e) = self.sync_live_to_providers().await {
|
||||
// 同步失败时尚未写入接管配置,但备份可能包含敏感信息,尽量清理
|
||||
if let Err(clean_err) = self.db.delete_all_live_backups().await {
|
||||
log::warn!("清理 Live 备份失败: {clean_err}");
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
// 3. 接管各应用的 Live 配置(写入代理地址,清空 Token)
|
||||
self.takeover_live_configs().await?;
|
||||
// 3. 在写入接管配置之前先落盘接管标志:
|
||||
// 这样即使在接管过程中断电/kill,下次启动也能检测到并自动恢复。
|
||||
if let Err(e) = self.db.set_live_takeover_active(true).await {
|
||||
if let Err(clean_err) = self.db.delete_all_live_backups().await {
|
||||
log::warn!("清理 Live 备份失败: {clean_err}");
|
||||
}
|
||||
return Err(format!("设置接管状态失败: {e}"));
|
||||
}
|
||||
|
||||
// 4. 设置接管状态
|
||||
self.db
|
||||
.set_live_takeover_active(true)
|
||||
.await
|
||||
.map_err(|e| format!("设置接管状态失败: {e}"))?;
|
||||
// 4. 接管各应用的 Live 配置(写入代理地址,清空 Token)
|
||||
if let Err(e) = self.takeover_live_configs().await {
|
||||
// 接管失败(可能是部分写入),尝试恢复原始配置;若恢复失败则保留标志与备份,等待下次启动自动恢复。
|
||||
log::error!("接管 Live 配置失败,尝试恢复原始配置: {e}");
|
||||
match self.restore_live_configs().await {
|
||||
Ok(()) => {
|
||||
let _ = self.db.set_live_takeover_active(false).await;
|
||||
let _ = self.db.delete_all_live_backups().await;
|
||||
}
|
||||
Err(restore_err) => {
|
||||
log::error!("恢复原始配置失败,将保留备份以便下次启动恢复: {restore_err}");
|
||||
}
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
// 5. 启动代理服务器
|
||||
match self.start().await {
|
||||
@@ -101,8 +123,15 @@ impl ProxyService {
|
||||
Err(e) => {
|
||||
// 启动失败,恢复原始配置
|
||||
log::error!("代理启动失败,尝试恢复原始配置: {e}");
|
||||
let _ = self.restore_live_configs().await;
|
||||
let _ = self.db.set_live_takeover_active(false).await;
|
||||
match self.restore_live_configs().await {
|
||||
Ok(()) => {
|
||||
let _ = self.db.set_live_takeover_active(false).await;
|
||||
let _ = self.db.delete_all_live_backups().await;
|
||||
}
|
||||
Err(restore_err) => {
|
||||
log::error!("恢复原始配置失败,将保留备份以便下次启动恢复: {restore_err}");
|
||||
}
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
@@ -517,6 +546,68 @@ impl ProxyService {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 检测 Live 配置是否处于“被接管”的残留状态
|
||||
///
|
||||
/// 用于兜底处理:当数据库标志未写入成功(或旧版本遗留)但 Live 文件已经写成代理占位符时,
|
||||
/// 启动流程可以据此触发恢复逻辑。
|
||||
pub fn detect_takeover_in_live_configs(&self) -> bool {
|
||||
if let Ok(config) = self.read_claude_live() {
|
||||
if Self::is_claude_live_taken_over(&config) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(config) = self.read_codex_live() {
|
||||
if Self::is_codex_live_taken_over(&config) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(config) = self.read_gemini_live() {
|
||||
if Self::is_gemini_live_taken_over(&config) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn is_claude_live_taken_over(config: &Value) -> bool {
|
||||
let env = match config.get("env").and_then(|v| v.as_object()) {
|
||||
Some(env) => env,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
for key in [
|
||||
"ANTHROPIC_AUTH_TOKEN",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"OPENROUTER_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
] {
|
||||
if env.get(key).and_then(|v| v.as_str()) == Some(PROXY_TOKEN_PLACEHOLDER) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn is_codex_live_taken_over(config: &Value) -> bool {
|
||||
let auth = match config.get("auth").and_then(|v| v.as_object()) {
|
||||
Some(auth) => auth,
|
||||
None => return false,
|
||||
};
|
||||
auth.get("OPENAI_API_KEY").and_then(|v| v.as_str()) == Some(PROXY_TOKEN_PLACEHOLDER)
|
||||
}
|
||||
|
||||
fn is_gemini_live_taken_over(config: &Value) -> bool {
|
||||
let env = match config.get("env").and_then(|v| v.as_object()) {
|
||||
Some(env) => env,
|
||||
None => return false,
|
||||
};
|
||||
env.get("GEMINI_API_KEY").and_then(|v| v.as_str()) == Some(PROXY_TOKEN_PLACEHOLDER)
|
||||
}
|
||||
|
||||
/// 从供应商配置更新 Live 备份(用于代理模式下的热切换)
|
||||
///
|
||||
/// 与 backup_live_configs() 不同,此方法从供应商的 settings_config 生成备份,
|
||||
|
||||
Reference in New Issue
Block a user