Files
Bubbles/ai_providers/fallback.py
2026-02-04 17:35:08 +08:00

144 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
模型 Fallback 机制 —— 主模型失败时自动切到备选模型。
参考 OpenClaw 的 model-fallback.ts 设计:
- 区分可重试错误(限流、超时、服务端 500和不可重试错误密钥无效
- 可重试:指数退避重试
- 不可重试或重试耗尽:切下一个 fallback 模型
- 记录失败模型的冷却时间
"""
import logging
import time
from typing import Any, Callable, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
# 冷却时间(秒):模型失败后暂时不再尝试
_MODEL_COOLDOWN: Dict[int, float] = {} # model_id -> 冷却结束时间戳
COOLDOWN_DURATION = 60 # 60秒冷却
# 重试参数
RETRY_INITIAL_DELAY = 2.0
RETRY_BACKOFF_FACTOR = 2.0
RETRY_MAX_DELAY = 30.0
MAX_RETRIES_PER_MODEL = 2
def _is_retryable(error: Exception) -> bool:
"""判断错误是否可重试。"""
error_str = str(error).lower()
error_type = type(error).__name__
# 限流
if "rate" in error_str and "limit" in error_str:
return True
if "429" in error_str:
return True
# 超时
if "timeout" in error_str or "timed out" in error_str:
return True
# 服务端错误
if "500" in error_str or "502" in error_str or "503" in error_str:
return True
if "server" in error_str and "error" in error_str:
return True
# 连接错误
if "connection" in error_str:
return True
# OpenAI 特定
if error_type in ("APIConnectionError", "APITimeoutError", "InternalServerError"):
return True
return False
def _is_in_cooldown(model_id: int) -> bool:
"""检查模型是否在冷却中。"""
deadline = _MODEL_COOLDOWN.get(model_id)
if deadline is None:
return False
if time.time() < deadline:
return True
# 冷却结束,清除
_MODEL_COOLDOWN.pop(model_id, None)
return False
def _set_cooldown(model_id: int) -> None:
"""将模型加入冷却。"""
_MODEL_COOLDOWN[model_id] = time.time() + COOLDOWN_DURATION
logger.info(f"模型 {model_id} 进入冷却({COOLDOWN_DURATION}秒)")
def call_with_fallback(
primary_model_id: int,
chat_models: Dict[int, Any],
fallback_ids: List[int],
call_fn: Callable[[Any], str],
) -> Tuple[str, int]:
"""
带 Fallback 的模型调用。
:param primary_model_id: 主模型 ID
:param chat_models: 所有可用模型 {id: instance}
:param fallback_ids: 按优先级排序的 fallback 模型 ID 列表
:param call_fn: 实际调用函数,接收模型实例,返回回复文本
:return: (回复文本, 实际使用的模型ID)
"""
# 构建候选列表:主模型 + fallbacks跳过冷却中的
candidates = []
for mid in [primary_model_id] + fallback_ids:
if mid not in chat_models:
continue
if mid in [c[0] for c in candidates]:
continue # 去重
if _is_in_cooldown(mid):
logger.info(f"模型 {mid} 处于冷却中,跳过")
continue
candidates.append((mid, chat_models[mid]))
if not candidates:
# 所有模型都在冷却,强制使用主模型
if primary_model_id in chat_models:
candidates = [(primary_model_id, chat_models[primary_model_id])]
else:
return "所有模型暂时不可用,请稍后再试。", primary_model_id
last_error = None
for model_id, model_instance in candidates:
# 对每个候选模型,尝试最多 MAX_RETRIES_PER_MODEL 次
for attempt in range(MAX_RETRIES_PER_MODEL + 1):
try:
result = call_fn(model_instance)
if result:
return result, model_id
# 空结果视为失败,但不重试
break
except Exception as e:
last_error = e
model_name = getattr(model_instance, '__class__', type(model_instance)).__name__
logger.warning(
f"模型 {model_name}(ID:{model_id}) 第 {attempt + 1} 次调用失败: {e}"
)
if not _is_retryable(e):
logger.info(f"不可重试错误,跳过模型 {model_id}")
_set_cooldown(model_id)
break
if attempt < MAX_RETRIES_PER_MODEL:
delay = min(
RETRY_INITIAL_DELAY * (RETRY_BACKOFF_FACTOR ** attempt),
RETRY_MAX_DELAY,
)
logger.info(f"等待 {delay:.1f}s 后重试...")
time.sleep(delay)
else:
_set_cooldown(model_id)
# 所有候选都失败了
error_msg = f"模型调用失败: {last_error}" if last_error else "无法获取回复"
logger.error(error_msg)
return f"抱歉,服务暂时不可用,请稍后再试。", primary_model_id