diff --git a/README.md b/README.md
index bd0b4c6..0e81067 100644
--- a/README.md
+++ b/README.md
@@ -79,7 +79,7 @@ graph TD
Claude-3.7:Anthropic的高级理解与解释
DeepSeek-v3/r1:专为中文场景优化的模型
QVQ-MAX/Qwen-VL-MAX:以视觉推理闻名的国产AI
- Gemini-2.5-Pro/2.0-flash:智商130的非推理AI
+ Gemini-2.5-Pro/2.5-flash:智商130的非推理AI
@@ -189,7 +189,7 @@ python app.py
| **QVQ-MAX** | 多模态支持,推理支持 | 复杂问题,视觉分析 |
| **Qwen-VL-MAX** | 多模态支持 | 简单问题,视觉分析 |
| **Gemini-2.5-Pro** | 多模态支持 | 复杂问题,视觉分析 |
-| **Gemini-2.0-Flash** | 多模态支持 | 简单问题,视觉分析 |
+| **Gemini-2.5-Flash** | 多模态支持 | 简单问题,视觉分析 |
### 🛠️ 可调参数
@@ -247,4 +247,4 @@ python app.py
## 📜 开源协议
-本项目采用 [Apache 2.0](LICENSE) 协议。
+本项目采用 [Apache 2.5](LICENSE) 协议。
diff --git a/app.py b/app.py
index 73cf796..0160584 100644
--- a/app.py
+++ b/app.py
@@ -101,6 +101,8 @@ def create_model_instance(model_id, settings, is_reasoning=False):
api_key_id = "AlibabaApiKey"
elif "gemini" in model_id.lower() or "google" in model_id.lower():
api_key_id = "GoogleApiKey"
+ elif "doubao" in model_id.lower():
+ api_key_id = "DoubaoApiKey"
# 首先尝试从本地配置获取API密钥
api_key = get_api_key(api_key_id)
@@ -156,6 +158,10 @@ def create_model_instance(model_id, settings, is_reasoning=False):
custom_base_url = api_base_urls.get('google')
if custom_base_url:
base_url = custom_base_url
+ elif "doubao" in model_id.lower():
+ custom_base_url = api_base_urls.get('doubao')
+ if custom_base_url:
+ base_url = custom_base_url
# 创建模型实例
model_instance = ModelFactory.create_model(
@@ -318,39 +324,66 @@ def handle_text_extraction(data):
if not isinstance(settings, dict):
raise ValueError("Invalid settings format")
- # 尝试从本地配置获取Mathpix API密钥
- mathpix_app_id = get_api_key('MathpixAppId')
- mathpix_app_key = get_api_key('MathpixAppKey')
+ # 优先使用百度OCR,如果没有配置则使用Mathpix
+ # 首先尝试获取百度OCR API密钥
+ baidu_api_key = get_api_key('BaiduApiKey')
+ baidu_secret_key = get_api_key('BaiduSecretKey')
- # 构建完整的Mathpix API密钥(格式:app_id:app_key)
- mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
+ # 构建百度OCR API密钥(格式:api_key:secret_key)
+ ocr_key = None
+ ocr_model = None
- # 如果本地没有配置,尝试使用前端传递的密钥(向后兼容)
- if not mathpix_key:
- mathpix_key = settings.get('mathpixApiKey')
+ if baidu_api_key and baidu_secret_key:
+ ocr_key = f"{baidu_api_key}:{baidu_secret_key}"
+ ocr_model = 'baidu-ocr'
+ print("Using Baidu OCR for text extraction...")
+ else:
+ # 回退到Mathpix
+ mathpix_app_id = get_api_key('MathpixAppId')
+ mathpix_app_key = get_api_key('MathpixAppKey')
+
+ # 构建完整的Mathpix API密钥(格式:app_id:app_key)
+ mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
+
+ # 如果本地没有配置,尝试使用前端传递的密钥(向后兼容)
+ if not mathpix_key:
+ mathpix_key = settings.get('mathpixApiKey')
+
+ if mathpix_key:
+ ocr_key = mathpix_key
+ ocr_model = 'mathpix'
+ print("Using Mathpix OCR for text extraction...")
- if not mathpix_key:
- raise ValueError("Mathpix API key is required")
+ if not ocr_key:
+ raise ValueError("OCR API key is required. Please configure Baidu OCR (API Key + Secret Key) or Mathpix (App ID + App Key)")
# 先回复客户端,确认已收到请求,防止超时断开
# 注意:这里不能使用return,否则后续代码不会执行
socketio.emit('request_acknowledged', {
'status': 'received',
- 'message': 'Image received, text extraction in progress'
+ 'message': f'Image received, text extraction in progress using {ocr_model}'
}, room=request.sid)
try:
- app_id, app_key = mathpix_key.split(':')
- if not app_id.strip() or not app_key.strip():
- raise ValueError()
+ if ocr_model == 'baidu-ocr':
+ api_key, secret_key = ocr_key.split(':')
+ if not api_key.strip() or not secret_key.strip():
+ raise ValueError()
+ elif ocr_model == 'mathpix':
+ app_id, app_key = ocr_key.split(':')
+ if not app_id.strip() or not app_key.strip():
+ raise ValueError()
except ValueError:
- raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
+ if ocr_model == 'baidu-ocr':
+ raise ValueError("Invalid Baidu OCR API key format. Expected format: 'API_KEY:SECRET_KEY'")
+ else:
+ raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
- print("Creating Mathpix model instance...")
- # 只传递必需的参数,ModelFactory.create_model会处理不同模型类型
+ print(f"Creating {ocr_model} model instance...")
+ # ModelFactory.create_model会处理不同模型类型
model = ModelFactory.create_model(
- model_name='mathpix',
- api_key=mathpix_key
+ model_name=ocr_model,
+ api_key=ocr_key
)
print("Starting text extraction...")
diff --git a/config/api_base_urls.json b/config/api_base_urls.json
index f5136e7..192c0b7 100644
--- a/config/api_base_urls.json
+++ b/config/api_base_urls.json
@@ -3,5 +3,6 @@
"OpenaiApiBaseUrl": "",
"DeepseekApiBaseUrl": "",
"AlibabaApiBaseUrl": "",
- "GoogleApiBaseUrl": ""
+ "GoogleApiBaseUrl": "",
+ "DoubaoApiBaseUrl": ""
}
\ No newline at end of file
diff --git a/config/models.json b/config/models.json
index bdd2481..fe35fd8 100644
--- a/config/models.json
+++ b/config/models.json
@@ -24,6 +24,11 @@
"name": "Google",
"api_key_id": "GoogleApiKey",
"class_name": "GoogleModel"
+ },
+ "doubao": {
+ "name": "Doubao",
+ "api_key_id": "DoubaoApiKey",
+ "class_name": "DoubaoModel"
}
},
"models": {
@@ -91,21 +96,29 @@
"version": "latest",
"description": "阿里通义千问VL-MAX模型,视觉理解能力最强,支持图像理解和复杂任务"
},
- "gemini-2.5-pro-preview-03-25": {
+ "gemini-2.5-pro": {
"name": "Gemini 2.5 Pro",
"provider": "google",
"supportsMultimodal": true,
"isReasoning": true,
- "version": "preview-03-25",
- "description": "Google最强大的Gemini 2.5 Pro模型,支持图像理解"
+ "version": "latest",
+ "description": "Google最强大的Gemini 2.5 Pro模型,支持图像理解(需要付费API密钥)"
},
- "gemini-2.0-flash": {
- "name": "Gemini 2.0 Flash",
+ "gemini-2.5-flash": {
+ "name": "Gemini 2.5 Flash",
"provider": "google",
"supportsMultimodal": true,
"isReasoning": false,
"version": "latest",
- "description": "Google更快速的Gemini 2.0 Flash模型,支持图像理解,响应更迅速"
+ "description": "Google更快速的Gemini 2.5 Flash模型,支持图像理解,有免费配额"
+ },
+ "doubao-seed-1-6-250615": {
+ "name": "Doubao-Seed-1.6",
+ "provider": "doubao",
+ "supportsMultimodal": true,
+ "isReasoning": true,
+ "version": "latest",
+ "description": "支持auto/thinking/non-thinking三种思考模式、支持多模态、256K长上下文"
}
}
}
\ No newline at end of file
diff --git a/config/proxy_api.json b/config/proxy_api.json
index 0b26cb6..98e2832 100644
--- a/config/proxy_api.json
+++ b/config/proxy_api.json
@@ -4,7 +4,8 @@
"anthropic": "",
"deepseek": "",
"google": "",
- "openai": ""
+ "openai": "",
+ "doubao": ""
},
"enabled": true
}
\ No newline at end of file
diff --git a/models/__init__.py b/models/__init__.py
index 336f66e..43783ce 100644
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -4,6 +4,7 @@ from .openai import OpenAIModel
from .deepseek import DeepSeekModel
from .alibaba import AlibabaModel
from .google import GoogleModel
+from .doubao import DoubaoModel
from .factory import ModelFactory
__all__ = [
@@ -13,5 +14,6 @@ __all__ = [
'DeepSeekModel',
'AlibabaModel',
'GoogleModel',
+ 'DoubaoModel',
'ModelFactory'
]
diff --git a/models/alibaba.py b/models/alibaba.py
index e6b3076..4402b3c 100644
--- a/models/alibaba.py
+++ b/models/alibaba.py
@@ -4,12 +4,13 @@ from openai import OpenAI
from .base import BaseModel
class AlibabaModel(BaseModel):
- def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None):
+ def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
# 如果没有提供模型名称,才使用默认值
self.model_name = model_name if model_name else "QVQ-Max-2025-03-25"
print(f"初始化阿里巴巴模型: {self.model_name}")
# 在super().__init__之前设置model_name,这样get_default_system_prompt能使用它
super().__init__(api_key, temperature, system_prompt, language)
+ self.api_base_url = api_base_url # 存储API基础URL
def get_default_system_prompt(self) -> str:
"""根据模型名称返回不同的默认系统提示词"""
diff --git a/models/baidu_ocr.py b/models/baidu_ocr.py
new file mode 100644
index 0000000..ac48057
--- /dev/null
+++ b/models/baidu_ocr.py
@@ -0,0 +1,177 @@
+import base64
+import json
+import time
+import urllib.request
+import urllib.parse
+from typing import Generator, Dict, Any
+from .base import BaseModel
+
+class BaiduOCRModel(BaseModel):
+ """
+ 百度OCR模型,用于图像文字识别
+ """
+
+ def __init__(self, api_key: str, secret_key: str = None, temperature: float = 0.7, system_prompt: str = None):
+ """
+ 初始化百度OCR模型
+
+ Args:
+ api_key: 百度API Key
+ secret_key: 百度Secret Key(可以在api_key中用冒号分隔传入)
+ temperature: 不用于OCR但保持BaseModel兼容性
+ system_prompt: 不用于OCR但保持BaseModel兼容性
+
+ Raises:
+ ValueError: 如果API密钥格式无效
+ """
+ super().__init__(api_key, temperature, system_prompt)
+
+ # 支持两种格式:单独传递或在api_key中用冒号分隔
+ if secret_key:
+ self.api_key = api_key
+ self.secret_key = secret_key
+ else:
+ try:
+ self.api_key, self.secret_key = api_key.split(':')
+ except ValueError:
+ raise ValueError("百度OCR API密钥必须是 'API_KEY:SECRET_KEY' 格式或单独传递secret_key参数")
+
+ # 百度API URLs
+ self.token_url = "https://aip.baidubce.com/oauth/2.0/token"
+ self.ocr_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
+
+ # 缓存access_token
+ self._access_token = None
+ self._token_expires = 0
+
+ def get_access_token(self) -> str:
+ """获取百度API的access_token"""
+ # 检查是否需要刷新token(提前5分钟刷新)
+ if self._access_token and time.time() < self._token_expires - 300:
+ return self._access_token
+
+ # 请求新的access_token
+ params = {
+ 'grant_type': 'client_credentials',
+ 'client_id': self.api_key,
+ 'client_secret': self.secret_key
+ }
+
+ data = urllib.parse.urlencode(params).encode('utf-8')
+ request = urllib.request.Request(self.token_url, data=data)
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+
+ try:
+ with urllib.request.urlopen(request) as response:
+ result = json.loads(response.read().decode('utf-8'))
+
+ if 'access_token' in result:
+ self._access_token = result['access_token']
+ # 设置过期时间(默认30天,但我们提前刷新)
+ self._token_expires = time.time() + result.get('expires_in', 2592000)
+ return self._access_token
+ else:
+ raise Exception(f"获取access_token失败: {result.get('error_description', '未知错误')}")
+
+ except Exception as e:
+ raise Exception(f"请求access_token失败: {str(e)}")
+
+ def ocr_image(self, image_data: str) -> str:
+ """
+ 对图像进行OCR识别
+
+ Args:
+ image_data: Base64编码的图像数据
+
+ Returns:
+ str: 识别出的文字内容
+ """
+ access_token = self.get_access_token()
+
+ # 准备请求数据
+ params = {
+ 'image': image_data,
+ 'language_type': 'auto_detect', # 自动检测语言
+ 'detect_direction': 'true', # 检测图像朝向
+ 'probability': 'false' # 不返回置信度(减少响应大小)
+ }
+
+ data = urllib.parse.urlencode(params).encode('utf-8')
+ url = f"{self.ocr_url}?access_token={access_token}"
+
+ request = urllib.request.Request(url, data=data)
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+
+ try:
+ with urllib.request.urlopen(request) as response:
+ result = json.loads(response.read().decode('utf-8'))
+
+ if 'error_code' in result:
+ raise Exception(f"百度OCR API错误: {result.get('error_msg', '未知错误')}")
+
+ # 提取识别的文字
+ words_result = result.get('words_result', [])
+ text_lines = [item['words'] for item in words_result]
+
+ return '\n'.join(text_lines)
+
+ except Exception as e:
+ raise Exception(f"OCR识别失败: {str(e)}")
+
+ def extract_full_text(self, image_data: str) -> str:
+ """
+ 提取图像中的完整文本(与Mathpix兼容的接口)
+
+ Args:
+ image_data: Base64编码的图像数据
+
+ Returns:
+ str: 提取的文本内容
+ """
+ return self.ocr_image(image_data)
+
+ def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
+ """
+ 分析图像并返回OCR结果(流式输出以保持接口一致性)
+
+ Args:
+ image_data: Base64编码的图像数据
+ proxies: 代理配置(未使用)
+
+ Yields:
+ dict: 包含OCR结果的响应
+ """
+ try:
+ text = self.ocr_image(image_data)
+ yield {
+ 'status': 'completed',
+ 'content': text,
+ 'model': 'baidu-ocr'
+ }
+ except Exception as e:
+ yield {
+ 'status': 'error',
+ 'content': f'OCR识别失败: {str(e)}',
+ 'model': 'baidu-ocr'
+ }
+
+ def analyze_text(self, text: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
+ """
+ 分析文本(OCR模型不支持文本分析)
+
+ Args:
+ text: 输入文本
+ proxies: 代理配置(未使用)
+
+ Yields:
+ dict: 错误响应
+ """
+ yield {
+ 'status': 'error',
+ 'content': 'OCR模型不支持文本分析功能',
+ 'model': 'baidu-ocr'
+ }
+
+ def get_model_identifier(self) -> str:
+ """返回模型标识符"""
+ return "baidu-ocr"
diff --git a/models/deepseek.py b/models/deepseek.py
index 4c314db..f324734 100644
--- a/models/deepseek.py
+++ b/models/deepseek.py
@@ -6,9 +6,10 @@ from openai import OpenAI
from .base import BaseModel
class DeepSeekModel(BaseModel):
- def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner"):
+ def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner", api_base_url: str = None):
super().__init__(api_key, temperature, system_prompt, language)
self.model_name = model_name
+ self.api_base_url = api_base_url # 存储API基础URL
def get_default_system_prompt(self) -> str:
return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
diff --git a/models/doubao.py b/models/doubao.py
new file mode 100644
index 0000000..68d7925
--- /dev/null
+++ b/models/doubao.py
@@ -0,0 +1,339 @@
+import json
+import os
+import base64
+from typing import Generator, Dict, Any, Optional
+import requests
+from .base import BaseModel
+
+class DoubaoModel(BaseModel):
+ """
+ 豆包API模型实现类
+ 支持字节跳动的豆包AI模型,可处理文本和图像输入
+ """
+
+ def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
+ """
+ 初始化豆包模型
+
+ Args:
+ api_key: 豆包API密钥
+ temperature: 生成温度
+ system_prompt: 系统提示词
+ language: 首选语言
+ model_name: 指定具体模型名称,如不指定则使用默认值
+ api_base_url: API基础URL,用于设置自定义API端点
+ """
+ super().__init__(api_key, temperature, system_prompt, language)
+ self.model_name = model_name or self.get_model_identifier()
+ self.base_url = api_base_url or "https://ark.cn-beijing.volces.com/api/v3"
+ self.max_tokens = 4096 # 默认最大输出token数
+ self.reasoning_config = None # 推理配置,类似于AnthropicModel
+
+ def get_default_system_prompt(self) -> str:
+ return """你是一个专业的问题分析专家。当看到问题图片时:
+1. 仔细阅读并理解问题
+2. 分解问题的关键组成部分
+3. 提供清晰的分步解决方案
+4. 如果相关,解释涉及的概念或理论
+5. 如果有多种方法,优先解释最有效的方法"""
+
+ def get_model_identifier(self) -> str:
+ """返回默认的模型标识符"""
+ return "doubao-seed-1-6-250615" # Doubao-Seed-1.6
+
+ def get_actual_model_name(self) -> str:
+ """根据配置的模型名称返回实际的API调用标识符"""
+ # 豆包API的实际模型名称映射
+ model_mapping = {
+ "doubao-seed-1-6-250615": "doubao-seed-1-6-250615"
+ }
+
+ return model_mapping.get(self.model_name, "doubao-seed-1-6-250615")
+
+ def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
+ """流式生成文本响应"""
+ try:
+ yield {"status": "started"}
+
+ # 设置环境变量代理(如果提供)
+ original_proxies = None
+ if proxies:
+ original_proxies = {
+ 'http_proxy': os.environ.get('http_proxy'),
+ 'https_proxy': os.environ.get('https_proxy')
+ }
+ if 'http' in proxies:
+ os.environ['http_proxy'] = proxies['http']
+ if 'https' in proxies:
+ os.environ['https_proxy'] = proxies['https']
+
+ try:
+ # 构建请求头
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json"
+ }
+
+ # 构建消息 - 根据官方API文档,暂时不使用系统提示词
+ messages = []
+
+ # 添加用户查询
+ user_content = text
+ if self.language and self.language != 'auto':
+ user_content = f"请使用{self.language}回答以下问题: {text}"
+
+ messages.append({
+ "role": "user",
+ "content": user_content
+ })
+
+ # 处理推理配置
+ thinking = {
+ "type": "auto" # 默认值
+ }
+
+ if hasattr(self, 'reasoning_config') and self.reasoning_config:
+ # 从reasoning_config中获取thinking_mode
+ thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
+ thinking = {
+ "type": thinking_mode
+ }
+
+ # 构建请求数据
+ data = {
+ "model": self.get_actual_model_name(),
+ "messages": messages,
+ "thinking": thinking,
+ "temperature": self.temperature,
+ "max_tokens": self.max_tokens,
+ "stream": True
+ }
+
+ # 发送流式请求
+ response = requests.post(
+ f"{self.base_url}/chat/completions",
+ headers=headers,
+ json=data,
+ stream=True,
+ proxies=proxies if proxies else None,
+ timeout=60
+ )
+
+ if response.status_code != 200:
+ error_text = response.text
+ raise Exception(f"HTTP {response.status_code}: {error_text}")
+
+ response.raise_for_status()
+
+ # 初始化响应缓冲区
+ response_buffer = ""
+
+ # 处理流式响应
+ for line in response.iter_lines():
+ if not line:
+ continue
+
+ line = line.decode('utf-8')
+ if not line.startswith('data: '):
+ continue
+
+ line = line[6:] # 移除 'data: ' 前缀
+
+ if line == '[DONE]':
+ break
+
+ try:
+ chunk_data = json.loads(line)
+ choices = chunk_data.get('choices', [])
+
+ if choices and len(choices) > 0:
+ delta = choices[0].get('delta', {})
+ content = delta.get('content', '')
+
+ if content:
+ response_buffer += content
+
+ # 发送响应进度
+ yield {
+ "status": "streaming",
+ "content": response_buffer
+ }
+
+ except json.JSONDecodeError:
+ continue
+
+ # 确保发送完整的最终内容
+ yield {
+ "status": "completed",
+ "content": response_buffer
+ }
+
+ finally:
+ # 恢复原始代理设置
+ if original_proxies:
+ for key, value in original_proxies.items():
+ if value is None:
+ if key in os.environ:
+ del os.environ[key]
+ else:
+ os.environ[key] = value
+
+ except Exception as e:
+ yield {
+ "status": "error",
+ "error": f"豆包API错误: {str(e)}"
+ }
+
+ def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
+ """分析图像并流式生成响应"""
+ try:
+ yield {"status": "started"}
+
+ # 设置环境变量代理(如果提供)
+ original_proxies = None
+ if proxies:
+ original_proxies = {
+ 'http_proxy': os.environ.get('http_proxy'),
+ 'https_proxy': os.environ.get('https_proxy')
+ }
+ if 'http' in proxies:
+ os.environ['http_proxy'] = proxies['http']
+ if 'https' in proxies:
+ os.environ['https_proxy'] = proxies['https']
+
+ try:
+ # 构建请求头
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json"
+ }
+
+ # 处理图像数据
+ if image_data.startswith('data:image'):
+ # 如果是data URI,提取base64部分
+ image_data = image_data.split(',', 1)[1]
+
+ # 构建用户消息 - 使用豆包API官方示例格式
+ # 首先检查图像数据的格式,确保是有效的图像
+ image_format = "jpeg" # 默认使用jpeg
+ if image_data.startswith('/9j/'): # JPEG magic number in base64
+ image_format = "jpeg"
+ elif image_data.startswith('iVBORw0KGgo'): # PNG magic number in base64
+ image_format = "png"
+
+ user_content = [
+ {
+ "type": "text",
+ "text": f"请使用{self.language}分析这张图片并提供详细解答。" if self.language and self.language != 'auto' else "请分析这张图片并提供详细解答?"
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/{image_format};base64,{image_data}"
+ }
+ }
+ ]
+
+ messages = [
+ {
+ "role": "user",
+ "content": user_content
+ }
+ ]
+
+ # 处理推理配置
+ thinking = {
+ "type": "auto" # 默认值
+ }
+
+ if hasattr(self, 'reasoning_config') and self.reasoning_config:
+ # 从reasoning_config中获取thinking_mode
+ thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
+ thinking = {
+ "type": thinking_mode
+ }
+
+ # 构建请求数据
+ data = {
+ "model": self.get_actual_model_name(),
+ "messages": messages,
+ "thinking": thinking,
+ "temperature": self.temperature,
+ "max_tokens": self.max_tokens,
+ "stream": True
+ }
+
+ # 发送流式请求
+ response = requests.post(
+ f"{self.base_url}/chat/completions",
+ headers=headers,
+ json=data,
+ stream=True,
+ proxies=proxies if proxies else None,
+ timeout=60
+ )
+
+ if response.status_code != 200:
+ error_text = response.text
+ raise Exception(f"HTTP {response.status_code}: {error_text}")
+
+ response.raise_for_status()
+
+ # 初始化响应缓冲区
+ response_buffer = ""
+
+ # 处理流式响应
+ for line in response.iter_lines():
+ if not line:
+ continue
+
+ line = line.decode('utf-8')
+ if not line.startswith('data: '):
+ continue
+
+ line = line[6:] # 移除 'data: ' 前缀
+
+ if line == '[DONE]':
+ break
+
+ try:
+ chunk_data = json.loads(line)
+ choices = chunk_data.get('choices', [])
+
+ if choices and len(choices) > 0:
+ delta = choices[0].get('delta', {})
+ content = delta.get('content', '')
+
+ if content:
+ response_buffer += content
+
+ # 发送响应进度
+ yield {
+ "status": "streaming",
+ "content": response_buffer
+ }
+
+ except json.JSONDecodeError:
+ continue
+
+ # 确保发送完整的最终内容
+ yield {
+ "status": "completed",
+ "content": response_buffer
+ }
+
+ finally:
+ # 恢复原始代理设置
+ if original_proxies:
+ for key, value in original_proxies.items():
+ if value is None:
+ if key in os.environ:
+ del os.environ[key]
+ else:
+ os.environ[key] = value
+
+ except Exception as e:
+ yield {
+ "status": "error",
+ "error": f"豆包图像分析错误: {str(e)}"
+ }
diff --git a/models/factory.py b/models/factory.py
index ad5f6e2..96cbfbb 100644
--- a/models/factory.py
+++ b/models/factory.py
@@ -3,7 +3,8 @@ import json
import os
import importlib
from .base import BaseModel
-from .mathpix import MathpixModel # MathpixModel仍然需要直接导入,因为它是特殊工具
+from .mathpix import MathpixModel # MathpixModel需要直接导入,因为它是特殊OCR工具
+from .baidu_ocr import BaiduOCRModel # 百度OCR也是特殊OCR工具,直接导入
class ModelFactory:
# 模型基本信息,包含类型和特性
@@ -39,13 +40,25 @@ class ModelFactory:
'description': model_info.get('description', '')
}
- # 添加Mathpix模型(特殊工具模型)
+ # 添加特殊OCR工具模型(不在配置文件中定义)
+
+ # 添加Mathpix OCR工具
cls._models['mathpix'] = {
'class': MathpixModel,
'is_multimodal': True,
'is_reasoning': False,
'display_name': 'Mathpix OCR',
- 'description': '文本提取工具,适用于数学公式和文本',
+ 'description': '数学公式识别工具,适用于复杂数学内容',
+ 'is_ocr_only': True
+ }
+
+ # 添加百度OCR工具
+ cls._models['baidu-ocr'] = {
+ 'class': BaiduOCRModel,
+ 'is_multimodal': True,
+ 'is_reasoning': False,
+ 'display_name': '百度OCR',
+ 'description': '通用文字识别工具,支持中文识别',
'is_ocr_only': True
}
@@ -62,22 +75,36 @@ class ModelFactory:
# 不再硬编码模型定义,而是使用空字典
cls._models = {}
- # 只保留Mathpix作为基础工具
+ # 添加特殊OCR工具(当配置加载失败时的备用)
try:
- # 导入MathpixModel类
+ # 导入并添加Mathpix OCR工具
from .mathpix import MathpixModel
- # 添加Mathpix作为基础工具
cls._models['mathpix'] = {
'class': MathpixModel,
'is_multimodal': True,
'is_reasoning': False,
'display_name': 'Mathpix OCR',
- 'description': '文本提取工具,适用于数学公式和文本',
+ 'description': '数学公式识别工具,适用于复杂数学内容',
'is_ocr_only': True
}
except Exception as e:
- print(f"无法加载基础Mathpix工具: {str(e)}")
+ print(f"无法加载Mathpix OCR工具: {str(e)}")
+
+ # 添加百度OCR工具
+ try:
+ from .baidu_ocr import BaiduOCRModel
+
+ cls._models['baidu-ocr'] = {
+ 'class': BaiduOCRModel,
+ 'is_multimodal': True,
+ 'is_reasoning': False,
+ 'display_name': '百度OCR',
+ 'description': '通用文字识别工具,支持中文识别',
+ 'is_ocr_only': True
+ }
+ except Exception as e:
+ print(f"无法加载百度OCR工具: {str(e)}")
@classmethod
def create_model(cls, model_name: str, api_key: str, temperature: float = 0.7,
@@ -114,6 +141,25 @@ class ModelFactory:
)
# 对于阿里巴巴模型,也需要传递正确的模型名称
elif 'qwen' in model_name.lower() or 'qvq' in model_name.lower() or 'alibaba' in model_name.lower():
+ return model_class(
+ api_key=api_key,
+ temperature=temperature,
+ system_prompt=system_prompt,
+ language=language,
+ model_name=model_name
+ )
+ # 对于Google模型,也需要传递正确的模型名称
+ elif 'gemini' in model_name.lower() or 'google' in model_name.lower():
+ return model_class(
+ api_key=api_key,
+ temperature=temperature,
+ system_prompt=system_prompt,
+ language=language,
+ model_name=model_name,
+ api_base_url=api_base_url
+ )
+ # 对于豆包模型,也需要传递正确的模型名称
+ elif 'doubao' in model_name.lower():
return model_class(
api_key=api_key,
temperature=temperature,
@@ -129,6 +175,13 @@ class ModelFactory:
temperature=temperature,
system_prompt=system_prompt
)
+ # 对于百度OCR模型,传递api_key(支持API_KEY:SECRET_KEY格式)
+ elif model_name == 'baidu-ocr':
+ return model_class(
+ api_key=api_key,
+ temperature=temperature,
+ system_prompt=system_prompt
+ )
# 对于Anthropic模型,需要传递model_identifier参数
elif 'claude' in model_name.lower() or 'anthropic' in model_name.lower():
return model_class(
diff --git a/models/google.py b/models/google.py
index 6904bfe..fe9f210 100644
--- a/models/google.py
+++ b/models/google.py
@@ -30,10 +30,17 @@ class GoogleModel(BaseModel):
# 配置Google API
if api_base_url:
- # 如果提供了自定义API基础URL,设置genai的api_url
- genai.configure(api_key=api_key, transport="rest", client_options={"api_endpoint": api_base_url})
+ # 配置中转API - 使用环境变量方式
+ # 移除末尾的斜杠以避免重复路径问题
+ clean_base_url = api_base_url.rstrip('/')
+ # 设置环境变量来指定API端点
+ os.environ['GOOGLE_AI_API_ENDPOINT'] = clean_base_url
+ genai.configure(api_key=api_key)
else:
# 使用默认API端点
+ # 清除可能存在的自定义端点环境变量
+ if 'GOOGLE_AI_API_ENDPOINT' in os.environ:
+ del os.environ['GOOGLE_AI_API_ENDPOINT']
genai.configure(api_key=api_key)
def get_default_system_prompt(self) -> str:
@@ -46,7 +53,7 @@ class GoogleModel(BaseModel):
def get_model_identifier(self) -> str:
"""返回默认的模型标识符"""
- return "gemini-2.5-pro-preview-03-25"
+ return "gemini-2.5-flash" # 使用有免费配额的模型作为默认值
def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
"""流式生成文本响应"""
diff --git a/static/js/main.js b/static/js/main.js
index ed6b8b9..13b2686 100644
--- a/static/js/main.js
+++ b/static/js/main.js
@@ -1053,10 +1053,33 @@ class SnapSolver {
this.extractTextBtn.innerHTML = '提取中...';
const settings = window.settingsManager.getSettings();
+
+ // 根据用户设置的OCR源进行选择
+ const ocrSource = settings.ocrSource || 'auto';
+ const baiduApiKey = window.settingsManager.apiKeyValues.BaiduApiKey;
+ const baiduSecretKey = window.settingsManager.apiKeyValues.BaiduSecretKey;
const mathpixApiKey = settings.mathpixApiKey;
- if (!mathpixApiKey || mathpixApiKey === ':') {
- window.uiManager.showToast('请在设置中输入Mathpix API凭据', 'error');
+ const hasBaiduOCR = baiduApiKey && baiduSecretKey;
+ const hasMathpix = mathpixApiKey && mathpixApiKey !== ':';
+
+ // 根据OCR源配置检查可用性
+ let canProceed = false;
+ let missingOCRMessage = '';
+
+ if (ocrSource === 'baidu') {
+ canProceed = hasBaiduOCR;
+ missingOCRMessage = '请在设置中配置百度OCR API密钥';
+ } else if (ocrSource === 'mathpix') {
+ canProceed = hasMathpix;
+ missingOCRMessage = '请在设置中配置Mathpix API密钥';
+ } else { // auto
+ canProceed = hasBaiduOCR || hasMathpix;
+ missingOCRMessage = '请在设置中配置OCR API密钥:百度OCR(推荐)或Mathpix';
+ }
+
+ if (!canProceed) {
+ window.uiManager.showToast(missingOCRMessage, 'error');
document.getElementById('settingsPanel').classList.add('active');
this.extractTextBtn.disabled = false;
this.extractTextBtn.innerHTML = '提取文本';
@@ -1076,7 +1099,7 @@ class SnapSolver {
this.socket.emit('extract_text', {
image: this.croppedImage.split(',')[1],
settings: {
- mathpixApiKey: mathpixApiKey
+ ocrSource: settings.ocrSource || 'auto'
}
});
diff --git a/static/js/settings.js b/static/js/settings.js
index 87c1e2b..b0019c5 100644
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -374,6 +374,9 @@ class SettingsManager {
// 模型选择器对象
this.modelSelector = null;
+ // OCR源配置
+ this.ocrSource = 'auto'; // 默认自动选择
+
// 存储API密钥的对象
this.apiKeyValues = {
'AnthropicApiKey': '',
@@ -381,6 +384,9 @@ class SettingsManager {
'DeepseekApiKey': '',
'AlibabaApiKey': '',
'GoogleApiKey': '',
+ 'DoubaoApiKey': '',
+ 'BaiduApiKey': '',
+ 'BaiduSecretKey': '',
'MathpixAppId': '',
'MathpixAppKey': ''
};
@@ -391,7 +397,8 @@ class SettingsManager {
'OpenaiApiBaseUrl': '',
'DeepseekApiBaseUrl': '',
'AlibabaApiBaseUrl': '',
- 'GoogleApiBaseUrl': ''
+ 'GoogleApiBaseUrl': '',
+ 'DoubaoApiBaseUrl': ''
};
// 加载模型配置
@@ -580,6 +587,13 @@ class SettingsManager {
this.updateReasoningOptionUI(settings.reasoningDepth);
}
+ // 加载豆包思考模式设置
+ if (settings.doubaoThinkingMode && this.doubaoThinkingModeSelect) {
+ this.doubaoThinkingModeSelect.value = settings.doubaoThinkingMode;
+ // 更新豆包思考选项UI
+ this.updateDoubaoThinkingOptionUI(settings.doubaoThinkingMode);
+ }
+
// 加载思考预算百分比
const thinkBudgetPercent = parseInt(settings.thinkBudgetPercent || '50');
if (this.thinkBudgetPercentInput) {
@@ -624,6 +638,14 @@ class SettingsManager {
this.proxyPortInput.value = settings.proxyPort;
}
+ // Load OCR source setting
+ if (settings.ocrSource) {
+ this.ocrSource = settings.ocrSource;
+ if (this.ocrSourceSelect) {
+ this.ocrSourceSelect.value = settings.ocrSource;
+ }
+ }
+
// Update UI based on model type
this.updateUIBasedOnModelType();
@@ -720,6 +742,14 @@ class SettingsManager {
this.thinkBudgetGroup.style.display = showThinkBudget ? 'block' : 'none';
}
+ // 处理豆包深度思考设置显示
+ const isDoubaoReasoning = modelInfo.isReasoning && modelInfo.provider === 'doubao';
+
+ // 只有对豆包推理模型才显示深度思考设置
+ if (this.doubaoThinkingGroup) {
+ this.doubaoThinkingGroup.style.display = isDoubaoReasoning ? 'block' : 'none';
+ }
+
// 控制最大Token设置的显示
// 阿里巴巴模型不支持自定义Token设置
const maxTokensGroup = this.maxTokens ? this.maxTokens.closest('.setting-group') : null;
@@ -759,6 +789,8 @@ class SettingsManager {
apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(4)'); // Alibaba
} else if (modelType && (modelType.toLowerCase().includes('gemini') || modelType.toLowerCase().includes('google'))) {
apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(5)'); // Google
+ } else if (modelType && modelType.toLowerCase().includes('doubao')) {
+ apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(6)'); // 豆包
}
if (apiKeyToHighlight) {
@@ -775,6 +807,7 @@ class SettingsManager {
model: this.modelSelect.value,
maxTokens: this.maxTokens.value,
reasoningDepth: this.reasoningDepthSelect?.value || 'standard',
+ doubaoThinkingMode: this.doubaoThinkingModeSelect?.value || 'auto',
thinkBudgetPercent: this.thinkBudgetPercentInput?.value || '50',
temperature: this.temperatureInput.value,
language: this.languageInput.value,
@@ -782,7 +815,8 @@ class SettingsManager {
currentPromptId: this.currentPromptId,
proxyEnabled: this.proxyEnabledInput.checked,
proxyHost: this.proxyHostInput.value,
- proxyPort: this.proxyPortInput.value
+ proxyPort: this.proxyPortInput.value,
+ ocrSource: this.ocrSource // 添加OCR源配置保存
};
// 保存设置到localStorage
@@ -832,17 +866,30 @@ class SettingsManager {
const reasoningDepth = this.reasoningDepthSelect?.value || 'standard';
const thinkBudgetPercent = parseInt(this.thinkBudgetPercentInput?.value || '50');
+ // 获取豆包思考模式设置
+ const doubaoThinkingMode = this.doubaoThinkingModeSelect?.value || 'auto';
+
// 计算思考预算的实际Token数
const thinkBudget = Math.floor(maxTokens * (thinkBudgetPercent / 100));
// 构建推理配置参数
const reasoningConfig = {};
- if (modelInfo.provider === 'anthropic' && modelInfo.isReasoning) {
- if (reasoningDepth === 'extended') {
- reasoningConfig.reasoning_depth = 'extended';
- reasoningConfig.think_budget = thinkBudget;
- } else {
- reasoningConfig.speed_mode = 'instant';
+
+ // 处理不同模型的推理配置
+ if (modelInfo.isReasoning) {
+ // 对于Anthropic模型
+ if (modelInfo.provider === 'anthropic') {
+ if (reasoningDepth === 'extended') {
+ reasoningConfig.reasoning_depth = 'extended';
+ reasoningConfig.think_budget = thinkBudget;
+ } else {
+ reasoningConfig.speed_mode = 'instant';
+ }
+ }
+
+ // 对于豆包模型
+ if (modelInfo.provider === 'doubao') {
+ reasoningConfig.thinking_mode = doubaoThinkingMode;
}
}
@@ -869,6 +916,9 @@ class SettingsManager {
if (this.apiBaseUrlValues['GoogleApiBaseUrl']) {
apiBaseUrls.google = this.apiBaseUrlValues['GoogleApiBaseUrl'];
}
+ if (this.apiBaseUrlValues['DoubaoApiBaseUrl']) {
+ apiBaseUrls.doubao = this.apiBaseUrlValues['DoubaoApiBaseUrl'];
+ }
}
return {
@@ -881,6 +931,8 @@ class SettingsManager {
proxyHost: this.proxyHostInput.value,
proxyPort: this.proxyPortInput.value,
mathpixApiKey: mathpixApiKey,
+ ocrSource: this.ocrSource, // 添加OCR源配置
+ doubaoThinkingMode: doubaoThinkingMode, // 添加豆包思考模式配置
modelInfo: {
supportsMultimodal: modelInfo.supportsMultimodal || false,
isReasoning: modelInfo.isReasoning || false,
@@ -1121,6 +1173,20 @@ class SettingsManager {
this.saveSettings();
});
+ // OCR源选择器事件监听
+ if (this.ocrSourceSelect) {
+ this.ocrSourceSelect.addEventListener('change', (e) => {
+ // 阻止事件冒泡
+ e.stopPropagation();
+
+ // 更新OCR源配置
+ this.ocrSource = e.target.value;
+ this.saveSettings();
+
+ console.log('OCR源已切换为:', this.ocrSource);
+ });
+ }
+
// Panel visibility
if (this.settingsToggle) {
this.settingsToggle.addEventListener('click', () => {
@@ -1195,6 +1261,71 @@ class SettingsManager {
// 初始化API密钥编辑功能
this.initApiKeyEditFunctions();
+
+ // 初始化推理选项事件
+ this.initReasoningOptionEvents();
+
+ // 初始化豆包思考选项事件
+ this.initDoubaoThinkingOptionEvents();
+ }
+
+ // 初始化推理选项事件
+ initReasoningOptionEvents() {
+ const reasoningOptions = document.querySelectorAll('.reasoning-option');
+ reasoningOptions.forEach(option => {
+ option.addEventListener('click', (e) => {
+ e.preventDefault();
+ e.stopPropagation();
+
+ const value = option.getAttribute('data-value');
+ if (value && this.reasoningDepthSelect) {
+ // 更新select值
+ this.reasoningDepthSelect.value = value;
+
+ // 更新UI
+ this.updateReasoningOptionUI(value);
+
+ // 保存设置
+ this.saveSettings();
+ }
+ });
+ });
+ }
+
+ // 初始化豆包思考选项事件
+ initDoubaoThinkingOptionEvents() {
+ const doubaoThinkingOptions = document.querySelectorAll('.doubao-thinking-option');
+ doubaoThinkingOptions.forEach(option => {
+ option.addEventListener('click', (e) => {
+ e.preventDefault();
+ e.stopPropagation();
+
+ const value = option.getAttribute('data-value');
+ if (value && this.doubaoThinkingModeSelect) {
+ // 更新select值
+ this.doubaoThinkingModeSelect.value = value;
+
+ // 更新UI
+ this.updateDoubaoThinkingOptionUI(value);
+
+ // 保存设置
+ this.saveSettings();
+ }
+ });
+ });
+ }
+
+ // 更新豆包思考选项UI
+ updateDoubaoThinkingOptionUI(value) {
+ const doubaoThinkingOptions = document.querySelectorAll('.doubao-thinking-option');
+ doubaoThinkingOptions.forEach(option => {
+ const optionValue = option.getAttribute('data-value');
+ if (optionValue === value) {
+ option.classList.add('active');
+ } else {
+ option.classList.remove('active');
+ }
+ });
}
// 更新思考预算显示
@@ -2208,10 +2339,17 @@ class SettingsManager {
this.thinkBudgetPercentValue = document.getElementById('thinkBudgetPercentValue');
this.thinkBudgetGroup = document.querySelector('.think-budget-group');
+ // 豆包深度思考相关元素
+ this.doubaoThinkingModeSelect = document.getElementById('doubaoThinkingMode');
+ this.doubaoThinkingGroup = document.querySelector('.doubao-thinking-group');
+
// Initialize Mathpix inputs
this.mathpixAppIdInput = document.getElementById('mathpixAppId');
this.mathpixAppKeyInput = document.getElementById('mathpixAppKey');
+ // OCR源选择器
+ this.ocrSourceSelect = document.getElementById('ocrSourceSelect');
+
// API Key elements - 所有的密钥输入框
this.apiKeyInputs = {
'AnthropicApiKey': document.getElementById('AnthropicApiKey'),
@@ -2260,6 +2398,9 @@ class SettingsManager {
'DeepseekApiKey': '',
'AlibabaApiKey': '',
'GoogleApiKey': '',
+ 'DoubaoApiKey': '',
+ 'BaiduApiKey': '',
+ 'BaiduSecretKey': '',
'MathpixAppId': '',
'MathpixAppKey': ''
};
@@ -2359,7 +2500,8 @@ class SettingsManager {
'OpenaiApiBaseUrl': proxyApiConfig.apis?.openai || '',
'DeepseekApiBaseUrl': proxyApiConfig.apis?.deepseek || '',
'AlibabaApiBaseUrl': proxyApiConfig.apis?.alibaba || '',
- 'GoogleApiBaseUrl': proxyApiConfig.apis?.google || ''
+ 'GoogleApiBaseUrl': proxyApiConfig.apis?.google || '',
+ 'DoubaoApiBaseUrl': proxyApiConfig.apis?.doubao || ''
};
this.updateApiBaseUrlStatus(apiBaseUrls);
console.log('API基础URL状态已刷新');
@@ -2449,6 +2591,9 @@ class SettingsManager {
case 'GoogleApiBaseUrl':
config.apis.google = value;
break;
+ case 'DoubaoApiBaseUrl':
+ config.apis.doubao = value;
+ break;
}
// 确保启用中转API
diff --git a/static/style.css b/static/style.css
index 7ba0233..2df0e26 100644
--- a/static/style.css
+++ b/static/style.css
@@ -2174,6 +2174,82 @@ button:disabled {
transition: all 0.2s ease-in-out;
}
+/* OCR设置样式 */
+.ocr-settings {
+ margin-bottom: 1.5rem;
+}
+
+.ocr-source-control {
+ display: flex;
+ flex-direction: column;
+ gap: 12px;
+}
+
+.ocr-source-selector {
+ position: relative;
+}
+
+.ocr-source-select {
+ width: 100%;
+ padding: 10px 14px;
+ border: 1px solid var(--border-color);
+ border-radius: 8px;
+ background: var(--surface);
+ color: var(--text-primary);
+ font-size: 0.9rem;
+ transition: all 0.2s ease;
+}
+
+.ocr-source-select:hover {
+ border-color: var(--primary-color);
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+}
+
+.ocr-source-select:focus {
+ outline: none;
+ border-color: var(--primary-color);
+ box-shadow: 0 0 0 3px rgba(var(--primary-rgb), 0.1);
+}
+
+.ocr-source-description {
+ display: flex;
+ flex-direction: column;
+ gap: 8px;
+ padding: 12px;
+ background: rgba(0, 0, 0, 0.02);
+ border-radius: 8px;
+ border: 1px solid var(--border-color);
+}
+
+.ocr-desc-item {
+ display: flex;
+ align-items: flex-start;
+ gap: 8px;
+ font-size: 0.85rem;
+ line-height: 1.4;
+ color: var(--text-secondary);
+}
+
+.ocr-desc-item i {
+ color: var(--primary-color);
+ margin-top: 2px;
+ flex-shrink: 0;
+}
+
+.ocr-desc-item strong {
+ color: var(--text-primary);
+}
+
+/* 暗色主题下的OCR设置样式 */
+[data-theme="dark"] .ocr-source-description {
+ background: rgba(255, 255, 255, 0.02);
+}
+
+[data-theme="dark"] .ocr-source-select {
+ background: var(--surface);
+ border-color: var(--border-color);
+}
+
/* 新增的推理控制组件样式 */
.reasoning-control {
display: flex;
@@ -2260,6 +2336,122 @@ button:disabled {
opacity: 1;
}
+/* 豆包深度思考控制组件样式 */
+.doubao-thinking-control {
+ display: flex;
+ flex-direction: column;
+ gap: 8px;
+}
+
+.doubao-thinking-label {
+ display: flex;
+ justify-content: space-between;
+ align-items: center;
+ margin-bottom: 6px;
+}
+
+.doubao-thinking-selector {
+ display: flex;
+ gap: 8px;
+ margin-bottom: 8px;
+}
+
+.doubao-thinking-option {
+ flex: 1;
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ padding: 12px 8px;
+ border-radius: 8px;
+ background: rgba(0, 0, 0, 0.05);
+ cursor: pointer;
+ transition: all 0.2s ease;
+ border: 2px solid transparent;
+ position: relative;
+ overflow: hidden;
+ min-height: 80px;
+ justify-content: center;
+}
+
+.doubao-thinking-option::before {
+ content: '';
+ position: absolute;
+ bottom: 0;
+ left: 0;
+ width: 100%;
+ height: 3px;
+ background: linear-gradient(to right, var(--primary-color), transparent);
+ opacity: 0;
+ transition: opacity 0.3s ease;
+}
+
+.doubao-thinking-option:hover {
+ transform: translateY(-2px);
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+}
+
+.doubao-thinking-option.active {
+ background: rgba(var(--primary-rgb), 0.1);
+ border-color: var(--primary-color);
+}
+
+.doubao-thinking-option.active::before {
+ opacity: 1;
+}
+
+.doubao-thinking-option i {
+ font-size: 1.3rem;
+ margin-bottom: 6px;
+ color: var(--primary-color);
+ opacity: 0.8;
+ transition: all 0.2s ease;
+}
+
+.doubao-thinking-option .option-name {
+ font-weight: 600;
+ font-size: 0.85rem;
+ margin-bottom: 4px;
+ text-align: center;
+}
+
+.doubao-thinking-option .option-desc {
+ font-size: 0.7rem;
+ opacity: 0.7;
+ text-align: center;
+ line-height: 1.2;
+}
+
+.doubao-thinking-option:hover i {
+ transform: scale(1.1);
+ opacity: 1;
+}
+
+.doubao-thinking-desc {
+ display: flex;
+ flex-direction: column;
+ gap: 6px;
+ margin-top: 8px;
+ padding: 8px;
+ background: rgba(0, 0, 0, 0.03);
+ border-radius: 6px;
+}
+
+.doubao-desc-item {
+ display: flex;
+ align-items: center;
+ gap: 8px;
+ font-size: 0.8rem;
+ opacity: 0.8;
+}
+
+.doubao-desc-item i {
+ font-size: 0.9rem;
+ color: var(--primary-color);
+ opacity: 0.7;
+ width: 16px;
+ text-align: center;
+}
+
/* 思考预算控制组件样式 */
.think-budget-control {
display: flex;
@@ -2411,6 +2603,18 @@ button:disabled {
background: rgba(var(--primary-rgb), 0.2);
}
+[data-theme="dark"] .doubao-thinking-option {
+ background: rgba(255, 255, 255, 0.05);
+}
+
+[data-theme="dark"] .doubao-thinking-option.active {
+ background: rgba(var(--primary-rgb), 0.2);
+}
+
+[data-theme="dark"] .doubao-thinking-desc {
+ background: rgba(255, 255, 255, 0.03);
+}
+
[data-theme="dark"] .think-value-badge {
background: rgba(255, 255, 255, 0.1);
}
diff --git a/templates/index.html b/templates/index.html
index 5432c2e..0a2f075 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -219,6 +219,49 @@
+
+
+
+
+
+
+
+
+ 自动模式
+ 由AI自动决定是否使用深度思考
+
+
+
+ 开启思考
+ 强制启用深度思考过程
+
+
+
+ 关闭思考
+ 禁用深度思考,快速响应
+
+
+
+
+
+
+ 自动模式:AI根据问题复杂度自动决定
+
+
+
+ 开启思考:显示完整的思考推理过程
+
+
+
+ 关闭思考:直接给出答案,响应更快
+
+
+
+
+
+
+
+
OCR设置
+
+
+
+
+
+
+
+
+
+ 自动选择:优先使用百度OCR,如无配置则使用Mathpix
+
+
+
+ 百度OCR:支持中文,免费额度大,推荐使用
+
+
+
+ Mathpix:专业数学公式识别,支持LaTeX格式
+
+
+
+
+
+
API密钥设置
@@ -425,6 +500,75 @@
+
+
Doubao API:
+
+
+
+ 未设置
+
+
+
+
+
+
+
+
+
+
+
+
+
+
百度OCR API Key:
+
+
+
+ 未设置
+
+
+
+
+
+
+
+
+
+
+
+
百度OCR Secret Key:
+
+
+
+ 未设置
+
+
+
+
+
+
+
+
+
+
+
Mathpix App ID:
@@ -577,6 +721,25 @@
+
+
Doubao API URL:
+
+
+
+ 未设置
+
+
+
+
+
+
+
+
+