mirror of
https://github.com/Zippland/Snap-Solver.git
synced 2026-01-19 01:21:13 +08:00
Merge pull request #10 from SkeStars/main
修复gemini接口,添加豆包接口;添加百度OCR支持,更新OCR源选择和设置界面
This commit is contained in:
@@ -79,7 +79,7 @@ graph TD
|
||||
<li><b>Claude-3.7</b>:Anthropic的高级理解与解释</li>
|
||||
<li><b>DeepSeek-v3/r1</b>:专为中文场景优化的模型</li>
|
||||
<li><b>QVQ-MAX/Qwen-VL-MAX</b>:以视觉推理闻名的国产AI</li>
|
||||
<li><b>Gemini-2.5-Pro/2.0-flash</b>:智商130的非推理AI</li>
|
||||
<li><b>Gemini-2.5-Pro/2.5-flash</b>:智商130的非推理AI</li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -189,7 +189,7 @@ python app.py
|
||||
| **QVQ-MAX** | 多模态支持,推理支持 | 复杂问题,视觉分析 |
|
||||
| **Qwen-VL-MAX** | 多模态支持 | 简单问题,视觉分析 |
|
||||
| **Gemini-2.5-Pro** | 多模态支持 | 复杂问题,视觉分析 |
|
||||
| **Gemini-2.0-Flash** | 多模态支持 | 简单问题,视觉分析 |
|
||||
| **Gemini-2.5-Flash** | 多模态支持 | 简单问题,视觉分析 |
|
||||
|
||||
|
||||
### 🛠️ 可调参数
|
||||
@@ -247,4 +247,4 @@ python app.py
|
||||
|
||||
## 📜 开源协议
|
||||
|
||||
本项目采用 [Apache 2.0](LICENSE) 协议。
|
||||
本项目采用 [Apache 2.5](LICENSE) 协议。
|
||||
|
||||
71
app.py
71
app.py
@@ -101,6 +101,8 @@ def create_model_instance(model_id, settings, is_reasoning=False):
|
||||
api_key_id = "AlibabaApiKey"
|
||||
elif "gemini" in model_id.lower() or "google" in model_id.lower():
|
||||
api_key_id = "GoogleApiKey"
|
||||
elif "doubao" in model_id.lower():
|
||||
api_key_id = "DoubaoApiKey"
|
||||
|
||||
# 首先尝试从本地配置获取API密钥
|
||||
api_key = get_api_key(api_key_id)
|
||||
@@ -156,6 +158,10 @@ def create_model_instance(model_id, settings, is_reasoning=False):
|
||||
custom_base_url = api_base_urls.get('google')
|
||||
if custom_base_url:
|
||||
base_url = custom_base_url
|
||||
elif "doubao" in model_id.lower():
|
||||
custom_base_url = api_base_urls.get('doubao')
|
||||
if custom_base_url:
|
||||
base_url = custom_base_url
|
||||
|
||||
# 创建模型实例
|
||||
model_instance = ModelFactory.create_model(
|
||||
@@ -318,39 +324,66 @@ def handle_text_extraction(data):
|
||||
if not isinstance(settings, dict):
|
||||
raise ValueError("Invalid settings format")
|
||||
|
||||
# 尝试从本地配置获取Mathpix API密钥
|
||||
mathpix_app_id = get_api_key('MathpixAppId')
|
||||
mathpix_app_key = get_api_key('MathpixAppKey')
|
||||
# 优先使用百度OCR,如果没有配置则使用Mathpix
|
||||
# 首先尝试获取百度OCR API密钥
|
||||
baidu_api_key = get_api_key('BaiduApiKey')
|
||||
baidu_secret_key = get_api_key('BaiduSecretKey')
|
||||
|
||||
# 构建完整的Mathpix API密钥(格式:app_id:app_key)
|
||||
mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
|
||||
# 构建百度OCR API密钥(格式:api_key:secret_key)
|
||||
ocr_key = None
|
||||
ocr_model = None
|
||||
|
||||
# 如果本地没有配置,尝试使用前端传递的密钥(向后兼容)
|
||||
if not mathpix_key:
|
||||
mathpix_key = settings.get('mathpixApiKey')
|
||||
if baidu_api_key and baidu_secret_key:
|
||||
ocr_key = f"{baidu_api_key}:{baidu_secret_key}"
|
||||
ocr_model = 'baidu-ocr'
|
||||
print("Using Baidu OCR for text extraction...")
|
||||
else:
|
||||
# 回退到Mathpix
|
||||
mathpix_app_id = get_api_key('MathpixAppId')
|
||||
mathpix_app_key = get_api_key('MathpixAppKey')
|
||||
|
||||
# 构建完整的Mathpix API密钥(格式:app_id:app_key)
|
||||
mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
|
||||
|
||||
# 如果本地没有配置,尝试使用前端传递的密钥(向后兼容)
|
||||
if not mathpix_key:
|
||||
mathpix_key = settings.get('mathpixApiKey')
|
||||
|
||||
if mathpix_key:
|
||||
ocr_key = mathpix_key
|
||||
ocr_model = 'mathpix'
|
||||
print("Using Mathpix OCR for text extraction...")
|
||||
|
||||
if not mathpix_key:
|
||||
raise ValueError("Mathpix API key is required")
|
||||
if not ocr_key:
|
||||
raise ValueError("OCR API key is required. Please configure Baidu OCR (API Key + Secret Key) or Mathpix (App ID + App Key)")
|
||||
|
||||
# 先回复客户端,确认已收到请求,防止超时断开
|
||||
# 注意:这里不能使用return,否则后续代码不会执行
|
||||
socketio.emit('request_acknowledged', {
|
||||
'status': 'received',
|
||||
'message': 'Image received, text extraction in progress'
|
||||
'message': f'Image received, text extraction in progress using {ocr_model}'
|
||||
}, room=request.sid)
|
||||
|
||||
try:
|
||||
app_id, app_key = mathpix_key.split(':')
|
||||
if not app_id.strip() or not app_key.strip():
|
||||
raise ValueError()
|
||||
if ocr_model == 'baidu-ocr':
|
||||
api_key, secret_key = ocr_key.split(':')
|
||||
if not api_key.strip() or not secret_key.strip():
|
||||
raise ValueError()
|
||||
elif ocr_model == 'mathpix':
|
||||
app_id, app_key = ocr_key.split(':')
|
||||
if not app_id.strip() or not app_key.strip():
|
||||
raise ValueError()
|
||||
except ValueError:
|
||||
raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
|
||||
if ocr_model == 'baidu-ocr':
|
||||
raise ValueError("Invalid Baidu OCR API key format. Expected format: 'API_KEY:SECRET_KEY'")
|
||||
else:
|
||||
raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
|
||||
|
||||
print("Creating Mathpix model instance...")
|
||||
# 只传递必需的参数,ModelFactory.create_model会处理不同模型类型
|
||||
print(f"Creating {ocr_model} model instance...")
|
||||
# ModelFactory.create_model会处理不同模型类型
|
||||
model = ModelFactory.create_model(
|
||||
model_name='mathpix',
|
||||
api_key=mathpix_key
|
||||
model_name=ocr_model,
|
||||
api_key=ocr_key
|
||||
)
|
||||
|
||||
print("Starting text extraction...")
|
||||
|
||||
@@ -3,5 +3,6 @@
|
||||
"OpenaiApiBaseUrl": "",
|
||||
"DeepseekApiBaseUrl": "",
|
||||
"AlibabaApiBaseUrl": "",
|
||||
"GoogleApiBaseUrl": ""
|
||||
"GoogleApiBaseUrl": "",
|
||||
"DoubaoApiBaseUrl": ""
|
||||
}
|
||||
@@ -24,6 +24,11 @@
|
||||
"name": "Google",
|
||||
"api_key_id": "GoogleApiKey",
|
||||
"class_name": "GoogleModel"
|
||||
},
|
||||
"doubao": {
|
||||
"name": "Doubao",
|
||||
"api_key_id": "DoubaoApiKey",
|
||||
"class_name": "DoubaoModel"
|
||||
}
|
||||
},
|
||||
"models": {
|
||||
@@ -91,21 +96,29 @@
|
||||
"version": "latest",
|
||||
"description": "阿里通义千问VL-MAX模型,视觉理解能力最强,支持图像理解和复杂任务"
|
||||
},
|
||||
"gemini-2.5-pro-preview-03-25": {
|
||||
"gemini-2.5-pro": {
|
||||
"name": "Gemini 2.5 Pro",
|
||||
"provider": "google",
|
||||
"supportsMultimodal": true,
|
||||
"isReasoning": true,
|
||||
"version": "preview-03-25",
|
||||
"description": "Google最强大的Gemini 2.5 Pro模型,支持图像理解"
|
||||
"version": "latest",
|
||||
"description": "Google最强大的Gemini 2.5 Pro模型,支持图像理解(需要付费API密钥)"
|
||||
},
|
||||
"gemini-2.0-flash": {
|
||||
"name": "Gemini 2.0 Flash",
|
||||
"gemini-2.5-flash": {
|
||||
"name": "Gemini 2.5 Flash",
|
||||
"provider": "google",
|
||||
"supportsMultimodal": true,
|
||||
"isReasoning": false,
|
||||
"version": "latest",
|
||||
"description": "Google更快速的Gemini 2.0 Flash模型,支持图像理解,响应更迅速"
|
||||
"description": "Google更快速的Gemini 2.5 Flash模型,支持图像理解,有免费配额"
|
||||
},
|
||||
"doubao-seed-1-6-250615": {
|
||||
"name": "Doubao-Seed-1.6",
|
||||
"provider": "doubao",
|
||||
"supportsMultimodal": true,
|
||||
"isReasoning": true,
|
||||
"version": "latest",
|
||||
"description": "支持auto/thinking/non-thinking三种思考模式、支持多模态、256K长上下文"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,8 @@
|
||||
"anthropic": "",
|
||||
"deepseek": "",
|
||||
"google": "",
|
||||
"openai": ""
|
||||
"openai": "",
|
||||
"doubao": ""
|
||||
},
|
||||
"enabled": true
|
||||
}
|
||||
@@ -4,6 +4,7 @@ from .openai import OpenAIModel
|
||||
from .deepseek import DeepSeekModel
|
||||
from .alibaba import AlibabaModel
|
||||
from .google import GoogleModel
|
||||
from .doubao import DoubaoModel
|
||||
from .factory import ModelFactory
|
||||
|
||||
__all__ = [
|
||||
@@ -13,5 +14,6 @@ __all__ = [
|
||||
'DeepSeekModel',
|
||||
'AlibabaModel',
|
||||
'GoogleModel',
|
||||
'DoubaoModel',
|
||||
'ModelFactory'
|
||||
]
|
||||
|
||||
@@ -4,12 +4,13 @@ from openai import OpenAI
|
||||
from .base import BaseModel
|
||||
|
||||
class AlibabaModel(BaseModel):
|
||||
def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None):
|
||||
def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
|
||||
# 如果没有提供模型名称,才使用默认值
|
||||
self.model_name = model_name if model_name else "QVQ-Max-2025-03-25"
|
||||
print(f"初始化阿里巴巴模型: {self.model_name}")
|
||||
# 在super().__init__之前设置model_name,这样get_default_system_prompt能使用它
|
||||
super().__init__(api_key, temperature, system_prompt, language)
|
||||
self.api_base_url = api_base_url # 存储API基础URL
|
||||
|
||||
def get_default_system_prompt(self) -> str:
|
||||
"""根据模型名称返回不同的默认系统提示词"""
|
||||
|
||||
177
models/baidu_ocr.py
Normal file
177
models/baidu_ocr.py
Normal file
@@ -0,0 +1,177 @@
|
||||
import base64
|
||||
import json
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
from typing import Generator, Dict, Any
|
||||
from .base import BaseModel
|
||||
|
||||
class BaiduOCRModel(BaseModel):
|
||||
"""
|
||||
百度OCR模型,用于图像文字识别
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str, secret_key: str = None, temperature: float = 0.7, system_prompt: str = None):
|
||||
"""
|
||||
初始化百度OCR模型
|
||||
|
||||
Args:
|
||||
api_key: 百度API Key
|
||||
secret_key: 百度Secret Key(可以在api_key中用冒号分隔传入)
|
||||
temperature: 不用于OCR但保持BaseModel兼容性
|
||||
system_prompt: 不用于OCR但保持BaseModel兼容性
|
||||
|
||||
Raises:
|
||||
ValueError: 如果API密钥格式无效
|
||||
"""
|
||||
super().__init__(api_key, temperature, system_prompt)
|
||||
|
||||
# 支持两种格式:单独传递或在api_key中用冒号分隔
|
||||
if secret_key:
|
||||
self.api_key = api_key
|
||||
self.secret_key = secret_key
|
||||
else:
|
||||
try:
|
||||
self.api_key, self.secret_key = api_key.split(':')
|
||||
except ValueError:
|
||||
raise ValueError("百度OCR API密钥必须是 'API_KEY:SECRET_KEY' 格式或单独传递secret_key参数")
|
||||
|
||||
# 百度API URLs
|
||||
self.token_url = "https://aip.baidubce.com/oauth/2.0/token"
|
||||
self.ocr_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
|
||||
|
||||
# 缓存access_token
|
||||
self._access_token = None
|
||||
self._token_expires = 0
|
||||
|
||||
def get_access_token(self) -> str:
|
||||
"""获取百度API的access_token"""
|
||||
# 检查是否需要刷新token(提前5分钟刷新)
|
||||
if self._access_token and time.time() < self._token_expires - 300:
|
||||
return self._access_token
|
||||
|
||||
# 请求新的access_token
|
||||
params = {
|
||||
'grant_type': 'client_credentials',
|
||||
'client_id': self.api_key,
|
||||
'client_secret': self.secret_key
|
||||
}
|
||||
|
||||
data = urllib.parse.urlencode(params).encode('utf-8')
|
||||
request = urllib.request.Request(self.token_url, data=data)
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(request) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
if 'access_token' in result:
|
||||
self._access_token = result['access_token']
|
||||
# 设置过期时间(默认30天,但我们提前刷新)
|
||||
self._token_expires = time.time() + result.get('expires_in', 2592000)
|
||||
return self._access_token
|
||||
else:
|
||||
raise Exception(f"获取access_token失败: {result.get('error_description', '未知错误')}")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"请求access_token失败: {str(e)}")
|
||||
|
||||
def ocr_image(self, image_data: str) -> str:
|
||||
"""
|
||||
对图像进行OCR识别
|
||||
|
||||
Args:
|
||||
image_data: Base64编码的图像数据
|
||||
|
||||
Returns:
|
||||
str: 识别出的文字内容
|
||||
"""
|
||||
access_token = self.get_access_token()
|
||||
|
||||
# 准备请求数据
|
||||
params = {
|
||||
'image': image_data,
|
||||
'language_type': 'auto_detect', # 自动检测语言
|
||||
'detect_direction': 'true', # 检测图像朝向
|
||||
'probability': 'false' # 不返回置信度(减少响应大小)
|
||||
}
|
||||
|
||||
data = urllib.parse.urlencode(params).encode('utf-8')
|
||||
url = f"{self.ocr_url}?access_token={access_token}"
|
||||
|
||||
request = urllib.request.Request(url, data=data)
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(request) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
if 'error_code' in result:
|
||||
raise Exception(f"百度OCR API错误: {result.get('error_msg', '未知错误')}")
|
||||
|
||||
# 提取识别的文字
|
||||
words_result = result.get('words_result', [])
|
||||
text_lines = [item['words'] for item in words_result]
|
||||
|
||||
return '\n'.join(text_lines)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"OCR识别失败: {str(e)}")
|
||||
|
||||
def extract_full_text(self, image_data: str) -> str:
|
||||
"""
|
||||
提取图像中的完整文本(与Mathpix兼容的接口)
|
||||
|
||||
Args:
|
||||
image_data: Base64编码的图像数据
|
||||
|
||||
Returns:
|
||||
str: 提取的文本内容
|
||||
"""
|
||||
return self.ocr_image(image_data)
|
||||
|
||||
def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
|
||||
"""
|
||||
分析图像并返回OCR结果(流式输出以保持接口一致性)
|
||||
|
||||
Args:
|
||||
image_data: Base64编码的图像数据
|
||||
proxies: 代理配置(未使用)
|
||||
|
||||
Yields:
|
||||
dict: 包含OCR结果的响应
|
||||
"""
|
||||
try:
|
||||
text = self.ocr_image(image_data)
|
||||
yield {
|
||||
'status': 'completed',
|
||||
'content': text,
|
||||
'model': 'baidu-ocr'
|
||||
}
|
||||
except Exception as e:
|
||||
yield {
|
||||
'status': 'error',
|
||||
'content': f'OCR识别失败: {str(e)}',
|
||||
'model': 'baidu-ocr'
|
||||
}
|
||||
|
||||
def analyze_text(self, text: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
|
||||
"""
|
||||
分析文本(OCR模型不支持文本分析)
|
||||
|
||||
Args:
|
||||
text: 输入文本
|
||||
proxies: 代理配置(未使用)
|
||||
|
||||
Yields:
|
||||
dict: 错误响应
|
||||
"""
|
||||
yield {
|
||||
'status': 'error',
|
||||
'content': 'OCR模型不支持文本分析功能',
|
||||
'model': 'baidu-ocr'
|
||||
}
|
||||
|
||||
def get_model_identifier(self) -> str:
|
||||
"""返回模型标识符"""
|
||||
return "baidu-ocr"
|
||||
@@ -6,9 +6,10 @@ from openai import OpenAI
|
||||
from .base import BaseModel
|
||||
|
||||
class DeepSeekModel(BaseModel):
|
||||
def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner"):
|
||||
def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner", api_base_url: str = None):
|
||||
super().__init__(api_key, temperature, system_prompt, language)
|
||||
self.model_name = model_name
|
||||
self.api_base_url = api_base_url # 存储API基础URL
|
||||
|
||||
def get_default_system_prompt(self) -> str:
|
||||
return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
|
||||
|
||||
339
models/doubao.py
Normal file
339
models/doubao.py
Normal file
@@ -0,0 +1,339 @@
|
||||
import json
|
||||
import os
|
||||
import base64
|
||||
from typing import Generator, Dict, Any, Optional
|
||||
import requests
|
||||
from .base import BaseModel
|
||||
|
||||
class DoubaoModel(BaseModel):
|
||||
"""
|
||||
豆包API模型实现类
|
||||
支持字节跳动的豆包AI模型,可处理文本和图像输入
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
|
||||
"""
|
||||
初始化豆包模型
|
||||
|
||||
Args:
|
||||
api_key: 豆包API密钥
|
||||
temperature: 生成温度
|
||||
system_prompt: 系统提示词
|
||||
language: 首选语言
|
||||
model_name: 指定具体模型名称,如不指定则使用默认值
|
||||
api_base_url: API基础URL,用于设置自定义API端点
|
||||
"""
|
||||
super().__init__(api_key, temperature, system_prompt, language)
|
||||
self.model_name = model_name or self.get_model_identifier()
|
||||
self.base_url = api_base_url or "https://ark.cn-beijing.volces.com/api/v3"
|
||||
self.max_tokens = 4096 # 默认最大输出token数
|
||||
self.reasoning_config = None # 推理配置,类似于AnthropicModel
|
||||
|
||||
def get_default_system_prompt(self) -> str:
|
||||
return """你是一个专业的问题分析专家。当看到问题图片时:
|
||||
1. 仔细阅读并理解问题
|
||||
2. 分解问题的关键组成部分
|
||||
3. 提供清晰的分步解决方案
|
||||
4. 如果相关,解释涉及的概念或理论
|
||||
5. 如果有多种方法,优先解释最有效的方法"""
|
||||
|
||||
def get_model_identifier(self) -> str:
|
||||
"""返回默认的模型标识符"""
|
||||
return "doubao-seed-1-6-250615" # Doubao-Seed-1.6
|
||||
|
||||
def get_actual_model_name(self) -> str:
|
||||
"""根据配置的模型名称返回实际的API调用标识符"""
|
||||
# 豆包API的实际模型名称映射
|
||||
model_mapping = {
|
||||
"doubao-seed-1-6-250615": "doubao-seed-1-6-250615"
|
||||
}
|
||||
|
||||
return model_mapping.get(self.model_name, "doubao-seed-1-6-250615")
|
||||
|
||||
def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
|
||||
"""流式生成文本响应"""
|
||||
try:
|
||||
yield {"status": "started"}
|
||||
|
||||
# 设置环境变量代理(如果提供)
|
||||
original_proxies = None
|
||||
if proxies:
|
||||
original_proxies = {
|
||||
'http_proxy': os.environ.get('http_proxy'),
|
||||
'https_proxy': os.environ.get('https_proxy')
|
||||
}
|
||||
if 'http' in proxies:
|
||||
os.environ['http_proxy'] = proxies['http']
|
||||
if 'https' in proxies:
|
||||
os.environ['https_proxy'] = proxies['https']
|
||||
|
||||
try:
|
||||
# 构建请求头
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# 构建消息 - 根据官方API文档,暂时不使用系统提示词
|
||||
messages = []
|
||||
|
||||
# 添加用户查询
|
||||
user_content = text
|
||||
if self.language and self.language != 'auto':
|
||||
user_content = f"请使用{self.language}回答以下问题: {text}"
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": user_content
|
||||
})
|
||||
|
||||
# 处理推理配置
|
||||
thinking = {
|
||||
"type": "auto" # 默认值
|
||||
}
|
||||
|
||||
if hasattr(self, 'reasoning_config') and self.reasoning_config:
|
||||
# 从reasoning_config中获取thinking_mode
|
||||
thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
|
||||
thinking = {
|
||||
"type": thinking_mode
|
||||
}
|
||||
|
||||
# 构建请求数据
|
||||
data = {
|
||||
"model": self.get_actual_model_name(),
|
||||
"messages": messages,
|
||||
"thinking": thinking,
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_tokens,
|
||||
"stream": True
|
||||
}
|
||||
|
||||
# 发送流式请求
|
||||
response = requests.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=headers,
|
||||
json=data,
|
||||
stream=True,
|
||||
proxies=proxies if proxies else None,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
raise Exception(f"HTTP {response.status_code}: {error_text}")
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# 初始化响应缓冲区
|
||||
response_buffer = ""
|
||||
|
||||
# 处理流式响应
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
|
||||
line = line.decode('utf-8')
|
||||
if not line.startswith('data: '):
|
||||
continue
|
||||
|
||||
line = line[6:] # 移除 'data: ' 前缀
|
||||
|
||||
if line == '[DONE]':
|
||||
break
|
||||
|
||||
try:
|
||||
chunk_data = json.loads(line)
|
||||
choices = chunk_data.get('choices', [])
|
||||
|
||||
if choices and len(choices) > 0:
|
||||
delta = choices[0].get('delta', {})
|
||||
content = delta.get('content', '')
|
||||
|
||||
if content:
|
||||
response_buffer += content
|
||||
|
||||
# 发送响应进度
|
||||
yield {
|
||||
"status": "streaming",
|
||||
"content": response_buffer
|
||||
}
|
||||
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# 确保发送完整的最终内容
|
||||
yield {
|
||||
"status": "completed",
|
||||
"content": response_buffer
|
||||
}
|
||||
|
||||
finally:
|
||||
# 恢复原始代理设置
|
||||
if original_proxies:
|
||||
for key, value in original_proxies.items():
|
||||
if value is None:
|
||||
if key in os.environ:
|
||||
del os.environ[key]
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
except Exception as e:
|
||||
yield {
|
||||
"status": "error",
|
||||
"error": f"豆包API错误: {str(e)}"
|
||||
}
|
||||
|
||||
def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
|
||||
"""分析图像并流式生成响应"""
|
||||
try:
|
||||
yield {"status": "started"}
|
||||
|
||||
# 设置环境变量代理(如果提供)
|
||||
original_proxies = None
|
||||
if proxies:
|
||||
original_proxies = {
|
||||
'http_proxy': os.environ.get('http_proxy'),
|
||||
'https_proxy': os.environ.get('https_proxy')
|
||||
}
|
||||
if 'http' in proxies:
|
||||
os.environ['http_proxy'] = proxies['http']
|
||||
if 'https' in proxies:
|
||||
os.environ['https_proxy'] = proxies['https']
|
||||
|
||||
try:
|
||||
# 构建请求头
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# 处理图像数据
|
||||
if image_data.startswith('data:image'):
|
||||
# 如果是data URI,提取base64部分
|
||||
image_data = image_data.split(',', 1)[1]
|
||||
|
||||
# 构建用户消息 - 使用豆包API官方示例格式
|
||||
# 首先检查图像数据的格式,确保是有效的图像
|
||||
image_format = "jpeg" # 默认使用jpeg
|
||||
if image_data.startswith('/9j/'): # JPEG magic number in base64
|
||||
image_format = "jpeg"
|
||||
elif image_data.startswith('iVBORw0KGgo'): # PNG magic number in base64
|
||||
image_format = "png"
|
||||
|
||||
user_content = [
|
||||
{
|
||||
"type": "text",
|
||||
"text": f"请使用{self.language}分析这张图片并提供详细解答。" if self.language and self.language != 'auto' else "请分析这张图片并提供详细解答?"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/{image_format};base64,{image_data}"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": user_content
|
||||
}
|
||||
]
|
||||
|
||||
# 处理推理配置
|
||||
thinking = {
|
||||
"type": "auto" # 默认值
|
||||
}
|
||||
|
||||
if hasattr(self, 'reasoning_config') and self.reasoning_config:
|
||||
# 从reasoning_config中获取thinking_mode
|
||||
thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
|
||||
thinking = {
|
||||
"type": thinking_mode
|
||||
}
|
||||
|
||||
# 构建请求数据
|
||||
data = {
|
||||
"model": self.get_actual_model_name(),
|
||||
"messages": messages,
|
||||
"thinking": thinking,
|
||||
"temperature": self.temperature,
|
||||
"max_tokens": self.max_tokens,
|
||||
"stream": True
|
||||
}
|
||||
|
||||
# 发送流式请求
|
||||
response = requests.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=headers,
|
||||
json=data,
|
||||
stream=True,
|
||||
proxies=proxies if proxies else None,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_text = response.text
|
||||
raise Exception(f"HTTP {response.status_code}: {error_text}")
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# 初始化响应缓冲区
|
||||
response_buffer = ""
|
||||
|
||||
# 处理流式响应
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
|
||||
line = line.decode('utf-8')
|
||||
if not line.startswith('data: '):
|
||||
continue
|
||||
|
||||
line = line[6:] # 移除 'data: ' 前缀
|
||||
|
||||
if line == '[DONE]':
|
||||
break
|
||||
|
||||
try:
|
||||
chunk_data = json.loads(line)
|
||||
choices = chunk_data.get('choices', [])
|
||||
|
||||
if choices and len(choices) > 0:
|
||||
delta = choices[0].get('delta', {})
|
||||
content = delta.get('content', '')
|
||||
|
||||
if content:
|
||||
response_buffer += content
|
||||
|
||||
# 发送响应进度
|
||||
yield {
|
||||
"status": "streaming",
|
||||
"content": response_buffer
|
||||
}
|
||||
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# 确保发送完整的最终内容
|
||||
yield {
|
||||
"status": "completed",
|
||||
"content": response_buffer
|
||||
}
|
||||
|
||||
finally:
|
||||
# 恢复原始代理设置
|
||||
if original_proxies:
|
||||
for key, value in original_proxies.items():
|
||||
if value is None:
|
||||
if key in os.environ:
|
||||
del os.environ[key]
|
||||
else:
|
||||
os.environ[key] = value
|
||||
|
||||
except Exception as e:
|
||||
yield {
|
||||
"status": "error",
|
||||
"error": f"豆包图像分析错误: {str(e)}"
|
||||
}
|
||||
@@ -3,7 +3,8 @@ import json
|
||||
import os
|
||||
import importlib
|
||||
from .base import BaseModel
|
||||
from .mathpix import MathpixModel # MathpixModel仍然需要直接导入,因为它是特殊工具
|
||||
from .mathpix import MathpixModel # MathpixModel需要直接导入,因为它是特殊OCR工具
|
||||
from .baidu_ocr import BaiduOCRModel # 百度OCR也是特殊OCR工具,直接导入
|
||||
|
||||
class ModelFactory:
|
||||
# 模型基本信息,包含类型和特性
|
||||
@@ -39,13 +40,25 @@ class ModelFactory:
|
||||
'description': model_info.get('description', '')
|
||||
}
|
||||
|
||||
# 添加Mathpix模型(特殊工具模型)
|
||||
# 添加特殊OCR工具模型(不在配置文件中定义)
|
||||
|
||||
# 添加Mathpix OCR工具
|
||||
cls._models['mathpix'] = {
|
||||
'class': MathpixModel,
|
||||
'is_multimodal': True,
|
||||
'is_reasoning': False,
|
||||
'display_name': 'Mathpix OCR',
|
||||
'description': '文本提取工具,适用于数学公式和文本',
|
||||
'description': '数学公式识别工具,适用于复杂数学内容',
|
||||
'is_ocr_only': True
|
||||
}
|
||||
|
||||
# 添加百度OCR工具
|
||||
cls._models['baidu-ocr'] = {
|
||||
'class': BaiduOCRModel,
|
||||
'is_multimodal': True,
|
||||
'is_reasoning': False,
|
||||
'display_name': '百度OCR',
|
||||
'description': '通用文字识别工具,支持中文识别',
|
||||
'is_ocr_only': True
|
||||
}
|
||||
|
||||
@@ -62,22 +75,36 @@ class ModelFactory:
|
||||
# 不再硬编码模型定义,而是使用空字典
|
||||
cls._models = {}
|
||||
|
||||
# 只保留Mathpix作为基础工具
|
||||
# 添加特殊OCR工具(当配置加载失败时的备用)
|
||||
try:
|
||||
# 导入MathpixModel类
|
||||
# 导入并添加Mathpix OCR工具
|
||||
from .mathpix import MathpixModel
|
||||
|
||||
# 添加Mathpix作为基础工具
|
||||
cls._models['mathpix'] = {
|
||||
'class': MathpixModel,
|
||||
'is_multimodal': True,
|
||||
'is_reasoning': False,
|
||||
'display_name': 'Mathpix OCR',
|
||||
'description': '文本提取工具,适用于数学公式和文本',
|
||||
'description': '数学公式识别工具,适用于复杂数学内容',
|
||||
'is_ocr_only': True
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"无法加载基础Mathpix工具: {str(e)}")
|
||||
print(f"无法加载Mathpix OCR工具: {str(e)}")
|
||||
|
||||
# 添加百度OCR工具
|
||||
try:
|
||||
from .baidu_ocr import BaiduOCRModel
|
||||
|
||||
cls._models['baidu-ocr'] = {
|
||||
'class': BaiduOCRModel,
|
||||
'is_multimodal': True,
|
||||
'is_reasoning': False,
|
||||
'display_name': '百度OCR',
|
||||
'description': '通用文字识别工具,支持中文识别',
|
||||
'is_ocr_only': True
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"无法加载百度OCR工具: {str(e)}")
|
||||
|
||||
@classmethod
|
||||
def create_model(cls, model_name: str, api_key: str, temperature: float = 0.7,
|
||||
@@ -114,6 +141,25 @@ class ModelFactory:
|
||||
)
|
||||
# 对于阿里巴巴模型,也需要传递正确的模型名称
|
||||
elif 'qwen' in model_name.lower() or 'qvq' in model_name.lower() or 'alibaba' in model_name.lower():
|
||||
return model_class(
|
||||
api_key=api_key,
|
||||
temperature=temperature,
|
||||
system_prompt=system_prompt,
|
||||
language=language,
|
||||
model_name=model_name
|
||||
)
|
||||
# 对于Google模型,也需要传递正确的模型名称
|
||||
elif 'gemini' in model_name.lower() or 'google' in model_name.lower():
|
||||
return model_class(
|
||||
api_key=api_key,
|
||||
temperature=temperature,
|
||||
system_prompt=system_prompt,
|
||||
language=language,
|
||||
model_name=model_name,
|
||||
api_base_url=api_base_url
|
||||
)
|
||||
# 对于豆包模型,也需要传递正确的模型名称
|
||||
elif 'doubao' in model_name.lower():
|
||||
return model_class(
|
||||
api_key=api_key,
|
||||
temperature=temperature,
|
||||
@@ -129,6 +175,13 @@ class ModelFactory:
|
||||
temperature=temperature,
|
||||
system_prompt=system_prompt
|
||||
)
|
||||
# 对于百度OCR模型,传递api_key(支持API_KEY:SECRET_KEY格式)
|
||||
elif model_name == 'baidu-ocr':
|
||||
return model_class(
|
||||
api_key=api_key,
|
||||
temperature=temperature,
|
||||
system_prompt=system_prompt
|
||||
)
|
||||
# 对于Anthropic模型,需要传递model_identifier参数
|
||||
elif 'claude' in model_name.lower() or 'anthropic' in model_name.lower():
|
||||
return model_class(
|
||||
|
||||
@@ -30,10 +30,17 @@ class GoogleModel(BaseModel):
|
||||
|
||||
# 配置Google API
|
||||
if api_base_url:
|
||||
# 如果提供了自定义API基础URL,设置genai的api_url
|
||||
genai.configure(api_key=api_key, transport="rest", client_options={"api_endpoint": api_base_url})
|
||||
# 配置中转API - 使用环境变量方式
|
||||
# 移除末尾的斜杠以避免重复路径问题
|
||||
clean_base_url = api_base_url.rstrip('/')
|
||||
# 设置环境变量来指定API端点
|
||||
os.environ['GOOGLE_AI_API_ENDPOINT'] = clean_base_url
|
||||
genai.configure(api_key=api_key)
|
||||
else:
|
||||
# 使用默认API端点
|
||||
# 清除可能存在的自定义端点环境变量
|
||||
if 'GOOGLE_AI_API_ENDPOINT' in os.environ:
|
||||
del os.environ['GOOGLE_AI_API_ENDPOINT']
|
||||
genai.configure(api_key=api_key)
|
||||
|
||||
def get_default_system_prompt(self) -> str:
|
||||
@@ -46,7 +53,7 @@ class GoogleModel(BaseModel):
|
||||
|
||||
def get_model_identifier(self) -> str:
|
||||
"""返回默认的模型标识符"""
|
||||
return "gemini-2.5-pro-preview-03-25"
|
||||
return "gemini-2.5-flash" # 使用有免费配额的模型作为默认值
|
||||
|
||||
def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
|
||||
"""流式生成文本响应"""
|
||||
|
||||
@@ -1053,10 +1053,33 @@ class SnapSolver {
|
||||
this.extractTextBtn.innerHTML = '<i class="fas fa-spinner fa-spin"></i><span>提取中...</span>';
|
||||
|
||||
const settings = window.settingsManager.getSettings();
|
||||
|
||||
// 根据用户设置的OCR源进行选择
|
||||
const ocrSource = settings.ocrSource || 'auto';
|
||||
const baiduApiKey = window.settingsManager.apiKeyValues.BaiduApiKey;
|
||||
const baiduSecretKey = window.settingsManager.apiKeyValues.BaiduSecretKey;
|
||||
const mathpixApiKey = settings.mathpixApiKey;
|
||||
|
||||
if (!mathpixApiKey || mathpixApiKey === ':') {
|
||||
window.uiManager.showToast('请在设置中输入Mathpix API凭据', 'error');
|
||||
const hasBaiduOCR = baiduApiKey && baiduSecretKey;
|
||||
const hasMathpix = mathpixApiKey && mathpixApiKey !== ':';
|
||||
|
||||
// 根据OCR源配置检查可用性
|
||||
let canProceed = false;
|
||||
let missingOCRMessage = '';
|
||||
|
||||
if (ocrSource === 'baidu') {
|
||||
canProceed = hasBaiduOCR;
|
||||
missingOCRMessage = '请在设置中配置百度OCR API密钥';
|
||||
} else if (ocrSource === 'mathpix') {
|
||||
canProceed = hasMathpix;
|
||||
missingOCRMessage = '请在设置中配置Mathpix API密钥';
|
||||
} else { // auto
|
||||
canProceed = hasBaiduOCR || hasMathpix;
|
||||
missingOCRMessage = '请在设置中配置OCR API密钥:百度OCR(推荐)或Mathpix';
|
||||
}
|
||||
|
||||
if (!canProceed) {
|
||||
window.uiManager.showToast(missingOCRMessage, 'error');
|
||||
document.getElementById('settingsPanel').classList.add('active');
|
||||
this.extractTextBtn.disabled = false;
|
||||
this.extractTextBtn.innerHTML = '<i class="fas fa-font"></i><span>提取文本</span>';
|
||||
@@ -1076,7 +1099,7 @@ class SnapSolver {
|
||||
this.socket.emit('extract_text', {
|
||||
image: this.croppedImage.split(',')[1],
|
||||
settings: {
|
||||
mathpixApiKey: mathpixApiKey
|
||||
ocrSource: settings.ocrSource || 'auto'
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -374,6 +374,9 @@ class SettingsManager {
|
||||
// 模型选择器对象
|
||||
this.modelSelector = null;
|
||||
|
||||
// OCR源配置
|
||||
this.ocrSource = 'auto'; // 默认自动选择
|
||||
|
||||
// 存储API密钥的对象
|
||||
this.apiKeyValues = {
|
||||
'AnthropicApiKey': '',
|
||||
@@ -381,6 +384,9 @@ class SettingsManager {
|
||||
'DeepseekApiKey': '',
|
||||
'AlibabaApiKey': '',
|
||||
'GoogleApiKey': '',
|
||||
'DoubaoApiKey': '',
|
||||
'BaiduApiKey': '',
|
||||
'BaiduSecretKey': '',
|
||||
'MathpixAppId': '',
|
||||
'MathpixAppKey': ''
|
||||
};
|
||||
@@ -391,7 +397,8 @@ class SettingsManager {
|
||||
'OpenaiApiBaseUrl': '',
|
||||
'DeepseekApiBaseUrl': '',
|
||||
'AlibabaApiBaseUrl': '',
|
||||
'GoogleApiBaseUrl': ''
|
||||
'GoogleApiBaseUrl': '',
|
||||
'DoubaoApiBaseUrl': ''
|
||||
};
|
||||
|
||||
// 加载模型配置
|
||||
@@ -580,6 +587,13 @@ class SettingsManager {
|
||||
this.updateReasoningOptionUI(settings.reasoningDepth);
|
||||
}
|
||||
|
||||
// 加载豆包思考模式设置
|
||||
if (settings.doubaoThinkingMode && this.doubaoThinkingModeSelect) {
|
||||
this.doubaoThinkingModeSelect.value = settings.doubaoThinkingMode;
|
||||
// 更新豆包思考选项UI
|
||||
this.updateDoubaoThinkingOptionUI(settings.doubaoThinkingMode);
|
||||
}
|
||||
|
||||
// 加载思考预算百分比
|
||||
const thinkBudgetPercent = parseInt(settings.thinkBudgetPercent || '50');
|
||||
if (this.thinkBudgetPercentInput) {
|
||||
@@ -624,6 +638,14 @@ class SettingsManager {
|
||||
this.proxyPortInput.value = settings.proxyPort;
|
||||
}
|
||||
|
||||
// Load OCR source setting
|
||||
if (settings.ocrSource) {
|
||||
this.ocrSource = settings.ocrSource;
|
||||
if (this.ocrSourceSelect) {
|
||||
this.ocrSourceSelect.value = settings.ocrSource;
|
||||
}
|
||||
}
|
||||
|
||||
// Update UI based on model type
|
||||
this.updateUIBasedOnModelType();
|
||||
|
||||
@@ -720,6 +742,14 @@ class SettingsManager {
|
||||
this.thinkBudgetGroup.style.display = showThinkBudget ? 'block' : 'none';
|
||||
}
|
||||
|
||||
// 处理豆包深度思考设置显示
|
||||
const isDoubaoReasoning = modelInfo.isReasoning && modelInfo.provider === 'doubao';
|
||||
|
||||
// 只有对豆包推理模型才显示深度思考设置
|
||||
if (this.doubaoThinkingGroup) {
|
||||
this.doubaoThinkingGroup.style.display = isDoubaoReasoning ? 'block' : 'none';
|
||||
}
|
||||
|
||||
// 控制最大Token设置的显示
|
||||
// 阿里巴巴模型不支持自定义Token设置
|
||||
const maxTokensGroup = this.maxTokens ? this.maxTokens.closest('.setting-group') : null;
|
||||
@@ -759,6 +789,8 @@ class SettingsManager {
|
||||
apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(4)'); // Alibaba
|
||||
} else if (modelType && (modelType.toLowerCase().includes('gemini') || modelType.toLowerCase().includes('google'))) {
|
||||
apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(5)'); // Google
|
||||
} else if (modelType && modelType.toLowerCase().includes('doubao')) {
|
||||
apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(6)'); // 豆包
|
||||
}
|
||||
|
||||
if (apiKeyToHighlight) {
|
||||
@@ -775,6 +807,7 @@ class SettingsManager {
|
||||
model: this.modelSelect.value,
|
||||
maxTokens: this.maxTokens.value,
|
||||
reasoningDepth: this.reasoningDepthSelect?.value || 'standard',
|
||||
doubaoThinkingMode: this.doubaoThinkingModeSelect?.value || 'auto',
|
||||
thinkBudgetPercent: this.thinkBudgetPercentInput?.value || '50',
|
||||
temperature: this.temperatureInput.value,
|
||||
language: this.languageInput.value,
|
||||
@@ -782,7 +815,8 @@ class SettingsManager {
|
||||
currentPromptId: this.currentPromptId,
|
||||
proxyEnabled: this.proxyEnabledInput.checked,
|
||||
proxyHost: this.proxyHostInput.value,
|
||||
proxyPort: this.proxyPortInput.value
|
||||
proxyPort: this.proxyPortInput.value,
|
||||
ocrSource: this.ocrSource // 添加OCR源配置保存
|
||||
};
|
||||
|
||||
// 保存设置到localStorage
|
||||
@@ -832,17 +866,30 @@ class SettingsManager {
|
||||
const reasoningDepth = this.reasoningDepthSelect?.value || 'standard';
|
||||
const thinkBudgetPercent = parseInt(this.thinkBudgetPercentInput?.value || '50');
|
||||
|
||||
// 获取豆包思考模式设置
|
||||
const doubaoThinkingMode = this.doubaoThinkingModeSelect?.value || 'auto';
|
||||
|
||||
// 计算思考预算的实际Token数
|
||||
const thinkBudget = Math.floor(maxTokens * (thinkBudgetPercent / 100));
|
||||
|
||||
// 构建推理配置参数
|
||||
const reasoningConfig = {};
|
||||
if (modelInfo.provider === 'anthropic' && modelInfo.isReasoning) {
|
||||
if (reasoningDepth === 'extended') {
|
||||
reasoningConfig.reasoning_depth = 'extended';
|
||||
reasoningConfig.think_budget = thinkBudget;
|
||||
} else {
|
||||
reasoningConfig.speed_mode = 'instant';
|
||||
|
||||
// 处理不同模型的推理配置
|
||||
if (modelInfo.isReasoning) {
|
||||
// 对于Anthropic模型
|
||||
if (modelInfo.provider === 'anthropic') {
|
||||
if (reasoningDepth === 'extended') {
|
||||
reasoningConfig.reasoning_depth = 'extended';
|
||||
reasoningConfig.think_budget = thinkBudget;
|
||||
} else {
|
||||
reasoningConfig.speed_mode = 'instant';
|
||||
}
|
||||
}
|
||||
|
||||
// 对于豆包模型
|
||||
if (modelInfo.provider === 'doubao') {
|
||||
reasoningConfig.thinking_mode = doubaoThinkingMode;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -869,6 +916,9 @@ class SettingsManager {
|
||||
if (this.apiBaseUrlValues['GoogleApiBaseUrl']) {
|
||||
apiBaseUrls.google = this.apiBaseUrlValues['GoogleApiBaseUrl'];
|
||||
}
|
||||
if (this.apiBaseUrlValues['DoubaoApiBaseUrl']) {
|
||||
apiBaseUrls.doubao = this.apiBaseUrlValues['DoubaoApiBaseUrl'];
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -881,6 +931,8 @@ class SettingsManager {
|
||||
proxyHost: this.proxyHostInput.value,
|
||||
proxyPort: this.proxyPortInput.value,
|
||||
mathpixApiKey: mathpixApiKey,
|
||||
ocrSource: this.ocrSource, // 添加OCR源配置
|
||||
doubaoThinkingMode: doubaoThinkingMode, // 添加豆包思考模式配置
|
||||
modelInfo: {
|
||||
supportsMultimodal: modelInfo.supportsMultimodal || false,
|
||||
isReasoning: modelInfo.isReasoning || false,
|
||||
@@ -1121,6 +1173,20 @@ class SettingsManager {
|
||||
this.saveSettings();
|
||||
});
|
||||
|
||||
// OCR源选择器事件监听
|
||||
if (this.ocrSourceSelect) {
|
||||
this.ocrSourceSelect.addEventListener('change', (e) => {
|
||||
// 阻止事件冒泡
|
||||
e.stopPropagation();
|
||||
|
||||
// 更新OCR源配置
|
||||
this.ocrSource = e.target.value;
|
||||
this.saveSettings();
|
||||
|
||||
console.log('OCR源已切换为:', this.ocrSource);
|
||||
});
|
||||
}
|
||||
|
||||
// Panel visibility
|
||||
if (this.settingsToggle) {
|
||||
this.settingsToggle.addEventListener('click', () => {
|
||||
@@ -1195,6 +1261,71 @@ class SettingsManager {
|
||||
|
||||
// 初始化API密钥编辑功能
|
||||
this.initApiKeyEditFunctions();
|
||||
|
||||
// 初始化推理选项事件
|
||||
this.initReasoningOptionEvents();
|
||||
|
||||
// 初始化豆包思考选项事件
|
||||
this.initDoubaoThinkingOptionEvents();
|
||||
}
|
||||
|
||||
// 初始化推理选项事件
|
||||
initReasoningOptionEvents() {
|
||||
const reasoningOptions = document.querySelectorAll('.reasoning-option');
|
||||
reasoningOptions.forEach(option => {
|
||||
option.addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
|
||||
const value = option.getAttribute('data-value');
|
||||
if (value && this.reasoningDepthSelect) {
|
||||
// 更新select值
|
||||
this.reasoningDepthSelect.value = value;
|
||||
|
||||
// 更新UI
|
||||
this.updateReasoningOptionUI(value);
|
||||
|
||||
// 保存设置
|
||||
this.saveSettings();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// 初始化豆包思考选项事件
|
||||
initDoubaoThinkingOptionEvents() {
|
||||
const doubaoThinkingOptions = document.querySelectorAll('.doubao-thinking-option');
|
||||
doubaoThinkingOptions.forEach(option => {
|
||||
option.addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
|
||||
const value = option.getAttribute('data-value');
|
||||
if (value && this.doubaoThinkingModeSelect) {
|
||||
// 更新select值
|
||||
this.doubaoThinkingModeSelect.value = value;
|
||||
|
||||
// 更新UI
|
||||
this.updateDoubaoThinkingOptionUI(value);
|
||||
|
||||
// 保存设置
|
||||
this.saveSettings();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// 更新豆包思考选项UI
|
||||
updateDoubaoThinkingOptionUI(value) {
|
||||
const doubaoThinkingOptions = document.querySelectorAll('.doubao-thinking-option');
|
||||
doubaoThinkingOptions.forEach(option => {
|
||||
const optionValue = option.getAttribute('data-value');
|
||||
if (optionValue === value) {
|
||||
option.classList.add('active');
|
||||
} else {
|
||||
option.classList.remove('active');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 更新思考预算显示
|
||||
@@ -2208,10 +2339,17 @@ class SettingsManager {
|
||||
this.thinkBudgetPercentValue = document.getElementById('thinkBudgetPercentValue');
|
||||
this.thinkBudgetGroup = document.querySelector('.think-budget-group');
|
||||
|
||||
// 豆包深度思考相关元素
|
||||
this.doubaoThinkingModeSelect = document.getElementById('doubaoThinkingMode');
|
||||
this.doubaoThinkingGroup = document.querySelector('.doubao-thinking-group');
|
||||
|
||||
// Initialize Mathpix inputs
|
||||
this.mathpixAppIdInput = document.getElementById('mathpixAppId');
|
||||
this.mathpixAppKeyInput = document.getElementById('mathpixAppKey');
|
||||
|
||||
// OCR源选择器
|
||||
this.ocrSourceSelect = document.getElementById('ocrSourceSelect');
|
||||
|
||||
// API Key elements - 所有的密钥输入框
|
||||
this.apiKeyInputs = {
|
||||
'AnthropicApiKey': document.getElementById('AnthropicApiKey'),
|
||||
@@ -2260,6 +2398,9 @@ class SettingsManager {
|
||||
'DeepseekApiKey': '',
|
||||
'AlibabaApiKey': '',
|
||||
'GoogleApiKey': '',
|
||||
'DoubaoApiKey': '',
|
||||
'BaiduApiKey': '',
|
||||
'BaiduSecretKey': '',
|
||||
'MathpixAppId': '',
|
||||
'MathpixAppKey': ''
|
||||
};
|
||||
@@ -2359,7 +2500,8 @@ class SettingsManager {
|
||||
'OpenaiApiBaseUrl': proxyApiConfig.apis?.openai || '',
|
||||
'DeepseekApiBaseUrl': proxyApiConfig.apis?.deepseek || '',
|
||||
'AlibabaApiBaseUrl': proxyApiConfig.apis?.alibaba || '',
|
||||
'GoogleApiBaseUrl': proxyApiConfig.apis?.google || ''
|
||||
'GoogleApiBaseUrl': proxyApiConfig.apis?.google || '',
|
||||
'DoubaoApiBaseUrl': proxyApiConfig.apis?.doubao || ''
|
||||
};
|
||||
this.updateApiBaseUrlStatus(apiBaseUrls);
|
||||
console.log('API基础URL状态已刷新');
|
||||
@@ -2449,6 +2591,9 @@ class SettingsManager {
|
||||
case 'GoogleApiBaseUrl':
|
||||
config.apis.google = value;
|
||||
break;
|
||||
case 'DoubaoApiBaseUrl':
|
||||
config.apis.doubao = value;
|
||||
break;
|
||||
}
|
||||
|
||||
// 确保启用中转API
|
||||
|
||||
204
static/style.css
204
static/style.css
@@ -2174,6 +2174,82 @@ button:disabled {
|
||||
transition: all 0.2s ease-in-out;
|
||||
}
|
||||
|
||||
/* OCR设置样式 */
|
||||
.ocr-settings {
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.ocr-source-control {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.ocr-source-selector {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.ocr-source-select {
|
||||
width: 100%;
|
||||
padding: 10px 14px;
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 8px;
|
||||
background: var(--surface);
|
||||
color: var(--text-primary);
|
||||
font-size: 0.9rem;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.ocr-source-select:hover {
|
||||
border-color: var(--primary-color);
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.ocr-source-select:focus {
|
||||
outline: none;
|
||||
border-color: var(--primary-color);
|
||||
box-shadow: 0 0 0 3px rgba(var(--primary-rgb), 0.1);
|
||||
}
|
||||
|
||||
.ocr-source-description {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 8px;
|
||||
padding: 12px;
|
||||
background: rgba(0, 0, 0, 0.02);
|
||||
border-radius: 8px;
|
||||
border: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.ocr-desc-item {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
gap: 8px;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1.4;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.ocr-desc-item i {
|
||||
color: var(--primary-color);
|
||||
margin-top: 2px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.ocr-desc-item strong {
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
/* 暗色主题下的OCR设置样式 */
|
||||
[data-theme="dark"] .ocr-source-description {
|
||||
background: rgba(255, 255, 255, 0.02);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .ocr-source-select {
|
||||
background: var(--surface);
|
||||
border-color: var(--border-color);
|
||||
}
|
||||
|
||||
/* 新增的推理控制组件样式 */
|
||||
.reasoning-control {
|
||||
display: flex;
|
||||
@@ -2260,6 +2336,122 @@ button:disabled {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
/* 豆包深度思考控制组件样式 */
|
||||
.doubao-thinking-control {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.doubao-thinking-label {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.doubao-thinking-selector {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.doubao-thinking-option {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
padding: 12px 8px;
|
||||
border-radius: 8px;
|
||||
background: rgba(0, 0, 0, 0.05);
|
||||
cursor: pointer;
|
||||
transition: all 0.2s ease;
|
||||
border: 2px solid transparent;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
min-height: 80px;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.doubao-thinking-option::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 3px;
|
||||
background: linear-gradient(to right, var(--primary-color), transparent);
|
||||
opacity: 0;
|
||||
transition: opacity 0.3s ease;
|
||||
}
|
||||
|
||||
.doubao-thinking-option:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.doubao-thinking-option.active {
|
||||
background: rgba(var(--primary-rgb), 0.1);
|
||||
border-color: var(--primary-color);
|
||||
}
|
||||
|
||||
.doubao-thinking-option.active::before {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.doubao-thinking-option i {
|
||||
font-size: 1.3rem;
|
||||
margin-bottom: 6px;
|
||||
color: var(--primary-color);
|
||||
opacity: 0.8;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.doubao-thinking-option .option-name {
|
||||
font-weight: 600;
|
||||
font-size: 0.85rem;
|
||||
margin-bottom: 4px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.doubao-thinking-option .option-desc {
|
||||
font-size: 0.7rem;
|
||||
opacity: 0.7;
|
||||
text-align: center;
|
||||
line-height: 1.2;
|
||||
}
|
||||
|
||||
.doubao-thinking-option:hover i {
|
||||
transform: scale(1.1);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.doubao-thinking-desc {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 6px;
|
||||
margin-top: 8px;
|
||||
padding: 8px;
|
||||
background: rgba(0, 0, 0, 0.03);
|
||||
border-radius: 6px;
|
||||
}
|
||||
|
||||
.doubao-desc-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
font-size: 0.8rem;
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
.doubao-desc-item i {
|
||||
font-size: 0.9rem;
|
||||
color: var(--primary-color);
|
||||
opacity: 0.7;
|
||||
width: 16px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* 思考预算控制组件样式 */
|
||||
.think-budget-control {
|
||||
display: flex;
|
||||
@@ -2411,6 +2603,18 @@ button:disabled {
|
||||
background: rgba(var(--primary-rgb), 0.2);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .doubao-thinking-option {
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .doubao-thinking-option.active {
|
||||
background: rgba(var(--primary-rgb), 0.2);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .doubao-thinking-desc {
|
||||
background: rgba(255, 255, 255, 0.03);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .think-value-badge {
|
||||
background: rgba(255, 255, 255, 0.1);
|
||||
}
|
||||
|
||||
@@ -219,6 +219,49 @@
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="setting-group doubao-thinking-group" style="display: none;">
|
||||
<div class="doubao-thinking-control">
|
||||
<div class="doubao-thinking-label">
|
||||
<label for="doubaoThinkingMode"><i class="fas fa-cogs"></i> 豆包深度思考模式</label>
|
||||
</div>
|
||||
<div class="doubao-thinking-selector">
|
||||
<div class="doubao-thinking-option active" data-value="auto">
|
||||
<i class="fas fa-magic"></i>
|
||||
<span class="option-name">自动模式</span>
|
||||
<span class="option-desc">由AI自动决定是否使用深度思考</span>
|
||||
</div>
|
||||
<div class="doubao-thinking-option" data-value="enabled">
|
||||
<i class="fas fa-brain"></i>
|
||||
<span class="option-name">开启思考</span>
|
||||
<span class="option-desc">强制启用深度思考过程</span>
|
||||
</div>
|
||||
<div class="doubao-thinking-option" data-value="disabled">
|
||||
<i class="fas fa-bolt"></i>
|
||||
<span class="option-name">关闭思考</span>
|
||||
<span class="option-desc">禁用深度思考,快速响应</span>
|
||||
</div>
|
||||
</div>
|
||||
<select id="doubaoThinkingMode" class="hidden">
|
||||
<option value="auto">自动模式</option>
|
||||
<option value="enabled">开启思考</option>
|
||||
<option value="disabled">关闭思考</option>
|
||||
</select>
|
||||
<div class="doubao-thinking-desc">
|
||||
<div class="doubao-desc-item">
|
||||
<i class="fas fa-info-circle"></i>
|
||||
<span><strong>自动模式:</strong>AI根据问题复杂度自动决定</span>
|
||||
</div>
|
||||
<div class="doubao-desc-item">
|
||||
<i class="fas fa-lightbulb"></i>
|
||||
<span><strong>开启思考:</strong>显示完整的思考推理过程</span>
|
||||
</div>
|
||||
<div class="doubao-desc-item">
|
||||
<i class="fas fa-rocket"></i>
|
||||
<span><strong>关闭思考:</strong>直接给出答案,响应更快</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="setting-group think-budget-group">
|
||||
<div class="think-budget-control">
|
||||
<div class="think-budget-label">
|
||||
@@ -252,6 +295,7 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<!-- 已删除重复的豆包思考模式UI元素 -->
|
||||
<div class="setting-group">
|
||||
<div class="temperature-control">
|
||||
<div class="temperature-label">
|
||||
@@ -309,6 +353,37 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- OCR设置部分 -->
|
||||
<div class="settings-section ocr-settings">
|
||||
<h3><i class="fas fa-font"></i> OCR设置</h3>
|
||||
<div class="setting-group">
|
||||
<div class="ocr-source-control">
|
||||
<label for="ocrSourceSelect"><i class="fas fa-eye"></i> OCR工具源</label>
|
||||
<div class="ocr-source-selector">
|
||||
<select id="ocrSourceSelect" class="ocr-source-select">
|
||||
<option value="auto">自动选择</option>
|
||||
<option value="baidu">百度OCR</option>
|
||||
<option value="mathpix">Mathpix</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="ocr-source-description">
|
||||
<div class="ocr-desc-item">
|
||||
<i class="fas fa-magic"></i>
|
||||
<span><strong>自动选择:</strong>优先使用百度OCR,如无配置则使用Mathpix</span>
|
||||
</div>
|
||||
<div class="ocr-desc-item">
|
||||
<i class="fas fa-language"></i>
|
||||
<span><strong>百度OCR:</strong>支持中文,免费额度大,推荐使用</span>
|
||||
</div>
|
||||
<div class="ocr-desc-item">
|
||||
<i class="fas fa-square-root-alt"></i>
|
||||
<span><strong>Mathpix:</strong>专业数学公式识别,支持LaTeX格式</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 2. 所有API密钥集中在一个区域 -->
|
||||
<div class="settings-section api-key-settings">
|
||||
<h3><i class="fas fa-key"></i> API密钥设置</h3>
|
||||
@@ -425,6 +500,75 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="api-key-status">
|
||||
<span class="key-name">Doubao API:</span>
|
||||
<div class="key-status-wrapper">
|
||||
<!-- 显示状态 -->
|
||||
<div class="key-display">
|
||||
<span id="DoubaoApiKeyStatus" class="key-status" data-key="DoubaoApiKey">未设置</span>
|
||||
<button class="btn-icon edit-api-key" data-key-type="DoubaoApiKey" title="编辑此密钥">
|
||||
<i class="fas fa-edit"></i>
|
||||
</button>
|
||||
</div>
|
||||
<!-- 编辑状态 -->
|
||||
<div class="key-edit hidden">
|
||||
<input type="password" class="key-input" data-key-type="DoubaoApiKey" placeholder="输入Doubao API key">
|
||||
<button class="btn-icon toggle-visibility">
|
||||
<i class="fas fa-eye"></i>
|
||||
</button>
|
||||
<button class="btn-icon save-api-key" data-key-type="DoubaoApiKey" title="保存密钥">
|
||||
<i class="fas fa-save"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 百度OCR API Key配置 -->
|
||||
<div class="api-key-status">
|
||||
<span class="key-name">百度OCR API Key:</span>
|
||||
<div class="key-status-wrapper">
|
||||
<!-- 显示状态 -->
|
||||
<div class="key-display">
|
||||
<span id="BaiduApiKeyStatus" class="key-status" data-key="BaiduApiKey">未设置</span>
|
||||
<button class="btn-icon edit-api-key" data-key-type="BaiduApiKey" title="编辑此密钥">
|
||||
<i class="fas fa-edit"></i>
|
||||
</button>
|
||||
</div>
|
||||
<!-- 编辑状态 -->
|
||||
<div class="key-edit hidden">
|
||||
<input type="password" class="key-input" data-key-type="BaiduApiKey" placeholder="输入百度OCR API Key">
|
||||
<button class="btn-icon toggle-visibility">
|
||||
<i class="fas fa-eye"></i>
|
||||
</button>
|
||||
<button class="btn-icon save-api-key" data-key-type="BaiduApiKey" title="保存密钥">
|
||||
<i class="fas fa-save"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="api-key-status">
|
||||
<span class="key-name">百度OCR Secret Key:</span>
|
||||
<div class="key-status-wrapper">
|
||||
<!-- 显示状态 -->
|
||||
<div class="key-display">
|
||||
<span id="BaiduSecretKeyStatus" class="key-status" data-key="BaiduSecretKey">未设置</span>
|
||||
<button class="btn-icon edit-api-key" data-key-type="BaiduSecretKey" title="编辑此密钥">
|
||||
<i class="fas fa-edit"></i>
|
||||
</button>
|
||||
</div>
|
||||
<!-- 编辑状态 -->
|
||||
<div class="key-edit hidden">
|
||||
<input type="password" class="key-input" data-key-type="BaiduSecretKey" placeholder="输入百度OCR Secret Key">
|
||||
<button class="btn-icon toggle-visibility">
|
||||
<i class="fas fa-eye"></i>
|
||||
</button>
|
||||
<button class="btn-icon save-api-key" data-key-type="BaiduSecretKey" title="保存密钥">
|
||||
<i class="fas fa-save"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="api-key-status">
|
||||
<span class="key-name">Mathpix App ID:</span>
|
||||
<div class="key-status-wrapper">
|
||||
@@ -577,6 +721,25 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="api-key-status">
|
||||
<span class="key-name">Doubao API URL:</span>
|
||||
<div class="key-status-wrapper">
|
||||
<!-- 显示状态 -->
|
||||
<div class="key-display">
|
||||
<span id="DoubaoApiBaseUrlStatus" class="key-status" data-key="DoubaoApiBaseUrl">未设置</span>
|
||||
<button class="btn-icon edit-api-base-url" data-key-type="DoubaoApiBaseUrl" title="编辑此URL">
|
||||
<i class="fas fa-edit"></i>
|
||||
</button>
|
||||
</div>
|
||||
<!-- 编辑状态 -->
|
||||
<div class="key-edit hidden">
|
||||
<input type="text" class="key-input" data-key-type="DoubaoApiBaseUrl" placeholder="https://ark.cn-beijing.volces.com/api/v3">
|
||||
<button class="btn-icon save-api-base-url" data-key-type="DoubaoApiBaseUrl" title="保存URL">
|
||||
<i class="fas fa-save"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user