Merge pull request #10 from SkeStars/main

修复gemini接口，添加豆包接口；添加百度OCR支持，更新OCR源选择和设置界面
2026-03-04 07:48:12 +08:00 · 2025-08-03 15:40:56 +08:00
parent 160d716fbe 1c4305d59c
commit acdfca54b1
16 changed files with 1218 additions and 55 deletions
--- a/README.md
+++ b/README.md
@@ -79,7 +79,7 @@ graph TD
        <li><b>Claude-3.7</b>：Anthropic的高级理解与解释</li>
        <li><b>DeepSeek-v3/r1</b>：专为中文场景优化的模型</li>
        <li><b>QVQ-MAX/Qwen-VL-MAX</b>：以视觉推理闻名的国产AI</li>
-        <li><b>Gemini-2.5-Pro/2.0-flash</b>：智商130的非推理AI</li>
+        <li><b>Gemini-2.5-Pro/2.5-flash</b>：智商130的非推理AI</li>
      </ul>
    </td>
  </tr>
@@ -189,7 +189,7 @@ python app.py
 | **QVQ-MAX** | 多模态支持，推理支持 | 复杂问题，视觉分析 |
 | **Qwen-VL-MAX** | 多模态支持 | 简单问题，视觉分析 |
 | **Gemini-2.5-Pro** | 多模态支持 | 复杂问题，视觉分析 |
-| **Gemini-2.0-Flash** | 多模态支持 | 简单问题，视觉分析 |
+| **Gemini-2.5-Flash** | 多模态支持 | 简单问题，视觉分析 |
 ### 🛠️ 可调参数
@@ -247,4 +247,4 @@ python app.py
 ## 📜 开源协议
-本项目采用 [Apache 2.0](LICENSE) 协议。
+本项目采用 [Apache 2.5](LICENSE) 协议。
--- a/app.py
+++ b/app.py
@@ -101,6 +101,8 @@ def create_model_instance(model_id, settings, is_reasoning=False):
        api_key_id = "AlibabaApiKey"
    elif "gemini" in model_id.lower() or "google" in model_id.lower():
        api_key_id = "GoogleApiKey"
    elif "doubao" in model_id.lower():
        api_key_id = "DoubaoApiKey"
    # 首先尝试从本地配置获取API密钥
    api_key = get_api_key(api_key_id)
@@ -156,6 +158,10 @@ def create_model_instance(model_id, settings, is_reasoning=False):
            custom_base_url = api_base_urls.get('google')
            if custom_base_url:
                base_url = custom_base_url
        elif "doubao" in model_id.lower():
            custom_base_url = api_base_urls.get('doubao')
            if custom_base_url:
                base_url = custom_base_url
    # 创建模型实例
    model_instance = ModelFactory.create_model(
@@ -318,39 +324,66 @@ def handle_text_extraction(data):
        if not isinstance(settings, dict):
            raise ValueError("Invalid settings format")
-        # 尝试从本地配置获取Mathpix API密钥
+        # 优先使用百度OCR，如果没有配置则使用Mathpix
-        mathpix_app_id = get_api_key('MathpixAppId')
+        # 首先尝试获取百度OCR API密钥
-        mathpix_app_key = get_api_key('MathpixAppKey')
+        baidu_api_key = get_api_key('BaiduApiKey')
        baidu_secret_key = get_api_key('BaiduSecretKey')
-        # 构建完整的Mathpix API密钥（格式：app_id:app_key）
+        # 构建百度OCR API密钥（格式：api_key:secret_key）
-        mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
+        ocr_key = None
        ocr_model = None
-        # 如果本地没有配置，尝试使用前端传递的密钥（向后兼容）
+        if baidu_api_key and baidu_secret_key:
-        if not mathpix_key:
+            ocr_key = f"{baidu_api_key}:{baidu_secret_key}"
-            mathpix_key = settings.get('mathpixApiKey')
+            ocr_model = 'baidu-ocr'
            print("Using Baidu OCR for text extraction...")
        else:
            # 回退到Mathpix
            mathpix_app_id = get_api_key('MathpixAppId')
            mathpix_app_key = get_api_key('MathpixAppKey')
            # 构建完整的Mathpix API密钥（格式：app_id:app_key）
            mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
            # 如果本地没有配置，尝试使用前端传递的密钥（向后兼容）
            if not mathpix_key:
                mathpix_key = settings.get('mathpixApiKey')
            if mathpix_key:
                ocr_key = mathpix_key
                ocr_model = 'mathpix'
                print("Using Mathpix OCR for text extraction...")
-        if not mathpix_key:
+        if not ocr_key:
-            raise ValueError("Mathpix API key is required")
+            raise ValueError("OCR API key is required. Please configure Baidu OCR (API Key + Secret Key) or Mathpix (App ID + App Key)")
        # 先回复客户端，确认已收到请求，防止超时断开
        # 注意：这里不能使用return，否则后续代码不会执行
        socketio.emit('request_acknowledged', {
            'status': 'received', 
-            'message': 'Image received, text extraction in progress'
+            'message': f'Image received, text extraction in progress using {ocr_model}'
        }, room=request.sid)
        try:
-            app_id, app_key = mathpix_key.split(':')
+            if ocr_model == 'baidu-ocr':
-            if not app_id.strip() or not app_key.strip():
+                api_key, secret_key = ocr_key.split(':')
-                raise ValueError()
+                if not api_key.strip() or not secret_key.strip():
                    raise ValueError()
            elif ocr_model == 'mathpix':
                app_id, app_key = ocr_key.split(':')
                if not app_id.strip() or not app_key.strip():
                    raise ValueError()
        except ValueError:
-            raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
+            if ocr_model == 'baidu-ocr':
                raise ValueError("Invalid Baidu OCR API key format. Expected format: 'API_KEY:SECRET_KEY'")
            else:
                raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
-        print("Creating Mathpix model instance...")
+        print(f"Creating {ocr_model} model instance...")
-        # 只传递必需的参数，ModelFactory.create_model会处理不同模型类型
+        # ModelFactory.create_model会处理不同模型类型
        model = ModelFactory.create_model(
-            model_name='mathpix',
+            model_name=ocr_model,
-            api_key=mathpix_key
+            api_key=ocr_key
        )
        print("Starting text extraction...")
--- a/config/api_base_urls.json
+++ b/config/api_base_urls.json
@@ -3,5 +3,6 @@
  "OpenaiApiBaseUrl": "",
  "DeepseekApiBaseUrl": "",
  "AlibabaApiBaseUrl": "",
-  "GoogleApiBaseUrl": ""
+  "GoogleApiBaseUrl": "",
  "DoubaoApiBaseUrl": ""
 }
--- a/config/models.json
+++ b/config/models.json
@@ -24,6 +24,11 @@
            "name": "Google",
            "api_key_id": "GoogleApiKey",
            "class_name": "GoogleModel"
        },
        "doubao": {
            "name": "Doubao",
            "api_key_id": "DoubaoApiKey",
            "class_name": "DoubaoModel"
        }
    },
    "models": {
@@ -91,21 +96,29 @@
            "version": "latest",
            "description": "阿里通义千问VL-MAX模型，视觉理解能力最强，支持图像理解和复杂任务"
        },
-        "gemini-2.5-pro-preview-03-25": {
+        "gemini-2.5-pro": {
            "name": "Gemini 2.5 Pro",
            "provider": "google",
            "supportsMultimodal": true,
            "isReasoning": true,
-            "version": "preview-03-25",
+            "version": "latest",
-            "description": "Google最强大的Gemini 2.5 Pro模型，支持图像理解"
+            "description": "Google最强大的Gemini 2.5 Pro模型，支持图像理解（需要付费API密钥）"
        },
-        "gemini-2.0-flash": {
+        "gemini-2.5-flash": {
-            "name": "Gemini 2.0 Flash",
+            "name": "Gemini 2.5 Flash",
            "provider": "google",
            "supportsMultimodal": true,
            "isReasoning": false,
            "version": "latest",
-            "description": "Google更快速的Gemini 2.0 Flash模型，支持图像理解，响应更迅速"
+            "description": "Google更快速的Gemini 2.5 Flash模型，支持图像理解，有免费配额"
        },
        "doubao-seed-1-6-250615": {
            "name": "Doubao-Seed-1.6",
            "provider": "doubao",
            "supportsMultimodal": true,
            "isReasoning": true,
            "version": "latest",
            "description": "支持auto/thinking/non-thinking三种思考模式、支持多模态、256K长上下文"
        }
    }
 } 
--- a/config/proxy_api.json
+++ b/config/proxy_api.json
@@ -4,7 +4,8 @@
    "anthropic": "",
    "deepseek": "",
    "google": "",
-    "openai": ""
+    "openai": "",
    "doubao": ""
  },
  "enabled": true
 }
--- a/models/init.py
+++ b/models/init.py
@@ -4,6 +4,7 @@ from .openai import OpenAIModel
 from .deepseek import DeepSeekModel
 from .alibaba import AlibabaModel
 from .google import GoogleModel
 from .doubao import DoubaoModel
 from .factory import ModelFactory
 __all__ = [
@@ -13,5 +14,6 @@ __all__ = [
    'DeepSeekModel',
    'AlibabaModel',
    'GoogleModel',
    'DoubaoModel',
    'ModelFactory'
 ]
--- a/models/alibaba.py
+++ b/models/alibaba.py
@@ -4,12 +4,13 @@ from openai import OpenAI
 from .base import BaseModel
 class AlibabaModel(BaseModel):
-    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None):
+    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
        # 如果没有提供模型名称，才使用默认值
        self.model_name = model_name if model_name else "QVQ-Max-2025-03-25"
        print(f"初始化阿里巴巴模型: {self.model_name}")
        # 在super().__init__之前设置model_name，这样get_default_system_prompt能使用它
        super().__init__(api_key, temperature, system_prompt, language)
        self.api_base_url = api_base_url  # 存储API基础URL
    def get_default_system_prompt(self) -> str:
        """根据模型名称返回不同的默认系统提示词"""
--- a/models/baidu_ocr.py
+++ b/models/baidu_ocr.py
@@ -0,0 +1,177 @@
 import base64
 import json
 import time
 import urllib.request
 import urllib.parse
 from typing import Generator, Dict, Any
 from .base import BaseModel
 class BaiduOCRModel(BaseModel):
    """
    百度OCR模型，用于图像文字识别
    """
    def __init__(self, api_key: str, secret_key: str = None, temperature: float = 0.7, system_prompt: str = None):
        """
        初始化百度OCR模型
        Args:
            api_key: 百度API Key
            secret_key: 百度Secret Key（可以在api_key中用冒号分隔传入）
            temperature: 不用于OCR但保持BaseModel兼容性
            system_prompt: 不用于OCR但保持BaseModel兼容性
        Raises:
            ValueError: 如果API密钥格式无效
        """
        super().__init__(api_key, temperature, system_prompt)
        # 支持两种格式：单独传递或在api_key中用冒号分隔
        if secret_key:
            self.api_key = api_key
            self.secret_key = secret_key
        else:
            try:
                self.api_key, self.secret_key = api_key.split(':')
            except ValueError:
                raise ValueError("百度OCR API密钥必须是 'API_KEY:SECRET_KEY' 格式或单独传递secret_key参数")
        # 百度API URLs
        self.token_url = "https://aip.baidubce.com/oauth/2.0/token"
        self.ocr_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
        # 缓存access_token
        self._access_token = None
        self._token_expires = 0
    def get_access_token(self) -> str:
        """获取百度API的access_token"""
        # 检查是否需要刷新token（提前5分钟刷新）
        if self._access_token and time.time() < self._token_expires - 300:
            return self._access_token
        # 请求新的access_token
        params = {
            'grant_type': 'client_credentials',
            'client_id': self.api_key,
            'client_secret': self.secret_key
        }
        data = urllib.parse.urlencode(params).encode('utf-8')
        request = urllib.request.Request(self.token_url, data=data)
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        try:
            with urllib.request.urlopen(request) as response:
                result = json.loads(response.read().decode('utf-8'))
            if 'access_token' in result:
                self._access_token = result['access_token']
                # 设置过期时间（默认30天，但我们提前刷新）
                self._token_expires = time.time() + result.get('expires_in', 2592000)
                return self._access_token
            else:
                raise Exception(f"获取access_token失败: {result.get('error_description', '未知错误')}")
        except Exception as e:
            raise Exception(f"请求access_token失败: {str(e)}")
    def ocr_image(self, image_data: str) -> str:
        """
        对图像进行OCR识别
        Args:
            image_data: Base64编码的图像数据
        Returns:
            str: 识别出的文字内容
        """
        access_token = self.get_access_token()
        # 准备请求数据
        params = {
            'image': image_data,
            'language_type': 'auto_detect',  # 自动检测语言
            'detect_direction': 'true',      # 检测图像朝向
            'probability': 'false'           # 不返回置信度（减少响应大小）
        }
        data = urllib.parse.urlencode(params).encode('utf-8')
        url = f"{self.ocr_url}?access_token={access_token}"
        request = urllib.request.Request(url, data=data)
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        try:
            with urllib.request.urlopen(request) as response:
                result = json.loads(response.read().decode('utf-8'))
            if 'error_code' in result:
                raise Exception(f"百度OCR API错误: {result.get('error_msg', '未知错误')}")
            # 提取识别的文字
            words_result = result.get('words_result', [])
            text_lines = [item['words'] for item in words_result]
            return '\n'.join(text_lines)
        except Exception as e:
            raise Exception(f"OCR识别失败: {str(e)}")
    def extract_full_text(self, image_data: str) -> str:
        """
        提取图像中的完整文本（与Mathpix兼容的接口）
        Args:
            image_data: Base64编码的图像数据
        Returns:
            str: 提取的文本内容
        """
        return self.ocr_image(image_data)
    def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
        """
        分析图像并返回OCR结果（流式输出以保持接口一致性）
        Args:
            image_data: Base64编码的图像数据
            proxies: 代理配置（未使用）
        Yields:
            dict: 包含OCR结果的响应
        """
        try:
            text = self.ocr_image(image_data)
            yield {
                'status': 'completed',
                'content': text,
                'model': 'baidu-ocr'
            }
        except Exception as e:
            yield {
                'status': 'error',
                'content': f'OCR识别失败: {str(e)}',
                'model': 'baidu-ocr'
            }
    def analyze_text(self, text: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
        """
        分析文本（OCR模型不支持文本分析）
        Args:
            text: 输入文本
            proxies: 代理配置（未使用）
        Yields:
            dict: 错误响应
        """
        yield {
            'status': 'error',
            'content': 'OCR模型不支持文本分析功能',
            'model': 'baidu-ocr'
        }
    def get_model_identifier(self) -> str:
        """返回模型标识符"""
        return "baidu-ocr"
--- a/models/deepseek.py
+++ b/models/deepseek.py
@@ -6,9 +6,10 @@ from openai import OpenAI
 from .base import BaseModel
 class DeepSeekModel(BaseModel):
-    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner"):
+    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner", api_base_url: str = None):
        super().__init__(api_key, temperature, system_prompt, language)
        self.model_name = model_name
        self.api_base_url = api_base_url  # 存储API基础URL
    def get_default_system_prompt(self) -> str:
        return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
--- a/models/doubao.py
+++ b/models/doubao.py
@@ -0,0 +1,339 @@
 import json
 import os
 import base64
 from typing import Generator, Dict, Any, Optional
 import requests
 from .base import BaseModel
 class DoubaoModel(BaseModel):
    """
    豆包API模型实现类
    支持字节跳动的豆包AI模型，可处理文本和图像输入
    """
    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
        """
        初始化豆包模型
        Args:
            api_key: 豆包API密钥
            temperature: 生成温度
            system_prompt: 系统提示词
            language: 首选语言
            model_name: 指定具体模型名称，如不指定则使用默认值
            api_base_url: API基础URL，用于设置自定义API端点
        """
        super().__init__(api_key, temperature, system_prompt, language)
        self.model_name = model_name or self.get_model_identifier()
        self.base_url = api_base_url or "https://ark.cn-beijing.volces.com/api/v3"
        self.max_tokens = 4096  # 默认最大输出token数
        self.reasoning_config = None  # 推理配置，类似于AnthropicModel
    def get_default_system_prompt(self) -> str:
        return """你是一个专业的问题分析专家。当看到问题图片时：
 1. 仔细阅读并理解问题
 2. 分解问题的关键组成部分
 3. 提供清晰的分步解决方案
 4. 如果相关，解释涉及的概念或理论
 5. 如果有多种方法，优先解释最有效的方法"""
    def get_model_identifier(self) -> str:
        """返回默认的模型标识符"""
        return "doubao-seed-1-6-250615"  # Doubao-Seed-1.6
    def get_actual_model_name(self) -> str:
        """根据配置的模型名称返回实际的API调用标识符"""
        # 豆包API的实际模型名称映射
        model_mapping = {
            "doubao-seed-1-6-250615": "doubao-seed-1-6-250615"
        }
        return model_mapping.get(self.model_name, "doubao-seed-1-6-250615")
    def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
        """流式生成文本响应"""
        try:
            yield {"status": "started"}
            # 设置环境变量代理（如果提供）
            original_proxies = None
            if proxies:
                original_proxies = {
                    'http_proxy': os.environ.get('http_proxy'),
                    'https_proxy': os.environ.get('https_proxy')
                }
                if 'http' in proxies:
                    os.environ['http_proxy'] = proxies['http']
                if 'https' in proxies:
                    os.environ['https_proxy'] = proxies['https']
            try:
                # 构建请求头
                headers = {
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json"
                }
                # 构建消息 - 根据官方API文档，暂时不使用系统提示词
                messages = []
                # 添加用户查询
                user_content = text
                if self.language and self.language != 'auto':
                    user_content = f"请使用{self.language}回答以下问题: {text}"
                messages.append({
                    "role": "user",
                    "content": user_content
                })
                # 处理推理配置
                thinking = {
                    "type": "auto"  # 默认值
                }
                if hasattr(self, 'reasoning_config') and self.reasoning_config:
                    # 从reasoning_config中获取thinking_mode
                    thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
                    thinking = {
                        "type": thinking_mode
                    }
                # 构建请求数据
                data = {
                    "model": self.get_actual_model_name(),
                    "messages": messages,
                    "thinking": thinking,
                    "temperature": self.temperature,
                    "max_tokens": self.max_tokens,
                    "stream": True
                }
                # 发送流式请求
                response = requests.post(
                    f"{self.base_url}/chat/completions",
                    headers=headers,
                    json=data,
                    stream=True,
                    proxies=proxies if proxies else None,
                    timeout=60
                )
                if response.status_code != 200:
                    error_text = response.text
                    raise Exception(f"HTTP {response.status_code}: {error_text}")
                response.raise_for_status()
                # 初始化响应缓冲区
                response_buffer = ""
                # 处理流式响应
                for line in response.iter_lines():
                    if not line:
                        continue
                    line = line.decode('utf-8')
                    if not line.startswith('data: '):
                        continue
                    line = line[6:]  # 移除 'data: ' 前缀
                    if line == '[DONE]':
                        break
                    try:
                        chunk_data = json.loads(line)
                        choices = chunk_data.get('choices', [])
                        if choices and len(choices) > 0:
                            delta = choices[0].get('delta', {})
                            content = delta.get('content', '')
                            if content:
                                response_buffer += content
                                # 发送响应进度
                                yield {
                                    "status": "streaming",
                                    "content": response_buffer
                                }
                    except json.JSONDecodeError:
                        continue
                # 确保发送完整的最终内容
                yield {
                    "status": "completed",
                    "content": response_buffer
                }
            finally:
                # 恢复原始代理设置
                if original_proxies:
                    for key, value in original_proxies.items():
                        if value is None:
                            if key in os.environ:
                                del os.environ[key]
                        else:
                            os.environ[key] = value
        except Exception as e:
            yield {
                "status": "error",
                "error": f"豆包API错误: {str(e)}"
            }
    def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
        """分析图像并流式生成响应"""
        try:
            yield {"status": "started"}
            # 设置环境变量代理（如果提供）
            original_proxies = None
            if proxies:
                original_proxies = {
                    'http_proxy': os.environ.get('http_proxy'),
                    'https_proxy': os.environ.get('https_proxy')
                }
                if 'http' in proxies:
                    os.environ['http_proxy'] = proxies['http']
                if 'https' in proxies:
                    os.environ['https_proxy'] = proxies['https']
            try:
                # 构建请求头
                headers = {
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json"
                }
                # 处理图像数据
                if image_data.startswith('data:image'):
                    # 如果是data URI，提取base64部分
                    image_data = image_data.split(',', 1)[1]
                # 构建用户消息 - 使用豆包API官方示例格式
                # 首先检查图像数据的格式，确保是有效的图像
                image_format = "jpeg"  # 默认使用jpeg
                if image_data.startswith('/9j/'):  # JPEG magic number in base64
                    image_format = "jpeg"
                elif image_data.startswith('iVBORw0KGgo'):  # PNG magic number in base64
                    image_format = "png"
                user_content = [
                    {
                        "type": "text",
                        "text": f"请使用{self.language}分析这张图片并提供详细解答。" if self.language and self.language != 'auto' else "请分析这张图片并提供详细解答?"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/{image_format};base64,{image_data}"
                        }
                    }
                ]
                messages = [
                    {
                        "role": "user",
                        "content": user_content
                    }
                ]
                # 处理推理配置
                thinking = {
                    "type": "auto"  # 默认值
                }
                if hasattr(self, 'reasoning_config') and self.reasoning_config:
                    # 从reasoning_config中获取thinking_mode
                    thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
                    thinking = {
                        "type": thinking_mode
                    }
                # 构建请求数据
                data = {
                    "model": self.get_actual_model_name(),
                    "messages": messages,
                    "thinking": thinking,
                    "temperature": self.temperature,
                    "max_tokens": self.max_tokens,
                    "stream": True
                }
                # 发送流式请求
                response = requests.post(
                    f"{self.base_url}/chat/completions",
                    headers=headers,
                    json=data,
                    stream=True,
                    proxies=proxies if proxies else None,
                    timeout=60
                )
                if response.status_code != 200:
                    error_text = response.text
                    raise Exception(f"HTTP {response.status_code}: {error_text}")
                response.raise_for_status()
                # 初始化响应缓冲区
                response_buffer = ""
                # 处理流式响应
                for line in response.iter_lines():
                    if not line:
                        continue
                    line = line.decode('utf-8')
                    if not line.startswith('data: '):
                        continue
                    line = line[6:]  # 移除 'data: ' 前缀
                    if line == '[DONE]':
                        break
                    try:
                        chunk_data = json.loads(line)
                        choices = chunk_data.get('choices', [])
                        if choices and len(choices) > 0:
                            delta = choices[0].get('delta', {})
                            content = delta.get('content', '')
                            if content:
                                response_buffer += content
                                # 发送响应进度
                                yield {
                                    "status": "streaming",
                                    "content": response_buffer
                                }
                    except json.JSONDecodeError:
                        continue
                # 确保发送完整的最终内容
                yield {
                    "status": "completed",
                    "content": response_buffer
                }
            finally:
                # 恢复原始代理设置
                if original_proxies:
                    for key, value in original_proxies.items():
                        if value is None:
                            if key in os.environ:
                                del os.environ[key]
                        else:
                            os.environ[key] = value
        except Exception as e:
            yield {
                "status": "error",
                "error": f"豆包图像分析错误: {str(e)}"
            }
--- a/models/factory.py
+++ b/models/factory.py
@@ -3,7 +3,8 @@ import json
 import os
 import importlib
 from .base import BaseModel
-from .mathpix import MathpixModel  # MathpixModel仍然需要直接导入，因为它是特殊工具
+from .mathpix import MathpixModel  # MathpixModel需要直接导入，因为它是特殊OCR工具
 from .baidu_ocr import BaiduOCRModel  # 百度OCR也是特殊OCR工具，直接导入
 class ModelFactory:
    # 模型基本信息，包含类型和特性
@@ -39,13 +40,25 @@ class ModelFactory:
                        'description': model_info.get('description', '')
                    }
-            # 添加Mathpix模型（特殊工具模型）
+            # 添加特殊OCR工具模型（不在配置文件中定义）
            # 添加Mathpix OCR工具
            cls._models['mathpix'] = {
                'class': MathpixModel,
                'is_multimodal': True,
                'is_reasoning': False,
                'display_name': 'Mathpix OCR',
-                'description': '文本提取工具，适用于数学公式和文本',
+                'description': '数学公式识别工具，适用于复杂数学内容',
                'is_ocr_only': True
            }
            # 添加百度OCR工具
            cls._models['baidu-ocr'] = {
                'class': BaiduOCRModel,
                'is_multimodal': True,
                'is_reasoning': False,
                'display_name': '百度OCR',
                'description': '通用文字识别工具，支持中文识别',
                'is_ocr_only': True
            }
@@ -62,22 +75,36 @@ class ModelFactory:
        # 不再硬编码模型定义，而是使用空字典
        cls._models = {}
-        # 只保留Mathpix作为基础工具
+        # 添加特殊OCR工具（当配置加载失败时的备用）
        try:
-            # 导入MathpixModel类
+            # 导入并添加Mathpix OCR工具
            from .mathpix import MathpixModel
            # 添加Mathpix作为基础工具
            cls._models['mathpix'] = {
                'class': MathpixModel,
                'is_multimodal': True,
                'is_reasoning': False,
                'display_name': 'Mathpix OCR',
-                'description': '文本提取工具，适用于数学公式和文本',
+                'description': '数学公式识别工具，适用于复杂数学内容',
                'is_ocr_only': True
            }
        except Exception as e:
-            print(f"无法加载基础Mathpix工具: {str(e)}")
+            print(f"无法加载Mathpix OCR工具: {str(e)}")
        # 添加百度OCR工具
        try:
            from .baidu_ocr import BaiduOCRModel
            cls._models['baidu-ocr'] = {
                'class': BaiduOCRModel,
                'is_multimodal': True,
                'is_reasoning': False,
                'display_name': '百度OCR',
                'description': '通用文字识别工具，支持中文识别',
                'is_ocr_only': True
            }
        except Exception as e:
            print(f"无法加载百度OCR工具: {str(e)}")
    @classmethod
    def create_model(cls, model_name: str, api_key: str, temperature: float = 0.7, 
@@ -114,6 +141,25 @@ class ModelFactory:
            )
        # 对于阿里巴巴模型，也需要传递正确的模型名称
        elif 'qwen' in model_name.lower() or 'qvq' in model_name.lower() or 'alibaba' in model_name.lower():
            return model_class(
                api_key=api_key,
                temperature=temperature,
                system_prompt=system_prompt,
                language=language,
                model_name=model_name
            )
        # 对于Google模型，也需要传递正确的模型名称
        elif 'gemini' in model_name.lower() or 'google' in model_name.lower():
            return model_class(
                api_key=api_key,
                temperature=temperature,
                system_prompt=system_prompt,
                language=language,
                model_name=model_name,
                api_base_url=api_base_url
            )
        # 对于豆包模型，也需要传递正确的模型名称
        elif 'doubao' in model_name.lower():
            return model_class(
                api_key=api_key,
                temperature=temperature,
@@ -129,6 +175,13 @@ class ModelFactory:
                temperature=temperature,
                system_prompt=system_prompt
            )
        # 对于百度OCR模型，传递api_key（支持API_KEY:SECRET_KEY格式）
        elif model_name == 'baidu-ocr':
            return model_class(
                api_key=api_key,
                temperature=temperature,
                system_prompt=system_prompt
            )
        # 对于Anthropic模型，需要传递model_identifier参数
        elif 'claude' in model_name.lower() or 'anthropic' in model_name.lower():
            return model_class(
--- a/models/google.py
+++ b/models/google.py
@@ -30,10 +30,17 @@ class GoogleModel(BaseModel):
        # 配置Google API
        if api_base_url:
-            # 如果提供了自定义API基础URL，设置genai的api_url
+            # 配置中转API - 使用环境变量方式
-            genai.configure(api_key=api_key, transport="rest", client_options={"api_endpoint": api_base_url})
+            # 移除末尾的斜杠以避免重复路径问题
            clean_base_url = api_base_url.rstrip('/')
            # 设置环境变量来指定API端点
            os.environ['GOOGLE_AI_API_ENDPOINT'] = clean_base_url
            genai.configure(api_key=api_key)
        else:
            # 使用默认API端点
            # 清除可能存在的自定义端点环境变量
            if 'GOOGLE_AI_API_ENDPOINT' in os.environ:
                del os.environ['GOOGLE_AI_API_ENDPOINT']
            genai.configure(api_key=api_key)
    def get_default_system_prompt(self) -> str:
@@ -46,7 +53,7 @@ class GoogleModel(BaseModel):
    def get_model_identifier(self) -> str:
        """返回默认的模型标识符"""
-        return "gemini-2.5-pro-preview-03-25"
+        return "gemini-2.5-flash"  # 使用有免费配额的模型作为默认值
    def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
        """流式生成文本响应"""
--- a/static/js/main.js
+++ b/static/js/main.js
@@ -1053,10 +1053,33 @@ class SnapSolver {
            this.extractTextBtn.innerHTML = '<i class="fas fa-spinner fa-spin"></i><span>提取中...</span>';
            const settings = window.settingsManager.getSettings();
            // 根据用户设置的OCR源进行选择
            const ocrSource = settings.ocrSource || 'auto';
            const baiduApiKey = window.settingsManager.apiKeyValues.BaiduApiKey;
            const baiduSecretKey = window.settingsManager.apiKeyValues.BaiduSecretKey;
            const mathpixApiKey = settings.mathpixApiKey;
-            if (!mathpixApiKey || mathpixApiKey === ':') {
+            const hasBaiduOCR = baiduApiKey && baiduSecretKey;
-                window.uiManager.showToast('请在设置中输入Mathpix API凭据', 'error');
+            const hasMathpix = mathpixApiKey && mathpixApiKey !== ':';
            // 根据OCR源配置检查可用性
            let canProceed = false;
            let missingOCRMessage = '';
            if (ocrSource === 'baidu') {
                canProceed = hasBaiduOCR;
                missingOCRMessage = '请在设置中配置百度OCR API密钥';
            } else if (ocrSource === 'mathpix') {
                canProceed = hasMathpix;
                missingOCRMessage = '请在设置中配置Mathpix API密钥';
            } else { // auto
                canProceed = hasBaiduOCR || hasMathpix;
                missingOCRMessage = '请在设置中配置OCR API密钥：百度OCR（推荐）或Mathpix';
            }
            if (!canProceed) {
                window.uiManager.showToast(missingOCRMessage, 'error');
                document.getElementById('settingsPanel').classList.add('active');
                this.extractTextBtn.disabled = false;
                this.extractTextBtn.innerHTML = '<i class="fas fa-font"></i><span>提取文本</span>';
@@ -1076,7 +1099,7 @@ class SnapSolver {
                this.socket.emit('extract_text', {
                    image: this.croppedImage.split(',')[1],
                    settings: {
-                        mathpixApiKey: mathpixApiKey
+                        ocrSource: settings.ocrSource || 'auto'
                    }
                });
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -374,6 +374,9 @@ class SettingsManager {
        // 模型选择器对象
        this.modelSelector = null;
        // OCR源配置
        this.ocrSource = 'auto'; // 默认自动选择
        // 存储API密钥的对象
        this.apiKeyValues = {
            'AnthropicApiKey': '',
@@ -381,6 +384,9 @@ class SettingsManager {
            'DeepseekApiKey': '',
            'AlibabaApiKey': '',
            'GoogleApiKey': '',
            'DoubaoApiKey': '',
            'BaiduApiKey': '',
            'BaiduSecretKey': '',
            'MathpixAppId': '',
            'MathpixAppKey': ''
        };
@@ -391,7 +397,8 @@ class SettingsManager {
            'OpenaiApiBaseUrl': '',
            'DeepseekApiBaseUrl': '',
            'AlibabaApiBaseUrl': '',
-            'GoogleApiBaseUrl': ''
+            'GoogleApiBaseUrl': '',
            'DoubaoApiBaseUrl': ''
        };
        // 加载模型配置
@@ -580,6 +587,13 @@ class SettingsManager {
                this.updateReasoningOptionUI(settings.reasoningDepth);
        }
        // 加载豆包思考模式设置
        if (settings.doubaoThinkingMode && this.doubaoThinkingModeSelect) {
            this.doubaoThinkingModeSelect.value = settings.doubaoThinkingMode;
            // 更新豆包思考选项UI
            this.updateDoubaoThinkingOptionUI(settings.doubaoThinkingMode);
        }
        // 加载思考预算百分比
        const thinkBudgetPercent = parseInt(settings.thinkBudgetPercent || '50');
        if (this.thinkBudgetPercentInput) {
@@ -624,6 +638,14 @@ class SettingsManager {
            this.proxyPortInput.value = settings.proxyPort;
        }
        // Load OCR source setting
        if (settings.ocrSource) {
            this.ocrSource = settings.ocrSource;
            if (this.ocrSourceSelect) {
                this.ocrSourceSelect.value = settings.ocrSource;
            }
        }
        // Update UI based on model type
        this.updateUIBasedOnModelType();
@@ -720,6 +742,14 @@ class SettingsManager {
            this.thinkBudgetGroup.style.display = showThinkBudget ? 'block' : 'none';
        }
        // 处理豆包深度思考设置显示
        const isDoubaoReasoning = modelInfo.isReasoning && modelInfo.provider === 'doubao';
        // 只有对豆包推理模型才显示深度思考设置
        if (this.doubaoThinkingGroup) {
            this.doubaoThinkingGroup.style.display = isDoubaoReasoning ? 'block' : 'none';
        }
        // 控制最大Token设置的显示
        // 阿里巴巴模型不支持自定义Token设置
        const maxTokensGroup = this.maxTokens ? this.maxTokens.closest('.setting-group') : null;
@@ -759,6 +789,8 @@ class SettingsManager {
            apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(4)'); // Alibaba
        } else if (modelType && (modelType.toLowerCase().includes('gemini') || modelType.toLowerCase().includes('google'))) {
            apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(5)'); // Google
        } else if (modelType && modelType.toLowerCase().includes('doubao')) {
            apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(6)'); // 豆包
        }
        if (apiKeyToHighlight) {
@@ -775,6 +807,7 @@ class SettingsManager {
            model: this.modelSelect.value,
                maxTokens: this.maxTokens.value,
            reasoningDepth: this.reasoningDepthSelect?.value || 'standard',
            doubaoThinkingMode: this.doubaoThinkingModeSelect?.value || 'auto',
            thinkBudgetPercent: this.thinkBudgetPercentInput?.value || '50',
            temperature: this.temperatureInput.value,
            language: this.languageInput.value,
@@ -782,7 +815,8 @@ class SettingsManager {
            currentPromptId: this.currentPromptId,
            proxyEnabled: this.proxyEnabledInput.checked,
            proxyHost: this.proxyHostInput.value,
-            proxyPort: this.proxyPortInput.value
+            proxyPort: this.proxyPortInput.value,
            ocrSource: this.ocrSource // 添加OCR源配置保存
        };
            // 保存设置到localStorage
@@ -832,17 +866,30 @@ class SettingsManager {
        const reasoningDepth = this.reasoningDepthSelect?.value || 'standard';
        const thinkBudgetPercent = parseInt(this.thinkBudgetPercentInput?.value || '50');
        // 获取豆包思考模式设置
        const doubaoThinkingMode = this.doubaoThinkingModeSelect?.value || 'auto';
        // 计算思考预算的实际Token数
        const thinkBudget = Math.floor(maxTokens * (thinkBudgetPercent / 100));
        // 构建推理配置参数
        const reasoningConfig = {};
-        if (modelInfo.provider === 'anthropic' && modelInfo.isReasoning) {
+        
-            if (reasoningDepth === 'extended') {
+        // 处理不同模型的推理配置
-                reasoningConfig.reasoning_depth = 'extended';
+        if (modelInfo.isReasoning) {
-                reasoningConfig.think_budget = thinkBudget;
+            // 对于Anthropic模型
-            } else {
+            if (modelInfo.provider === 'anthropic') {
-                reasoningConfig.speed_mode = 'instant';
+                if (reasoningDepth === 'extended') {
                    reasoningConfig.reasoning_depth = 'extended';
                    reasoningConfig.think_budget = thinkBudget;
                } else {
                    reasoningConfig.speed_mode = 'instant';
                }
            }
            // 对于豆包模型
            if (modelInfo.provider === 'doubao') {
                reasoningConfig.thinking_mode = doubaoThinkingMode;
            }
        }
@@ -869,6 +916,9 @@ class SettingsManager {
            if (this.apiBaseUrlValues['GoogleApiBaseUrl']) {
                apiBaseUrls.google = this.apiBaseUrlValues['GoogleApiBaseUrl'];
            }
            if (this.apiBaseUrlValues['DoubaoApiBaseUrl']) {
                apiBaseUrls.doubao = this.apiBaseUrlValues['DoubaoApiBaseUrl'];
            }
        }
        return {
@@ -881,6 +931,8 @@ class SettingsManager {
            proxyHost: this.proxyHostInput.value,
            proxyPort: this.proxyPortInput.value,
            mathpixApiKey: mathpixApiKey,
            ocrSource: this.ocrSource, // 添加OCR源配置
            doubaoThinkingMode: doubaoThinkingMode, // 添加豆包思考模式配置
            modelInfo: {
                supportsMultimodal: modelInfo.supportsMultimodal || false,
                isReasoning: modelInfo.isReasoning || false,
@@ -1121,6 +1173,20 @@ class SettingsManager {
            this.saveSettings();
        });
        // OCR源选择器事件监听
        if (this.ocrSourceSelect) {
            this.ocrSourceSelect.addEventListener('change', (e) => {
                // 阻止事件冒泡
                e.stopPropagation();
                // 更新OCR源配置
                this.ocrSource = e.target.value;
                this.saveSettings();
                console.log('OCR源已切换为:', this.ocrSource);
            });
        }
        // Panel visibility
        if (this.settingsToggle) {
        this.settingsToggle.addEventListener('click', () => {
@@ -1195,6 +1261,71 @@ class SettingsManager {
        // 初始化API密钥编辑功能
        this.initApiKeyEditFunctions();
        // 初始化推理选项事件
        this.initReasoningOptionEvents();
        // 初始化豆包思考选项事件
        this.initDoubaoThinkingOptionEvents();
    }
    // 初始化推理选项事件
    initReasoningOptionEvents() {
        const reasoningOptions = document.querySelectorAll('.reasoning-option');
        reasoningOptions.forEach(option => {
            option.addEventListener('click', (e) => {
                e.preventDefault();
                e.stopPropagation();
                const value = option.getAttribute('data-value');
                if (value && this.reasoningDepthSelect) {
                    // 更新select值
                    this.reasoningDepthSelect.value = value;
                    // 更新UI
                    this.updateReasoningOptionUI(value);
                    // 保存设置
                    this.saveSettings();
                }
            });
        });
    }
    // 初始化豆包思考选项事件
    initDoubaoThinkingOptionEvents() {
        const doubaoThinkingOptions = document.querySelectorAll('.doubao-thinking-option');
        doubaoThinkingOptions.forEach(option => {
            option.addEventListener('click', (e) => {
                e.preventDefault();
                e.stopPropagation();
                const value = option.getAttribute('data-value');
                if (value && this.doubaoThinkingModeSelect) {
                    // 更新select值
                    this.doubaoThinkingModeSelect.value = value;
                    // 更新UI
                    this.updateDoubaoThinkingOptionUI(value);
                    // 保存设置
                    this.saveSettings();
                }
            });
        });
    }
    // 更新豆包思考选项UI
    updateDoubaoThinkingOptionUI(value) {
        const doubaoThinkingOptions = document.querySelectorAll('.doubao-thinking-option');
        doubaoThinkingOptions.forEach(option => {
            const optionValue = option.getAttribute('data-value');
            if (optionValue === value) {
                option.classList.add('active');
            } else {
                option.classList.remove('active');
            }
        });
    }
    // 更新思考预算显示
@@ -2208,10 +2339,17 @@ class SettingsManager {
        this.thinkBudgetPercentValue = document.getElementById('thinkBudgetPercentValue');
        this.thinkBudgetGroup = document.querySelector('.think-budget-group');
        // 豆包深度思考相关元素
        this.doubaoThinkingModeSelect = document.getElementById('doubaoThinkingMode');
        this.doubaoThinkingGroup = document.querySelector('.doubao-thinking-group');
        // Initialize Mathpix inputs
        this.mathpixAppIdInput = document.getElementById('mathpixAppId');
        this.mathpixAppKeyInput = document.getElementById('mathpixAppKey');
        // OCR源选择器
        this.ocrSourceSelect = document.getElementById('ocrSourceSelect');
        // API Key elements - 所有的密钥输入框
        this.apiKeyInputs = {
            'AnthropicApiKey': document.getElementById('AnthropicApiKey'),
@@ -2260,6 +2398,9 @@ class SettingsManager {
            'DeepseekApiKey': '',
            'AlibabaApiKey': '',
            'GoogleApiKey': '',
            'DoubaoApiKey': '',
            'BaiduApiKey': '',
            'BaiduSecretKey': '',
            'MathpixAppId': '',
            'MathpixAppKey': ''
        };
@@ -2359,7 +2500,8 @@ class SettingsManager {
                    'OpenaiApiBaseUrl': proxyApiConfig.apis?.openai || '',
                    'DeepseekApiBaseUrl': proxyApiConfig.apis?.deepseek || '',
                    'AlibabaApiBaseUrl': proxyApiConfig.apis?.alibaba || '',
-                    'GoogleApiBaseUrl': proxyApiConfig.apis?.google || ''
+                    'GoogleApiBaseUrl': proxyApiConfig.apis?.google || '',
                    'DoubaoApiBaseUrl': proxyApiConfig.apis?.doubao || ''
                };
                this.updateApiBaseUrlStatus(apiBaseUrls);
                console.log('API基础URL状态已刷新');
@@ -2449,6 +2591,9 @@ class SettingsManager {
                case 'GoogleApiBaseUrl':
                    config.apis.google = value;
                    break;
                case 'DoubaoApiBaseUrl':
                    config.apis.doubao = value;
                    break;
            }
            // 确保启用中转API
--- a/static/style.css
+++ b/static/style.css
@@ -2174,6 +2174,82 @@ button:disabled {
    transition: all 0.2s ease-in-out;
 }
 /* OCR设置样式 */
 .ocr-settings {
    margin-bottom: 1.5rem;
 }
 .ocr-source-control {
    display: flex;
    flex-direction: column;
    gap: 12px;
 }
 .ocr-source-selector {
    position: relative;
 }
 .ocr-source-select {
    width: 100%;
    padding: 10px 14px;
    border: 1px solid var(--border-color);
    border-radius: 8px;
    background: var(--surface);
    color: var(--text-primary);
    font-size: 0.9rem;
    transition: all 0.2s ease;
 }
 .ocr-source-select:hover {
    border-color: var(--primary-color);
    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
 }
 .ocr-source-select:focus {
    outline: none;
    border-color: var(--primary-color);
    box-shadow: 0 0 0 3px rgba(var(--primary-rgb), 0.1);
 }
 .ocr-source-description {
    display: flex;
    flex-direction: column;
    gap: 8px;
    padding: 12px;
    background: rgba(0, 0, 0, 0.02);
    border-radius: 8px;
    border: 1px solid var(--border-color);
 }
 .ocr-desc-item {
    display: flex;
    align-items: flex-start;
    gap: 8px;
    font-size: 0.85rem;
    line-height: 1.4;
    color: var(--text-secondary);
 }
 .ocr-desc-item i {
    color: var(--primary-color);
    margin-top: 2px;
    flex-shrink: 0;
 }
 .ocr-desc-item strong {
    color: var(--text-primary);
 }
 /* 暗色主题下的OCR设置样式 */
 [data-theme="dark"] .ocr-source-description {
    background: rgba(255, 255, 255, 0.02);
 }
 [data-theme="dark"] .ocr-source-select {
    background: var(--surface);
    border-color: var(--border-color);
 }
 /* 新增的推理控制组件样式 */
 .reasoning-control {
    display: flex;
@@ -2260,6 +2336,122 @@ button:disabled {
    opacity: 1;
 }
 /* 豆包深度思考控制组件样式 */
 .doubao-thinking-control {
    display: flex;
    flex-direction: column;
    gap: 8px;
 }
 .doubao-thinking-label {
    display: flex;
    justify-content: space-between;
    align-items: center;
    margin-bottom: 6px;
 }
 .doubao-thinking-selector {
    display: flex;
    gap: 8px;
    margin-bottom: 8px;
 }
 .doubao-thinking-option {
    flex: 1;
    display: flex;
    flex-direction: column;
    align-items: center;
    padding: 12px 8px;
    border-radius: 8px;
    background: rgba(0, 0, 0, 0.05);
    cursor: pointer;
    transition: all 0.2s ease;
    border: 2px solid transparent;
    position: relative;
    overflow: hidden;
    min-height: 80px;
    justify-content: center;
 }
 .doubao-thinking-option::before {
    content: '';
    position: absolute;
    bottom: 0;
    left: 0;
    width: 100%;
    height: 3px;
    background: linear-gradient(to right, var(--primary-color), transparent);
    opacity: 0;
    transition: opacity 0.3s ease;
 }
 .doubao-thinking-option:hover {
    transform: translateY(-2px);
    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
 }
 .doubao-thinking-option.active {
    background: rgba(var(--primary-rgb), 0.1);
    border-color: var(--primary-color);
 }
 .doubao-thinking-option.active::before {
    opacity: 1;
 }
 .doubao-thinking-option i {
    font-size: 1.3rem;
    margin-bottom: 6px;
    color: var(--primary-color);
    opacity: 0.8;
    transition: all 0.2s ease;
 }
 .doubao-thinking-option .option-name {
    font-weight: 600;
    font-size: 0.85rem;
    margin-bottom: 4px;
    text-align: center;
 }
 .doubao-thinking-option .option-desc {
    font-size: 0.7rem;
    opacity: 0.7;
    text-align: center;
    line-height: 1.2;
 }
 .doubao-thinking-option:hover i {
    transform: scale(1.1);
    opacity: 1;
 }
 .doubao-thinking-desc {
    display: flex;
    flex-direction: column;
    gap: 6px;
    margin-top: 8px;
    padding: 8px;
    background: rgba(0, 0, 0, 0.03);
    border-radius: 6px;
 }
 .doubao-desc-item {
    display: flex;
    align-items: center;
    gap: 8px;
    font-size: 0.8rem;
    opacity: 0.8;
 }
 .doubao-desc-item i {
    font-size: 0.9rem;
    color: var(--primary-color);
    opacity: 0.7;
    width: 16px;
    text-align: center;
 }
 /* 思考预算控制组件样式 */
 .think-budget-control {
    display: flex;
@@ -2411,6 +2603,18 @@ button:disabled {
    background: rgba(var(--primary-rgb), 0.2);
 }
 [data-theme="dark"] .doubao-thinking-option {
    background: rgba(255, 255, 255, 0.05);
 }
 [data-theme="dark"] .doubao-thinking-option.active {
    background: rgba(var(--primary-rgb), 0.2);
 }
 [data-theme="dark"] .doubao-thinking-desc {
    background: rgba(255, 255, 255, 0.03);
 }
 [data-theme="dark"] .think-value-badge {
    background: rgba(255, 255, 255, 0.1);
 }
--- a/templates/index.html
+++ b/templates/index.html
@@ -219,6 +219,49 @@
                            </select>
                        </div>
                    </div>
                    <div class="setting-group doubao-thinking-group" style="display: none;">
                        <div class="doubao-thinking-control">
                            <div class="doubao-thinking-label">
                                <label for="doubaoThinkingMode"><i class="fas fa-cogs"></i> 豆包深度思考模式</label>
                            </div>
                            <div class="doubao-thinking-selector">
                                <div class="doubao-thinking-option active" data-value="auto">
                                    <i class="fas fa-magic"></i>
                                    <span class="option-name">自动模式</span>
                                    <span class="option-desc">由AI自动决定是否使用深度思考</span>
                                </div>
                                <div class="doubao-thinking-option" data-value="enabled">
                                    <i class="fas fa-brain"></i>
                                    <span class="option-name">开启思考</span>
                                    <span class="option-desc">强制启用深度思考过程</span>
                                </div>
                                <div class="doubao-thinking-option" data-value="disabled">
                                    <i class="fas fa-bolt"></i>
                                    <span class="option-name">关闭思考</span>
                                    <span class="option-desc">禁用深度思考，快速响应</span>
                                </div>
                            </div>
                            <select id="doubaoThinkingMode" class="hidden">
                                <option value="auto">自动模式</option>
                                <option value="enabled">开启思考</option>
                                <option value="disabled">关闭思考</option>
                            </select>
                            <div class="doubao-thinking-desc">
                                <div class="doubao-desc-item">
                                    <i class="fas fa-info-circle"></i>
                                    <span><strong>自动模式：</strong>AI根据问题复杂度自动决定</span>
                                </div>
                                <div class="doubao-desc-item">
                                    <i class="fas fa-lightbulb"></i>
                                    <span><strong>开启思考：</strong>显示完整的思考推理过程</span>
                                </div>
                                <div class="doubao-desc-item">
                                    <i class="fas fa-rocket"></i>
                                    <span><strong>关闭思考：</strong>直接给出答案，响应更快</span>
                                </div>
                            </div>
                        </div>
                    </div>
                    <div class="setting-group think-budget-group">
                        <div class="think-budget-control">
                            <div class="think-budget-label">
@@ -252,6 +295,7 @@
                            </div>
                        </div>
                    </div>
                    <!-- 已删除重复的豆包思考模式UI元素 -->
                    <div class="setting-group">
                        <div class="temperature-control">
                            <div class="temperature-label">
@@ -309,6 +353,37 @@
                    </div>
                </div>
                <!-- OCR设置部分 -->
                <div class="settings-section ocr-settings">
                    <h3><i class="fas fa-font"></i> OCR设置</h3>
                    <div class="setting-group">
                        <div class="ocr-source-control">
                            <label for="ocrSourceSelect"><i class="fas fa-eye"></i> OCR工具源</label>
                            <div class="ocr-source-selector">
                                <select id="ocrSourceSelect" class="ocr-source-select">
                                    <option value="auto">自动选择</option>
                                    <option value="baidu">百度OCR</option>
                                    <option value="mathpix">Mathpix</option>
                                </select>
                            </div>
                            <div class="ocr-source-description">
                                <div class="ocr-desc-item">
                                    <i class="fas fa-magic"></i>
                                    <span><strong>自动选择：</strong>优先使用百度OCR，如无配置则使用Mathpix</span>
                                </div>
                                <div class="ocr-desc-item">
                                    <i class="fas fa-language"></i>
                                    <span><strong>百度OCR：</strong>支持中文，免费额度大，推荐使用</span>
                                </div>
                                <div class="ocr-desc-item">
                                    <i class="fas fa-square-root-alt"></i>
                                    <span><strong>Mathpix：</strong>专业数学公式识别，支持LaTeX格式</span>
                                </div>
                            </div>
                        </div>
                    </div>
                </div>
                <!-- 2. 所有API密钥集中在一个区域 -->
                <div class="settings-section api-key-settings">
                    <h3><i class="fas fa-key"></i> API密钥设置</h3>
@@ -425,6 +500,75 @@
                                </div>
                            </div>
                        </div>
                        <div class="api-key-status">
                            <span class="key-name">Doubao API:</span>
                            <div class="key-status-wrapper">
                                <!-- 显示状态 -->
                                <div class="key-display">
                                    <span id="DoubaoApiKeyStatus" class="key-status" data-key="DoubaoApiKey">未设置</span>
                                    <button class="btn-icon edit-api-key" data-key-type="DoubaoApiKey" title="编辑此密钥">
                                        <i class="fas fa-edit"></i>
                                    </button>
                                </div>
                                <!-- 编辑状态 -->
                                <div class="key-edit hidden">
                                    <input type="password" class="key-input" data-key-type="DoubaoApiKey" placeholder="输入Doubao API key">
                                    <button class="btn-icon toggle-visibility">
                                        <i class="fas fa-eye"></i>
                                    </button>
                                    <button class="btn-icon save-api-key" data-key-type="DoubaoApiKey" title="保存密钥">
                                        <i class="fas fa-save"></i>
                                    </button>
                                </div>
                            </div>
                        </div>
                        <!-- 百度OCR API Key配置 -->
                        <div class="api-key-status">
                            <span class="key-name">百度OCR API Key:</span>
                            <div class="key-status-wrapper">
                                <!-- 显示状态 -->
                                <div class="key-display">
                                    <span id="BaiduApiKeyStatus" class="key-status" data-key="BaiduApiKey">未设置</span>
                                    <button class="btn-icon edit-api-key" data-key-type="BaiduApiKey" title="编辑此密钥">
                                        <i class="fas fa-edit"></i>
                                    </button>
                                </div>
                                <!-- 编辑状态 -->
                                <div class="key-edit hidden">
                                    <input type="password" class="key-input" data-key-type="BaiduApiKey" placeholder="输入百度OCR API Key">
                                    <button class="btn-icon toggle-visibility">
                                        <i class="fas fa-eye"></i>
                                    </button>
                                    <button class="btn-icon save-api-key" data-key-type="BaiduApiKey" title="保存密钥">
                                        <i class="fas fa-save"></i>
                                    </button>
                                </div>
                            </div>
                        </div>
                        <div class="api-key-status">
                            <span class="key-name">百度OCR Secret Key:</span>
                            <div class="key-status-wrapper">
                                <!-- 显示状态 -->
                                <div class="key-display">
                                    <span id="BaiduSecretKeyStatus" class="key-status" data-key="BaiduSecretKey">未设置</span>
                                    <button class="btn-icon edit-api-key" data-key-type="BaiduSecretKey" title="编辑此密钥">
                                        <i class="fas fa-edit"></i>
                                    </button>
                                </div>
                                <!-- 编辑状态 -->
                                <div class="key-edit hidden">
                                    <input type="password" class="key-input" data-key-type="BaiduSecretKey" placeholder="输入百度OCR Secret Key">
                                    <button class="btn-icon toggle-visibility">
                                        <i class="fas fa-eye"></i>
                                    </button>
                                    <button class="btn-icon save-api-key" data-key-type="BaiduSecretKey" title="保存密钥">
                                        <i class="fas fa-save"></i>
                                    </button>
                                </div>
                            </div>
                        </div>
                        <div class="api-key-status">
                            <span class="key-name">Mathpix App ID:</span>
                            <div class="key-status-wrapper">
@@ -577,6 +721,25 @@
                                        </div>
                                    </div>
                                </div>
                                <div class="api-key-status">
                                    <span class="key-name">Doubao API URL:</span>
                                    <div class="key-status-wrapper">
                                        <!-- 显示状态 -->
                                        <div class="key-display">
                                            <span id="DoubaoApiBaseUrlStatus" class="key-status" data-key="DoubaoApiBaseUrl">未设置</span>
                                            <button class="btn-icon edit-api-base-url" data-key-type="DoubaoApiBaseUrl" title="编辑此URL">
                                                <i class="fas fa-edit"></i>
                                            </button>
                                        </div>
                                        <!-- 编辑状态 -->
                                        <div class="key-edit hidden">
                                            <input type="text" class="key-input" data-key-type="DoubaoApiBaseUrl" placeholder="https://ark.cn-beijing.volces.com/api/v3">
                                            <button class="btn-icon save-api-base-url" data-key-type="DoubaoApiBaseUrl" title="保存URL">
                                                <i class="fas fa-save"></i>
                                            </button>
                                        </div>
                                    </div>
                                </div>
                            </div>
                        </div>
                    </div>