Merge pull request #10 from SkeStars/main

修复gemini接口，添加豆包接口；添加百度OCR支持，更新OCR源选择和设置界面
2026-01-19 01:21:13 +08:00 · 2025-08-03 15:40:56 +08:00
parent 160d716fbe 1c4305d59c
commit acdfca54b1
16 changed files with 1218 additions and 55 deletions
--- a/README.md
+++ b/README.md
@@ -79,7 +79,7 @@ graph TD
        <li><b>Claude-3.7</b>：Anthropic的高级理解与解释</li>
        <li><b>DeepSeek-v3/r1</b>：专为中文场景优化的模型</li>
        <li><b>QVQ-MAX/Qwen-VL-MAX</b>：以视觉推理闻名的国产AI</li>
-        <li><b>Gemini-2.5-Pro/2.0-flash</b>：智商130的非推理AI</li>
+        <li><b>Gemini-2.5-Pro/2.5-flash</b>：智商130的非推理AI</li>
      </ul>
    </td>
  </tr>
@@ -189,7 +189,7 @@ python app.py
 | **QVQ-MAX** | 多模态支持，推理支持 | 复杂问题，视觉分析 |
 | **Qwen-VL-MAX** | 多模态支持 | 简单问题，视觉分析 |
 | **Gemini-2.5-Pro** | 多模态支持 | 复杂问题，视觉分析 |
-| **Gemini-2.0-Flash** | 多模态支持 | 简单问题，视觉分析 |
+| **Gemini-2.5-Flash** | 多模态支持 | 简单问题，视觉分析 |


 ### 🛠️ 可调参数
@@ -247,4 +247,4 @@ python app.py

 ## 📜 开源协议

-本项目采用 [Apache 2.0](LICENSE) 协议。
+本项目采用 [Apache 2.5](LICENSE) 协议。
--- a/app.py
+++ b/app.py
@@ -101,6 +101,8 @@ def create_model_instance(model_id, settings, is_reasoning=False):
        api_key_id = "AlibabaApiKey"
    elif "gemini" in model_id.lower() or "google" in model_id.lower():
        api_key_id = "GoogleApiKey"
+    elif "doubao" in model_id.lower():
+        api_key_id = "DoubaoApiKey"
    
    # 首先尝试从本地配置获取API密钥
    api_key = get_api_key(api_key_id)
@@ -156,6 +158,10 @@ def create_model_instance(model_id, settings, is_reasoning=False):
            custom_base_url = api_base_urls.get('google')
            if custom_base_url:
                base_url = custom_base_url
+        elif "doubao" in model_id.lower():
+            custom_base_url = api_base_urls.get('doubao')
+            if custom_base_url:
+                base_url = custom_base_url
    
    # 创建模型实例
    model_instance = ModelFactory.create_model(
@@ -318,39 +324,66 @@ def handle_text_extraction(data):
        if not isinstance(settings, dict):
            raise ValueError("Invalid settings format")
        
-        # 尝试从本地配置获取Mathpix API密钥
-        mathpix_app_id = get_api_key('MathpixAppId')
-        mathpix_app_key = get_api_key('MathpixAppKey')
+        # 优先使用百度OCR，如果没有配置则使用Mathpix
+        # 首先尝试获取百度OCR API密钥
+        baidu_api_key = get_api_key('BaiduApiKey')
+        baidu_secret_key = get_api_key('BaiduSecretKey')
        
-        # 构建完整的Mathpix API密钥（格式：app_id:app_key）
-        mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
+        # 构建百度OCR API密钥（格式：api_key:secret_key）
+        ocr_key = None
+        ocr_model = None
        
-        # 如果本地没有配置，尝试使用前端传递的密钥（向后兼容）
-        if not mathpix_key:
-            mathpix_key = settings.get('mathpixApiKey')
+        if baidu_api_key and baidu_secret_key:
+            ocr_key = f"{baidu_api_key}:{baidu_secret_key}"
+            ocr_model = 'baidu-ocr'
+            print("Using Baidu OCR for text extraction...")
+        else:
+            # 回退到Mathpix
+            mathpix_app_id = get_api_key('MathpixAppId')
+            mathpix_app_key = get_api_key('MathpixAppKey')
+            
+            # 构建完整的Mathpix API密钥（格式：app_id:app_key）
+            mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
+            
+            # 如果本地没有配置，尝试使用前端传递的密钥（向后兼容）
+            if not mathpix_key:
+                mathpix_key = settings.get('mathpixApiKey')
+            
+            if mathpix_key:
+                ocr_key = mathpix_key
+                ocr_model = 'mathpix'
+                print("Using Mathpix OCR for text extraction...")
        
-        if not mathpix_key:
-            raise ValueError("Mathpix API key is required")
+        if not ocr_key:
+            raise ValueError("OCR API key is required. Please configure Baidu OCR (API Key + Secret Key) or Mathpix (App ID + App Key)")
        
        # 先回复客户端，确认已收到请求，防止超时断开
        # 注意：这里不能使用return，否则后续代码不会执行
        socketio.emit('request_acknowledged', {
            'status': 'received', 
-            'message': 'Image received, text extraction in progress'
+            'message': f'Image received, text extraction in progress using {ocr_model}'
        }, room=request.sid)
        
        try:
-            app_id, app_key = mathpix_key.split(':')
-            if not app_id.strip() or not app_key.strip():
-                raise ValueError()
+            if ocr_model == 'baidu-ocr':
+                api_key, secret_key = ocr_key.split(':')
+                if not api_key.strip() or not secret_key.strip():
+                    raise ValueError()
+            elif ocr_model == 'mathpix':
+                app_id, app_key = ocr_key.split(':')
+                if not app_id.strip() or not app_key.strip():
+                    raise ValueError()
        except ValueError:
-            raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
+            if ocr_model == 'baidu-ocr':
+                raise ValueError("Invalid Baidu OCR API key format. Expected format: 'API_KEY:SECRET_KEY'")
+            else:
+                raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")

-        print("Creating Mathpix model instance...")
-        # 只传递必需的参数，ModelFactory.create_model会处理不同模型类型
+        print(f"Creating {ocr_model} model instance...")
+        # ModelFactory.create_model会处理不同模型类型
        model = ModelFactory.create_model(
-            model_name='mathpix',
-            api_key=mathpix_key
+            model_name=ocr_model,
+            api_key=ocr_key
        )

        print("Starting text extraction...")
--- a/config/api_base_urls.json
+++ b/config/api_base_urls.json
@@ -3,5 +3,6 @@
  "OpenaiApiBaseUrl": "",
  "DeepseekApiBaseUrl": "",
  "AlibabaApiBaseUrl": "",
-  "GoogleApiBaseUrl": ""
+  "GoogleApiBaseUrl": "",
+  "DoubaoApiBaseUrl": ""
 }
--- a/config/models.json
+++ b/config/models.json
@@ -24,6 +24,11 @@
            "name": "Google",
            "api_key_id": "GoogleApiKey",
            "class_name": "GoogleModel"
+        },
+        "doubao": {
+            "name": "Doubao",
+            "api_key_id": "DoubaoApiKey",
+            "class_name": "DoubaoModel"
        }
    },
    "models": {
@@ -91,21 +96,29 @@
            "version": "latest",
            "description": "阿里通义千问VL-MAX模型，视觉理解能力最强，支持图像理解和复杂任务"
        },
-        "gemini-2.5-pro-preview-03-25": {
+        "gemini-2.5-pro": {
            "name": "Gemini 2.5 Pro",
            "provider": "google",
            "supportsMultimodal": true,
            "isReasoning": true,
-            "version": "preview-03-25",
-            "description": "Google最强大的Gemini 2.5 Pro模型，支持图像理解"
+            "version": "latest",
+            "description": "Google最强大的Gemini 2.5 Pro模型，支持图像理解（需要付费API密钥）"
        },
-        "gemini-2.0-flash": {
-            "name": "Gemini 2.0 Flash",
+        "gemini-2.5-flash": {
+            "name": "Gemini 2.5 Flash",
            "provider": "google",
            "supportsMultimodal": true,
            "isReasoning": false,
            "version": "latest",
-            "description": "Google更快速的Gemini 2.0 Flash模型，支持图像理解，响应更迅速"
+            "description": "Google更快速的Gemini 2.5 Flash模型，支持图像理解，有免费配额"
+        },
+        "doubao-seed-1-6-250615": {
+            "name": "Doubao-Seed-1.6",
+            "provider": "doubao",
+            "supportsMultimodal": true,
+            "isReasoning": true,
+            "version": "latest",
+            "description": "支持auto/thinking/non-thinking三种思考模式、支持多模态、256K长上下文"
        }
    }
 } 
--- a/config/proxy_api.json
+++ b/config/proxy_api.json
@@ -4,7 +4,8 @@
    "anthropic": "",
    "deepseek": "",
    "google": "",
-    "openai": ""
+    "openai": "",
+    "doubao": ""
  },
  "enabled": true
 }
--- a/models/init.py
+++ b/models/init.py
@@ -4,6 +4,7 @@ from .openai import OpenAIModel
 from .deepseek import DeepSeekModel
 from .alibaba import AlibabaModel
 from .google import GoogleModel
+from .doubao import DoubaoModel
 from .factory import ModelFactory

 __all__ = [
@@ -13,5 +14,6 @@ __all__ = [
    'DeepSeekModel',
    'AlibabaModel',
    'GoogleModel',
+    'DoubaoModel',
    'ModelFactory'
 ]
--- a/models/alibaba.py
+++ b/models/alibaba.py
@@ -4,12 +4,13 @@ from openai import OpenAI
 from .base import BaseModel

 class AlibabaModel(BaseModel):
-    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None):
+    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
        # 如果没有提供模型名称，才使用默认值
        self.model_name = model_name if model_name else "QVQ-Max-2025-03-25"
        print(f"初始化阿里巴巴模型: {self.model_name}")
        # 在super().__init__之前设置model_name，这样get_default_system_prompt能使用它
        super().__init__(api_key, temperature, system_prompt, language)
+        self.api_base_url = api_base_url  # 存储API基础URL
    
    def get_default_system_prompt(self) -> str:
        """根据模型名称返回不同的默认系统提示词"""
--- a/models/baidu_ocr.py
+++ b/models/baidu_ocr.py
@@ -0,0 +1,177 @@
+import base64
+import json
+import time
+import urllib.request
+import urllib.parse
+from typing import Generator, Dict, Any
+from .base import BaseModel
+
+class BaiduOCRModel(BaseModel):
+    """
+    百度OCR模型，用于图像文字识别
+    """
+    
+    def __init__(self, api_key: str, secret_key: str = None, temperature: float = 0.7, system_prompt: str = None):
+        """
+        初始化百度OCR模型
+        
+        Args:
+            api_key: 百度API Key
+            secret_key: 百度Secret Key（可以在api_key中用冒号分隔传入）
+            temperature: 不用于OCR但保持BaseModel兼容性
+            system_prompt: 不用于OCR但保持BaseModel兼容性
+            
+        Raises:
+            ValueError: 如果API密钥格式无效
+        """
+        super().__init__(api_key, temperature, system_prompt)
+        
+        # 支持两种格式：单独传递或在api_key中用冒号分隔
+        if secret_key:
+            self.api_key = api_key
+            self.secret_key = secret_key
+        else:
+            try:
+                self.api_key, self.secret_key = api_key.split(':')
+            except ValueError:
+                raise ValueError("百度OCR API密钥必须是 'API_KEY:SECRET_KEY' 格式或单独传递secret_key参数")
+        
+        # 百度API URLs
+        self.token_url = "https://aip.baidubce.com/oauth/2.0/token"
+        self.ocr_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
+        
+        # 缓存access_token
+        self._access_token = None
+        self._token_expires = 0
+    
+    def get_access_token(self) -> str:
+        """获取百度API的access_token"""
+        # 检查是否需要刷新token（提前5分钟刷新）
+        if self._access_token and time.time() < self._token_expires - 300:
+            return self._access_token
+        
+        # 请求新的access_token
+        params = {
+            'grant_type': 'client_credentials',
+            'client_id': self.api_key,
+            'client_secret': self.secret_key
+        }
+        
+        data = urllib.parse.urlencode(params).encode('utf-8')
+        request = urllib.request.Request(self.token_url, data=data)
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        
+        try:
+            with urllib.request.urlopen(request) as response:
+                result = json.loads(response.read().decode('utf-8'))
+                
+            if 'access_token' in result:
+                self._access_token = result['access_token']
+                # 设置过期时间（默认30天，但我们提前刷新）
+                self._token_expires = time.time() + result.get('expires_in', 2592000)
+                return self._access_token
+            else:
+                raise Exception(f"获取access_token失败: {result.get('error_description', '未知错误')}")
+                
+        except Exception as e:
+            raise Exception(f"请求access_token失败: {str(e)}")
+    
+    def ocr_image(self, image_data: str) -> str:
+        """
+        对图像进行OCR识别
+        
+        Args:
+            image_data: Base64编码的图像数据
+            
+        Returns:
+            str: 识别出的文字内容
+        """
+        access_token = self.get_access_token()
+        
+        # 准备请求数据
+        params = {
+            'image': image_data,
+            'language_type': 'auto_detect',  # 自动检测语言
+            'detect_direction': 'true',      # 检测图像朝向
+            'probability': 'false'           # 不返回置信度（减少响应大小）
+        }
+        
+        data = urllib.parse.urlencode(params).encode('utf-8')
+        url = f"{self.ocr_url}?access_token={access_token}"
+        
+        request = urllib.request.Request(url, data=data)
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        
+        try:
+            with urllib.request.urlopen(request) as response:
+                result = json.loads(response.read().decode('utf-8'))
+                
+            if 'error_code' in result:
+                raise Exception(f"百度OCR API错误: {result.get('error_msg', '未知错误')}")
+            
+            # 提取识别的文字
+            words_result = result.get('words_result', [])
+            text_lines = [item['words'] for item in words_result]
+            
+            return '\n'.join(text_lines)
+            
+        except Exception as e:
+            raise Exception(f"OCR识别失败: {str(e)}")
+    
+    def extract_full_text(self, image_data: str) -> str:
+        """
+        提取图像中的完整文本（与Mathpix兼容的接口）
+        
+        Args:
+            image_data: Base64编码的图像数据
+            
+        Returns:
+            str: 提取的文本内容
+        """
+        return self.ocr_image(image_data)
+    
+    def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
+        """
+        分析图像并返回OCR结果（流式输出以保持接口一致性）
+        
+        Args:
+            image_data: Base64编码的图像数据
+            proxies: 代理配置（未使用）
+            
+        Yields:
+            dict: 包含OCR结果的响应
+        """
+        try:
+            text = self.ocr_image(image_data)
+            yield {
+                'status': 'completed',
+                'content': text,
+                'model': 'baidu-ocr'
+            }
+        except Exception as e:
+            yield {
+                'status': 'error',
+                'content': f'OCR识别失败: {str(e)}',
+                'model': 'baidu-ocr'
+            }
+    
+    def analyze_text(self, text: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
+        """
+        分析文本（OCR模型不支持文本分析）
+        
+        Args:
+            text: 输入文本
+            proxies: 代理配置（未使用）
+            
+        Yields:
+            dict: 错误响应
+        """
+        yield {
+            'status': 'error',
+            'content': 'OCR模型不支持文本分析功能',
+            'model': 'baidu-ocr'
+        }
+    
+    def get_model_identifier(self) -> str:
+        """返回模型标识符"""
+        return "baidu-ocr"
--- a/models/deepseek.py
+++ b/models/deepseek.py
@@ -6,9 +6,10 @@ from openai import OpenAI
 from .base import BaseModel

 class DeepSeekModel(BaseModel):
-    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner"):
+    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner", api_base_url: str = None):
        super().__init__(api_key, temperature, system_prompt, language)
        self.model_name = model_name
+        self.api_base_url = api_base_url  # 存储API基础URL

    def get_default_system_prompt(self) -> str:
        return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
--- a/models/doubao.py
+++ b/models/doubao.py
@@ -0,0 +1,339 @@
+import json
+import os
+import base64
+from typing import Generator, Dict, Any, Optional
+import requests
+from .base import BaseModel
+
+class DoubaoModel(BaseModel):
+    """
+    豆包API模型实现类
+    支持字节跳动的豆包AI模型，可处理文本和图像输入
+    """
+    
+    def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
+        """
+        初始化豆包模型
+        
+        Args:
+            api_key: 豆包API密钥
+            temperature: 生成温度
+            system_prompt: 系统提示词
+            language: 首选语言
+            model_name: 指定具体模型名称，如不指定则使用默认值
+            api_base_url: API基础URL，用于设置自定义API端点
+        """
+        super().__init__(api_key, temperature, system_prompt, language)
+        self.model_name = model_name or self.get_model_identifier()
+        self.base_url = api_base_url or "https://ark.cn-beijing.volces.com/api/v3"
+        self.max_tokens = 4096  # 默认最大输出token数
+        self.reasoning_config = None  # 推理配置，类似于AnthropicModel
+    
+    def get_default_system_prompt(self) -> str:
+        return """你是一个专业的问题分析专家。当看到问题图片时：
+1. 仔细阅读并理解问题
+2. 分解问题的关键组成部分
+3. 提供清晰的分步解决方案
+4. 如果相关，解释涉及的概念或理论
+5. 如果有多种方法，优先解释最有效的方法"""
+
+    def get_model_identifier(self) -> str:
+        """返回默认的模型标识符"""
+        return "doubao-seed-1-6-250615"  # Doubao-Seed-1.6
+    
+    def get_actual_model_name(self) -> str:
+        """根据配置的模型名称返回实际的API调用标识符"""
+        # 豆包API的实际模型名称映射
+        model_mapping = {
+            "doubao-seed-1-6-250615": "doubao-seed-1-6-250615"
+        }
+        
+        return model_mapping.get(self.model_name, "doubao-seed-1-6-250615")
+    
+    def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
+        """流式生成文本响应"""
+        try:
+            yield {"status": "started"}
+            
+            # 设置环境变量代理（如果提供）
+            original_proxies = None
+            if proxies:
+                original_proxies = {
+                    'http_proxy': os.environ.get('http_proxy'),
+                    'https_proxy': os.environ.get('https_proxy')
+                }
+                if 'http' in proxies:
+                    os.environ['http_proxy'] = proxies['http']
+                if 'https' in proxies:
+                    os.environ['https_proxy'] = proxies['https']
+            
+            try:
+                # 构建请求头
+                headers = {
+                    "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json"
+                }
+                
+                # 构建消息 - 根据官方API文档，暂时不使用系统提示词
+                messages = []
+                
+                # 添加用户查询
+                user_content = text
+                if self.language and self.language != 'auto':
+                    user_content = f"请使用{self.language}回答以下问题: {text}"
+                
+                messages.append({
+                    "role": "user",
+                    "content": user_content
+                })
+
+                # 处理推理配置
+                thinking = {
+                    "type": "auto"  # 默认值
+                }
+                
+                if hasattr(self, 'reasoning_config') and self.reasoning_config:
+                    # 从reasoning_config中获取thinking_mode
+                    thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
+                    thinking = {
+                        "type": thinking_mode
+                    }
+
+                # 构建请求数据
+                data = {
+                    "model": self.get_actual_model_name(),
+                    "messages": messages,
+                    "thinking": thinking,
+                    "temperature": self.temperature,
+                    "max_tokens": self.max_tokens,
+                    "stream": True
+                }
+                
+                # 发送流式请求
+                response = requests.post(
+                    f"{self.base_url}/chat/completions",
+                    headers=headers,
+                    json=data,
+                    stream=True,
+                    proxies=proxies if proxies else None,
+                    timeout=60
+                )
+                
+                if response.status_code != 200:
+                    error_text = response.text
+                    raise Exception(f"HTTP {response.status_code}: {error_text}")
+                
+                response.raise_for_status()
+                
+                # 初始化响应缓冲区
+                response_buffer = ""
+                
+                # 处理流式响应
+                for line in response.iter_lines():
+                    if not line:
+                        continue
+                    
+                    line = line.decode('utf-8')
+                    if not line.startswith('data: '):
+                        continue
+                    
+                    line = line[6:]  # 移除 'data: ' 前缀
+                    
+                    if line == '[DONE]':
+                        break
+                    
+                    try:
+                        chunk_data = json.loads(line)
+                        choices = chunk_data.get('choices', [])
+                        
+                        if choices and len(choices) > 0:
+                            delta = choices[0].get('delta', {})
+                            content = delta.get('content', '')
+                            
+                            if content:
+                                response_buffer += content
+                                
+                                # 发送响应进度
+                                yield {
+                                    "status": "streaming",
+                                    "content": response_buffer
+                                }
+                    
+                    except json.JSONDecodeError:
+                        continue
+                
+                # 确保发送完整的最终内容
+                yield {
+                    "status": "completed",
+                    "content": response_buffer
+                }
+            
+            finally:
+                # 恢复原始代理设置
+                if original_proxies:
+                    for key, value in original_proxies.items():
+                        if value is None:
+                            if key in os.environ:
+                                del os.environ[key]
+                        else:
+                            os.environ[key] = value
+                
+        except Exception as e:
+            yield {
+                "status": "error",
+                "error": f"豆包API错误: {str(e)}"
+            }
+    
+    def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
+        """分析图像并流式生成响应"""
+        try:
+            yield {"status": "started"}
+            
+            # 设置环境变量代理（如果提供）
+            original_proxies = None
+            if proxies:
+                original_proxies = {
+                    'http_proxy': os.environ.get('http_proxy'),
+                    'https_proxy': os.environ.get('https_proxy')
+                }
+                if 'http' in proxies:
+                    os.environ['http_proxy'] = proxies['http']
+                if 'https' in proxies:
+                    os.environ['https_proxy'] = proxies['https']
+            
+            try:
+                # 构建请求头
+                headers = {
+                    "Authorization": f"Bearer {self.api_key}",
+                    "Content-Type": "application/json"
+                }
+                
+                # 处理图像数据
+                if image_data.startswith('data:image'):
+                    # 如果是data URI，提取base64部分
+                    image_data = image_data.split(',', 1)[1]
+                
+                # 构建用户消息 - 使用豆包API官方示例格式
+                # 首先检查图像数据的格式，确保是有效的图像
+                image_format = "jpeg"  # 默认使用jpeg
+                if image_data.startswith('/9j/'):  # JPEG magic number in base64
+                    image_format = "jpeg"
+                elif image_data.startswith('iVBORw0KGgo'):  # PNG magic number in base64
+                    image_format = "png"
+                
+                user_content = [
+                    {
+                        "type": "text",
+                        "text": f"请使用{self.language}分析这张图片并提供详细解答。" if self.language and self.language != 'auto' else "请分析这张图片并提供详细解答?"
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/{image_format};base64,{image_data}"
+                        }
+                    }
+                ]
+                
+                messages = [
+                    {
+                        "role": "user",
+                        "content": user_content
+                    }
+                ]
+
+                # 处理推理配置
+                thinking = {
+                    "type": "auto"  # 默认值
+                }
+                
+                if hasattr(self, 'reasoning_config') and self.reasoning_config:
+                    # 从reasoning_config中获取thinking_mode
+                    thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
+                    thinking = {
+                        "type": thinking_mode
+                    }
+                
+                # 构建请求数据
+                data = {
+                    "model": self.get_actual_model_name(),
+                    "messages": messages,
+                    "thinking": thinking,
+                    "temperature": self.temperature,
+                    "max_tokens": self.max_tokens,
+                    "stream": True
+                }
+                
+                # 发送流式请求
+                response = requests.post(
+                    f"{self.base_url}/chat/completions",
+                    headers=headers,
+                    json=data,
+                    stream=True,
+                    proxies=proxies if proxies else None,
+                    timeout=60
+                )
+                
+                if response.status_code != 200:
+                    error_text = response.text
+                    raise Exception(f"HTTP {response.status_code}: {error_text}")
+                
+                response.raise_for_status()
+                
+                # 初始化响应缓冲区
+                response_buffer = ""
+                
+                # 处理流式响应
+                for line in response.iter_lines():
+                    if not line:
+                        continue
+                    
+                    line = line.decode('utf-8')
+                    if not line.startswith('data: '):
+                        continue
+                    
+                    line = line[6:]  # 移除 'data: ' 前缀
+                    
+                    if line == '[DONE]':
+                        break
+                    
+                    try:
+                        chunk_data = json.loads(line)
+                        choices = chunk_data.get('choices', [])
+                        
+                        if choices and len(choices) > 0:
+                            delta = choices[0].get('delta', {})
+                            content = delta.get('content', '')
+                            
+                            if content:
+                                response_buffer += content
+                                
+                                # 发送响应进度
+                                yield {
+                                    "status": "streaming",
+                                    "content": response_buffer
+                                }
+                    
+                    except json.JSONDecodeError:
+                        continue
+                
+                # 确保发送完整的最终内容
+                yield {
+                    "status": "completed",
+                    "content": response_buffer
+                }
+            
+            finally:
+                # 恢复原始代理设置
+                if original_proxies:
+                    for key, value in original_proxies.items():
+                        if value is None:
+                            if key in os.environ:
+                                del os.environ[key]
+                        else:
+                            os.environ[key] = value
+                
+        except Exception as e:
+            yield {
+                "status": "error",
+                "error": f"豆包图像分析错误: {str(e)}"
+            }
--- a/models/factory.py
+++ b/models/factory.py
@@ -3,7 +3,8 @@ import json
 import os
 import importlib
 from .base import BaseModel
-from .mathpix import MathpixModel  # MathpixModel仍然需要直接导入，因为它是特殊工具
+from .mathpix import MathpixModel  # MathpixModel需要直接导入，因为它是特殊OCR工具
+from .baidu_ocr import BaiduOCRModel  # 百度OCR也是特殊OCR工具，直接导入

 class ModelFactory:
    # 模型基本信息，包含类型和特性
@@ -39,13 +40,25 @@ class ModelFactory:
                        'description': model_info.get('description', '')
                    }
            
-            # 添加Mathpix模型（特殊工具模型）
+            # 添加特殊OCR工具模型（不在配置文件中定义）
+            
+            # 添加Mathpix OCR工具
            cls._models['mathpix'] = {
                'class': MathpixModel,
                'is_multimodal': True,
                'is_reasoning': False,
                'display_name': 'Mathpix OCR',
-                'description': '文本提取工具，适用于数学公式和文本',
+                'description': '数学公式识别工具，适用于复杂数学内容',
+                'is_ocr_only': True
+            }
+            
+            # 添加百度OCR工具
+            cls._models['baidu-ocr'] = {
+                'class': BaiduOCRModel,
+                'is_multimodal': True,
+                'is_reasoning': False,
+                'display_name': '百度OCR',
+                'description': '通用文字识别工具，支持中文识别',
                'is_ocr_only': True
            }
            
@@ -62,22 +75,36 @@ class ModelFactory:
        # 不再硬编码模型定义，而是使用空字典
        cls._models = {}
        
-        # 只保留Mathpix作为基础工具
+        # 添加特殊OCR工具（当配置加载失败时的备用）
        try:
-            # 导入MathpixModel类
+            # 导入并添加Mathpix OCR工具
            from .mathpix import MathpixModel
            
-            # 添加Mathpix作为基础工具
            cls._models['mathpix'] = {
                'class': MathpixModel,
                'is_multimodal': True,
                'is_reasoning': False,
                'display_name': 'Mathpix OCR',
-                'description': '文本提取工具，适用于数学公式和文本',
+                'description': '数学公式识别工具，适用于复杂数学内容',
                'is_ocr_only': True
            }
        except Exception as e:
-            print(f"无法加载基础Mathpix工具: {str(e)}")
+            print(f"无法加载Mathpix OCR工具: {str(e)}")
+            
+        # 添加百度OCR工具
+        try:
+            from .baidu_ocr import BaiduOCRModel
+            
+            cls._models['baidu-ocr'] = {
+                'class': BaiduOCRModel,
+                'is_multimodal': True,
+                'is_reasoning': False,
+                'display_name': '百度OCR',
+                'description': '通用文字识别工具，支持中文识别',
+                'is_ocr_only': True
+            }
+        except Exception as e:
+            print(f"无法加载百度OCR工具: {str(e)}")

    @classmethod
    def create_model(cls, model_name: str, api_key: str, temperature: float = 0.7, 
@@ -114,6 +141,25 @@ class ModelFactory:
            )
        # 对于阿里巴巴模型，也需要传递正确的模型名称
        elif 'qwen' in model_name.lower() or 'qvq' in model_name.lower() or 'alibaba' in model_name.lower():
+            return model_class(
+                api_key=api_key,
+                temperature=temperature,
+                system_prompt=system_prompt,
+                language=language,
+                model_name=model_name
+            )
+        # 对于Google模型，也需要传递正确的模型名称
+        elif 'gemini' in model_name.lower() or 'google' in model_name.lower():
+            return model_class(
+                api_key=api_key,
+                temperature=temperature,
+                system_prompt=system_prompt,
+                language=language,
+                model_name=model_name,
+                api_base_url=api_base_url
+            )
+        # 对于豆包模型，也需要传递正确的模型名称
+        elif 'doubao' in model_name.lower():
            return model_class(
                api_key=api_key,
                temperature=temperature,
@@ -129,6 +175,13 @@ class ModelFactory:
                temperature=temperature,
                system_prompt=system_prompt
            )
+        # 对于百度OCR模型，传递api_key（支持API_KEY:SECRET_KEY格式）
+        elif model_name == 'baidu-ocr':
+            return model_class(
+                api_key=api_key,
+                temperature=temperature,
+                system_prompt=system_prompt
+            )
        # 对于Anthropic模型，需要传递model_identifier参数
        elif 'claude' in model_name.lower() or 'anthropic' in model_name.lower():
            return model_class(
--- a/models/google.py
+++ b/models/google.py
@@ -30,10 +30,17 @@ class GoogleModel(BaseModel):
        
        # 配置Google API
        if api_base_url:
-            # 如果提供了自定义API基础URL，设置genai的api_url
-            genai.configure(api_key=api_key, transport="rest", client_options={"api_endpoint": api_base_url})
+            # 配置中转API - 使用环境变量方式
+            # 移除末尾的斜杠以避免重复路径问题
+            clean_base_url = api_base_url.rstrip('/')
+            # 设置环境变量来指定API端点
+            os.environ['GOOGLE_AI_API_ENDPOINT'] = clean_base_url
+            genai.configure(api_key=api_key)
        else:
            # 使用默认API端点
+            # 清除可能存在的自定义端点环境变量
+            if 'GOOGLE_AI_API_ENDPOINT' in os.environ:
+                del os.environ['GOOGLE_AI_API_ENDPOINT']
            genai.configure(api_key=api_key)
    
    def get_default_system_prompt(self) -> str:
@@ -46,7 +53,7 @@ class GoogleModel(BaseModel):

    def get_model_identifier(self) -> str:
        """返回默认的模型标识符"""
-        return "gemini-2.5-pro-preview-03-25"
+        return "gemini-2.5-flash"  # 使用有免费配额的模型作为默认值
    
    def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
        """流式生成文本响应"""
--- a/static/js/main.js
+++ b/static/js/main.js
@@ -1053,10 +1053,33 @@ class SnapSolver {
            this.extractTextBtn.innerHTML = '<i class="fas fa-spinner fa-spin"></i><span>提取中...</span>';

            const settings = window.settingsManager.getSettings();
+            
+            // 根据用户设置的OCR源进行选择
+            const ocrSource = settings.ocrSource || 'auto';
+            const baiduApiKey = window.settingsManager.apiKeyValues.BaiduApiKey;
+            const baiduSecretKey = window.settingsManager.apiKeyValues.BaiduSecretKey;
            const mathpixApiKey = settings.mathpixApiKey;
            
-            if (!mathpixApiKey || mathpixApiKey === ':') {
-                window.uiManager.showToast('请在设置中输入Mathpix API凭据', 'error');
+            const hasBaiduOCR = baiduApiKey && baiduSecretKey;
+            const hasMathpix = mathpixApiKey && mathpixApiKey !== ':';
+            
+            // 根据OCR源配置检查可用性
+            let canProceed = false;
+            let missingOCRMessage = '';
+            
+            if (ocrSource === 'baidu') {
+                canProceed = hasBaiduOCR;
+                missingOCRMessage = '请在设置中配置百度OCR API密钥';
+            } else if (ocrSource === 'mathpix') {
+                canProceed = hasMathpix;
+                missingOCRMessage = '请在设置中配置Mathpix API密钥';
+            } else { // auto
+                canProceed = hasBaiduOCR || hasMathpix;
+                missingOCRMessage = '请在设置中配置OCR API密钥：百度OCR（推荐）或Mathpix';
+            }
+            
+            if (!canProceed) {
+                window.uiManager.showToast(missingOCRMessage, 'error');
                document.getElementById('settingsPanel').classList.add('active');
                this.extractTextBtn.disabled = false;
                this.extractTextBtn.innerHTML = '<i class="fas fa-font"></i><span>提取文本</span>';
@@ -1076,7 +1099,7 @@ class SnapSolver {
                this.socket.emit('extract_text', {
                    image: this.croppedImage.split(',')[1],
                    settings: {
-                        mathpixApiKey: mathpixApiKey
+                        ocrSource: settings.ocrSource || 'auto'
                    }
                });

--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -374,6 +374,9 @@ class SettingsManager {
        // 模型选择器对象
        this.modelSelector = null;
        
+        // OCR源配置
+        this.ocrSource = 'auto'; // 默认自动选择
+        
        // 存储API密钥的对象
        this.apiKeyValues = {
            'AnthropicApiKey': '',
@@ -381,6 +384,9 @@ class SettingsManager {
            'DeepseekApiKey': '',
            'AlibabaApiKey': '',
            'GoogleApiKey': '',
+            'DoubaoApiKey': '',
+            'BaiduApiKey': '',
+            'BaiduSecretKey': '',
            'MathpixAppId': '',
            'MathpixAppKey': ''
        };
@@ -391,7 +397,8 @@ class SettingsManager {
            'OpenaiApiBaseUrl': '',
            'DeepseekApiBaseUrl': '',
            'AlibabaApiBaseUrl': '',
-            'GoogleApiBaseUrl': ''
+            'GoogleApiBaseUrl': '',
+            'DoubaoApiBaseUrl': ''
        };
        
        // 加载模型配置
@@ -580,6 +587,13 @@ class SettingsManager {
                this.updateReasoningOptionUI(settings.reasoningDepth);
        }
        
+        // 加载豆包思考模式设置
+        if (settings.doubaoThinkingMode && this.doubaoThinkingModeSelect) {
+            this.doubaoThinkingModeSelect.value = settings.doubaoThinkingMode;
+            // 更新豆包思考选项UI
+            this.updateDoubaoThinkingOptionUI(settings.doubaoThinkingMode);
+        }
+        
        // 加载思考预算百分比
        const thinkBudgetPercent = parseInt(settings.thinkBudgetPercent || '50');
        if (this.thinkBudgetPercentInput) {
@@ -624,6 +638,14 @@ class SettingsManager {
            this.proxyPortInput.value = settings.proxyPort;
        }
        
+        // Load OCR source setting
+        if (settings.ocrSource) {
+            this.ocrSource = settings.ocrSource;
+            if (this.ocrSourceSelect) {
+                this.ocrSourceSelect.value = settings.ocrSource;
+            }
+        }
+        
        // Update UI based on model type
        this.updateUIBasedOnModelType();
            
@@ -720,6 +742,14 @@ class SettingsManager {
            this.thinkBudgetGroup.style.display = showThinkBudget ? 'block' : 'none';
        }
        
+        // 处理豆包深度思考设置显示
+        const isDoubaoReasoning = modelInfo.isReasoning && modelInfo.provider === 'doubao';
+        
+        // 只有对豆包推理模型才显示深度思考设置
+        if (this.doubaoThinkingGroup) {
+            this.doubaoThinkingGroup.style.display = isDoubaoReasoning ? 'block' : 'none';
+        }
+        
        // 控制最大Token设置的显示
        // 阿里巴巴模型不支持自定义Token设置
        const maxTokensGroup = this.maxTokens ? this.maxTokens.closest('.setting-group') : null;
@@ -759,6 +789,8 @@ class SettingsManager {
            apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(4)'); // Alibaba
        } else if (modelType && (modelType.toLowerCase().includes('gemini') || modelType.toLowerCase().includes('google'))) {
            apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(5)'); // Google
+        } else if (modelType && modelType.toLowerCase().includes('doubao')) {
+            apiKeyToHighlight = document.querySelector('.api-key-status:nth-child(6)'); // 豆包
        }
        
        if (apiKeyToHighlight) {
@@ -775,6 +807,7 @@ class SettingsManager {
            model: this.modelSelect.value,
                maxTokens: this.maxTokens.value,
            reasoningDepth: this.reasoningDepthSelect?.value || 'standard',
+            doubaoThinkingMode: this.doubaoThinkingModeSelect?.value || 'auto',
            thinkBudgetPercent: this.thinkBudgetPercentInput?.value || '50',
            temperature: this.temperatureInput.value,
            language: this.languageInput.value,
@@ -782,7 +815,8 @@ class SettingsManager {
            currentPromptId: this.currentPromptId,
            proxyEnabled: this.proxyEnabledInput.checked,
            proxyHost: this.proxyHostInput.value,
-            proxyPort: this.proxyPortInput.value
+            proxyPort: this.proxyPortInput.value,
+            ocrSource: this.ocrSource // 添加OCR源配置保存
        };

            // 保存设置到localStorage
@@ -832,17 +866,30 @@ class SettingsManager {
        const reasoningDepth = this.reasoningDepthSelect?.value || 'standard';
        const thinkBudgetPercent = parseInt(this.thinkBudgetPercentInput?.value || '50');
        
+        // 获取豆包思考模式设置
+        const doubaoThinkingMode = this.doubaoThinkingModeSelect?.value || 'auto';
+        
        // 计算思考预算的实际Token数
        const thinkBudget = Math.floor(maxTokens * (thinkBudgetPercent / 100));
        
        // 构建推理配置参数
        const reasoningConfig = {};
-        if (modelInfo.provider === 'anthropic' && modelInfo.isReasoning) {
-            if (reasoningDepth === 'extended') {
-                reasoningConfig.reasoning_depth = 'extended';
-                reasoningConfig.think_budget = thinkBudget;
-            } else {
-                reasoningConfig.speed_mode = 'instant';
+        
+        // 处理不同模型的推理配置
+        if (modelInfo.isReasoning) {
+            // 对于Anthropic模型
+            if (modelInfo.provider === 'anthropic') {
+                if (reasoningDepth === 'extended') {
+                    reasoningConfig.reasoning_depth = 'extended';
+                    reasoningConfig.think_budget = thinkBudget;
+                } else {
+                    reasoningConfig.speed_mode = 'instant';
+                }
+            }
+            
+            // 对于豆包模型
+            if (modelInfo.provider === 'doubao') {
+                reasoningConfig.thinking_mode = doubaoThinkingMode;
            }
        }
        
@@ -869,6 +916,9 @@ class SettingsManager {
            if (this.apiBaseUrlValues['GoogleApiBaseUrl']) {
                apiBaseUrls.google = this.apiBaseUrlValues['GoogleApiBaseUrl'];
            }
+            if (this.apiBaseUrlValues['DoubaoApiBaseUrl']) {
+                apiBaseUrls.doubao = this.apiBaseUrlValues['DoubaoApiBaseUrl'];
+            }
        }
        
        return {
@@ -881,6 +931,8 @@ class SettingsManager {
            proxyHost: this.proxyHostInput.value,
            proxyPort: this.proxyPortInput.value,
            mathpixApiKey: mathpixApiKey,
+            ocrSource: this.ocrSource, // 添加OCR源配置
+            doubaoThinkingMode: doubaoThinkingMode, // 添加豆包思考模式配置
            modelInfo: {
                supportsMultimodal: modelInfo.supportsMultimodal || false,
                isReasoning: modelInfo.isReasoning || false,
@@ -1121,6 +1173,20 @@ class SettingsManager {
            this.saveSettings();
        });

+        // OCR源选择器事件监听
+        if (this.ocrSourceSelect) {
+            this.ocrSourceSelect.addEventListener('change', (e) => {
+                // 阻止事件冒泡
+                e.stopPropagation();
+                
+                // 更新OCR源配置
+                this.ocrSource = e.target.value;
+                this.saveSettings();
+                
+                console.log('OCR源已切换为:', this.ocrSource);
+            });
+        }
+
        // Panel visibility
        if (this.settingsToggle) {
        this.settingsToggle.addEventListener('click', () => {
@@ -1195,6 +1261,71 @@ class SettingsManager {
        
        // 初始化API密钥编辑功能
        this.initApiKeyEditFunctions();
+        
+        // 初始化推理选项事件
+        this.initReasoningOptionEvents();
+        
+        // 初始化豆包思考选项事件
+        this.initDoubaoThinkingOptionEvents();
+    }
+    
+    // 初始化推理选项事件
+    initReasoningOptionEvents() {
+        const reasoningOptions = document.querySelectorAll('.reasoning-option');
+        reasoningOptions.forEach(option => {
+            option.addEventListener('click', (e) => {
+                e.preventDefault();
+                e.stopPropagation();
+                
+                const value = option.getAttribute('data-value');
+                if (value && this.reasoningDepthSelect) {
+                    // 更新select值
+                    this.reasoningDepthSelect.value = value;
+                    
+                    // 更新UI
+                    this.updateReasoningOptionUI(value);
+                    
+                    // 保存设置
+                    this.saveSettings();
+                }
+            });
+        });
+    }
+    
+    // 初始化豆包思考选项事件
+    initDoubaoThinkingOptionEvents() {
+        const doubaoThinkingOptions = document.querySelectorAll('.doubao-thinking-option');
+        doubaoThinkingOptions.forEach(option => {
+            option.addEventListener('click', (e) => {
+                e.preventDefault();
+                e.stopPropagation();
+                
+                const value = option.getAttribute('data-value');
+                if (value && this.doubaoThinkingModeSelect) {
+                    // 更新select值
+                    this.doubaoThinkingModeSelect.value = value;
+                    
+                    // 更新UI
+                    this.updateDoubaoThinkingOptionUI(value);
+                    
+                    // 保存设置
+                    this.saveSettings();
+                }
+            });
+        });
+    }
+    
+    // 更新豆包思考选项UI
+    updateDoubaoThinkingOptionUI(value) {
+        const doubaoThinkingOptions = document.querySelectorAll('.doubao-thinking-option');
+        doubaoThinkingOptions.forEach(option => {
+            const optionValue = option.getAttribute('data-value');
+            if (optionValue === value) {
+                option.classList.add('active');
+            } else {
+                option.classList.remove('active');
+            }
+        });
    }

    // 更新思考预算显示
@@ -2208,10 +2339,17 @@ class SettingsManager {
        this.thinkBudgetPercentValue = document.getElementById('thinkBudgetPercentValue');
        this.thinkBudgetGroup = document.querySelector('.think-budget-group');
        
+        // 豆包深度思考相关元素
+        this.doubaoThinkingModeSelect = document.getElementById('doubaoThinkingMode');
+        this.doubaoThinkingGroup = document.querySelector('.doubao-thinking-group');
+        
        // Initialize Mathpix inputs
        this.mathpixAppIdInput = document.getElementById('mathpixAppId');
        this.mathpixAppKeyInput = document.getElementById('mathpixAppKey');
        
+        // OCR源选择器
+        this.ocrSourceSelect = document.getElementById('ocrSourceSelect');
+        
        // API Key elements - 所有的密钥输入框
        this.apiKeyInputs = {
            'AnthropicApiKey': document.getElementById('AnthropicApiKey'),
@@ -2260,6 +2398,9 @@ class SettingsManager {
            'DeepseekApiKey': '',
            'AlibabaApiKey': '',
            'GoogleApiKey': '',
+            'DoubaoApiKey': '',
+            'BaiduApiKey': '',
+            'BaiduSecretKey': '',
            'MathpixAppId': '',
            'MathpixAppKey': ''
        };
@@ -2359,7 +2500,8 @@ class SettingsManager {
                    'OpenaiApiBaseUrl': proxyApiConfig.apis?.openai || '',
                    'DeepseekApiBaseUrl': proxyApiConfig.apis?.deepseek || '',
                    'AlibabaApiBaseUrl': proxyApiConfig.apis?.alibaba || '',
-                    'GoogleApiBaseUrl': proxyApiConfig.apis?.google || ''
+                    'GoogleApiBaseUrl': proxyApiConfig.apis?.google || '',
+                    'DoubaoApiBaseUrl': proxyApiConfig.apis?.doubao || ''
                };
                this.updateApiBaseUrlStatus(apiBaseUrls);
                console.log('API基础URL状态已刷新');
@@ -2449,6 +2591,9 @@ class SettingsManager {
                case 'GoogleApiBaseUrl':
                    config.apis.google = value;
                    break;
+                case 'DoubaoApiBaseUrl':
+                    config.apis.doubao = value;
+                    break;
            }
            
            // 确保启用中转API
--- a/static/style.css
+++ b/static/style.css
@@ -2174,6 +2174,82 @@ button:disabled {
    transition: all 0.2s ease-in-out;
 }

+/* OCR设置样式 */
+.ocr-settings {
+    margin-bottom: 1.5rem;
+}
+
+.ocr-source-control {
+    display: flex;
+    flex-direction: column;
+    gap: 12px;
+}
+
+.ocr-source-selector {
+    position: relative;
+}
+
+.ocr-source-select {
+    width: 100%;
+    padding: 10px 14px;
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+    background: var(--surface);
+    color: var(--text-primary);
+    font-size: 0.9rem;
+    transition: all 0.2s ease;
+}
+
+.ocr-source-select:hover {
+    border-color: var(--primary-color);
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+}
+
+.ocr-source-select:focus {
+    outline: none;
+    border-color: var(--primary-color);
+    box-shadow: 0 0 0 3px rgba(var(--primary-rgb), 0.1);
+}
+
+.ocr-source-description {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+    padding: 12px;
+    background: rgba(0, 0, 0, 0.02);
+    border-radius: 8px;
+    border: 1px solid var(--border-color);
+}
+
+.ocr-desc-item {
+    display: flex;
+    align-items: flex-start;
+    gap: 8px;
+    font-size: 0.85rem;
+    line-height: 1.4;
+    color: var(--text-secondary);
+}
+
+.ocr-desc-item i {
+    color: var(--primary-color);
+    margin-top: 2px;
+    flex-shrink: 0;
+}
+
+.ocr-desc-item strong {
+    color: var(--text-primary);
+}
+
+/* 暗色主题下的OCR设置样式 */
+[data-theme="dark"] .ocr-source-description {
+    background: rgba(255, 255, 255, 0.02);
+}
+
+[data-theme="dark"] .ocr-source-select {
+    background: var(--surface);
+    border-color: var(--border-color);
+}
+
 /* 新增的推理控制组件样式 */
 .reasoning-control {
    display: flex;
@@ -2260,6 +2336,122 @@ button:disabled {
    opacity: 1;
 }

+/* 豆包深度思考控制组件样式 */
+.doubao-thinking-control {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+}
+
+.doubao-thinking-label {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    margin-bottom: 6px;
+}
+
+.doubao-thinking-selector {
+    display: flex;
+    gap: 8px;
+    margin-bottom: 8px;
+}
+
+.doubao-thinking-option {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    padding: 12px 8px;
+    border-radius: 8px;
+    background: rgba(0, 0, 0, 0.05);
+    cursor: pointer;
+    transition: all 0.2s ease;
+    border: 2px solid transparent;
+    position: relative;
+    overflow: hidden;
+    min-height: 80px;
+    justify-content: center;
+}
+
+.doubao-thinking-option::before {
+    content: '';
+    position: absolute;
+    bottom: 0;
+    left: 0;
+    width: 100%;
+    height: 3px;
+    background: linear-gradient(to right, var(--primary-color), transparent);
+    opacity: 0;
+    transition: opacity 0.3s ease;
+}
+
+.doubao-thinking-option:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+}
+
+.doubao-thinking-option.active {
+    background: rgba(var(--primary-rgb), 0.1);
+    border-color: var(--primary-color);
+}
+
+.doubao-thinking-option.active::before {
+    opacity: 1;
+}
+
+.doubao-thinking-option i {
+    font-size: 1.3rem;
+    margin-bottom: 6px;
+    color: var(--primary-color);
+    opacity: 0.8;
+    transition: all 0.2s ease;
+}
+
+.doubao-thinking-option .option-name {
+    font-weight: 600;
+    font-size: 0.85rem;
+    margin-bottom: 4px;
+    text-align: center;
+}
+
+.doubao-thinking-option .option-desc {
+    font-size: 0.7rem;
+    opacity: 0.7;
+    text-align: center;
+    line-height: 1.2;
+}
+
+.doubao-thinking-option:hover i {
+    transform: scale(1.1);
+    opacity: 1;
+}
+
+.doubao-thinking-desc {
+    display: flex;
+    flex-direction: column;
+    gap: 6px;
+    margin-top: 8px;
+    padding: 8px;
+    background: rgba(0, 0, 0, 0.03);
+    border-radius: 6px;
+}
+
+.doubao-desc-item {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    font-size: 0.8rem;
+    opacity: 0.8;
+}
+
+.doubao-desc-item i {
+    font-size: 0.9rem;
+    color: var(--primary-color);
+    opacity: 0.7;
+    width: 16px;
+    text-align: center;
+}
+
 /* 思考预算控制组件样式 */
 .think-budget-control {
    display: flex;
@@ -2411,6 +2603,18 @@ button:disabled {
    background: rgba(var(--primary-rgb), 0.2);
 }

+[data-theme="dark"] .doubao-thinking-option {
+    background: rgba(255, 255, 255, 0.05);
+}
+
+[data-theme="dark"] .doubao-thinking-option.active {
+    background: rgba(var(--primary-rgb), 0.2);
+}
+
+[data-theme="dark"] .doubao-thinking-desc {
+    background: rgba(255, 255, 255, 0.03);
+}
+
 [data-theme="dark"] .think-value-badge {
    background: rgba(255, 255, 255, 0.1);
 }
--- a/templates/index.html
+++ b/templates/index.html
@@ -219,6 +219,49 @@
                            </select>
                        </div>
                    </div>
+                    <div class="setting-group doubao-thinking-group" style="display: none;">
+                        <div class="doubao-thinking-control">
+                            <div class="doubao-thinking-label">
+                                <label for="doubaoThinkingMode"><i class="fas fa-cogs"></i> 豆包深度思考模式</label>
+                            </div>
+                            <div class="doubao-thinking-selector">
+                                <div class="doubao-thinking-option active" data-value="auto">
+                                    <i class="fas fa-magic"></i>
+                                    <span class="option-name">自动模式</span>
+                                    <span class="option-desc">由AI自动决定是否使用深度思考</span>
+                                </div>
+                                <div class="doubao-thinking-option" data-value="enabled">
+                                    <i class="fas fa-brain"></i>
+                                    <span class="option-name">开启思考</span>
+                                    <span class="option-desc">强制启用深度思考过程</span>
+                                </div>
+                                <div class="doubao-thinking-option" data-value="disabled">
+                                    <i class="fas fa-bolt"></i>
+                                    <span class="option-name">关闭思考</span>
+                                    <span class="option-desc">禁用深度思考，快速响应</span>
+                                </div>
+                            </div>
+                            <select id="doubaoThinkingMode" class="hidden">
+                                <option value="auto">自动模式</option>
+                                <option value="enabled">开启思考</option>
+                                <option value="disabled">关闭思考</option>
+                            </select>
+                            <div class="doubao-thinking-desc">
+                                <div class="doubao-desc-item">
+                                    <i class="fas fa-info-circle"></i>
+                                    <span><strong>自动模式：</strong>AI根据问题复杂度自动决定</span>
+                                </div>
+                                <div class="doubao-desc-item">
+                                    <i class="fas fa-lightbulb"></i>
+                                    <span><strong>开启思考：</strong>显示完整的思考推理过程</span>
+                                </div>
+                                <div class="doubao-desc-item">
+                                    <i class="fas fa-rocket"></i>
+                                    <span><strong>关闭思考：</strong>直接给出答案，响应更快</span>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
                    <div class="setting-group think-budget-group">
                        <div class="think-budget-control">
                            <div class="think-budget-label">
@@ -252,6 +295,7 @@
                            </div>
                        </div>
                    </div>
+                    <!-- 已删除重复的豆包思考模式UI元素 -->
                    <div class="setting-group">
                        <div class="temperature-control">
                            <div class="temperature-label">
@@ -309,6 +353,37 @@
                    </div>
                </div>

+                <!-- OCR设置部分 -->
+                <div class="settings-section ocr-settings">
+                    <h3><i class="fas fa-font"></i> OCR设置</h3>
+                    <div class="setting-group">
+                        <div class="ocr-source-control">
+                            <label for="ocrSourceSelect"><i class="fas fa-eye"></i> OCR工具源</label>
+                            <div class="ocr-source-selector">
+                                <select id="ocrSourceSelect" class="ocr-source-select">
+                                    <option value="auto">自动选择</option>
+                                    <option value="baidu">百度OCR</option>
+                                    <option value="mathpix">Mathpix</option>
+                                </select>
+                            </div>
+                            <div class="ocr-source-description">
+                                <div class="ocr-desc-item">
+                                    <i class="fas fa-magic"></i>
+                                    <span><strong>自动选择：</strong>优先使用百度OCR，如无配置则使用Mathpix</span>
+                                </div>
+                                <div class="ocr-desc-item">
+                                    <i class="fas fa-language"></i>
+                                    <span><strong>百度OCR：</strong>支持中文，免费额度大，推荐使用</span>
+                                </div>
+                                <div class="ocr-desc-item">
+                                    <i class="fas fa-square-root-alt"></i>
+                                    <span><strong>Mathpix：</strong>专业数学公式识别，支持LaTeX格式</span>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+
                <!-- 2. 所有API密钥集中在一个区域 -->
                <div class="settings-section api-key-settings">
                    <h3><i class="fas fa-key"></i> API密钥设置</h3>
@@ -425,6 +500,75 @@
                                </div>
                            </div>
                        </div>
+                        <div class="api-key-status">
+                            <span class="key-name">Doubao API:</span>
+                            <div class="key-status-wrapper">
+                                <!-- 显示状态 -->
+                                <div class="key-display">
+                                    <span id="DoubaoApiKeyStatus" class="key-status" data-key="DoubaoApiKey">未设置</span>
+                                    <button class="btn-icon edit-api-key" data-key-type="DoubaoApiKey" title="编辑此密钥">
+                                        <i class="fas fa-edit"></i>
+                                    </button>
+                                </div>
+                                <!-- 编辑状态 -->
+                                <div class="key-edit hidden">
+                                    <input type="password" class="key-input" data-key-type="DoubaoApiKey" placeholder="输入Doubao API key">
+                                    <button class="btn-icon toggle-visibility">
+                                        <i class="fas fa-eye"></i>
+                                    </button>
+                                    <button class="btn-icon save-api-key" data-key-type="DoubaoApiKey" title="保存密钥">
+                                        <i class="fas fa-save"></i>
+                                    </button>
+                                </div>
+                            </div>
+                        </div>
+                        
+                        <!-- 百度OCR API Key配置 -->
+                        <div class="api-key-status">
+                            <span class="key-name">百度OCR API Key:</span>
+                            <div class="key-status-wrapper">
+                                <!-- 显示状态 -->
+                                <div class="key-display">
+                                    <span id="BaiduApiKeyStatus" class="key-status" data-key="BaiduApiKey">未设置</span>
+                                    <button class="btn-icon edit-api-key" data-key-type="BaiduApiKey" title="编辑此密钥">
+                                        <i class="fas fa-edit"></i>
+                                    </button>
+                                </div>
+                                <!-- 编辑状态 -->
+                                <div class="key-edit hidden">
+                                    <input type="password" class="key-input" data-key-type="BaiduApiKey" placeholder="输入百度OCR API Key">
+                                    <button class="btn-icon toggle-visibility">
+                                        <i class="fas fa-eye"></i>
+                                    </button>
+                                    <button class="btn-icon save-api-key" data-key-type="BaiduApiKey" title="保存密钥">
+                                        <i class="fas fa-save"></i>
+                                    </button>
+                                </div>
+                            </div>
+                        </div>
+                        <div class="api-key-status">
+                            <span class="key-name">百度OCR Secret Key:</span>
+                            <div class="key-status-wrapper">
+                                <!-- 显示状态 -->
+                                <div class="key-display">
+                                    <span id="BaiduSecretKeyStatus" class="key-status" data-key="BaiduSecretKey">未设置</span>
+                                    <button class="btn-icon edit-api-key" data-key-type="BaiduSecretKey" title="编辑此密钥">
+                                        <i class="fas fa-edit"></i>
+                                    </button>
+                                </div>
+                                <!-- 编辑状态 -->
+                                <div class="key-edit hidden">
+                                    <input type="password" class="key-input" data-key-type="BaiduSecretKey" placeholder="输入百度OCR Secret Key">
+                                    <button class="btn-icon toggle-visibility">
+                                        <i class="fas fa-eye"></i>
+                                    </button>
+                                    <button class="btn-icon save-api-key" data-key-type="BaiduSecretKey" title="保存密钥">
+                                        <i class="fas fa-save"></i>
+                                    </button>
+                                </div>
+                            </div>
+                        </div>
+                        
                        <div class="api-key-status">
                            <span class="key-name">Mathpix App ID:</span>
                            <div class="key-status-wrapper">
@@ -577,6 +721,25 @@
                                        </div>
                                    </div>
                                </div>
+                                <div class="api-key-status">
+                                    <span class="key-name">Doubao API URL:</span>
+                                    <div class="key-status-wrapper">
+                                        <!-- 显示状态 -->
+                                        <div class="key-display">
+                                            <span id="DoubaoApiBaseUrlStatus" class="key-status" data-key="DoubaoApiBaseUrl">未设置</span>
+                                            <button class="btn-icon edit-api-base-url" data-key-type="DoubaoApiBaseUrl" title="编辑此URL">
+                                                <i class="fas fa-edit"></i>
+                                            </button>
+                                        </div>
+                                        <!-- 编辑状态 -->
+                                        <div class="key-edit hidden">
+                                            <input type="text" class="key-input" data-key-type="DoubaoApiBaseUrl" placeholder="https://ark.cn-beijing.volces.com/api/v3">
+                                            <button class="btn-icon save-api-base-url" data-key-type="DoubaoApiBaseUrl" title="保存URL">
+                                                <i class="fas fa-save"></i>
+                                            </button>
+                                        </div>
+                                    </div>
+                                </div>
                            </div>
                        </div>
                    </div>