From 163fad337e7f0c315ae4573401fb941dab6b25e5 Mon Sep 17 00:00:00 2001
From: Zylan <zihan.jian@outlook.com>
Date: Mon, 31 Mar 2025 14:33:36 +0800
Subject: [PATCH] support qwq-max

---
 app.py                |   4 +
 config/models.json    |  13 +++
 models/__init__.py    |   2 +
 models/alibaba.py     | 253 ++++++++++++++++++++++++++++++++++++++++++
 static/js/main.js     |  53 +++++++++
 static/js/settings.js |   1 +
 templates/index.html  |  10 ++
 7 files changed, 336 insertions(+)
 create mode 100644 models/alibaba.py

diff --git a/app.py b/app.py
index 8cd3c46..f809956 100644
--- a/app.py
+++ b/app.py
@@ -81,6 +81,8 @@ def create_model_instance(model_id, api_keys, settings):
         api_key_id = "OpenaiApiKey"
     elif provider == 'deepseek':
         api_key_id = "DeepseekApiKey"
+    elif provider == 'alibaba':
+        api_key_id = "AlibabaApiKey"
     else:
         # 根据模型名称
         if "claude" in model_id.lower():
@@ -89,6 +91,8 @@ def create_model_instance(model_id, api_keys, settings):
             api_key_id = "OpenaiApiKey"
         elif "deepseek" in model_id.lower():
             api_key_id = "DeepseekApiKey"
+        elif "qvq" in model_id.lower() or "alibaba" in model_id.lower():
+            api_key_id = "AlibabaApiKey"
     
     api_key = api_keys.get(api_key_id)
     if not api_key:
diff --git a/config/models.json b/config/models.json
index 5596372..117ac05 100644
--- a/config/models.json
+++ b/config/models.json
@@ -14,6 +14,11 @@
             "name": "DeepSeek",
             "api_key_id": "DeepseekApiKey",
             "class_name": "DeepSeekModel"
+        },
+        "alibaba": {
+            "name": "Alibaba",
+            "api_key_id": "AlibabaApiKey",
+            "class_name": "AlibabaModel"
         }
     },
     "models": {
@@ -56,6 +61,14 @@
             "isReasoning": true,
             "version": "latest",
             "description": "DeepSeek推理模型，提供详细思考过程（仅支持文本）"
+        },
+        "QVQ-Max-2025-03-25": {
+            "name": "QVQ-Max",
+            "provider": "alibaba",
+            "supportsMultimodal": true,
+            "isReasoning": true,
+            "version": "2025-03-25",
+            "description": "阿里巴巴通义千问-QVQ-Max版本，支持图像理解和思考过程"
         }
     }
 } 
\ No newline at end of file
diff --git a/models/__init__.py b/models/__init__.py
index eded06f..e3f4e65 100644
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -2,6 +2,7 @@ from .base import BaseModel
 from .anthropic import AnthropicModel
 from .openai import OpenAIModel
 from .deepseek import DeepSeekModel
+from .alibaba import AlibabaModel
 from .factory import ModelFactory
 
 __all__ = [
@@ -9,5 +10,6 @@ __all__ = [
     'AnthropicModel',
     'OpenAIModel',
     'DeepSeekModel',
+    'AlibabaModel',
     'ModelFactory'
 ]
diff --git a/models/alibaba.py b/models/alibaba.py
new file mode 100644
index 0000000..0e27d7d
--- /dev/null
+++ b/models/alibaba.py
@@ -0,0 +1,253 @@
+import os
+from typing import Generator, Dict, Optional, Any
+from openai import OpenAI
+from .base import BaseModel
+
+class AlibabaModel(BaseModel):
+    def get_default_system_prompt(self) -> str:
+        return """你是一位专业的问题分析与解答助手。当看到一个问题图片时，请：
+1. 仔细阅读并理解问题
+2. 分析问题的关键组成部分
+3. 提供清晰的、逐步的解决方案
+4. 如果相关，解释涉及的概念或理论
+5. 如果有多种解决方法，先解释最高效的方法"""
+
+    def get_model_identifier(self) -> str:
+        return "qvq-max"
+
+    def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
+        """Stream QVQ-Max's response for text analysis"""
+        try:
+            # Initial status
+            yield {"status": "started", "content": ""}
+
+            # Save original environment state
+            original_env = {
+                'http_proxy': os.environ.get('http_proxy'),
+                'https_proxy': os.environ.get('https_proxy')
+            }
+
+            try:
+                # Set proxy environment variables if provided
+                if proxies:
+                    if 'http' in proxies:
+                        os.environ['http_proxy'] = proxies['http']
+                    if 'https' in proxies:
+                        os.environ['https_proxy'] = proxies['https']
+
+                # Initialize OpenAI compatible client for DashScope
+                client = OpenAI(
+                    api_key=self.api_key,
+                    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
+                )
+
+                # Prepare messages
+                messages = [
+                    {
+                        "role": "system",
+                        "content": [{"type": "text", "text": self.system_prompt}]
+                    },
+                    {
+                        "role": "user",
+                        "content": [{"type": "text", "text": text}]
+                    }
+                ]
+
+                # 创建聊天完成请求
+                response = client.chat.completions.create(
+                    model=self.get_model_identifier(),
+                    messages=messages,
+                    temperature=self.temperature,
+                    stream=True,
+                    max_tokens=self.max_tokens if hasattr(self, 'max_tokens') and self.max_tokens else 4000
+                )
+
+                # 记录思考过程和回答
+                reasoning_content = ""
+                answer_content = ""
+                is_answering = False
+                
+                for chunk in response:
+                    if not chunk.choices:
+                        continue
+                        
+                    delta = chunk.choices[0].delta
+                    
+                    # 处理思考过程
+                    if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
+                        reasoning_content += delta.reasoning_content
+                        # 思考过程作为一个独立的内容发送
+                        yield {
+                            "status": "reasoning",
+                            "content": reasoning_content,
+                            "is_reasoning": True
+                        }
+                    elif delta.content != "":
+                        # 判断是否开始回答（从思考过程切换到回答）
+                        if not is_answering:
+                            is_answering = True
+                            # 发送完整的思考过程
+                            if reasoning_content:
+                                yield {
+                                    "status": "reasoning_complete",
+                                    "content": reasoning_content,
+                                    "is_reasoning": True
+                                }
+                        
+                        # 累积回答内容
+                        answer_content += delta.content
+                        
+                        # 发送回答内容
+                        yield {
+                            "status": "streaming",
+                            "content": answer_content
+                        }
+
+                # 确保发送最终完整内容
+                if answer_content:
+                    yield {
+                        "status": "completed",
+                        "content": answer_content
+                    }
+
+            finally:
+                # Restore original environment state
+                for key, value in original_env.items():
+                    if value is None:
+                        if key in os.environ:
+                            del os.environ[key]
+                    else:
+                        os.environ[key] = value
+
+        except Exception as e:
+            yield {
+                "status": "error",
+                "error": str(e)
+            }
+
+    def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
+        """Stream QVQ-Max's response for image analysis"""
+        try:
+            # Initial status
+            yield {"status": "started", "content": ""}
+
+            # Save original environment state
+            original_env = {
+                'http_proxy': os.environ.get('http_proxy'),
+                'https_proxy': os.environ.get('https_proxy')
+            }
+
+            try:
+                # Set proxy environment variables if provided
+                if proxies:
+                    if 'http' in proxies:
+                        os.environ['http_proxy'] = proxies['http']
+                    if 'https' in proxies:
+                        os.environ['https_proxy'] = proxies['https']
+
+                # Initialize OpenAI compatible client for DashScope
+                client = OpenAI(
+                    api_key=self.api_key,
+                    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
+                )
+
+                # 检查系统提示词是否已包含语言设置指令
+                system_prompt = self.system_prompt
+                language = self.language or '中文'
+                if not any(phrase in system_prompt for phrase in ['Please respond in', '请用', '使用', '回答']):
+                    system_prompt = f"{system_prompt}\n\n请务必使用{language}回答，无论问题是什么语言。即使在分析图像时也请使用{language}回答。"
+
+                # Prepare messages with image
+                messages = [
+                    {
+                        "role": "system",
+                        "content": [{"type": "text", "text": system_prompt}]
+                    },
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{image_data}"
+                                }
+                            },
+                            {
+                                "type": "text",
+                                "text": "请分析这个图片并提供详细的解答。"
+                            }
+                        ]
+                    }
+                ]
+
+                # 创建聊天完成请求
+                response = client.chat.completions.create(
+                    model=self.get_model_identifier(),
+                    messages=messages,
+                    temperature=self.temperature,
+                    stream=True,
+                    max_tokens=self.max_tokens if hasattr(self, 'max_tokens') and self.max_tokens else 4000
+                )
+
+                # 记录思考过程和回答
+                reasoning_content = ""
+                answer_content = ""
+                is_answering = False
+                
+                for chunk in response:
+                    if not chunk.choices:
+                        continue
+                        
+                    delta = chunk.choices[0].delta
+                    
+                    # 处理思考过程
+                    if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
+                        reasoning_content += delta.reasoning_content
+                        # 思考过程作为一个独立的内容发送
+                        yield {
+                            "status": "reasoning",
+                            "content": reasoning_content,
+                            "is_reasoning": True
+                        }
+                    elif delta.content != "":
+                        # 判断是否开始回答（从思考过程切换到回答）
+                        if not is_answering:
+                            is_answering = True
+                            # 发送完整的思考过程
+                            if reasoning_content:
+                                yield {
+                                    "status": "reasoning_complete",
+                                    "content": reasoning_content,
+                                    "is_reasoning": True
+                                }
+                        
+                        # 累积回答内容
+                        answer_content += delta.content
+                        
+                        # 发送回答内容
+                        yield {
+                            "status": "streaming",
+                            "content": answer_content
+                        }
+
+                # 确保发送最终完整内容
+                if answer_content:
+                    yield {
+                        "status": "completed",
+                        "content": answer_content
+                    }
+
+            finally:
+                # Restore original environment state
+                for key, value in original_env.items():
+                    if value is None:
+                        if key in os.environ:
+                            del os.environ[key]
+                    else:
+                        os.environ[key] = value
+
+        except Exception as e:
+            yield {
+                "status": "error",
+                "error": str(e)
+            } 
\ No newline at end of file
diff --git a/static/js/main.js b/static/js/main.js
index a821e2a..fbb6502 100644
--- a/static/js/main.js
+++ b/static/js/main.js
@@ -355,6 +355,45 @@ class SnapSolver {
                     }
                     break;
                 
+                case 'reasoning':
+                    // 处理推理内容 (QVQ-Max模型使用)
+                    if (data.content && this.thinkingContent && this.thinkingSection) {
+                        console.log('Received reasoning content');
+                        this.thinkingSection.classList.remove('hidden');
+                        
+                        // 记住用户的展开/折叠状态
+                        const wasExpanded = this.thinkingContent.classList.contains('expanded');
+                        
+                        // 直接设置完整内容而不是追加
+                        this.setElementContent(this.thinkingContent, data.content);
+                        
+                        // 添加打字动画效果
+                        this.thinkingContent.classList.add('thinking-typing');
+                        
+                        // 根据之前的状态决定是否展开
+                        if (wasExpanded) {
+                            this.thinkingContent.classList.add('expanded');
+                            this.thinkingContent.classList.remove('collapsed');
+                            
+                            // 更新切换按钮图标
+                            const toggleIcon = document.querySelector('#thinkingToggle .toggle-btn i');
+                            if (toggleIcon) {
+                                toggleIcon.className = 'fas fa-chevron-up';
+                            }
+                        } else {
+                            // 初始状态为折叠
+                            this.thinkingContent.classList.add('collapsed');
+                            this.thinkingContent.classList.remove('expanded');
+                            
+                            // 更新切换按钮图标
+                            const toggleIcon = document.querySelector('#thinkingToggle .toggle-btn i');
+                            if (toggleIcon) {
+                                toggleIcon.className = 'fas fa-chevron-down';
+                            }
+                        }
+                    }
+                    break;
+                
                 case 'thinking_complete':
                     // 完整的思考内容
                     if (data.content && this.thinkingContent && this.thinkingSection) {
@@ -369,6 +408,20 @@ class SnapSolver {
                     }
                     break;
                     
+                case 'reasoning_complete':
+                    // 完整的推理内容 (QVQ-Max模型使用)
+                    if (data.content && this.thinkingContent && this.thinkingSection) {
+                        console.log('Reasoning complete');
+                        this.thinkingSection.classList.remove('hidden');
+                        
+                        // 设置完整内容
+                        this.setElementContent(this.thinkingContent, data.content);
+                        
+                        // 移除打字动画
+                        this.thinkingContent.classList.remove('thinking-typing');
+                    }
+                    break;
+                    
                 case 'streaming':
                     if (data.content && this.responseContent) {
                         console.log('Received content chunk');
diff --git a/static/js/settings.js b/static/js/settings.js
index 032673e..b1e5fce 100644
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -181,6 +181,7 @@ class SettingsManager {
             'AnthropicApiKey': document.getElementById('AnthropicApiKey'),
             'OpenaiApiKey': document.getElementById('OpenaiApiKey'),
             'DeepseekApiKey': document.getElementById('DeepseekApiKey'),
+            'AlibabaApiKey': document.getElementById('AlibabaApiKey'),
             'mathpixAppId': this.mathpixAppIdInput,
             'mathpixAppKey': this.mathpixAppKeyInput
         };
diff --git a/templates/index.html b/templates/index.html
index ebbd208..668d0b3 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -7,6 +7,7 @@
     <!-- Safari兼容性设置 -->
     <meta name="apple-mobile-web-app-capable" content="yes">
     <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
+    <meta name="mobile-web-app-capable" content="yes">
     <title>Snap Solver</title>
     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/cropperjs/1.5.13/cropper.min.css">
     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
@@ -214,6 +215,15 @@
                                 </button>
                             </div>
                         </div>
+                        <div class="setting-group api-key-group">
+                            <label for="AlibabaApiKey">Alibaba API Key</label>
+                            <div class="input-group">
+                                <input type="password" id="AlibabaApiKey" placeholder="输入 Alibaba API key">
+                                <button class="btn-icon toggle-api-key">
+                                    <i class="fas fa-eye"></i>
+                                </button>
+                            </div>
+                        </div>
                         <div class="setting-group api-key-group">
                             <label for="mathpixAppId">Mathpix App ID</label>
                             <div class="input-group">