support qwq-max

This commit is contained in:
Zylan
2025-03-31 14:33:36 +08:00
parent 4f3ee654d8
commit 163fad337e
7 changed files with 336 additions and 0 deletions

4
app.py
View File

@@ -81,6 +81,8 @@ def create_model_instance(model_id, api_keys, settings):
api_key_id = "OpenaiApiKey"
elif provider == 'deepseek':
api_key_id = "DeepseekApiKey"
elif provider == 'alibaba':
api_key_id = "AlibabaApiKey"
else:
# 根据模型名称
if "claude" in model_id.lower():
@@ -89,6 +91,8 @@ def create_model_instance(model_id, api_keys, settings):
api_key_id = "OpenaiApiKey"
elif "deepseek" in model_id.lower():
api_key_id = "DeepseekApiKey"
elif "qvq" in model_id.lower() or "alibaba" in model_id.lower():
api_key_id = "AlibabaApiKey"
api_key = api_keys.get(api_key_id)
if not api_key:

View File

@@ -14,6 +14,11 @@
"name": "DeepSeek",
"api_key_id": "DeepseekApiKey",
"class_name": "DeepSeekModel"
},
"alibaba": {
"name": "Alibaba",
"api_key_id": "AlibabaApiKey",
"class_name": "AlibabaModel"
}
},
"models": {
@@ -56,6 +61,14 @@
"isReasoning": true,
"version": "latest",
"description": "DeepSeek推理模型提供详细思考过程仅支持文本"
},
"QVQ-Max-2025-03-25": {
"name": "QVQ-Max",
"provider": "alibaba",
"supportsMultimodal": true,
"isReasoning": true,
"version": "2025-03-25",
"description": "阿里巴巴通义千问-QVQ-Max版本支持图像理解和思考过程"
}
}
}

View File

@@ -2,6 +2,7 @@ from .base import BaseModel
from .anthropic import AnthropicModel
from .openai import OpenAIModel
from .deepseek import DeepSeekModel
from .alibaba import AlibabaModel
from .factory import ModelFactory
__all__ = [
@@ -9,5 +10,6 @@ __all__ = [
'AnthropicModel',
'OpenAIModel',
'DeepSeekModel',
'AlibabaModel',
'ModelFactory'
]

253
models/alibaba.py Normal file
View File

@@ -0,0 +1,253 @@
import os
from typing import Generator, Dict, Optional, Any
from openai import OpenAI
from .base import BaseModel
class AlibabaModel(BaseModel):
def get_default_system_prompt(self) -> str:
return """你是一位专业的问题分析与解答助手。当看到一个问题图片时,请:
1. 仔细阅读并理解问题
2. 分析问题的关键组成部分
3. 提供清晰的、逐步的解决方案
4. 如果相关,解释涉及的概念或理论
5. 如果有多种解决方法,先解释最高效的方法"""
def get_model_identifier(self) -> str:
return "qvq-max"
def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
"""Stream QVQ-Max's response for text analysis"""
try:
# Initial status
yield {"status": "started", "content": ""}
# Save original environment state
original_env = {
'http_proxy': os.environ.get('http_proxy'),
'https_proxy': os.environ.get('https_proxy')
}
try:
# Set proxy environment variables if provided
if proxies:
if 'http' in proxies:
os.environ['http_proxy'] = proxies['http']
if 'https' in proxies:
os.environ['https_proxy'] = proxies['https']
# Initialize OpenAI compatible client for DashScope
client = OpenAI(
api_key=self.api_key,
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
# Prepare messages
messages = [
{
"role": "system",
"content": [{"type": "text", "text": self.system_prompt}]
},
{
"role": "user",
"content": [{"type": "text", "text": text}]
}
]
# 创建聊天完成请求
response = client.chat.completions.create(
model=self.get_model_identifier(),
messages=messages,
temperature=self.temperature,
stream=True,
max_tokens=self.max_tokens if hasattr(self, 'max_tokens') and self.max_tokens else 4000
)
# 记录思考过程和回答
reasoning_content = ""
answer_content = ""
is_answering = False
for chunk in response:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
# 处理思考过程
if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
reasoning_content += delta.reasoning_content
# 思考过程作为一个独立的内容发送
yield {
"status": "reasoning",
"content": reasoning_content,
"is_reasoning": True
}
elif delta.content != "":
# 判断是否开始回答(从思考过程切换到回答)
if not is_answering:
is_answering = True
# 发送完整的思考过程
if reasoning_content:
yield {
"status": "reasoning_complete",
"content": reasoning_content,
"is_reasoning": True
}
# 累积回答内容
answer_content += delta.content
# 发送回答内容
yield {
"status": "streaming",
"content": answer_content
}
# 确保发送最终完整内容
if answer_content:
yield {
"status": "completed",
"content": answer_content
}
finally:
# Restore original environment state
for key, value in original_env.items():
if value is None:
if key in os.environ:
del os.environ[key]
else:
os.environ[key] = value
except Exception as e:
yield {
"status": "error",
"error": str(e)
}
def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
"""Stream QVQ-Max's response for image analysis"""
try:
# Initial status
yield {"status": "started", "content": ""}
# Save original environment state
original_env = {
'http_proxy': os.environ.get('http_proxy'),
'https_proxy': os.environ.get('https_proxy')
}
try:
# Set proxy environment variables if provided
if proxies:
if 'http' in proxies:
os.environ['http_proxy'] = proxies['http']
if 'https' in proxies:
os.environ['https_proxy'] = proxies['https']
# Initialize OpenAI compatible client for DashScope
client = OpenAI(
api_key=self.api_key,
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
# 检查系统提示词是否已包含语言设置指令
system_prompt = self.system_prompt
language = self.language or '中文'
if not any(phrase in system_prompt for phrase in ['Please respond in', '请用', '使用', '回答']):
system_prompt = f"{system_prompt}\n\n请务必使用{language}回答,无论问题是什么语言。即使在分析图像时也请使用{language}回答。"
# Prepare messages with image
messages = [
{
"role": "system",
"content": [{"type": "text", "text": system_prompt}]
},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
}
},
{
"type": "text",
"text": "请分析这个图片并提供详细的解答。"
}
]
}
]
# 创建聊天完成请求
response = client.chat.completions.create(
model=self.get_model_identifier(),
messages=messages,
temperature=self.temperature,
stream=True,
max_tokens=self.max_tokens if hasattr(self, 'max_tokens') and self.max_tokens else 4000
)
# 记录思考过程和回答
reasoning_content = ""
answer_content = ""
is_answering = False
for chunk in response:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
# 处理思考过程
if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
reasoning_content += delta.reasoning_content
# 思考过程作为一个独立的内容发送
yield {
"status": "reasoning",
"content": reasoning_content,
"is_reasoning": True
}
elif delta.content != "":
# 判断是否开始回答(从思考过程切换到回答)
if not is_answering:
is_answering = True
# 发送完整的思考过程
if reasoning_content:
yield {
"status": "reasoning_complete",
"content": reasoning_content,
"is_reasoning": True
}
# 累积回答内容
answer_content += delta.content
# 发送回答内容
yield {
"status": "streaming",
"content": answer_content
}
# 确保发送最终完整内容
if answer_content:
yield {
"status": "completed",
"content": answer_content
}
finally:
# Restore original environment state
for key, value in original_env.items():
if value is None:
if key in os.environ:
del os.environ[key]
else:
os.environ[key] = value
except Exception as e:
yield {
"status": "error",
"error": str(e)
}

View File

@@ -355,6 +355,45 @@ class SnapSolver {
}
break;
case 'reasoning':
// 处理推理内容 (QVQ-Max模型使用)
if (data.content && this.thinkingContent && this.thinkingSection) {
console.log('Received reasoning content');
this.thinkingSection.classList.remove('hidden');
// 记住用户的展开/折叠状态
const wasExpanded = this.thinkingContent.classList.contains('expanded');
// 直接设置完整内容而不是追加
this.setElementContent(this.thinkingContent, data.content);
// 添加打字动画效果
this.thinkingContent.classList.add('thinking-typing');
// 根据之前的状态决定是否展开
if (wasExpanded) {
this.thinkingContent.classList.add('expanded');
this.thinkingContent.classList.remove('collapsed');
// 更新切换按钮图标
const toggleIcon = document.querySelector('#thinkingToggle .toggle-btn i');
if (toggleIcon) {
toggleIcon.className = 'fas fa-chevron-up';
}
} else {
// 初始状态为折叠
this.thinkingContent.classList.add('collapsed');
this.thinkingContent.classList.remove('expanded');
// 更新切换按钮图标
const toggleIcon = document.querySelector('#thinkingToggle .toggle-btn i');
if (toggleIcon) {
toggleIcon.className = 'fas fa-chevron-down';
}
}
}
break;
case 'thinking_complete':
// 完整的思考内容
if (data.content && this.thinkingContent && this.thinkingSection) {
@@ -369,6 +408,20 @@ class SnapSolver {
}
break;
case 'reasoning_complete':
// 完整的推理内容 (QVQ-Max模型使用)
if (data.content && this.thinkingContent && this.thinkingSection) {
console.log('Reasoning complete');
this.thinkingSection.classList.remove('hidden');
// 设置完整内容
this.setElementContent(this.thinkingContent, data.content);
// 移除打字动画
this.thinkingContent.classList.remove('thinking-typing');
}
break;
case 'streaming':
if (data.content && this.responseContent) {
console.log('Received content chunk');

View File

@@ -181,6 +181,7 @@ class SettingsManager {
'AnthropicApiKey': document.getElementById('AnthropicApiKey'),
'OpenaiApiKey': document.getElementById('OpenaiApiKey'),
'DeepseekApiKey': document.getElementById('DeepseekApiKey'),
'AlibabaApiKey': document.getElementById('AlibabaApiKey'),
'mathpixAppId': this.mathpixAppIdInput,
'mathpixAppKey': this.mathpixAppKeyInput
};

View File

@@ -7,6 +7,7 @@
<!-- Safari兼容性设置 -->
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
<meta name="mobile-web-app-capable" content="yes">
<title>Snap Solver</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/cropperjs/1.5.13/cropper.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
@@ -214,6 +215,15 @@
</button>
</div>
</div>
<div class="setting-group api-key-group">
<label for="AlibabaApiKey">Alibaba API Key</label>
<div class="input-group">
<input type="password" id="AlibabaApiKey" placeholder="输入 Alibaba API key">
<button class="btn-icon toggle-api-key">
<i class="fas fa-eye"></i>
</button>
</div>
</div>
<div class="setting-group api-key-group">
<label for="mathpixAppId">Mathpix App ID</label>
<div class="input-group">