完善截图和分析流程

This commit is contained in:
Zylan
2025-03-06 14:45:52 +08:00
parent 5bb8b4ea2d
commit b514145c13
8 changed files with 1352 additions and 685 deletions

27
app.py
View File

@@ -104,7 +104,6 @@ def stream_model_response(response_generator, sid):
thinking_buffer += content
# 发送完整的思考内容
print(f"Streaming thinking content: {len(thinking_buffer)} chars")
socketio.emit('claude_response', {
'status': 'thinking',
'content': thinking_buffer
@@ -127,7 +126,7 @@ def stream_model_response(response_generator, sid):
response_buffer += content
# 发送完整的内容
print(f"Streaming response content: {len(response_buffer)} chars")
# print(f"Streaming response content: {len(response_buffer)} chars")
socketio.emit('claude_response', {
'status': 'streaming',
'content': response_buffer
@@ -379,6 +378,30 @@ def handle_analyze_image(data):
'error': f'Analysis error: {str(e)}'
}, room=request.sid)
@socketio.on('capture_screenshot')
def handle_capture_screenshot(data):
try:
# Capture the screen
screenshot = pyautogui.screenshot()
# Convert the image to base64 string
buffered = BytesIO()
screenshot.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
# Emit the screenshot back to the client
socketio.emit('screenshot_complete', {
'success': True,
'image': img_str
}, room=request.sid)
except Exception as e:
error_msg = f"Screenshot error: {str(e)}"
print(f"Error capturing screenshot: {error_msg}")
socketio.emit('screenshot_complete', {
'success': False,
'error': error_msg
}, room=request.sid)
def run_tray():
icon = create_tray_icon()
icon.run()

View File

@@ -18,13 +18,12 @@ class ClaudeModel(BaseModel):
def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
"""Stream Claude's response for text analysis"""
try:
# Initial status
yield {"status": "started", "content": ""}
api_key = self.api_key.strip()
yield {"status": "started"}
api_key = self.api_key
if api_key.startswith('Bearer '):
api_key = api_key[7:]
headers = {
'x-api-key': api_key,
'anthropic-version': '2023-06-01',
@@ -105,6 +104,16 @@ class ClaudeModel(BaseModel):
"status": "thinking",
"content": thinking_content
}
# 处理新的extended_thinking格式
elif data.get('type') == 'extended_thinking_delta':
if 'delta' in data and 'text' in data['delta']:
thinking_chunk = data['delta']['text']
thinking_content += thinking_chunk
yield {
"status": "thinking",
"content": thinking_content
}
elif data.get('type') == 'message_stop':
if thinking_content:
@@ -135,130 +144,132 @@ class ClaudeModel(BaseModel):
"error": f"Streaming error: {str(e)}"
}
def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
"""Stream Claude's response for image analysis"""
try:
# Initial status
yield {"status": "started", "content": ""}
api_key = self.api_key.strip()
if api_key.startswith('Bearer '):
api_key = api_key[7:]
headers = {
'x-api-key': api_key,
'anthropic-version': '2023-06-01',
'content-type': 'application/json',
'accept': 'application/json',
}
payload = {
'model': self.get_model_identifier(),
'stream': True,
'max_tokens': 8192,
'temperature': 1,
'system': self.system_prompt,
'thinking': {
'type': 'enabled',
'budget_tokens': 4096
},
'messages': [{
'role': 'user',
'content': [
{
'type': 'image',
'source': {
'type': 'base64',
'media_type': 'image/png',
'data': image_data
}
},
{
'type': 'text',
'text': "Please analyze this question and provide a detailed solution. If you see multiple questions, focus on solving them one at a time."
}
]
}]
}
response = requests.post(
'https://api.anthropic.com/v1/messages',
headers=headers,
json=payload,
stream=True,
proxies=proxies,
timeout=60
)
if response.status_code != 200:
error_msg = f'API error: {response.status_code}'
try:
error_data = response.json()
if 'error' in error_data:
error_msg += f" - {error_data['error']['message']}"
except:
error_msg += f" - {response.text}"
yield {"status": "error", "error": error_msg}
return
thinking_content = ""
response_buffer = ""
def analyze_image(self, image_data, prompt, socket=None, proxies=None):
yield {"status": "started"}
api_key = self.api_key
if api_key.startswith('Bearer '):
api_key = api_key[7:]
for chunk in response.iter_lines():
if not chunk:
headers = {
'x-api-key': api_key,
'anthropic-version': '2023-06-01',
'content-type': 'application/json'
}
payload = {
'model': 'claude-3-7-sonnet-20250219',
'stream': True,
'max_tokens': 8192,
'temperature': 1,
'thinking': {
'type': 'enabled',
'budget_tokens': 4096
},
'system': "You are a helpful AI assistant that specializes in solving math problems. You should provide step-by-step solutions and explanations for any math problem presented to you. If you're given an image, analyze any mathematical content in it and provide a detailed solution.",
'messages': [{
'role': 'user',
'content': [
{
'type': 'image',
'source': {
'type': 'base64',
'media_type': 'image/png',
'data': image_data
}
},
{
'type': 'text',
'text': "Please analyze this question and provide a detailed solution. If you see multiple questions, focus on solving them one at a time."
}
]
}]
}
response = requests.post(
'https://api.anthropic.com/v1/messages',
headers=headers,
json=payload,
stream=True,
proxies=proxies,
timeout=60
)
if response.status_code != 200:
error_msg = f'API error: {response.status_code}'
try:
error_data = response.json()
if 'error' in error_data:
error_msg += f" - {error_data['error']['message']}"
except:
error_msg += f" - {response.text}"
yield {"status": "error", "error": error_msg}
return
thinking_content = ""
response_buffer = ""
for chunk in response.iter_lines():
if not chunk:
continue
try:
chunk_str = chunk.decode('utf-8')
if not chunk_str.startswith('data: '):
continue
try:
chunk_str = chunk.decode('utf-8')
if not chunk_str.startswith('data: '):
continue
chunk_str = chunk_str[6:]
data = json.loads(chunk_str)
chunk_str = chunk_str[6:]
data = json.loads(chunk_str)
if data.get('type') == 'content_block_delta':
if 'delta' in data:
if 'text' in data['delta']:
text_chunk = data['delta']['text']
yield {
"status": "streaming",
"content": text_chunk
}
response_buffer += text_chunk
elif 'thinking' in data['delta']:
thinking_chunk = data['delta']['thinking']
thinking_content += thinking_chunk
yield {
"status": "thinking",
"content": thinking_content
}
elif data.get('type') == 'message_stop':
if thinking_content:
if data.get('type') == 'content_block_delta':
if 'delta' in data:
if 'text' in data['delta']:
text_chunk = data['delta']['text']
yield {
"status": "thinking_complete",
"status": "streaming",
"content": text_chunk
}
response_buffer += text_chunk
elif 'thinking' in data['delta']:
thinking_chunk = data['delta']['thinking']
thinking_content += thinking_chunk
yield {
"status": "thinking",
"content": thinking_content
}
# 处理新的extended_thinking格式
elif data.get('type') == 'extended_thinking_delta':
if 'delta' in data and 'text' in data['delta']:
thinking_chunk = data['delta']['text']
thinking_content += thinking_chunk
yield {
"status": "completed",
"content": ""
"status": "thinking",
"content": thinking_content
}
elif data.get('type') == 'error':
error_msg = data.get('error', {}).get('message', 'Unknown error')
elif data.get('type') == 'message_stop':
if thinking_content:
yield {
"status": "error",
"error": error_msg
"status": "thinking_complete",
"content": thinking_content
}
break
except json.JSONDecodeError as e:
print(f"JSON decode error: {str(e)}")
continue
except Exception as e:
yield {
"status": "error",
"error": f"Streaming error: {str(e)}"
}
yield {
"status": "completed",
"content": response_buffer
}
elif data.get('type') == 'error':
error_message = data.get('error', {}).get('message', 'Unknown error')
yield {
"status": "error",
"error": error_message
}
except Exception as e:
yield {
"status": "error",
"error": f"Error processing response: {str(e)}"
}
break

View File

@@ -132,7 +132,7 @@ class GPT4oModel(BaseModel):
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_data}",
"url": image_data if image_data.startswith('data:') else f"data:image/png;base64,{image_data}",
"detail": "high"
}
},

File diff suppressed because it is too large Load Diff

View File

@@ -136,11 +136,20 @@ class SettingsManager {
}
getSettings() {
const language = this.languageInput.value || '中文';
const basePrompt = this.systemPromptInput.value || '';
// 检查系统提示词是否已包含语言设置
let systemPrompt = basePrompt;
if (!basePrompt.includes('Please respond in') && !basePrompt.includes('请用') && !basePrompt.includes('使用')) {
systemPrompt = `${basePrompt}\n\n请务必使用${language}回答。`;
}
return {
model: this.modelSelect.value,
temperature: this.temperatureInput.value,
language: this.languageInput.value,
systemPrompt: this.systemPromptInput.value + ` Please respond in ${this.languageInput.value}.`,
language: language,
systemPrompt: systemPrompt,
proxyEnabled: this.proxyEnabledInput.checked,
proxyHost: this.proxyHostInput.value,
proxyPort: this.proxyPortInput.value,

View File

@@ -47,6 +47,16 @@ class UIManager {
}
showToast(message, type = 'success') {
// 检查是否已经存在相同内容的提示
const existingToasts = this.toastContainer.querySelectorAll('.toast');
for (const existingToast of existingToasts) {
const existingMessage = existingToast.querySelector('span').textContent;
if (existingMessage === message) {
// 已经存在相同的提示,不再创建新的
return;
}
}
const toast = document.createElement('div');
toast.className = `toast ${type}`;
toast.innerHTML = `
@@ -55,10 +65,13 @@ class UIManager {
`;
this.toastContainer.appendChild(toast);
// 为不同类型的提示设置不同的显示时间
const displayTime = message === '截图成功' ? 1500 : 3000;
setTimeout(() => {
toast.style.opacity = '0';
setTimeout(() => toast.remove(), 300);
}, 3000);
}, displayTime);
}
closeAllPanels() {

File diff suppressed because it is too large Load Diff

View File

@@ -15,17 +15,17 @@
<div class="header-left">
<h1>Snap Solver</h1>
<div class="connection-status">
<div id="connectionStatus" class="status disconnected">Disconnected</div>
<div id="connectionStatus" class="status disconnected">未连接</div>
</div>
</div>
<div class="header-right">
<button id="themeToggle" class="btn-icon" title="Toggle theme">
<button id="themeToggle" class="btn-icon" title="切换主题">
<i class="fas fa-moon"></i>
</button>
<button id="historyToggle" class="btn-icon" title="View history">
<button id="historyToggle" class="btn-icon" title="查看历史记录">
<i class="fas fa-history"></i>
</button>
<button id="settingsToggle" class="btn-icon" title="Settings">
<button id="settingsToggle" class="btn-icon" title="设置">
<i class="fas fa-cog"></i>
</button>
</div>
@@ -35,45 +35,50 @@
<div class="content-panel">
<div class="capture-section">
<div class="toolbar">
<div class="toolbar-buttons">
<div class="button-group">
<button id="captureBtn" class="btn-primary" disabled>
<i class="fas fa-camera"></i>
<span>Capture</span>
</button>
<button id="cropBtn" class="btn-secondary hidden">
<i class="fas fa-crop"></i>
<span>Crop</span>
</button>
<div class="toolbar-buttons">
<div class="button-group">
<button id="captureBtn" class="btn-primary" disabled>
<i class="fas fa-camera"></i>
<span>截图</span>
</button>
<button id="cropBtn" class="btn-secondary hidden">
<i class="fas fa-crop"></i>
<span>裁剪</span>
</button>
</div>
</div>
</div>
<div id="emptyState" class="empty-state">
<i class="fas fa-camera-retro"></i>
<h3>准备好开始了吗?</h3>
<p>点击"截图"按钮捕获屏幕然后使用AI分析图像或提取文本。您可以截取数学题、代码或任何需要帮助的内容。</p>
</div>
<div id="imagePreview" class="image-preview hidden">
<div class="image-container">
<img id="screenshotImg" src="" alt="Screenshot preview">
<img id="screenshotImg" src="" alt="截图预览">
</div>
<div class="analysis-button">
<div class="button-group">
<button id="sendToClaude" class="btn-primary hidden">
<i class="fas fa-robot"></i>
<span>Send to AI</span>
<span>发送至AI</span>
</button>
<button id="extractText" class="btn-primary hidden">
<i class="fas fa-font"></i>
<span>Extract Text</span>
<span>提取文本</span>
</button>
</div>
<div id="textEditor" class="text-editor hidden">
<textarea id="extractedText" rows="4" placeholder="Extracted text will appear here..."></textarea>
<textarea id="extractedText" rows="4" placeholder="提取的文本将显示在这里..."></textarea>
<div class="text-format-controls">
<div class="send-text-group">
<div id="confidenceIndicator" class="confidence-indicator" title="OCR Confidence">
<div id="confidenceIndicator" class="confidence-indicator" title="OCR 置信度">
<i class="fas fa-check-circle"></i>
<span class="confidence-value"></span>
</div>
<button id="sendExtractedText" class="btn-primary">
<i class="fas fa-paper-plane"></i>
<span>Send Text to AI</span>
<span>发送文本至AI</span>
</button>
</div>
</div>
@@ -85,7 +90,7 @@
<div id="claudePanel" class="claude-panel hidden">
<div class="panel-header">
<div class="header-title">
<h2>Analysis Result</h2>
<h2>分析结果</h2>
<div class="analysis-status">
<div class="status-light"></div>
</div>
@@ -98,7 +103,7 @@
<div class="thinking-header" id="thinkingToggle">
<div class="thinking-title">
<i class="fas fa-brain"></i>
<h3>AI's Thinking Process</h3>
<h3>AI思考过程 <span class="thinking-hint">(点击展开/折叠)</span></h3>
</div>
<button class="toggle-btn">
<i class="fas fa-chevron-down"></i>
@@ -112,18 +117,18 @@
<aside id="settingsPanel" class="settings-panel hidden">
<div class="panel-header">
<h2>Settings</h2>
<h2>设置</h2>
<button class="btn-icon" id="closeSettings">
<i class="fas fa-times"></i>
</button>
</div>
<div class="settings-content">
<div class="settings-section">
<h3>OCR Configuration</h3>
<h3><i class="fas fa-font"></i> OCR 配置</h3>
<div class="setting-group">
<label for="mathpixAppId">Mathpix App ID</label>
<div class="input-group">
<input type="password" id="mathpixAppId" placeholder="Enter Mathpix App ID">
<input type="password" id="mathpixAppId" placeholder="输入 Mathpix App ID">
<button class="btn-icon toggle-api-key">
<i class="fas fa-eye"></i>
</button>
@@ -132,7 +137,7 @@
<div class="setting-group">
<label for="mathpixAppKey">Mathpix App Key</label>
<div class="input-group">
<input type="password" id="mathpixAppKey" placeholder="Enter Mathpix App Key">
<input type="password" id="mathpixAppKey" placeholder="输入 Mathpix App Key">
<button class="btn-icon toggle-api-key">
<i class="fas fa-eye"></i>
</button>
@@ -141,11 +146,11 @@
</div>
<div class="settings-section">
<h3>AI Configuration</h3>
<h3><i class="fas fa-robot"></i> AI 配置</h3>
<div class="setting-group api-key-group" data-model="claude-3-7-sonnet-20250219">
<label for="claudeApiKey">Claude API Key</label>
<div class="input-group">
<input type="password" id="claudeApiKey" placeholder="Enter Claude API key">
<input type="password" id="claudeApiKey" placeholder="输入 Claude API key">
<button class="btn-icon toggle-api-key">
<i class="fas fa-eye"></i>
</button>
@@ -154,7 +159,7 @@
<div class="setting-group api-key-group" data-model="gpt-4o-2024-11-20">
<label for="gpt4oApiKey">GPT-4o API Key</label>
<div class="input-group">
<input type="password" id="gpt4oApiKey" placeholder="Enter GPT-4o API key">
<input type="password" id="gpt4oApiKey" placeholder="输入 GPT-4o API key">
<button class="btn-icon toggle-api-key">
<i class="fas fa-eye"></i>
</button>
@@ -163,18 +168,18 @@
<div class="setting-group api-key-group" data-model="deepseek-reasoner">
<label for="deepseekApiKey">DeepSeek API Key</label>
<div class="input-group">
<input type="password" id="deepseekApiKey" placeholder="Enter DeepSeek API key">
<input type="password" id="deepseekApiKey" placeholder="输入 DeepSeek API key">
<button class="btn-icon toggle-api-key">
<i class="fas fa-eye"></i>
</button>
</div>
</div>
<div class="setting-group">
<label for="language">Language</label>
<input type="text" id="language" value="English" placeholder="Enter preferred language">
<label for="language"><i class="fas fa-language"></i> 语言</label>
<input type="text" id="language" value="中文" placeholder="输入首选语言">
</div>
<div class="setting-group">
<label for="modelSelect">Model</label>
<label for="modelSelect"><i class="fas fa-microchip"></i> 模型</label>
<select id="modelSelect" class="select-styled">
<option value="claude-3-7-sonnet-20250219">Claude 3.7 Sonnet</option>
<option value="gpt-4o-2024-11-20">GPT-4o</option>
@@ -182,34 +187,34 @@
</select>
</div>
<div class="setting-group">
<label for="temperature">Temperature</label>
<label for="temperature"><i class="fas fa-thermometer-half"></i> 温度</label>
<div class="range-group">
<input type="range" id="temperature" min="0" max="1" step="0.1" value="0.7">
<span id="temperatureValue">0.7</span>
</div>
</div>
<div class="setting-group">
<label for="systemPrompt">System Prompt</label>
<textarea id="systemPrompt" rows="3">You are an expert problem solver. Analyze the problem step by step, identify any questions or problems, and provide detailed solutions. Always respond in the user's preferred language.</textarea>
<label for="systemPrompt"><i class="fas fa-comment-alt"></i> 系统提示词</label>
<textarea id="systemPrompt" rows="3">您是一位专业的问题解决专家。请逐步分析问题,找出问题所在,并提供详细的解决方案。始终使用用户偏好的语言回答。</textarea>
</div>
</div>
<div class="settings-section">
<h3>Proxy Settings</h3>
<h3><i class="fas fa-globe"></i> 代理设置</h3>
<div class="setting-group">
<label class="checkbox-label">
<input type="checkbox" id="proxyEnabled">
<span>Enable VPN Proxy</span>
<span>启用 VPN 代理</span>
</label>
</div>
<div id="proxySettings" class="proxy-settings">
<div class="setting-group">
<label for="proxyHost">Proxy Host</label>
<input type="text" id="proxyHost" value="127.0.0.1" placeholder="Enter proxy host">
<label for="proxyHost"><i class="fas fa-server"></i> 代理主机</label>
<input type="text" id="proxyHost" value="127.0.0.1" placeholder="输入代理主机">
</div>
<div class="setting-group">
<label for="proxyPort">Proxy Port</label>
<input type="number" id="proxyPort" value="4780" placeholder="Enter proxy port">
<label for="proxyPort"><i class="fas fa-plug"></i> 代理端口</label>
<input type="number" id="proxyPort" value="4780" placeholder="输入代理端口">
</div>
</div>
</div>
@@ -218,7 +223,7 @@
<div id="historyPanel" class="history-panel hidden">
<div class="panel-header">
<h2>History</h2>
<h2>历史记录</h2>
<button class="btn-icon" id="closeHistory">
<i class="fas fa-times"></i>
</button>
@@ -226,7 +231,7 @@
<div class="history-content">
<div class="history-empty">
<i class="fas fa-history"></i>
<p>No history yet</p>
<p>暂无历史记录</p>
</div>
</div>
</div>
@@ -239,16 +244,17 @@
<div class="crop-actions">
<button id="cropCancel" class="btn-secondary">
<i class="fas fa-times"></i>
<span>Cancel</span>
<span>取消</span>
</button>
<button id="cropConfirm" class="btn-primary">
<i class="fas fa-check"></i>
<span>Confirm</span>
<span>确认</span>
</button>
</div>
</div>
<div id="toastContainer" class="toast-container"></div>
<script src="{{ url_for('static', filename='js/ui.js') }}"></script>
<script src="{{ url_for('static', filename='js/settings.js') }}"></script>
<script src="{{ url_for('static', filename='js/main.js') }}"></script>