完善截图和分析流程

2026-01-20 07:00:57 +08:00 · 2025-03-06 14:45:52 +08:00
parent 5bb8b4ea2d
commit b514145c13
8 changed files with 1352 additions and 685 deletions
--- a/app.py
+++ b/app.py
@@ -104,7 +104,6 @@ def stream_model_response(response_generator, sid):
                thinking_buffer += content
                
                # 发送完整的思考内容
-                print(f"Streaming thinking content: {len(thinking_buffer)} chars")
                socketio.emit('claude_response', {
                    'status': 'thinking',
                    'content': thinking_buffer
@@ -127,7 +126,7 @@ def stream_model_response(response_generator, sid):
                    response_buffer += content
                    
                    # 发送完整的内容
-                    print(f"Streaming response content: {len(response_buffer)} chars")
+                    # print(f"Streaming response content: {len(response_buffer)} chars")
                    socketio.emit('claude_response', {
                        'status': 'streaming',
                        'content': response_buffer
@@ -379,6 +378,30 @@ def handle_analyze_image(data):
            'error': f'Analysis error: {str(e)}'
        }, room=request.sid)

+@socketio.on('capture_screenshot')
+def handle_capture_screenshot(data):
+    try:
+        # Capture the screen
+        screenshot = pyautogui.screenshot()
+        
+        # Convert the image to base64 string
+        buffered = BytesIO()
+        screenshot.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        
+        # Emit the screenshot back to the client
+        socketio.emit('screenshot_complete', {
+            'success': True,
+            'image': img_str
+        }, room=request.sid)
+    except Exception as e:
+        error_msg = f"Screenshot error: {str(e)}"
+        print(f"Error capturing screenshot: {error_msg}")
+        socketio.emit('screenshot_complete', {
+            'success': False,
+            'error': error_msg
+        }, room=request.sid)
+
 def run_tray():
    icon = create_tray_icon()
    icon.run()
--- a/models/claude.py
+++ b/models/claude.py
@@ -18,13 +18,12 @@ class ClaudeModel(BaseModel):
    def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
        """Stream Claude's response for text analysis"""
        try:
-            # Initial status
-            yield {"status": "started", "content": ""}
-
-            api_key = self.api_key.strip()
+            yield {"status": "started"}
+            
+            api_key = self.api_key
            if api_key.startswith('Bearer '):
                api_key = api_key[7:]
-
+                
            headers = {
                'x-api-key': api_key,
                'anthropic-version': '2023-06-01',
@@ -105,6 +104,16 @@ class ClaudeModel(BaseModel):
                                    "status": "thinking",
                                    "content": thinking_content
                                }
+                    
+                    # 处理新的extended_thinking格式
+                    elif data.get('type') == 'extended_thinking_delta':
+                        if 'delta' in data and 'text' in data['delta']:
+                            thinking_chunk = data['delta']['text']
+                            thinking_content += thinking_chunk
+                            yield {
+                                "status": "thinking",
+                                "content": thinking_content
+                            }

                    elif data.get('type') == 'message_stop':
                        if thinking_content:
@@ -135,130 +144,132 @@ class ClaudeModel(BaseModel):
                "error": f"Streaming error: {str(e)}"
            }

-    def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
-        """Stream Claude's response for image analysis"""
-        try:
-            # Initial status
-            yield {"status": "started", "content": ""}
-
-            api_key = self.api_key.strip()
-            if api_key.startswith('Bearer '):
-                api_key = api_key[7:]
-
-            headers = {
-                'x-api-key': api_key,
-                'anthropic-version': '2023-06-01',
-                'content-type': 'application/json',
-                'accept': 'application/json',
-            }
-
-            payload = {
-                'model': self.get_model_identifier(),
-                'stream': True,
-                'max_tokens': 8192,
-                'temperature': 1,
-                'system': self.system_prompt,
-                'thinking': {
-                    'type': 'enabled',
-                    'budget_tokens': 4096
-                },
-                'messages': [{
-                    'role': 'user',
-                    'content': [
-                        {
-                            'type': 'image',
-                            'source': {
-                                'type': 'base64',
-                                'media_type': 'image/png',
-                                'data': image_data
-                            }
-                        },
-                        {
-                            'type': 'text',
-                            'text': "Please analyze this question and provide a detailed solution. If you see multiple questions, focus on solving them one at a time."
-                        }
-                    ]
-                }]
-            }
-
-            response = requests.post(
-                'https://api.anthropic.com/v1/messages',
-                headers=headers,
-                json=payload,
-                stream=True,
-                proxies=proxies,
-                timeout=60
-            )
-
-            if response.status_code != 200:
-                error_msg = f'API error: {response.status_code}'
-                try:
-                    error_data = response.json()
-                    if 'error' in error_data:
-                        error_msg += f" - {error_data['error']['message']}"
-                except:
-                    error_msg += f" - {response.text}"
-                yield {"status": "error", "error": error_msg}
-                return
-
-            thinking_content = ""
-            response_buffer = ""
+    def analyze_image(self, image_data, prompt, socket=None, proxies=None):
+        yield {"status": "started"}
+        
+        api_key = self.api_key
+        if api_key.startswith('Bearer '):
+            api_key = api_key[7:]
            
-            for chunk in response.iter_lines():
-                if not chunk:
+        headers = {
+            'x-api-key': api_key,
+            'anthropic-version': '2023-06-01',
+            'content-type': 'application/json'
+        }
+        
+        payload = {
+            'model': 'claude-3-7-sonnet-20250219',
+            'stream': True,
+            'max_tokens': 8192,
+            'temperature': 1,
+            'thinking': {
+                'type': 'enabled',
+                'budget_tokens': 4096
+            },
+            'system': "You are a helpful AI assistant that specializes in solving math problems. You should provide step-by-step solutions and explanations for any math problem presented to you. If you're given an image, analyze any mathematical content in it and provide a detailed solution.",
+            'messages': [{
+                'role': 'user',
+                'content': [
+                    {
+                        'type': 'image',
+                        'source': {
+                            'type': 'base64',
+                            'media_type': 'image/png',
+                            'data': image_data
+                        }
+                    },
+                    {
+                        'type': 'text',
+                        'text': "Please analyze this question and provide a detailed solution. If you see multiple questions, focus on solving them one at a time."
+                    }
+                ]
+            }]
+        }
+
+        response = requests.post(
+            'https://api.anthropic.com/v1/messages',
+            headers=headers,
+            json=payload,
+            stream=True,
+            proxies=proxies,
+            timeout=60
+        )
+
+        if response.status_code != 200:
+            error_msg = f'API error: {response.status_code}'
+            try:
+                error_data = response.json()
+                if 'error' in error_data:
+                    error_msg += f" - {error_data['error']['message']}"
+            except:
+                error_msg += f" - {response.text}"
+            yield {"status": "error", "error": error_msg}
+            return
+
+        thinking_content = ""
+        response_buffer = ""
+        
+        for chunk in response.iter_lines():
+            if not chunk:
+                continue
+
+            try:
+                chunk_str = chunk.decode('utf-8')
+                if not chunk_str.startswith('data: '):
                    continue

-                try:
-                    chunk_str = chunk.decode('utf-8')
-                    if not chunk_str.startswith('data: '):
-                        continue
+                chunk_str = chunk_str[6:]
+                data = json.loads(chunk_str)

-                    chunk_str = chunk_str[6:]
-                    data = json.loads(chunk_str)
-
-                    if data.get('type') == 'content_block_delta':
-                        if 'delta' in data:
-                            if 'text' in data['delta']:
-                                text_chunk = data['delta']['text']
-                                yield {
-                                    "status": "streaming",
-                                    "content": text_chunk
-                                }
-                                response_buffer += text_chunk
-                                
-                            elif 'thinking' in data['delta']:
-                                thinking_chunk = data['delta']['thinking']
-                                thinking_content += thinking_chunk
-                                yield {
-                                    "status": "thinking",
-                                    "content": thinking_content
-                                }
-
-                    elif data.get('type') == 'message_stop':
-                        if thinking_content:
+                if data.get('type') == 'content_block_delta':
+                    if 'delta' in data:
+                        if 'text' in data['delta']:
+                            text_chunk = data['delta']['text']
                            yield {
-                                "status": "thinking_complete",
+                                "status": "streaming",
+                                "content": text_chunk
+                            }
+                            response_buffer += text_chunk
+                            
+                        elif 'thinking' in data['delta']:
+                            thinking_chunk = data['delta']['thinking']
+                            thinking_content += thinking_chunk
+                            yield {
+                                "status": "thinking",
                                "content": thinking_content
                            }
+                
+                # 处理新的extended_thinking格式
+                elif data.get('type') == 'extended_thinking_delta':
+                    if 'delta' in data and 'text' in data['delta']:
+                        thinking_chunk = data['delta']['text']
+                        thinking_content += thinking_chunk
                        yield {
-                            "status": "completed",
-                            "content": ""
+                            "status": "thinking",
+                            "content": thinking_content
                        }

-                    elif data.get('type') == 'error':
-                        error_msg = data.get('error', {}).get('message', 'Unknown error')
+                elif data.get('type') == 'message_stop':
+                    if thinking_content:
                        yield {
-                            "status": "error",
-                            "error": error_msg
+                            "status": "thinking_complete",
+                            "content": thinking_content
                        }
-                        break
-
-                except json.JSONDecodeError as e:
-                    print(f"JSON decode error: {str(e)}")
-                    continue
-
-        except Exception as e:
-            yield {
-                "status": "error",
-                "error": f"Streaming error: {str(e)}"
-            }
+                    yield {
+                        "status": "completed",
+                        "content": response_buffer
+                    }
+                    
+                elif data.get('type') == 'error':
+                    error_message = data.get('error', {}).get('message', 'Unknown error')
+                    yield {
+                        "status": "error",
+                        "error": error_message
+                    }
+                    
+            except Exception as e:
+                yield {
+                    "status": "error",
+                    "error": f"Error processing response: {str(e)}"
+                }
+                break
--- a/models/gpt4o.py
+++ b/models/gpt4o.py
@@ -132,7 +132,7 @@ class GPT4oModel(BaseModel):
                            {
                                "type": "image_url",
                                "image_url": {
-                                    "url": f"data:image/png;base64,{image_data}",
+                                    "url": image_data if image_data.startswith('data:') else f"data:image/png;base64,{image_data}",
                                    "detail": "high"
                                }
                            },
--- a/static/js/main.js
+++ b/static/js/main.js
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -136,11 +136,20 @@ class SettingsManager {
    }

    getSettings() {
+        const language = this.languageInput.value || '中文';
+        const basePrompt = this.systemPromptInput.value || '';
+        
+        // 检查系统提示词是否已包含语言设置
+        let systemPrompt = basePrompt;
+        if (!basePrompt.includes('Please respond in') && !basePrompt.includes('请用') && !basePrompt.includes('使用')) {
+            systemPrompt = `${basePrompt}\n\n请务必使用${language}回答。`;
+        }
+        
        return {
            model: this.modelSelect.value,
            temperature: this.temperatureInput.value,
-            language: this.languageInput.value,
-            systemPrompt: this.systemPromptInput.value + ` Please respond in ${this.languageInput.value}.`,
+            language: language,
+            systemPrompt: systemPrompt,
            proxyEnabled: this.proxyEnabledInput.checked,
            proxyHost: this.proxyHostInput.value,
            proxyPort: this.proxyPortInput.value,
--- a/static/js/ui.js
+++ b/static/js/ui.js
@@ -47,6 +47,16 @@ class UIManager {
    }

    showToast(message, type = 'success') {
+        // 检查是否已经存在相同内容的提示
+        const existingToasts = this.toastContainer.querySelectorAll('.toast');
+        for (const existingToast of existingToasts) {
+            const existingMessage = existingToast.querySelector('span').textContent;
+            if (existingMessage === message) {
+                // 已经存在相同的提示，不再创建新的
+                return;
+            }
+        }
+        
        const toast = document.createElement('div');
        toast.className = `toast ${type}`;
        toast.innerHTML = `
@@ -55,10 +65,13 @@ class UIManager {
        `;
        this.toastContainer.appendChild(toast);
        
+        // 为不同类型的提示设置不同的显示时间
+        const displayTime = message === '截图成功' ? 1500 : 3000;
+        
        setTimeout(() => {
            toast.style.opacity = '0';
            setTimeout(() => toast.remove(), 300);
-        }, 3000);
+        }, displayTime);
    }

    closeAllPanels() {
--- a/static/style.css
+++ b/static/style.css
--- a/templates/index.html
+++ b/templates/index.html
@@ -15,17 +15,17 @@
        <div class="header-left">
            <h1>Snap Solver</h1>
            <div class="connection-status">
-                <div id="connectionStatus" class="status disconnected">Disconnected</div>
+                <div id="connectionStatus" class="status disconnected">未连接</div>
            </div>
        </div>
        <div class="header-right">
-            <button id="themeToggle" class="btn-icon" title="Toggle theme">
+            <button id="themeToggle" class="btn-icon" title="切换主题">
                <i class="fas fa-moon"></i>
            </button>
-            <button id="historyToggle" class="btn-icon" title="View history">
+            <button id="historyToggle" class="btn-icon" title="查看历史记录">
                <i class="fas fa-history"></i>
            </button>
-            <button id="settingsToggle" class="btn-icon" title="Settings">
+            <button id="settingsToggle" class="btn-icon" title="设置">
                <i class="fas fa-cog"></i>
            </button>
        </div>
@@ -35,45 +35,50 @@
        <div class="content-panel">
            <div class="capture-section">
                <div class="toolbar">
-                <div class="toolbar-buttons">
-                    <div class="button-group">
-                        <button id="captureBtn" class="btn-primary" disabled>
-                            <i class="fas fa-camera"></i>
-                            <span>Capture</span>
-                        </button>
-                        <button id="cropBtn" class="btn-secondary hidden">
-                            <i class="fas fa-crop"></i>
-                            <span>Crop</span>
-                        </button>
+                    <div class="toolbar-buttons">
+                        <div class="button-group">
+                            <button id="captureBtn" class="btn-primary" disabled>
+                                <i class="fas fa-camera"></i>
+                                <span>截图</span>
+                            </button>
+                            <button id="cropBtn" class="btn-secondary hidden">
+                                <i class="fas fa-crop"></i>
+                                <span>裁剪</span>
+                            </button>
+                        </div>
                    </div>
                </div>
+                <div id="emptyState" class="empty-state">
+                    <i class="fas fa-camera-retro"></i>
+                    <h3>准备好开始了吗？</h3>
+                    <p>点击"截图"按钮捕获屏幕，然后使用AI分析图像或提取文本。您可以截取数学题、代码或任何需要帮助的内容。</p>
                </div>
                <div id="imagePreview" class="image-preview hidden">
                    <div class="image-container">
-                        <img id="screenshotImg" src="" alt="Screenshot preview">
+                        <img id="screenshotImg" src="" alt="截图预览">
                    </div>
                    <div class="analysis-button">
                        <div class="button-group">
                            <button id="sendToClaude" class="btn-primary hidden">
                                <i class="fas fa-robot"></i>
-                                <span>Send to AI</span>
+                                <span>发送至AI</span>
                            </button>
                            <button id="extractText" class="btn-primary hidden">
                                <i class="fas fa-font"></i>
-                                <span>Extract Text</span>
+                                <span>提取文本</span>
                            </button>
                        </div>
                        <div id="textEditor" class="text-editor hidden">
-                            <textarea id="extractedText" rows="4" placeholder="Extracted text will appear here..."></textarea>
+                            <textarea id="extractedText" rows="4" placeholder="提取的文本将显示在这里..."></textarea>
                            <div class="text-format-controls">
                                <div class="send-text-group">
-                                    <div id="confidenceIndicator" class="confidence-indicator" title="OCR Confidence">
+                                    <div id="confidenceIndicator" class="confidence-indicator" title="OCR 置信度">
                                        <i class="fas fa-check-circle"></i>
                                        <span class="confidence-value"></span>
                                    </div>
                                    <button id="sendExtractedText" class="btn-primary">
                                        <i class="fas fa-paper-plane"></i>
-                                        <span>Send Text to AI</span>
+                                        <span>发送文本至AI</span>
                                    </button>
                                </div>
                            </div>
@@ -85,7 +90,7 @@
            <div id="claudePanel" class="claude-panel hidden">
                <div class="panel-header">
                    <div class="header-title">
-                        <h2>Analysis Result</h2>
+                        <h2>分析结果</h2>
                        <div class="analysis-status">
                            <div class="status-light"></div>
                        </div>
@@ -98,7 +103,7 @@
                    <div class="thinking-header" id="thinkingToggle">
                        <div class="thinking-title">
                            <i class="fas fa-brain"></i>
-                            <h3>AI's Thinking Process</h3>
+                            <h3>AI思考过程 <span class="thinking-hint">(点击展开/折叠)</span></h3>
                        </div>
                        <button class="toggle-btn">
                            <i class="fas fa-chevron-down"></i>
@@ -112,18 +117,18 @@

        <aside id="settingsPanel" class="settings-panel hidden">
            <div class="panel-header">
-                <h2>Settings</h2>
+                <h2>设置</h2>
                <button class="btn-icon" id="closeSettings">
                    <i class="fas fa-times"></i>
                </button>
            </div>
            <div class="settings-content">
                <div class="settings-section">
-                    <h3>OCR Configuration</h3>
+                    <h3><i class="fas fa-font"></i> OCR 配置</h3>
                    <div class="setting-group">
                        <label for="mathpixAppId">Mathpix App ID</label>
                        <div class="input-group">
-                            <input type="password" id="mathpixAppId" placeholder="Enter Mathpix App ID">
+                            <input type="password" id="mathpixAppId" placeholder="输入 Mathpix App ID">
                            <button class="btn-icon toggle-api-key">
                                <i class="fas fa-eye"></i>
                            </button>
@@ -132,7 +137,7 @@
                    <div class="setting-group">
                        <label for="mathpixAppKey">Mathpix App Key</label>
                        <div class="input-group">
-                            <input type="password" id="mathpixAppKey" placeholder="Enter Mathpix App Key">
+                            <input type="password" id="mathpixAppKey" placeholder="输入 Mathpix App Key">
                            <button class="btn-icon toggle-api-key">
                                <i class="fas fa-eye"></i>
                            </button>
@@ -141,11 +146,11 @@
                </div>

                <div class="settings-section">
-                    <h3>AI Configuration</h3>
+                    <h3><i class="fas fa-robot"></i> AI 配置</h3>
                    <div class="setting-group api-key-group" data-model="claude-3-7-sonnet-20250219">
                        <label for="claudeApiKey">Claude API Key</label>
                        <div class="input-group">
-                            <input type="password" id="claudeApiKey" placeholder="Enter Claude API key">
+                            <input type="password" id="claudeApiKey" placeholder="输入 Claude API key">
                            <button class="btn-icon toggle-api-key">
                                <i class="fas fa-eye"></i>
                            </button>
@@ -154,7 +159,7 @@
                    <div class="setting-group api-key-group" data-model="gpt-4o-2024-11-20">
                        <label for="gpt4oApiKey">GPT-4o API Key</label>
                        <div class="input-group">
-                            <input type="password" id="gpt4oApiKey" placeholder="Enter GPT-4o API key">
+                            <input type="password" id="gpt4oApiKey" placeholder="输入 GPT-4o API key">
                            <button class="btn-icon toggle-api-key">
                                <i class="fas fa-eye"></i>
                            </button>
@@ -163,18 +168,18 @@
                    <div class="setting-group api-key-group" data-model="deepseek-reasoner">
                        <label for="deepseekApiKey">DeepSeek API Key</label>
                        <div class="input-group">
-                            <input type="password" id="deepseekApiKey" placeholder="Enter DeepSeek API key">
+                            <input type="password" id="deepseekApiKey" placeholder="输入 DeepSeek API key">
                            <button class="btn-icon toggle-api-key">
                                <i class="fas fa-eye"></i>
                            </button>
                        </div>
                    </div>
                    <div class="setting-group">
-                        <label for="language">Language</label>
-                        <input type="text" id="language" value="English" placeholder="Enter preferred language">
+                        <label for="language"><i class="fas fa-language"></i> 语言</label>
+                        <input type="text" id="language" value="中文" placeholder="输入首选语言">
                    </div>
                    <div class="setting-group">
-                        <label for="modelSelect">Model</label>
+                        <label for="modelSelect"><i class="fas fa-microchip"></i> 模型</label>
                        <select id="modelSelect" class="select-styled">
                            <option value="claude-3-7-sonnet-20250219">Claude 3.7 Sonnet</option>
                            <option value="gpt-4o-2024-11-20">GPT-4o</option>
@@ -182,34 +187,34 @@
                        </select>
                    </div>
                    <div class="setting-group">
-                        <label for="temperature">Temperature</label>
+                        <label for="temperature"><i class="fas fa-thermometer-half"></i> 温度</label>
                        <div class="range-group">
                            <input type="range" id="temperature" min="0" max="1" step="0.1" value="0.7">
                            <span id="temperatureValue">0.7</span>
                        </div>
                    </div>
                    <div class="setting-group">
-                        <label for="systemPrompt">System Prompt</label>
-                        <textarea id="systemPrompt" rows="3">You are an expert problem solver. Analyze the problem step by step, identify any questions or problems, and provide detailed solutions. Always respond in the user's preferred language.</textarea>
+                        <label for="systemPrompt"><i class="fas fa-comment-alt"></i> 系统提示词</label>
+                        <textarea id="systemPrompt" rows="3">您是一位专业的问题解决专家。请逐步分析问题，找出问题所在，并提供详细的解决方案。始终使用用户偏好的语言回答。</textarea>
                    </div>
                </div>

                <div class="settings-section">
-                    <h3>Proxy Settings</h3>
+                    <h3><i class="fas fa-globe"></i> 代理设置</h3>
                    <div class="setting-group">
                        <label class="checkbox-label">
                            <input type="checkbox" id="proxyEnabled">
-                            <span>Enable VPN Proxy</span>
+                            <span>启用 VPN 代理</span>
                        </label>
                    </div>
                    <div id="proxySettings" class="proxy-settings">
                        <div class="setting-group">
-                            <label for="proxyHost">Proxy Host</label>
-                            <input type="text" id="proxyHost" value="127.0.0.1" placeholder="Enter proxy host">
+                            <label for="proxyHost"><i class="fas fa-server"></i> 代理主机</label>
+                            <input type="text" id="proxyHost" value="127.0.0.1" placeholder="输入代理主机">
                        </div>
                        <div class="setting-group">
-                            <label for="proxyPort">Proxy Port</label>
-                            <input type="number" id="proxyPort" value="4780" placeholder="Enter proxy port">
+                            <label for="proxyPort"><i class="fas fa-plug"></i> 代理端口</label>
+                            <input type="number" id="proxyPort" value="4780" placeholder="输入代理端口">
                        </div>
                    </div>
                </div>
@@ -218,7 +223,7 @@

        <div id="historyPanel" class="history-panel hidden">
            <div class="panel-header">
-                <h2>History</h2>
+                <h2>历史记录</h2>
                <button class="btn-icon" id="closeHistory">
                    <i class="fas fa-times"></i>
                </button>
@@ -226,7 +231,7 @@
            <div class="history-content">
                <div class="history-empty">
                    <i class="fas fa-history"></i>
-                    <p>No history yet</p>
+                    <p>暂无历史记录</p>
                </div>
            </div>
        </div>
@@ -239,16 +244,17 @@
        <div class="crop-actions">
            <button id="cropCancel" class="btn-secondary">
                <i class="fas fa-times"></i>
-                <span>Cancel</span>
+                <span>取消</span>
            </button>
            <button id="cropConfirm" class="btn-primary">
                <i class="fas fa-check"></i>
-                <span>Confirm</span>
+                <span>确认</span>
            </button>
        </div>
+    </div>
+    
    <div id="toastContainer" class="toast-container"></div>

-
    <script src="{{ url_for('static', filename='js/ui.js') }}"></script>
    <script src="{{ url_for('static', filename='js/settings.js') }}"></script>
    <script src="{{ url_for('static', filename='js/main.js') }}"></script>