mirror of
https://github.com/Zippland/Snap-Solver.git
synced 2026-03-04 16:08:11 +08:00
重构文本提取功能,简化图像文本处理流程
This commit is contained in:
15
app.py
15
app.py
@@ -222,13 +222,14 @@ def handle_text_extraction(data):
|
|||||||
api_key=mathpix_key
|
api_key=mathpix_key
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Starting text extraction thread...")
|
print("Starting text extraction...")
|
||||||
extraction_thread = Thread(
|
# 使用新的extract_full_text方法直接提取完整文本
|
||||||
target=stream_model_response,
|
extracted_text = model.extract_full_text(image_data)
|
||||||
args=(model.analyze_image(image_data), request.sid)
|
|
||||||
)
|
# 直接返回文本结果
|
||||||
extraction_thread.daemon = True # Make thread daemon so it doesn't block shutdown
|
socketio.emit('text_extracted', {
|
||||||
extraction_thread.start()
|
'content': extracted_text
|
||||||
|
}, room=request.sid)
|
||||||
|
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
error_msg = str(e)
|
error_msg = str(e)
|
||||||
|
|||||||
@@ -72,6 +72,21 @@ class MathpixModel(BaseModel):
|
|||||||
"enable_spell_check": True,
|
"enable_spell_check": True,
|
||||||
"rm_spaces": True
|
"rm_spaces": True
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"full_text": {
|
||||||
|
"formats": ["text"],
|
||||||
|
"data_options": {
|
||||||
|
"include_latex": False,
|
||||||
|
"include_asciimath": False
|
||||||
|
},
|
||||||
|
"ocr_options": {
|
||||||
|
"enable_spell_check": True,
|
||||||
|
"enable_handwritten": True,
|
||||||
|
"rm_spaces": False,
|
||||||
|
"detect_paragraphs": True,
|
||||||
|
"enable_tables": False,
|
||||||
|
"enable_math_ocr": False
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -281,3 +296,76 @@ class MathpixModel(BaseModel):
|
|||||||
formatted_parts.append(f"Error: {error_msg}")
|
formatted_parts.append(f"Error: {error_msg}")
|
||||||
|
|
||||||
return "\n".join(formatted_parts).strip()
|
return "\n".join(formatted_parts).strip()
|
||||||
|
|
||||||
|
def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str:
|
||||||
|
"""
|
||||||
|
专门用于提取图像中的全部文本内容,忽略数学公式和表格等其他元素。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_data: Base64编码的图像数据
|
||||||
|
proxies: 可选的代理配置
|
||||||
|
max_retries: 请求失败时的最大重试次数
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 图像中提取的完整文本内容
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 准备请求负载,使用专为全文提取配置的参数
|
||||||
|
payload = {
|
||||||
|
"src": f"data:image/jpeg;base64,{image_data}",
|
||||||
|
"formats": ["text"],
|
||||||
|
"data_options": {
|
||||||
|
"include_latex": False,
|
||||||
|
"include_asciimath": False
|
||||||
|
},
|
||||||
|
"ocr_options": {
|
||||||
|
"enable_spell_check": True,
|
||||||
|
"enable_handwritten": True,
|
||||||
|
"rm_spaces": False,
|
||||||
|
"detect_paragraphs": True,
|
||||||
|
"enable_tables": False,
|
||||||
|
"enable_math_ocr": False
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# 初始化重试计数器
|
||||||
|
retry_count = 0
|
||||||
|
|
||||||
|
while retry_count < max_retries:
|
||||||
|
try:
|
||||||
|
# 发送请求到Mathpix API
|
||||||
|
response = requests.post(
|
||||||
|
self.api_url,
|
||||||
|
headers=self.headers,
|
||||||
|
json=payload,
|
||||||
|
proxies=proxies,
|
||||||
|
timeout=30 # 30秒超时
|
||||||
|
)
|
||||||
|
|
||||||
|
# 处理特定API错误代码
|
||||||
|
if response.status_code == 429: # 超出速率限制
|
||||||
|
if retry_count < max_retries - 1:
|
||||||
|
retry_count += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise requests.exceptions.RequestException("超出API速率限制")
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
# 直接返回文本内容
|
||||||
|
if 'text' in result:
|
||||||
|
return result['text']
|
||||||
|
else:
|
||||||
|
return "未能提取到文本内容"
|
||||||
|
|
||||||
|
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
|
||||||
|
if retry_count < max_retries - 1:
|
||||||
|
retry_count += 1
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
return f"Mathpix API错误: {str(e)}"
|
||||||
|
except Exception as e:
|
||||||
|
return f"处理图像时出错: {str(e)}"
|
||||||
|
|||||||
@@ -43,8 +43,6 @@ class SnapSolver {
|
|||||||
this.cropConfirm = document.getElementById('cropConfirm');
|
this.cropConfirm = document.getElementById('cropConfirm');
|
||||||
|
|
||||||
// Format toggle elements
|
// Format toggle elements
|
||||||
this.textFormatBtn = document.getElementById('textFormatBtn');
|
|
||||||
this.latexFormatBtn = document.getElementById('latexFormatBtn');
|
|
||||||
this.confidenceIndicator = document.getElementById('confidenceIndicator');
|
this.confidenceIndicator = document.getElementById('confidenceIndicator');
|
||||||
this.confidenceValue = document.querySelector('.confidence-value');
|
this.confidenceValue = document.querySelector('.confidence-value');
|
||||||
|
|
||||||
@@ -60,12 +58,8 @@ class SnapSolver {
|
|||||||
this.cropper = null;
|
this.cropper = null;
|
||||||
this.croppedImage = null;
|
this.croppedImage = null;
|
||||||
this.history = JSON.parse(localStorage.getItem('snapHistory') || '[]');
|
this.history = JSON.parse(localStorage.getItem('snapHistory') || '[]');
|
||||||
this.currentFormat = 'text';
|
|
||||||
this.emitTimeout = null;
|
this.emitTimeout = null;
|
||||||
this.extractedFormats = {
|
this.extractedContent = '';
|
||||||
text: '',
|
|
||||||
latex: ''
|
|
||||||
};
|
|
||||||
|
|
||||||
// 确保裁剪容器和其他面板初始为隐藏状态
|
// 确保裁剪容器和其他面板初始为隐藏状态
|
||||||
if (this.cropContainer) {
|
if (this.cropContainer) {
|
||||||
@@ -228,37 +222,18 @@ class SnapSolver {
|
|||||||
}
|
}
|
||||||
this.sendExtractedTextBtn.disabled = false; // Re-enable send button on server error
|
this.sendExtractedTextBtn.disabled = false; // Re-enable send button on server error
|
||||||
} else if (data.content) {
|
} else if (data.content) {
|
||||||
// Parse the content to extract text and LaTeX
|
// 直接使用提取的文本内容
|
||||||
const lines = data.content.split('\n');
|
this.extractedContent = data.content;
|
||||||
let confidence = null;
|
|
||||||
|
|
||||||
// Process the content
|
// 更新文本编辑器
|
||||||
for (let i = 0; i < lines.length; i++) {
|
|
||||||
const line = lines[i];
|
|
||||||
if (line.startsWith('Confidence:')) {
|
|
||||||
confidence = parseFloat(line.match(/[\d.]+/)[0]) / 100;
|
|
||||||
} else if (line === 'Text Content:' && i + 1 < lines.length) {
|
|
||||||
this.extractedFormats.text = lines[i + 1];
|
|
||||||
} else if (line === 'LaTeX (Styled):' && i + 1 < lines.length) {
|
|
||||||
this.extractedFormats.latex = lines[i + 1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update confidence indicator
|
|
||||||
if (confidence !== null) {
|
|
||||||
this.confidenceValue.textContent = `${(confidence * 100).toFixed(0)}%`;
|
|
||||||
this.confidenceIndicator.style.display = 'flex';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update text editor with current format
|
|
||||||
if (this.extractedText) {
|
if (this.extractedText) {
|
||||||
this.extractedText.value = this.extractedFormats[this.currentFormat];
|
this.extractedText.value = data.content;
|
||||||
this.extractedText.disabled = false;
|
this.extractedText.disabled = false;
|
||||||
this.extractedText.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
|
this.extractedText.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
|
||||||
this.sendExtractedTextBtn.disabled = false;
|
this.sendExtractedTextBtn.disabled = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
window.showToast('Text extracted successfully');
|
window.showToast('文本提取成功');
|
||||||
}
|
}
|
||||||
|
|
||||||
this.extractTextBtn.disabled = false;
|
this.extractTextBtn.disabled = false;
|
||||||
@@ -601,7 +576,6 @@ class SnapSolver {
|
|||||||
}
|
}
|
||||||
|
|
||||||
setupEventListeners() {
|
setupEventListeners() {
|
||||||
this.setupFormatToggle();
|
|
||||||
this.setupCaptureEvents();
|
this.setupCaptureEvents();
|
||||||
this.setupCropEvents();
|
this.setupCropEvents();
|
||||||
this.setupAnalysisEvents();
|
this.setupAnalysisEvents();
|
||||||
@@ -609,26 +583,6 @@ class SnapSolver {
|
|||||||
this.setupThinkingToggle();
|
this.setupThinkingToggle();
|
||||||
}
|
}
|
||||||
|
|
||||||
setupFormatToggle() {
|
|
||||||
this.textFormatBtn.addEventListener('click', () => {
|
|
||||||
if (this.currentFormat !== 'text') {
|
|
||||||
this.currentFormat = 'text';
|
|
||||||
this.textFormatBtn.classList.add('active');
|
|
||||||
this.latexFormatBtn.classList.remove('active');
|
|
||||||
this.extractedText.value = this.extractedFormats.text;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
this.latexFormatBtn.addEventListener('click', () => {
|
|
||||||
if (this.currentFormat !== 'latex') {
|
|
||||||
this.currentFormat = 'latex';
|
|
||||||
this.latexFormatBtn.classList.add('active');
|
|
||||||
this.textFormatBtn.classList.remove('active');
|
|
||||||
this.extractedText.value = this.extractedFormats.latex;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
setupCaptureEvents() {
|
setupCaptureEvents() {
|
||||||
// Capture button
|
// Capture button
|
||||||
this.captureBtn.addEventListener('click', async () => {
|
this.captureBtn.addEventListener('click', async () => {
|
||||||
|
|||||||
@@ -66,10 +66,6 @@
|
|||||||
<div id="textEditor" class="text-editor hidden">
|
<div id="textEditor" class="text-editor hidden">
|
||||||
<textarea id="extractedText" rows="4" placeholder="Extracted text will appear here..."></textarea>
|
<textarea id="extractedText" rows="4" placeholder="Extracted text will appear here..."></textarea>
|
||||||
<div class="text-format-controls">
|
<div class="text-format-controls">
|
||||||
<div class="format-toggle">
|
|
||||||
<button id="textFormatBtn" class="format-btn active">Text</button>
|
|
||||||
<button id="latexFormatBtn" class="format-btn">LaTeX</button>
|
|
||||||
</div>
|
|
||||||
<div class="send-text-group">
|
<div class="send-text-group">
|
||||||
<div id="confidenceIndicator" class="confidence-indicator" title="OCR Confidence">
|
<div id="confidenceIndicator" class="confidence-indicator" title="OCR Confidence">
|
||||||
<i class="fas fa-check-circle"></i>
|
<i class="fas fa-check-circle"></i>
|
||||||
|
|||||||
Reference in New Issue
Block a user