mirror of
https://github.com/Zippland/Snap-Solver.git
synced 2026-01-19 09:41:15 +08:00
重构文本提取功能,简化图像文本处理流程
This commit is contained in:
15
app.py
15
app.py
@@ -222,13 +222,14 @@ def handle_text_extraction(data):
|
||||
api_key=mathpix_key
|
||||
)
|
||||
|
||||
print("Starting text extraction thread...")
|
||||
extraction_thread = Thread(
|
||||
target=stream_model_response,
|
||||
args=(model.analyze_image(image_data), request.sid)
|
||||
)
|
||||
extraction_thread.daemon = True # Make thread daemon so it doesn't block shutdown
|
||||
extraction_thread.start()
|
||||
print("Starting text extraction...")
|
||||
# 使用新的extract_full_text方法直接提取完整文本
|
||||
extracted_text = model.extract_full_text(image_data)
|
||||
|
||||
# 直接返回文本结果
|
||||
socketio.emit('text_extracted', {
|
||||
'content': extracted_text
|
||||
}, room=request.sid)
|
||||
|
||||
except ValueError as e:
|
||||
error_msg = str(e)
|
||||
|
||||
@@ -72,6 +72,21 @@ class MathpixModel(BaseModel):
|
||||
"enable_spell_check": True,
|
||||
"rm_spaces": True
|
||||
}
|
||||
},
|
||||
"full_text": {
|
||||
"formats": ["text"],
|
||||
"data_options": {
|
||||
"include_latex": False,
|
||||
"include_asciimath": False
|
||||
},
|
||||
"ocr_options": {
|
||||
"enable_spell_check": True,
|
||||
"enable_handwritten": True,
|
||||
"rm_spaces": False,
|
||||
"detect_paragraphs": True,
|
||||
"enable_tables": False,
|
||||
"enable_math_ocr": False
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -281,3 +296,76 @@ class MathpixModel(BaseModel):
|
||||
formatted_parts.append(f"Error: {error_msg}")
|
||||
|
||||
return "\n".join(formatted_parts).strip()
|
||||
|
||||
def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str:
|
||||
"""
|
||||
专门用于提取图像中的全部文本内容,忽略数学公式和表格等其他元素。
|
||||
|
||||
Args:
|
||||
image_data: Base64编码的图像数据
|
||||
proxies: 可选的代理配置
|
||||
max_retries: 请求失败时的最大重试次数
|
||||
|
||||
Returns:
|
||||
str: 图像中提取的完整文本内容
|
||||
"""
|
||||
try:
|
||||
# 准备请求负载,使用专为全文提取配置的参数
|
||||
payload = {
|
||||
"src": f"data:image/jpeg;base64,{image_data}",
|
||||
"formats": ["text"],
|
||||
"data_options": {
|
||||
"include_latex": False,
|
||||
"include_asciimath": False
|
||||
},
|
||||
"ocr_options": {
|
||||
"enable_spell_check": True,
|
||||
"enable_handwritten": True,
|
||||
"rm_spaces": False,
|
||||
"detect_paragraphs": True,
|
||||
"enable_tables": False,
|
||||
"enable_math_ocr": False
|
||||
}
|
||||
}
|
||||
|
||||
# 初始化重试计数器
|
||||
retry_count = 0
|
||||
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
# 发送请求到Mathpix API
|
||||
response = requests.post(
|
||||
self.api_url,
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
proxies=proxies,
|
||||
timeout=30 # 30秒超时
|
||||
)
|
||||
|
||||
# 处理特定API错误代码
|
||||
if response.status_code == 429: # 超出速率限制
|
||||
if retry_count < max_retries - 1:
|
||||
retry_count += 1
|
||||
continue
|
||||
else:
|
||||
raise requests.exceptions.RequestException("超出API速率限制")
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# 直接返回文本内容
|
||||
if 'text' in result:
|
||||
return result['text']
|
||||
else:
|
||||
return "未能提取到文本内容"
|
||||
|
||||
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
|
||||
if retry_count < max_retries - 1:
|
||||
retry_count += 1
|
||||
continue
|
||||
raise
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return f"Mathpix API错误: {str(e)}"
|
||||
except Exception as e:
|
||||
return f"处理图像时出错: {str(e)}"
|
||||
|
||||
@@ -43,8 +43,6 @@ class SnapSolver {
|
||||
this.cropConfirm = document.getElementById('cropConfirm');
|
||||
|
||||
// Format toggle elements
|
||||
this.textFormatBtn = document.getElementById('textFormatBtn');
|
||||
this.latexFormatBtn = document.getElementById('latexFormatBtn');
|
||||
this.confidenceIndicator = document.getElementById('confidenceIndicator');
|
||||
this.confidenceValue = document.querySelector('.confidence-value');
|
||||
|
||||
@@ -60,12 +58,8 @@ class SnapSolver {
|
||||
this.cropper = null;
|
||||
this.croppedImage = null;
|
||||
this.history = JSON.parse(localStorage.getItem('snapHistory') || '[]');
|
||||
this.currentFormat = 'text';
|
||||
this.emitTimeout = null;
|
||||
this.extractedFormats = {
|
||||
text: '',
|
||||
latex: ''
|
||||
};
|
||||
this.extractedContent = '';
|
||||
|
||||
// 确保裁剪容器和其他面板初始为隐藏状态
|
||||
if (this.cropContainer) {
|
||||
@@ -228,37 +222,18 @@ class SnapSolver {
|
||||
}
|
||||
this.sendExtractedTextBtn.disabled = false; // Re-enable send button on server error
|
||||
} else if (data.content) {
|
||||
// Parse the content to extract text and LaTeX
|
||||
const lines = data.content.split('\n');
|
||||
let confidence = null;
|
||||
// 直接使用提取的文本内容
|
||||
this.extractedContent = data.content;
|
||||
|
||||
// Process the content
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (line.startsWith('Confidence:')) {
|
||||
confidence = parseFloat(line.match(/[\d.]+/)[0]) / 100;
|
||||
} else if (line === 'Text Content:' && i + 1 < lines.length) {
|
||||
this.extractedFormats.text = lines[i + 1];
|
||||
} else if (line === 'LaTeX (Styled):' && i + 1 < lines.length) {
|
||||
this.extractedFormats.latex = lines[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
// Update confidence indicator
|
||||
if (confidence !== null) {
|
||||
this.confidenceValue.textContent = `${(confidence * 100).toFixed(0)}%`;
|
||||
this.confidenceIndicator.style.display = 'flex';
|
||||
}
|
||||
|
||||
// Update text editor with current format
|
||||
// 更新文本编辑器
|
||||
if (this.extractedText) {
|
||||
this.extractedText.value = this.extractedFormats[this.currentFormat];
|
||||
this.extractedText.value = data.content;
|
||||
this.extractedText.disabled = false;
|
||||
this.extractedText.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
|
||||
this.sendExtractedTextBtn.disabled = false;
|
||||
}
|
||||
|
||||
window.showToast('Text extracted successfully');
|
||||
window.showToast('文本提取成功');
|
||||
}
|
||||
|
||||
this.extractTextBtn.disabled = false;
|
||||
@@ -601,7 +576,6 @@ class SnapSolver {
|
||||
}
|
||||
|
||||
setupEventListeners() {
|
||||
this.setupFormatToggle();
|
||||
this.setupCaptureEvents();
|
||||
this.setupCropEvents();
|
||||
this.setupAnalysisEvents();
|
||||
@@ -609,26 +583,6 @@ class SnapSolver {
|
||||
this.setupThinkingToggle();
|
||||
}
|
||||
|
||||
setupFormatToggle() {
|
||||
this.textFormatBtn.addEventListener('click', () => {
|
||||
if (this.currentFormat !== 'text') {
|
||||
this.currentFormat = 'text';
|
||||
this.textFormatBtn.classList.add('active');
|
||||
this.latexFormatBtn.classList.remove('active');
|
||||
this.extractedText.value = this.extractedFormats.text;
|
||||
}
|
||||
});
|
||||
|
||||
this.latexFormatBtn.addEventListener('click', () => {
|
||||
if (this.currentFormat !== 'latex') {
|
||||
this.currentFormat = 'latex';
|
||||
this.latexFormatBtn.classList.add('active');
|
||||
this.textFormatBtn.classList.remove('active');
|
||||
this.extractedText.value = this.extractedFormats.latex;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
setupCaptureEvents() {
|
||||
// Capture button
|
||||
this.captureBtn.addEventListener('click', async () => {
|
||||
|
||||
@@ -66,10 +66,6 @@
|
||||
<div id="textEditor" class="text-editor hidden">
|
||||
<textarea id="extractedText" rows="4" placeholder="Extracted text will appear here..."></textarea>
|
||||
<div class="text-format-controls">
|
||||
<div class="format-toggle">
|
||||
<button id="textFormatBtn" class="format-btn active">Text</button>
|
||||
<button id="latexFormatBtn" class="format-btn">LaTeX</button>
|
||||
</div>
|
||||
<div class="send-text-group">
|
||||
<div id="confidenceIndicator" class="confidence-indicator" title="OCR Confidence">
|
||||
<i class="fas fa-check-circle"></i>
|
||||
|
||||
Reference in New Issue
Block a user