重构文本提取功能,简化图像文本处理流程

This commit is contained in:
Zylan
2025-03-05 23:20:47 +08:00
parent 80829f09d2
commit 5bb8b4ea2d
4 changed files with 102 additions and 63 deletions

15
app.py
View File

@@ -222,13 +222,14 @@ def handle_text_extraction(data):
api_key=mathpix_key
)
print("Starting text extraction thread...")
extraction_thread = Thread(
target=stream_model_response,
args=(model.analyze_image(image_data), request.sid)
)
extraction_thread.daemon = True # Make thread daemon so it doesn't block shutdown
extraction_thread.start()
print("Starting text extraction...")
# 使用新的extract_full_text方法直接提取完整文本
extracted_text = model.extract_full_text(image_data)
# 直接返回文本结果
socketio.emit('text_extracted', {
'content': extracted_text
}, room=request.sid)
except ValueError as e:
error_msg = str(e)

View File

@@ -72,6 +72,21 @@ class MathpixModel(BaseModel):
"enable_spell_check": True,
"rm_spaces": True
}
},
"full_text": {
"formats": ["text"],
"data_options": {
"include_latex": False,
"include_asciimath": False
},
"ocr_options": {
"enable_spell_check": True,
"enable_handwritten": True,
"rm_spaces": False,
"detect_paragraphs": True,
"enable_tables": False,
"enable_math_ocr": False
}
}
}
@@ -281,3 +296,76 @@ class MathpixModel(BaseModel):
formatted_parts.append(f"Error: {error_msg}")
return "\n".join(formatted_parts).strip()
def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str:
"""
专门用于提取图像中的全部文本内容,忽略数学公式和表格等其他元素。
Args:
image_data: Base64编码的图像数据
proxies: 可选的代理配置
max_retries: 请求失败时的最大重试次数
Returns:
str: 图像中提取的完整文本内容
"""
try:
# 准备请求负载,使用专为全文提取配置的参数
payload = {
"src": f"data:image/jpeg;base64,{image_data}",
"formats": ["text"],
"data_options": {
"include_latex": False,
"include_asciimath": False
},
"ocr_options": {
"enable_spell_check": True,
"enable_handwritten": True,
"rm_spaces": False,
"detect_paragraphs": True,
"enable_tables": False,
"enable_math_ocr": False
}
}
# 初始化重试计数器
retry_count = 0
while retry_count < max_retries:
try:
# 发送请求到Mathpix API
response = requests.post(
self.api_url,
headers=self.headers,
json=payload,
proxies=proxies,
timeout=30 # 30秒超时
)
# 处理特定API错误代码
if response.status_code == 429: # 超出速率限制
if retry_count < max_retries - 1:
retry_count += 1
continue
else:
raise requests.exceptions.RequestException("超出API速率限制")
response.raise_for_status()
result = response.json()
# 直接返回文本内容
if 'text' in result:
return result['text']
else:
return "未能提取到文本内容"
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
if retry_count < max_retries - 1:
retry_count += 1
continue
raise
except requests.exceptions.RequestException as e:
return f"Mathpix API错误: {str(e)}"
except Exception as e:
return f"处理图像时出错: {str(e)}"

View File

@@ -43,8 +43,6 @@ class SnapSolver {
this.cropConfirm = document.getElementById('cropConfirm');
// Format toggle elements
this.textFormatBtn = document.getElementById('textFormatBtn');
this.latexFormatBtn = document.getElementById('latexFormatBtn');
this.confidenceIndicator = document.getElementById('confidenceIndicator');
this.confidenceValue = document.querySelector('.confidence-value');
@@ -60,12 +58,8 @@ class SnapSolver {
this.cropper = null;
this.croppedImage = null;
this.history = JSON.parse(localStorage.getItem('snapHistory') || '[]');
this.currentFormat = 'text';
this.emitTimeout = null;
this.extractedFormats = {
text: '',
latex: ''
};
this.extractedContent = '';
// 确保裁剪容器和其他面板初始为隐藏状态
if (this.cropContainer) {
@@ -228,37 +222,18 @@ class SnapSolver {
}
this.sendExtractedTextBtn.disabled = false; // Re-enable send button on server error
} else if (data.content) {
// Parse the content to extract text and LaTeX
const lines = data.content.split('\n');
let confidence = null;
// 直接使用提取的文本内容
this.extractedContent = data.content;
// Process the content
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.startsWith('Confidence:')) {
confidence = parseFloat(line.match(/[\d.]+/)[0]) / 100;
} else if (line === 'Text Content:' && i + 1 < lines.length) {
this.extractedFormats.text = lines[i + 1];
} else if (line === 'LaTeX (Styled):' && i + 1 < lines.length) {
this.extractedFormats.latex = lines[i + 1];
}
}
// Update confidence indicator
if (confidence !== null) {
this.confidenceValue.textContent = `${(confidence * 100).toFixed(0)}%`;
this.confidenceIndicator.style.display = 'flex';
}
// Update text editor with current format
// 更新文本编辑器
if (this.extractedText) {
this.extractedText.value = this.extractedFormats[this.currentFormat];
this.extractedText.value = data.content;
this.extractedText.disabled = false;
this.extractedText.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
this.sendExtractedTextBtn.disabled = false;
}
window.showToast('Text extracted successfully');
window.showToast('文本提取成功');
}
this.extractTextBtn.disabled = false;
@@ -601,7 +576,6 @@ class SnapSolver {
}
setupEventListeners() {
this.setupFormatToggle();
this.setupCaptureEvents();
this.setupCropEvents();
this.setupAnalysisEvents();
@@ -609,26 +583,6 @@ class SnapSolver {
this.setupThinkingToggle();
}
setupFormatToggle() {
this.textFormatBtn.addEventListener('click', () => {
if (this.currentFormat !== 'text') {
this.currentFormat = 'text';
this.textFormatBtn.classList.add('active');
this.latexFormatBtn.classList.remove('active');
this.extractedText.value = this.extractedFormats.text;
}
});
this.latexFormatBtn.addEventListener('click', () => {
if (this.currentFormat !== 'latex') {
this.currentFormat = 'latex';
this.latexFormatBtn.classList.add('active');
this.textFormatBtn.classList.remove('active');
this.extractedText.value = this.extractedFormats.latex;
}
});
}
setupCaptureEvents() {
// Capture button
this.captureBtn.addEventListener('click', async () => {

View File

@@ -66,10 +66,6 @@
<div id="textEditor" class="text-editor hidden">
<textarea id="extractedText" rows="4" placeholder="Extracted text will appear here..."></textarea>
<div class="text-format-controls">
<div class="format-toggle">
<button id="textFormatBtn" class="format-btn active">Text</button>
<button id="latexFormatBtn" class="format-btn">LaTeX</button>
</div>
<div class="send-text-group">
<div id="confidenceIndicator" class="confidence-indicator" title="OCR Confidence">
<i class="fas fa-check-circle"></i>