from typing import Generator, Dict, Any import json import requests from .base import BaseModel class MathpixModel(BaseModel): """ Mathpix OCR model for processing images containing mathematical formulas, text, and tables. """ def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None): """ Initialize the Mathpix model. Args: api_key: Mathpix API key in format "app_id:app_key" temperature: Not used for Mathpix but kept for BaseModel compatibility system_prompt: Not used for Mathpix but kept for BaseModel compatibility Raises: ValueError: If the API key format is invalid """ # 只传递必需的参数,不传递language参数 super().__init__(api_key, temperature, system_prompt) try: self.app_id, self.app_key = api_key.split(':') except ValueError: raise ValueError("Mathpix API key must be in format 'app_id:app_key'") self.api_url = "https://api.mathpix.com/v3/text" self.headers = { "app_id": self.app_id, "app_key": self.app_key, "Content-Type": "application/json" } # Content type presets self.presets = { "math": { "formats": ["latex_normal", "latex_styled", "asciimath"], "data_options": { "include_asciimath": True, "include_latex": True, "include_mathml": True }, "ocr_options": { "detect_formulas": True, "enable_math_ocr": True, "enable_handwritten": True, "rm_spaces": True } }, "text": { "formats": ["text"], "data_options": { "include_latex": False, "include_asciimath": False }, "ocr_options": { "enable_spell_check": True, "enable_handwritten": True, "rm_spaces": False } }, "table": { "formats": ["text", "data"], "data_options": { "include_latex": True }, "ocr_options": { "detect_tables": True, "enable_spell_check": True, "rm_spaces": True } }, "full_text": { "formats": ["text"], "data_options": { "include_latex": False, "include_asciimath": False }, "ocr_options": { "enable_spell_check": True, "enable_handwritten": True, "rm_spaces": False, "detect_paragraphs": True, "enable_tables": False, "enable_math_ocr": False } } } # Default to math preset self.current_preset = "math" def analyze_image(self, image_data: str, proxies: dict = None, content_type: str = None, confidence_threshold: float = 0.8, max_retries: int = 3) -> Generator[dict, None, None]: """ Analyze an image using Mathpix OCR API. Args: image_data: Base64 encoded image data proxies: Optional proxy configuration content_type: Type of content to analyze ('math', 'text', or 'table') confidence_threshold: Minimum confidence score to accept (0.0 to 1.0) max_retries: Maximum number of retry attempts for failed requests Yields: dict: Response chunks with status and content """ if content_type and content_type in self.presets: self.current_preset = content_type preset = self.presets[self.current_preset] try: # Prepare request payload payload = { "src": f"data:image/jpeg;base64,{image_data}", "formats": preset["formats"], "data_options": preset["data_options"], "ocr_options": preset["ocr_options"] } # Initialize retry counter retry_count = 0 while retry_count < max_retries: try: # Send request to Mathpix API with timeout response = requests.post( self.api_url, headers=self.headers, json=payload, proxies=proxies, timeout=25 # 25 second timeout ) # Handle specific API error codes if response.status_code == 429: # Rate limit exceeded if retry_count < max_retries - 1: retry_count += 1 continue else: raise requests.exceptions.RequestException("Rate limit exceeded") response.raise_for_status() result = response.json() # Check confidence threshold if 'confidence' in result and result['confidence'] < confidence_threshold: yield { "status": "warning", "content": f"Low confidence score: {result['confidence']:.2%}" } break # Success, exit retry loop except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): if retry_count < max_retries - 1: retry_count += 1 continue raise # Format the response formatted_response = self._format_response(result) # Yield initial status yield { "status": "started", "content": "" } # Yield the formatted response yield { "status": "completed", "content": formatted_response, "model": self.get_model_identifier() } except requests.exceptions.RequestException as e: yield { "status": "error", "error": f"Mathpix API error: {str(e)}" } except Exception as e: yield { "status": "error", "error": f"Error processing image: {str(e)}" } def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]: """ Not implemented for Mathpix model as it only processes images. """ yield { "status": "error", "error": "Text analysis is not supported by Mathpix model" } def get_default_system_prompt(self) -> str: """ Not used for Mathpix model. """ return "" def get_model_identifier(self) -> str: """ Return the model identifier. """ return "mathpix" def _format_response(self, result: Dict[str, Any]) -> str: """ Format the Mathpix API response into a readable string. Args: result: Raw API response from Mathpix Returns: str: Formatted response string with all available formats """ formatted_parts = [] # Add confidence score if available if 'confidence' in result: formatted_parts.append(f"Confidence: {result['confidence']:.2%}\n") # Add text content if 'text' in result: formatted_parts.append("Text Content:") formatted_parts.append(result['text']) formatted_parts.append("") # Add LaTeX content if 'latex_normal' in result: formatted_parts.append("LaTeX (Normal):") formatted_parts.append(result['latex_normal']) formatted_parts.append("") if 'latex_styled' in result: formatted_parts.append("LaTeX (Styled):") formatted_parts.append(result['latex_styled']) formatted_parts.append("") # Add data formats (ASCII math, MathML) if 'data' in result and isinstance(result['data'], list): for item in result['data']: item_type = item.get('type', '') if item_type and 'value' in item: formatted_parts.append(f"{item_type.upper()}:") formatted_parts.append(item['value']) formatted_parts.append("") # Add table data if present if 'tables' in result and result['tables']: formatted_parts.append("Tables Detected:") for i, table in enumerate(result['tables'], 1): formatted_parts.append(f"Table {i}:") if 'cells' in table: # Format table as a grid cells = table['cells'] if cells: max_col = max(cell.get('col', 0) for cell in cells) + 1 max_row = max(cell.get('row', 0) for cell in cells) + 1 grid = [['' for _ in range(max_col)] for _ in range(max_row)] for cell in cells: row = cell.get('row', 0) col = cell.get('col', 0) text = cell.get('text', '') grid[row][col] = text # Format grid as table col_widths = [max(len(str(grid[r][c])) for r in range(max_row)) for c in range(max_col)] for row in grid: row_str = ' | '.join(f"{str(cell):<{width}}" for cell, width in zip(row, col_widths)) formatted_parts.append(f"| {row_str} |") formatted_parts.append("") # Add error message if present if 'error' in result: error_msg = result['error'] if isinstance(error_msg, dict): error_msg = error_msg.get('message', str(error_msg)) formatted_parts.append(f"Error: {error_msg}") return "\n".join(formatted_parts).strip() def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str: """ 专门用于提取图像中的全部文本内容,忽略数学公式和表格等其他元素。 Args: image_data: Base64编码的图像数据 proxies: 可选的代理配置 max_retries: 请求失败时的最大重试次数 Returns: str: 图像中提取的完整文本内容 """ try: # 准备请求负载,使用专为全文提取配置的参数 payload = { "src": f"data:image/jpeg;base64,{image_data}", "formats": ["text"], "data_options": { "include_latex": False, "include_asciimath": False }, "ocr_options": { "enable_spell_check": True, "enable_handwritten": True, "rm_spaces": False, "detect_paragraphs": True, "enable_tables": False, "enable_math_ocr": False } } # 初始化重试计数器 retry_count = 0 while retry_count < max_retries: try: # 发送请求到Mathpix API response = requests.post( self.api_url, headers=self.headers, json=payload, proxies=proxies, timeout=30 # 30秒超时 ) # 处理特定API错误代码 if response.status_code == 429: # 超出速率限制 if retry_count < max_retries - 1: retry_count += 1 continue else: raise requests.exceptions.RequestException("超出API速率限制") response.raise_for_status() result = response.json() # 直接返回文本内容 if 'text' in result: return result['text'] else: return "未能提取到文本内容" except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): if retry_count < max_retries - 1: retry_count += 1 continue raise except requests.exceptions.RequestException as e: return f"Mathpix API错误: {str(e)}" except Exception as e: return f"处理图像时出错: {str(e)}"