From 585edd932e82b5ebe763995db223b2ca0c681131 Mon Sep 17 00:00:00 2001 From: zihanjian Date: Thu, 25 Sep 2025 13:36:39 +0800 Subject: [PATCH] pplxty debug --- ai_providers/ai_perplexity.py | 200 +++++++++++--------------- function_calls/services/perplexity.py | 88 +++++++----- 2 files changed, 133 insertions(+), 155 deletions(-) diff --git a/ai_providers/ai_perplexity.py b/ai_providers/ai_perplexity.py index 39d9d61..1b0a33e 100644 --- a/ai_providers/ai_perplexity.py +++ b/ai_providers/ai_perplexity.py @@ -34,12 +34,9 @@ class PerplexityThread(Thread): self.at_user = at_user self.LOG = logging.getLogger("PerplexityThread") - # 检查是否使用reasoning模型 - self.is_reasoning_model = False - if hasattr(self.perplexity, 'config'): - model_name = self.perplexity.config.get('model', 'sonar').lower() - self.is_reasoning_model = 'reasoning' in model_name - self.LOG.info(f"Perplexity使用模型: {model_name}, 是否为reasoning模型: {self.is_reasoning_model}") + self.LOG.info( + f"Perplexity使用模型: {self.perplexity.model_name}, 是否为reasoning模型: {self.perplexity.is_reasoning_model}" + ) def run(self): """线程执行函数""" @@ -49,16 +46,12 @@ class PerplexityThread(Thread): # 获取回答 response = self.perplexity.get_answer(self.prompt, self.chat_id) - # 处理sonar-reasoning和sonar-reasoning-pro模型的标签 if response: - # 只有对reasoning模型才应用清理逻辑 - if self.is_reasoning_model: - response = self.remove_thinking_content(response) - - # 移除Markdown格式符号 - response = self.remove_markdown_formatting(response) - - self.send_text_func(response, at_list=self.at_user) + response = self.perplexity.sanitize_response(response) + if response: + self.send_text_func(response, at_list=self.at_user) + else: + self.send_text_func("无法从Perplexity获取回答", at_list=self.at_user) else: self.send_text_func("无法从Perplexity获取回答", at_list=self.at_user) @@ -68,105 +61,6 @@ class PerplexityThread(Thread): self.LOG.error(f"处理Perplexity请求时出错: {e}") self.send_text_func(f"处理请求时出错: {e}", at_list=self.at_user) - def remove_thinking_content(self, text): - """移除标签之间的思考内容 - - Args: - text: 原始响应文本 - - Returns: - str: 处理后的文本 - """ - try: - # 检查是否包含思考标签 - has_thinking = '' in text or '' in text - - if has_thinking: - self.LOG.info("检测到思考内容标签,准备移除...") - - # 导入正则表达式库 - import re - - # 移除不完整的标签对情况 - if text.count('') != text.count(''): - self.LOG.warning(f"检测到不匹配的思考标签: 数量={text.count('')}, 数量={text.count('')}") - - # 提取思考内容用于日志记录 - thinking_pattern = re.compile(r'(.*?)', re.DOTALL) - thinking_matches = thinking_pattern.findall(text) - - if thinking_matches: - for i, thinking in enumerate(thinking_matches): - short_thinking = thinking[:100] + '...' if len(thinking) > 100 else thinking - self.LOG.debug(f"思考内容 #{i+1}: {short_thinking}") - - # 替换所有的...内容 - 使用非贪婪模式 - cleaned_text = re.sub(r'.*?', '', text, flags=re.DOTALL) - - # 处理不完整的标签 - cleaned_text = re.sub(r'.*?$', '', cleaned_text, flags=re.DOTALL) # 处理未闭合的开始标签 - cleaned_text = re.sub(r'^.*?', '', cleaned_text, flags=re.DOTALL) # 处理未开始的闭合标签 - - # 处理可能的多余空行 - cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text) - - # 移除前后空白 - cleaned_text = cleaned_text.strip() - - self.LOG.info(f"思考内容已移除,原文本长度: {len(text)} -> 清理后: {len(cleaned_text)}") - - # 如果清理后文本为空,返回一个提示信息 - if not cleaned_text: - return "回答内容为空,可能是模型仅返回了思考过程。请重新提问。" - - return cleaned_text - else: - return text # 没有思考标签,直接返回原文本 - - except Exception as e: - self.LOG.error(f"清理思考内容时出错: {e}") - return text # 出错时返回原始文本 - - def remove_markdown_formatting(self, text): - """移除Markdown格式符号,如*和# - - Args: - text: 包含Markdown格式的文本 - - Returns: - str: 移除Markdown格式后的文本 - """ - try: - # 导入正则表达式库 - import re - - self.LOG.info("开始移除Markdown格式符号...") - - # 保存原始文本长度 - original_length = len(text) - - # 移除标题符号 (#) - # 替换 # 开头的标题,保留文本内容 - cleaned_text = re.sub(r'^\s*#{1,6}\s+(.+)$', r'\1', text, flags=re.MULTILINE) - - # 移除强调符号 (*) - # 替换 **粗体** 和 *斜体* 格式,保留文本内容 - cleaned_text = re.sub(r'\*\*(.*?)\*\*', r'\1', cleaned_text) - cleaned_text = re.sub(r'\*(.*?)\*', r'\1', cleaned_text) - - # 处理可能的多余空行 - cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text) - - # 移除前后空白 - cleaned_text = cleaned_text.strip() - - self.LOG.info(f"Markdown格式符号已移除,原文本长度: {original_length} -> 清理后: {len(cleaned_text)}") - - return cleaned_text - - except Exception as e: - self.LOG.error(f"移除Markdown格式符号时出错: {e}") - return text # 出错时返回原始文本 class PerplexityManager: @@ -271,6 +165,9 @@ class Perplexity: self.trigger_keyword = config.get('trigger_keyword', 'ask') self.fallback_prompt = config.get('fallback_prompt', "请像 Perplexity 一样,以专业、客观、信息丰富的方式回答问题。不要使用任何tex或者md格式,纯文本输出。") self.LOG = logging.getLogger('Perplexity') + self.model = config.get('model', 'sonar') + self.model_name = str(self.model).lower() + self.is_reasoning_model = 'reasoning' in self.model_name # 权限控制 - 允许使用Perplexity的群聊和个人ID self.allowed_groups = config.get('allowed_groups', []) @@ -362,7 +259,7 @@ class Perplexity: ] # 获取模型 - model = self.config.get('model', 'sonar') + model = self.model # 使用json序列化确保正确处理Unicode self.LOG.info(f"发送到Perplexity的消息: {json.dumps(messages, ensure_ascii=False)}") @@ -379,6 +276,77 @@ class Perplexity: except Exception as e: self.LOG.error(f"调用Perplexity API时发生错误: {str(e)}") return f"发生错误: {str(e)}" + + def sanitize_response(self, text: str) -> str: + """根据模型类型清理Perplexity响应""" + if not isinstance(text, str): + return "" + + cleaned = text + if self.is_reasoning_model: + cleaned = self._remove_thinking_content(cleaned) + + cleaned = self._remove_markdown_formatting(cleaned) + return cleaned.strip() + + def _remove_thinking_content(self, text: str) -> str: + try: + has_thinking = '' in text or '' in text + if has_thinking: + self.LOG.info("检测到思考内容标签,准备移除...") + + if text.count('') != text.count(''): + self.LOG.warning( + f"检测到不匹配的思考标签: 数量={text.count('')}, 数量={text.count('')}" + ) + + thinking_pattern = re.compile(r'(.*?)', re.DOTALL) + thinking_matches = thinking_pattern.findall(text) + if thinking_matches: + for index, thinking in enumerate(thinking_matches, start=1): + short_thinking = thinking[:100] + '...' if len(thinking) > 100 else thinking + self.LOG.debug(f"思考内容 #{index}: {short_thinking}") + + cleaned_text = re.sub(r'.*?', '', text, flags=re.DOTALL) + cleaned_text = re.sub(r'.*?$', '', cleaned_text, flags=re.DOTALL) + cleaned_text = re.sub(r'^.*?', '', cleaned_text, flags=re.DOTALL) + cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text) + cleaned_text = cleaned_text.strip() + + self.LOG.info( + f"思考内容已移除,原文本长度: {len(text)} -> 清理后: {len(cleaned_text)}" + ) + + if not cleaned_text: + return "回答内容为空,可能是模型仅返回了思考过程。请重新提问。" + + return cleaned_text + + except Exception as exc: + self.LOG.error(f"清理思考内容时出错: {exc}") + + return text + + def _remove_markdown_formatting(self, text: str) -> str: + try: + self.LOG.info("开始移除Markdown格式符号...") + original_length = len(text) + + cleaned_text = re.sub(r'^\s*#{1,6}\s+(.+)$', r'\1', text, flags=re.MULTILINE) + cleaned_text = re.sub(r'\*\*(.*?)\*\*', r'\1', cleaned_text) + cleaned_text = re.sub(r'\*(.*?)\*', r'\1', cleaned_text) + cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text) + cleaned_text = cleaned_text.strip() + + self.LOG.info( + f"Markdown格式符号已移除,原文本长度: {original_length} -> 清理后: {len(cleaned_text)}" + ) + + return cleaned_text + + except Exception as exc: + self.LOG.error(f"移除Markdown格式符号时出错: {exc}") + return text def process_message(self, content, chat_id, sender, roomid, from_group, send_text_func): """处理可能包含Perplexity触发词的消息 @@ -445,4 +413,4 @@ class Perplexity: self.thread_manager.cleanup_threads() def __str__(self): - return "Perplexity" \ No newline at end of file + return "Perplexity" diff --git a/function_calls/services/perplexity.py b/function_calls/services/perplexity.py index 5007235..c421a6e 100644 --- a/function_calls/services/perplexity.py +++ b/function_calls/services/perplexity.py @@ -24,48 +24,58 @@ def run_perplexity(ctx: MessageContext, query: str) -> PerplexityResult: if not perplexity_instance: return PerplexityResult(success=True, messages=["❌ Perplexity搜索功能当前不可用"], handled_externally=False) - content_for_perplexity = f"ask {query}" chat_id = ctx.get_receiver() sender_wxid = ctx.msg.sender - room_id = ctx.msg.roomid if ctx.is_group else None - captured_messages: list[str] = [] + def run_fallback(fallback_prompt: str | None) -> PerplexityResult | None: + if not fallback_prompt: + return None - def capture_send_text(content: str, at_list: str = "") -> bool: - captured_messages.append(content) - return True - - was_handled, fallback_prompt = perplexity_instance.process_message( - content=content_for_perplexity, - chat_id=chat_id, - sender=sender_wxid, - roomid=room_id, - from_group=ctx.is_group, - send_text_func=capture_send_text - ) - - if captured_messages: - return PerplexityResult(success=True, messages=captured_messages, handled_externally=False) - - if was_handled: - return PerplexityResult(success=True, messages=[], handled_externally=True) - - if fallback_prompt: chat_model = getattr(ctx, 'chat', None) or (getattr(ctx.robot, 'chat', None) if ctx.robot else None) - if chat_model: - try: - import time - current_time = time.strftime("%H:%M", time.localtime()) - formatted_question = f"[{current_time}] {ctx.sender_name}: {query}" - answer = chat_model.get_answer( - question=formatted_question, - wxid=ctx.get_receiver(), - system_prompt_override=fallback_prompt - ) - if answer: - return PerplexityResult(success=True, messages=[answer], handled_externally=False) - except Exception as exc: - if ctx.logger: - ctx.logger.error(f"默认AI处理失败: {exc}") + if not chat_model: + return None - return PerplexityResult(success=True, messages=["❌ Perplexity搜索时发生错误"], handled_externally=False) + try: + import time + + current_time = time.strftime("%H:%M", time.localtime()) + formatted_question = f"[{current_time}] {ctx.sender_name}: {query}" + answer = chat_model.get_answer( + question=formatted_question, + wxid=ctx.get_receiver(), + system_prompt_override=fallback_prompt + ) + if answer: + return PerplexityResult(success=True, messages=[answer], handled_externally=False) + except Exception as exc: + if ctx.logger: + ctx.logger.error(f"默认AI处理失败: {exc}") + + return None + + if not perplexity_instance.is_allowed(chat_id, sender_wxid, ctx.is_group): + fallback_result = run_fallback(perplexity_instance.fallback_prompt) + if fallback_result: + return fallback_result + return PerplexityResult(success=True, messages=["❌ 当前会话未授权使用Perplexity"], handled_externally=False) + + try: + answer = perplexity_instance.get_answer(query, chat_id) + sanitized = perplexity_instance.sanitize_response(answer) if answer else "" + if sanitized: + return PerplexityResult(success=True, messages=[sanitized], handled_externally=False) + + fallback_result = run_fallback(perplexity_instance.fallback_prompt) + if fallback_result: + return fallback_result + + return PerplexityResult(success=True, messages=["❌ Perplexity未返回结果"], handled_externally=False) + except Exception as exc: + if ctx.logger: + ctx.logger.error(f"Perplexity搜索异常: {exc}") + + fallback_result = run_fallback(perplexity_instance.fallback_prompt) + if fallback_result: + return fallback_result + + return PerplexityResult(success=True, messages=["❌ Perplexity搜索时发生错误"], handled_externally=False)