#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import logging import re import time from typing import Optional, Dict, Callable, List import os from threading import Thread, Lock from openai import OpenAI class PerplexityThread(Thread): """处理Perplexity请求的线程""" def __init__( self, perplexity_instance, prompt, chat_id, send_text_func, receiver, at_user=None, on_finish: Optional[Callable[[], None]] = None, enable_full_research: bool = False, ): """初始化Perplexity处理线程 Args: perplexity_instance: Perplexity实例 prompt: 查询内容 chat_id: 聊天ID send_text_func: 发送消息的函数,接受(消息内容, 接收者ID, @用户ID)参数 receiver: 接收消息的ID at_user: 被@的用户ID """ super().__init__(daemon=True) self.perplexity = perplexity_instance self.prompt = prompt self.chat_id = chat_id self.send_text_func = send_text_func self.receiver = receiver self.at_user = at_user self.LOG = logging.getLogger("PerplexityThread") self.on_finish = on_finish self.enable_full_research = enable_full_research # 检查是否使用reasoning模型 self.is_reasoning_model = bool(self.enable_full_research and getattr(self.perplexity, 'has_reasoning_model', False)) if self.is_reasoning_model: self.LOG.info("Perplexity将启用推理模型处理此次请求") def run(self): """线程执行函数""" try: self.LOG.info(f"开始处理Perplexity请求: {self.prompt[:30]}...") # 获取回答 response = self.perplexity.get_answer( self.prompt, self.chat_id, deep_research=self.enable_full_research ) # 处理sonar-reasoning和sonar-reasoning-pro模型的标签 if response: # 只有对reasoning模型才应用清理逻辑 if self.is_reasoning_model: response = self.remove_thinking_content(response) # 移除Markdown格式符号 response = self.remove_markdown_formatting(response) self.send_text_func(response, at_list=self.at_user) else: self.send_text_func("无法从Perplexity获取回答", at_list=self.at_user) self.LOG.info(f"Perplexity请求处理完成: {self.prompt[:30]}...") except Exception as e: self.LOG.error(f"处理Perplexity请求时出错: {e}") self.send_text_func(f"处理请求时出错: {e}", at_list=self.at_user) finally: if self.on_finish: try: self.on_finish() except Exception as cleanup_error: self.LOG.error(f"清理Perplexity线程时出错: {cleanup_error}") def remove_thinking_content(self, text): """移除标签之间的思考内容 Args: text: 原始响应文本 Returns: str: 处理后的文本 """ try: # 检查是否包含思考标签 has_thinking = '' in text or '' in text if has_thinking: self.LOG.info("检测到思考内容标签,准备移除...") # 导入正则表达式库 import re # 移除不完整的标签对情况 if text.count('') != text.count(''): self.LOG.warning(f"检测到不匹配的思考标签: 数量={text.count('')}, 数量={text.count('')}") # 提取思考内容用于日志记录 thinking_pattern = re.compile(r'(.*?)', re.DOTALL) thinking_matches = thinking_pattern.findall(text) if thinking_matches: for i, thinking in enumerate(thinking_matches): short_thinking = thinking[:100] + '...' if len(thinking) > 100 else thinking self.LOG.debug(f"思考内容 #{i+1}: {short_thinking}") # 替换所有的...内容 - 使用非贪婪模式 cleaned_text = re.sub(r'.*?', '', text, flags=re.DOTALL) # 处理不完整的标签 cleaned_text = re.sub(r'.*?$', '', cleaned_text, flags=re.DOTALL) # 处理未闭合的开始标签 cleaned_text = re.sub(r'^.*?', '', cleaned_text, flags=re.DOTALL) # 处理未开始的闭合标签 # 处理可能的多余空行 cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text) # 移除前后空白 cleaned_text = cleaned_text.strip() self.LOG.info(f"思考内容已移除,原文本长度: {len(text)} -> 清理后: {len(cleaned_text)}") # 如果清理后文本为空,返回一个提示信息 if not cleaned_text: return "回答内容为空,可能是模型仅返回了思考过程。请重新提问。" return cleaned_text else: return text # 没有思考标签,直接返回原文本 except Exception as e: self.LOG.error(f"清理思考内容时出错: {e}") return text # 出错时返回原始文本 def remove_markdown_formatting(self, text): """移除Markdown格式符号,如*和# Args: text: 包含Markdown格式的文本 Returns: str: 移除Markdown格式后的文本 """ try: # 导入正则表达式库 import re self.LOG.info("开始移除Markdown格式符号...") # 保存原始文本长度 original_length = len(text) # 移除标题符号 (#) # 替换 # 开头的标题,保留文本内容 cleaned_text = re.sub(r'^\s*#{1,6}\s+(.+)$', r'\1', text, flags=re.MULTILINE) # 移除强调符号 (*) # 替换 **粗体** 和 *斜体* 格式,保留文本内容 cleaned_text = re.sub(r'\*\*(.*?)\*\*', r'\1', cleaned_text) cleaned_text = re.sub(r'\*(.*?)\*', r'\1', cleaned_text) # 处理可能的多余空行 cleaned_text = re.sub(r'\n{3,}', '\n\n', cleaned_text) # 移除前后空白 cleaned_text = cleaned_text.strip() self.LOG.info(f"Markdown格式符号已移除,原文本长度: {original_length} -> 清理后: {len(cleaned_text)}") return cleaned_text except Exception as e: self.LOG.error(f"移除Markdown格式符号时出错: {e}") return text # 出错时返回原始文本 class PerplexityManager: """管理Perplexity请求线程的类""" def __init__(self): self.threads = {} self.lock = Lock() self.LOG = logging.getLogger("PerplexityManager") def start_request( self, perplexity_instance, prompt, chat_id, send_text_func, receiver, at_user=None, enable_full_research: bool = False, ): """启动Perplexity请求线程 Args: perplexity_instance: Perplexity实例 prompt: 查询内容 chat_id: 聊天ID send_text_func: 发送消息的函数 receiver: 接收消息的ID at_user: 被@的用户ID enable_full_research: 是否启用深度研究模式 Returns: bool: 是否成功启动线程 """ thread_key = f"{receiver}_{chat_id}" full_research_available = enable_full_research and getattr(perplexity_instance, 'has_reasoning_model', False) with self.lock: # 检查是否已有正在处理的相同请求 if thread_key in self.threads and self.threads[thread_key].is_alive(): send_text_func("⚠️ 已有一个Perplexity请求正在处理中,请稍后再试", at_list=at_user) return False # 发送等待消息 wait_msg = "正在启用满血模式研究中...." if full_research_available else "正在联网查询,请稍候..." if enable_full_research and not full_research_available: self.LOG.warning("收到满血模式请求,但未配置推理模型,退回普通模式。") # 等待提示无需 @ 用户,避免频繁打扰 send_text_func(wait_msg, at_list="", record_message=False) # 添加线程完成回调,自动清理线程 def thread_finished_callback(): with self.lock: thread = self.threads.pop(thread_key, None) if thread is not None: self.LOG.info(f"已清理Perplexity线程: {thread_key}") # 创建并启动新线程处理请求 perplexity_thread = PerplexityThread( perplexity_instance=perplexity_instance, prompt=prompt, chat_id=chat_id, send_text_func=send_text_func, receiver=receiver, at_user=at_user, on_finish=thread_finished_callback, enable_full_research=full_research_available ) # 保存线程引用 self.threads[thread_key] = perplexity_thread # 启动线程 perplexity_thread.start() self.LOG.info(f"已启动Perplexity请求线程: {thread_key}") return True def cleanup_threads(self): """清理所有Perplexity线程""" with self.lock: active_threads = [thread_key for thread_key, thread in self.threads.items() if thread.is_alive()] if active_threads: self.LOG.info(f"等待{len(active_threads)}个Perplexity线程结束: {active_threads}") # 等待所有线程结束,但最多等待10秒 for _ in range(10): with self.lock: active_count = sum(1 for thread in self.threads.values() if thread.is_alive()) if active_count == 0: break time.sleep(1) with self.lock: still_active = [thread_key for thread_key, thread in self.threads.items() if thread.is_alive()] if still_active: self.LOG.warning(f"以下Perplexity线程在退出时仍在运行: {still_active}") self.threads.clear() else: with self.lock: self.threads.clear() self.LOG.info("Perplexity线程管理已清理") class Perplexity: def __init__(self, config): self.config = config self.api_key = config.get('key') self.api_base = config.get('api', 'https://api.perplexity.ai') self.proxy = config.get('proxy') self.prompt = config.get('prompt', '你是智能助手Perplexity') self.trigger_keyword = config.get('trigger_keyword', 'ask') self.fallback_prompt = config.get('fallback_prompt', "请像 Perplexity 一样,以专业、客观、信息丰富的方式回答问题。不要使用任何tex或者md格式,纯文本输出。") self.LOG = logging.getLogger('Perplexity') self.model_flash = config.get('model_flash') or config.get('model', 'sonar') self.model_reasoning = config.get('model_reasoning') if self.model_reasoning and self.model_reasoning.lower() == (self.model_flash or '').lower(): self.model_reasoning = None self.has_reasoning_model = bool(self.model_reasoning) # 设置编码环境变量,确保处理Unicode字符 os.environ["PYTHONIOENCODING"] = "utf-8" # 创建线程管理器 self.thread_manager = PerplexityManager() # 创建OpenAI客户端 self.client = None if self.api_key: try: self.client = OpenAI( api_key=self.api_key, base_url=self.api_base ) # 如果有代理设置 if self.proxy: # OpenAI客户端不直接支持代理设置,需要通过环境变量 os.environ["HTTPS_PROXY"] = self.proxy os.environ["HTTP_PROXY"] = self.proxy self.LOG.info("Perplexity 客户端已初始化") except Exception as e: self.LOG.error(f"初始化Perplexity客户端失败: {str(e)}") else: self.LOG.warning("未配置Perplexity API密钥") @staticmethod def value_check(args: dict) -> bool: if args: return all(value is not None for key, value in args.items() if key != 'proxy') return False def get_answer(self, prompt, session_id=None, deep_research: bool = False): """获取Perplexity回答 Args: prompt: 用户输入的问题 session_id: 会话ID,用于区分不同会话 Returns: str: Perplexity的回答 """ try: if not self.api_key or not self.client: return "Perplexity API key 未配置或客户端初始化失败" # 构建消息列表 messages = [ {"role": "system", "content": self.prompt}, {"role": "user", "content": prompt} ] # 获取模型 model = self.model_reasoning if (deep_research and self.has_reasoning_model) else self.model_flash or self.config.get('model', 'sonar') if deep_research and self.has_reasoning_model: self.LOG.info(f"Perplexity启动深度研究模式,使用模型: {model}") # 使用json序列化确保正确处理Unicode self.LOG.info(f"发送到Perplexity的消息: {json.dumps(messages, ensure_ascii=False)}") # 创建聊天完成 response = self.client.chat.completions.create( model=model, messages=messages ) # 返回回答内容 return response.choices[0].message.content except Exception as e: self.LOG.error(f"调用Perplexity API时发生错误: {str(e)}") return f"发生错误: {str(e)}" def process_message( self, content, chat_id, sender, roomid, from_group, send_text_func, enable_full_research: bool = False, ): """处理可能包含Perplexity触发词的消息 Args: content: 消息内容 chat_id: 聊天ID sender: 发送者ID roomid: 群聊ID(如果是群聊) from_group: 是否来自群聊 send_text_func: 发送消息的函数 enable_full_research: 是否启用深度研究模式 Returns: tuple[bool, Optional[str]]: - bool: 是否已处理该消息 - Optional[str]: 无权限时的备选prompt,其他情况为None """ prompt = (content or "").strip() if not prompt: return False, None stripped_by_keyword = False trigger = (self.trigger_keyword or "").strip() if trigger: lowered_prompt = prompt.lower() lowered_trigger = trigger.lower() if lowered_prompt.startswith(lowered_trigger): stripped_by_keyword = True prompt = prompt[len(trigger):].strip() if not prompt: if stripped_by_keyword: send_text_func( "请告诉我你想搜索什么内容", at_list=sender if from_group else "", record_message=False ) return True, None return False, None receiver = roomid if from_group else sender at_user = sender if from_group else None request_started = self.thread_manager.start_request( perplexity_instance=self, prompt=prompt, chat_id=chat_id, send_text_func=send_text_func, receiver=receiver, at_user=at_user, enable_full_research=enable_full_research ) return request_started, None def cleanup(self): """清理所有资源""" self.thread_manager.cleanup_threads() def __str__(self): return "Perplexity"