Files
chatgpt-on-wechat/bridge/agent_bridge.py
2026-02-02 22:43:08 +08:00

530 lines
22 KiB
Python

"""
Agent Bridge - Integrates Agent system with existing COW bridge
"""
import os
from typing import Optional, List
from agent.protocol import Agent, LLMModel, LLMRequest
from bridge.agent_event_handler import AgentEventHandler
from bridge.agent_initializer import AgentInitializer
from bridge.bridge import Bridge
from bridge.context import Context
from bridge.reply import Reply, ReplyType
from common import const
from common.log import logger
from models.openai_compatible_bot import OpenAICompatibleBot
def add_openai_compatible_support(bot_instance):
"""
Dynamically add OpenAI-compatible tool calling support to a bot instance.
This allows any bot to gain tool calling capability without modifying its code,
as long as it uses OpenAI-compatible API format.
Note: Some bots like ZHIPUAIBot have native tool calling support and don't need enhancement.
"""
if hasattr(bot_instance, 'call_with_tools'):
# Bot already has tool calling support (e.g., ZHIPUAIBot)
logger.info(f"[AgentBridge] {type(bot_instance).__name__} already has native tool calling support")
return bot_instance
# Create a temporary mixin class that combines the bot with OpenAI compatibility
class EnhancedBot(bot_instance.__class__, OpenAICompatibleBot):
"""Dynamically enhanced bot with OpenAI-compatible tool calling"""
def get_api_config(self):
"""
Infer API config from common configuration patterns.
Most OpenAI-compatible bots use similar configuration.
"""
from config import conf
return {
'api_key': conf().get("open_ai_api_key"),
'api_base': conf().get("open_ai_api_base"),
'model': conf().get("model", "gpt-3.5-turbo"),
'default_temperature': conf().get("temperature", 0.9),
'default_top_p': conf().get("top_p", 1.0),
'default_frequency_penalty': conf().get("frequency_penalty", 0.0),
'default_presence_penalty': conf().get("presence_penalty", 0.0),
}
# Change the bot's class to the enhanced version
bot_instance.__class__ = EnhancedBot
logger.info(
f"[AgentBridge] Enhanced {bot_instance.__class__.__bases__[0].__name__} with OpenAI-compatible tool calling")
return bot_instance
class AgentLLMModel(LLMModel):
"""
LLM Model adapter that uses COW's existing bot infrastructure
"""
def __init__(self, bridge: Bridge, bot_type: str = "chat"):
# Get model name directly from config
from config import conf
model_name = conf().get("model", const.GPT_41)
super().__init__(model=model_name)
self.bridge = bridge
self.bot_type = bot_type
self._bot = None
self._use_linkai = conf().get("use_linkai", False) and conf().get("linkai_api_key")
@property
def bot(self):
"""Lazy load the bot and enhance it with tool calling if needed"""
if self._bot is None:
# If use_linkai is enabled, use LinkAI bot directly
if self._use_linkai:
self._bot = self.bridge.find_chat_bot(const.LINKAI)
else:
self._bot = self.bridge.get_bot(self.bot_type)
# Automatically add tool calling support if not present
self._bot = add_openai_compatible_support(self._bot)
# Log bot info
bot_name = type(self._bot).__name__
return self._bot
def call(self, request: LLMRequest):
"""
Call the model using COW's bot infrastructure
"""
try:
# For non-streaming calls, we'll use the existing reply method
# This is a simplified implementation
if hasattr(self.bot, 'call_with_tools'):
# Use tool-enabled call if available
kwargs = {
'messages': request.messages,
'tools': getattr(request, 'tools', None),
'stream': False,
'model': self.model # Pass model parameter
}
# Only pass max_tokens if it's explicitly set
if request.max_tokens is not None:
kwargs['max_tokens'] = request.max_tokens
# Extract system prompt if present
system_prompt = getattr(request, 'system', None)
if system_prompt:
kwargs['system'] = system_prompt
response = self.bot.call_with_tools(**kwargs)
return self._format_response(response)
else:
# Fallback to regular call
# This would need to be implemented based on your specific needs
raise NotImplementedError("Regular call not implemented yet")
except Exception as e:
logger.error(f"AgentLLMModel call error: {e}")
raise
def call_stream(self, request: LLMRequest):
"""
Call the model with streaming using COW's bot infrastructure
"""
try:
if hasattr(self.bot, 'call_with_tools'):
# Use tool-enabled streaming call if available
# Extract system prompt if present
system_prompt = getattr(request, 'system', None)
# Build kwargs for call_with_tools
kwargs = {
'messages': request.messages,
'tools': getattr(request, 'tools', None),
'stream': True,
'model': self.model # Pass model parameter
}
# Only pass max_tokens if explicitly set, let the bot use its default
if request.max_tokens is not None:
kwargs['max_tokens'] = request.max_tokens
# Add system prompt if present
if system_prompt:
kwargs['system'] = system_prompt
stream = self.bot.call_with_tools(**kwargs)
# Convert stream format to our expected format
for chunk in stream:
yield self._format_stream_chunk(chunk)
else:
bot_type = type(self.bot).__name__
raise NotImplementedError(f"Bot {bot_type} does not support call_with_tools. Please add the method.")
except Exception as e:
logger.error(f"AgentLLMModel call_stream error: {e}", exc_info=True)
raise
def _format_response(self, response):
"""Format Claude response to our expected format"""
# This would need to be implemented based on Claude's response format
return response
def _format_stream_chunk(self, chunk):
"""Format Claude stream chunk to our expected format"""
# This would need to be implemented based on Claude's stream format
return chunk
class AgentBridge:
"""
Bridge class that integrates super Agent with COW
Manages multiple agent instances per session for conversation isolation
"""
def __init__(self, bridge: Bridge):
self.bridge = bridge
self.agents = {} # session_id -> Agent instance mapping
self.default_agent = None # For backward compatibility (no session_id)
self.agent: Optional[Agent] = None
self.scheduler_initialized = False
# Create helper instances
self.initializer = AgentInitializer(bridge, self)
def create_agent(self, system_prompt: str, tools: List = None, **kwargs) -> Agent:
"""
Create the super agent with COW integration
Args:
system_prompt: System prompt
tools: List of tools (optional)
**kwargs: Additional agent parameters
Returns:
Agent instance
"""
# Create LLM model that uses COW's bot infrastructure
model = AgentLLMModel(self.bridge)
# Default tools if none provided
if tools is None:
# Use ToolManager to load all available tools
from agent.tools import ToolManager
tool_manager = ToolManager()
tool_manager.load_tools()
tools = []
for tool_name in tool_manager.tool_classes.keys():
try:
tool = tool_manager.create_tool(tool_name)
if tool:
tools.append(tool)
except Exception as e:
logger.warning(f"[AgentBridge] Failed to load tool {tool_name}: {e}")
# Create agent instance
agent = Agent(
system_prompt=system_prompt,
description=kwargs.get("description", "AI Super Agent"),
model=model,
tools=tools,
max_steps=kwargs.get("max_steps", 15),
output_mode=kwargs.get("output_mode", "logger"),
workspace_dir=kwargs.get("workspace_dir"), # Pass workspace for skills loading
enable_skills=kwargs.get("enable_skills", True), # Enable skills by default
memory_manager=kwargs.get("memory_manager"), # Pass memory manager
max_context_tokens=kwargs.get("max_context_tokens"),
context_reserve_tokens=kwargs.get("context_reserve_tokens")
)
# Log skill loading details
if agent.skill_manager:
logger.debug(f"[AgentBridge] SkillManager initialized with {len(agent.skill_manager.skills)} skills")
return agent
def get_agent(self, session_id: str = None) -> Optional[Agent]:
"""
Get agent instance for the given session
Args:
session_id: Session identifier (e.g., user_id). If None, returns default agent.
Returns:
Agent instance for this session
"""
# If no session_id, use default agent (backward compatibility)
if session_id is None:
if self.default_agent is None:
self._init_default_agent()
return self.default_agent
# Check if agent exists for this session
if session_id not in self.agents:
self._init_agent_for_session(session_id)
return self.agents[session_id]
def _init_default_agent(self):
"""Initialize default super agent"""
agent = self.initializer.initialize_agent(session_id=None)
self.default_agent = agent
def _init_agent_for_session(self, session_id: str):
"""Initialize agent for a specific session"""
agent = self.initializer.initialize_agent(session_id=session_id)
self.agents[session_id] = agent
def agent_reply(self, query: str, context: Context = None,
on_event=None, clear_history: bool = False) -> Reply:
"""
Use super agent to reply to a query
Args:
query: User query
context: COW context (optional, contains session_id for user isolation)
on_event: Event callback (optional)
clear_history: Whether to clear conversation history
Returns:
Reply object
"""
try:
# Extract session_id from context for user isolation
session_id = None
if context:
session_id = context.kwargs.get("session_id") or context.get("session_id")
# Get agent for this session (will auto-initialize if needed)
agent = self.get_agent(session_id=session_id)
if not agent:
return Reply(ReplyType.ERROR, "Failed to initialize super agent")
# Create event handler for logging and channel communication
event_handler = AgentEventHandler(context=context, original_callback=on_event)
# Filter tools based on context
original_tools = agent.tools
filtered_tools = original_tools
# If this is a scheduled task execution, exclude scheduler tool to prevent recursion
if context and context.get("is_scheduled_task"):
filtered_tools = [tool for tool in agent.tools if tool.name != "scheduler"]
agent.tools = filtered_tools
logger.info(f"[AgentBridge] Scheduled task execution: excluded scheduler tool ({len(filtered_tools)}/{len(original_tools)} tools)")
else:
# Attach context to scheduler tool if present
if context and agent.tools:
for tool in agent.tools:
if tool.name == "scheduler":
try:
from agent.tools.scheduler.integration import attach_scheduler_to_tool
attach_scheduler_to_tool(tool, context)
except Exception as e:
logger.warning(f"[AgentBridge] Failed to attach context to scheduler: {e}")
break
try:
# Use agent's run_stream method with event handler
response = agent.run_stream(
user_message=query,
on_event=event_handler.handle_event,
clear_history=clear_history
)
finally:
# Restore original tools
if context and context.get("is_scheduled_task"):
agent.tools = original_tools
# Log execution summary
event_handler.log_summary()
# Check if there are files to send (from read tool)
if hasattr(agent, 'stream_executor') and hasattr(agent.stream_executor, 'files_to_send'):
files_to_send = agent.stream_executor.files_to_send
if files_to_send:
# Send the first file (for now, handle one file at a time)
file_info = files_to_send[0]
logger.info(f"[AgentBridge] Sending file: {file_info.get('path')}")
# Clear files_to_send for next request
agent.stream_executor.files_to_send = []
# Return file reply based on file type
return self._create_file_reply(file_info, response, context)
return Reply(ReplyType.TEXT, response)
except Exception as e:
logger.error(f"Agent reply error: {e}")
return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
def _create_file_reply(self, file_info: dict, text_response: str, context: Context = None) -> Reply:
"""
Create a reply for sending files
Args:
file_info: File metadata from read tool
text_response: Text response from agent
context: Context object
Returns:
Reply object for file sending
"""
file_type = file_info.get("file_type", "file")
file_path = file_info.get("path")
# For images, use IMAGE_URL type (channel will handle upload)
if file_type == "image":
# Convert local path to file:// URL for channel processing
file_url = f"file://{file_path}"
logger.info(f"[AgentBridge] Sending image: {file_url}")
reply = Reply(ReplyType.IMAGE_URL, file_url)
# Attach text message if present (for channels that support text+image)
if text_response:
reply.text_content = text_response # Store accompanying text
return reply
# For all file types (document, video, audio), use FILE type
if file_type in ["document", "video", "audio"]:
file_url = f"file://{file_path}"
logger.info(f"[AgentBridge] Sending {file_type}: {file_url}")
reply = Reply(ReplyType.FILE, file_url)
reply.file_name = file_info.get("file_name", os.path.basename(file_path))
# Attach text message if present
if text_response:
reply.text_content = text_response
return reply
# For other unknown file types, return text with file info
message = text_response or file_info.get("message", "文件已准备")
message += f"\n\n[文件: {file_info.get('file_name', file_path)}]"
return Reply(ReplyType.TEXT, message)
def _migrate_config_to_env(self, workspace_root: str):
"""
Migrate API keys from config.json to .env file if not already set
Args:
workspace_root: Workspace directory path (not used, kept for compatibility)
"""
from config import conf
import os
# Mapping from config.json keys to environment variable names
key_mapping = {
"open_ai_api_key": "OPENAI_API_KEY",
"open_ai_api_base": "OPENAI_API_BASE",
"gemini_api_key": "GEMINI_API_KEY",
"claude_api_key": "CLAUDE_API_KEY",
"linkai_api_key": "LINKAI_API_KEY",
}
# Use fixed secure location for .env file
env_file = os.path.expanduser("~/.cow/.env")
# Read existing env vars from .env file
existing_env_vars = {}
if os.path.exists(env_file):
try:
with open(env_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, _ = line.split('=', 1)
existing_env_vars[key.strip()] = True
except Exception as e:
logger.warning(f"[AgentBridge] Failed to read .env file: {e}")
# Check which keys need to be migrated
keys_to_migrate = {}
for config_key, env_key in key_mapping.items():
# Skip if already in .env file
if env_key in existing_env_vars:
continue
# Get value from config.json
value = conf().get(config_key, "")
if value and value.strip(): # Only migrate non-empty values
keys_to_migrate[env_key] = value.strip()
# Log summary if there are keys to skip
if existing_env_vars:
logger.debug(f"[AgentBridge] {len(existing_env_vars)} env vars already in .env")
# Write new keys to .env file
if keys_to_migrate:
try:
# Ensure ~/.cow directory and .env file exist
env_dir = os.path.dirname(env_file)
if not os.path.exists(env_dir):
os.makedirs(env_dir, exist_ok=True)
if not os.path.exists(env_file):
open(env_file, 'a').close()
# Append new keys
with open(env_file, 'a', encoding='utf-8') as f:
f.write('\n# Auto-migrated from config.json\n')
for key, value in keys_to_migrate.items():
f.write(f'{key}={value}\n')
# Also set in current process
os.environ[key] = value
logger.info(f"[AgentBridge] Migrated {len(keys_to_migrate)} API keys from config.json to .env: {list(keys_to_migrate.keys())}")
except Exception as e:
logger.warning(f"[AgentBridge] Failed to migrate API keys: {e}")
def clear_session(self, session_id: str):
"""
Clear a specific session's agent and conversation history
Args:
session_id: Session identifier to clear
"""
if session_id in self.agents:
logger.info(f"[AgentBridge] Clearing session: {session_id}")
del self.agents[session_id]
def clear_all_sessions(self):
"""Clear all agent sessions"""
logger.info(f"[AgentBridge] Clearing all sessions ({len(self.agents)} total)")
self.agents.clear()
self.default_agent = None
def refresh_all_skills(self) -> int:
"""
Refresh skills in all agent instances after environment variable changes.
This allows hot-reload of skills without restarting the agent.
Returns:
Number of agent instances refreshed
"""
import os
from dotenv import load_dotenv
from config import conf
# Reload environment variables from .env file
workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
env_file = os.path.join(workspace_root, '.env')
if os.path.exists(env_file):
load_dotenv(env_file, override=True)
logger.info(f"[AgentBridge] Reloaded environment variables from {env_file}")
refreshed_count = 0
# Refresh default agent
if self.default_agent and hasattr(self.default_agent, 'skill_manager'):
self.default_agent.skill_manager.refresh_skills()
refreshed_count += 1
logger.info("[AgentBridge] Refreshed skills in default agent")
# Refresh all session agents
for session_id, agent in self.agents.items():
if hasattr(agent, 'skill_manager'):
agent.skill_manager.refresh_skills()
refreshed_count += 1
if refreshed_count > 0:
logger.info(f"[AgentBridge] Refreshed skills in {refreshed_count} agent instance(s)")
return refreshed_count