diff --git a/docs/docs.json b/docs/docs.json new file mode 100644 index 0000000..af4dacf --- /dev/null +++ b/docs/docs.json @@ -0,0 +1,118 @@ +{ + "$schema": "https://mintlify.com/docs.json", + "name": "CowAgent", + "description": "CowAgent - AI Super Assistant powered by LLMs, with autonomous task planning, long-term memory, skills system, and multi-channel deployment.", + "theme": "mint", + "colors": { + "primary": "#2563EB", + "light": "#60A5FA", + "dark": "#1D4ED8" + }, + "favicon": "/images/favicon.png", + "navbar": { + "links": [ + { + "label": "GitHub", + "href": "https://github.com/zhayujie/chatgpt-on-wechat" + } + ] + }, + "footer": { + "socials": { + "github": "https://github.com/zhayujie/chatgpt-on-wechat" + } + }, + "navigation": { + "languages": [ + { + "language": "zh", + "default": true, + "groups": [ + { + "group": "开始", + "pages": [ + "zh/index", + "zh/architecture" + ] + }, + { + "group": "快速开始", + "pages": [ + "zh/quick-start", + "zh/configuration" + ] + }, + { + "group": "核心功能", + "pages": [ + "zh/models", + "zh/skills", + "zh/memory" + ] + }, + { + "group": "通道", + "pages": [ + "zh/channels/web", + "zh/channels/feishu", + "zh/channels/dingtalk", + "zh/channels/wecom", + "zh/channels/wechatmp" + ] + }, + { + "group": "版本", + "pages": [ + "zh/releases/overview", + "zh/releases/v2.0.0" + ] + } + ] + }, + { + "language": "en", + "groups": [ + { + "group": "Getting Started", + "pages": [ + "en/index", + "en/architecture" + ] + }, + { + "group": "Quick Start", + "pages": [ + "en/quick-start", + "en/configuration" + ] + }, + { + "group": "Core Features", + "pages": [ + "en/models", + "en/skills", + "en/memory" + ] + }, + { + "group": "Channels", + "pages": [ + "en/channels/web", + "en/channels/feishu", + "en/channels/dingtalk", + "en/channels/wecom", + "en/channels/wechatmp" + ] + }, + { + "group": "Versions", + "pages": [ + "en/releases/overview", + "en/releases/v2.0.0" + ] + } + ] + } + ] + } +} diff --git a/docs/en/architecture.mdx b/docs/en/architecture.mdx new file mode 100644 index 0000000..18be965 --- /dev/null +++ b/docs/en/architecture.mdx @@ -0,0 +1,93 @@ +--- +title: Architecture +description: CowAgent 2.0 system architecture and core design +--- + +# Architecture + +CowAgent 2.0 is a comprehensive upgrade from a simple chatbot to an AI super assistant, built on an Agent architecture with autonomous thinking, task planning, long-term memory, and skill extension capabilities. + +## System Architecture + +``` +┌──────────────────────────────────────────────────────┐ +│ Channels │ +│ Web │ Feishu │ DingTalk │ WeCom │ WeChat MP │ +└───────────────────────┬──────────────────────────────┘ + │ +┌───────────────────────▼──────────────────────────────┐ +│ Agent Core │ +│ ┌─────────────┐ ┌──────────┐ ┌───────────────────┐ │ +│ │ Task Planner│ │ Memory │ │ Skills Engine │ │ +│ └──────┬──────┘ └────┬─────┘ └────────┬──────────┘ │ +│ │ │ │ │ +│ ┌──────▼─────────────▼────────────────▼──────────┐ │ +│ │ Tools │ │ +│ │ File R/W │ Bash │ Browser │ Scheduler │ ... │ │ +│ └────────────────────────────────────────────────┘ │ +└───────────────────────┬──────────────────────────────┘ + │ +┌───────────────────────▼──────────────────────────────┐ +│ Models │ +│ OpenAI │ Claude │ Gemini │ MiniMax │ GLM │ ... │ +└──────────────────────────────────────────────────────┘ +``` + +### Core Modules + +| Module | Description | +| --- | --- | +| **Channels** | Message channel layer for receiving and sending messages, supporting Web, Feishu, DingTalk, WeCom, WeChat Official Accounts, etc. | +| **Agent Core** | The intelligent agent engine, including task planning, memory system, and skills engine | +| **Tools** | Tool layer through which the Agent accesses OS resources, with 10+ built-in tools | +| **Models** | Model layer supporting unified access to major domestic and international LLMs | + +## Agent Mode + +When Agent mode is enabled, CowAgent operates as an autonomous intelligent agent with the following workflow: + +1. **Receive Message** - Receives user input through a channel +2. **Understand Intent** - Analyzes task requirements and context +3. **Plan Task** - Breaks complex tasks into multiple steps +4. **Call Tools** - Selects appropriate tools to execute each step +5. **Update Memory** - Stores important information in long-term memory +6. **Return Result** - Sends execution results back to the user + +## Workspace + +The Agent workspace defaults to `~/cow`, storing system prompts, memory files, skill files, etc.: + +``` +~/cow/ +├── system.md # Agent system prompt +├── user.md # User profile +├── memory/ # Long-term memory storage +│ ├── core.md # Core memory +│ └── daily/ # Daily memory +├── skills/ # Custom skills +│ ├── skill-1/ +│ └── skill-2/ +└── .env # Secret keys for skills +``` + +## Core Configuration + +Configure Agent mode parameters in `config.json`: + +```json +{ + "agent": true, + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30, + "agent_max_steps": 15 +} +``` + +| Parameter | Description | Default | +| --- | --- | --- | +| `agent` | Enable Agent mode | `true` | +| `agent_workspace` | Workspace path | `~/cow` | +| `agent_max_context_tokens` | Maximum context tokens | `40000` | +| `agent_max_context_turns` | Maximum context conversation turns | `30` | +| `agent_max_steps` | Maximum tool call steps per task | `15` | diff --git a/docs/en/channels/dingtalk.mdx b/docs/en/channels/dingtalk.mdx new file mode 100644 index 0000000..12bd0ae --- /dev/null +++ b/docs/en/channels/dingtalk.mdx @@ -0,0 +1,40 @@ +--- +title: DingTalk +description: Integrate CowAgent with DingTalk +--- + +# DingTalk + +Create a smart bot application on the DingTalk Open Platform to integrate CowAgent. + +## 1. Create an App + +1. Go to [DingTalk Developer Console](https://open-dev.dingtalk.com/fe/app#/corp/app), click **Create App**, and fill in the information +2. Add the **Bot** capability +3. Configure bot info and click **Publish** + +## 2. Project Configuration + +1. Get `Client ID` and `Client Secret` from **Credentials & Basic Info** + +2. Add to `config.json`: + +```json +{ + "channel_type": "dingtalk", + "dingtalk_client_id": "YOUR_CLIENT_ID", + "dingtalk_client_secret": "YOUR_CLIENT_SECRET" +} +``` + +3. Install dependency: + +```bash +pip3 install dingtalk_stream +``` + +4. After starting the project, go to **Event Subscription** in the DingTalk console, click **Verify Connection** — it should show "Connection successful" + +## 3. Usage + +Chat with the bot privately or add it to a group chat to start a conversation. diff --git a/docs/en/channels/feishu.mdx b/docs/en/channels/feishu.mdx new file mode 100644 index 0000000..b2a0585 --- /dev/null +++ b/docs/en/channels/feishu.mdx @@ -0,0 +1,69 @@ +--- +title: Feishu (Lark) +description: Integrate CowAgent with Feishu +--- + +# Feishu (Lark) + +Integrate CowAgent into Feishu by creating a custom app. Supports WebSocket (recommended) and Webhook event modes. + +## 1. Create a Custom App + +### Create the App + +Go to [Feishu Open Platform](https://open.feishu.cn/app/), click **Create Custom App**, and fill in the required information. + +### Add Bot Capability + +In the **Add App Capabilities** menu, add the **Bot** capability. + +### Configure Permissions + +Go to **Permission Management**, paste the following permissions, select all, and enable them: + +``` +im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource +``` + +## 2. Project Configuration + +Get `App ID` and `App Secret` from **Credentials & Basic Info**, then add to `config.json`: + + + + No public IP required: + + ```json + { + "channel_type": "feishu", + "feishu_app_id": "YOUR_APP_ID", + "feishu_app_secret": "YOUR_APP_SECRET", + "feishu_event_mode": "websocket" + } + ``` + + Install dependency: `pip3 install lark-oapi` + + + Requires public IP: + + ```json + { + "channel_type": "feishu", + "feishu_app_id": "YOUR_APP_ID", + "feishu_app_secret": "YOUR_APP_SECRET", + "feishu_token": "VERIFICATION_TOKEN", + "feishu_event_mode": "webhook", + "feishu_port": 9891 + } + ``` + + + +## 3. Configure Event Subscription + +1. After starting the project, go to **Events & Callbacks** on the Feishu Open Platform, select **Long Connection** mode, and save +2. Click **Add Event**, search for "Receive Message", select "Receive Message v2.0", and confirm +3. Go to **Version Management & Release**, create a new version, and submit for release approval + +Once approved, search for the bot name in Feishu to start chatting. diff --git a/docs/en/channels/web.mdx b/docs/en/channels/web.mdx new file mode 100644 index 0000000..ae38427 --- /dev/null +++ b/docs/en/channels/web.mdx @@ -0,0 +1,33 @@ +--- +title: Web +description: Use CowAgent through the Web interface +--- + +# Web + +Web is the default channel for CowAgent. A Web console starts automatically on launch, allowing you to chat with the Agent through your browser. + +## Configuration + +```json +{ + "channel_type": "web", + "web_port": 9899 +} +``` + +| Parameter | Description | Default | +| --- | --- | --- | +| `channel_type` | Set to `web` | `web` | +| `web_port` | Web service listening port | `9899` | + +## Usage + +After starting the project, visit: + +- Local: `http://localhost:9899/chat` +- Server: `http://:9899/chat` + + + Ensure your server firewall and security group allow access to the configured port. + diff --git a/docs/en/channels/wechatmp.mdx b/docs/en/channels/wechatmp.mdx new file mode 100644 index 0000000..6b2b82f --- /dev/null +++ b/docs/en/channels/wechatmp.mdx @@ -0,0 +1,58 @@ +--- +title: WeChat Official Account +description: Integrate CowAgent with WeChat Official Accounts +--- + +# WeChat Official Account + +CowAgent supports both personal subscription accounts and enterprise service accounts. + +| Type | Requirements | Features | +| --- | --- | --- | +| **Personal Subscription** | Available to individuals | Users must send a message to retrieve replies | +| **Enterprise Service** | Enterprise registration with verified customer service API | Can proactively push replies to users | + + + Official Accounts only support server and Docker deployment. Install extended dependencies: `pip3 install -r requirements-optional.txt` + + +## Personal Subscription Account + +Add the following to `config.json`: + +```json +{ + "channel_type": "wechatmp", + "wechatmp_app_id": "YOUR_APP_ID", + "wechatmp_app_secret": "YOUR_APP_SECRET", + "wechatmp_aes_key": "", + "wechatmp_token": "YOUR_TOKEN", + "wechatmp_port": 80 +} +``` + +### Setup Steps + +1. Get parameters from [WeChat Official Account Platform](https://mp.weixin.qq.com/) under **Settings & Development → Basic Configuration → Server Configuration** +2. Enable developer secret and add server IP to the whitelist +3. Start the program (listens on port 80) +4. In the official account console, **enable server configuration** with URL format `http://{HOST}/wx` + +## Enterprise Service Account + +The setup is largely identical to the subscription account, with these differences: + +1. Register an enterprise service account and complete WeChat verification, ensure **Customer Service API** permission is granted +2. Set `"channel_type": "wechatmp_service"` in `config.json` +3. Replies can be proactively pushed to users without them having to manually retrieve them + +```json +{ + "channel_type": "wechatmp_service", + "wechatmp_app_id": "YOUR_APP_ID", + "wechatmp_app_secret": "YOUR_APP_SECRET", + "wechatmp_aes_key": "", + "wechatmp_token": "YOUR_TOKEN", + "wechatmp_port": 80 +} +``` diff --git a/docs/en/channels/wecom.mdx b/docs/en/channels/wecom.mdx new file mode 100644 index 0000000..fcec1ab --- /dev/null +++ b/docs/en/channels/wecom.mdx @@ -0,0 +1,59 @@ +--- +title: WeCom +description: Integrate CowAgent with WeCom (WeChat Work) custom app +--- + +# WeCom (WeChat Work) + +Integrate CowAgent through a WeCom custom application for internal team messaging. + + + WeCom only supports Docker deployment or server-based Python deployment, not local running mode. + + +## 1. Prerequisites + +- A server with a public IP +- A WeCom account (individuals can register but cannot be verified) +- For verified WeCom accounts, a domain registered to the same entity + +## 2. Create a WeCom App + +1. In the [WeCom Admin Console](https://work.weixin.qq.com/wework_admin/frame#profile), go to **My Enterprise** to get the **Corp ID** +2. Go to **App Management**, create a new app, and note the `AgentId` and `Secret` +3. Click **Set API Receive**, configure the app interface: + - URL format: `http://ip:port/wxcomapp` (verified enterprises need a registered domain) + - Generate and save the `Token` and `EncodingAESKey` + +## 3. Configuration and Startup + +```json +{ + "channel_type": "wechatcom_app", + "wechatcom_corp_id": "YOUR_CORP_ID", + "wechatcomapp_token": "YOUR_TOKEN", + "wechatcomapp_secret": "YOUR_SECRET", + "wechatcomapp_agent_id": "YOUR_AGENT_ID", + "wechatcomapp_aes_key": "YOUR_AES_KEY", + "wechatcomapp_port": 9898 +} +``` + +| Parameter | Description | +| --- | --- | +| `wechatcom_corp_id` | Enterprise Corp ID | +| `wechatcomapp_token` | Token from API receive config | +| `wechatcomapp_secret` | App Secret | +| `wechatcomapp_agent_id` | App AgentId | +| `wechatcomapp_aes_key` | EncodingAESKey from API receive config | +| `wechatcomapp_port` | Listening port, default 9898 | + +After starting the program, save the **message server configuration** in the WeCom console, then add the server IP to **Trusted IPs**. + + + If configuration fails: 1. Ensure the firewall and security group allow the port; 2. Verify all parameters match; 3. Verified enterprises need a registered domain. + + +## 4. Usage + +Search for the app name in WeCom to start chatting. To allow external WeChat users, share the invitation QR code from **My Enterprise → WeChat Plugin**. diff --git a/docs/en/configuration.mdx b/docs/en/configuration.mdx new file mode 100644 index 0000000..0b29271 --- /dev/null +++ b/docs/en/configuration.mdx @@ -0,0 +1,101 @@ +--- +title: Configuration +description: CowAgent configuration file reference +--- + +# Configuration + +The config template is located at `config-template.json` in the project root. Copy it to create the active `config.json`: + +```bash +cp config-template.json config.json +``` + +## Core Settings + +```json +{ + "channel_type": "web", + "model": "MiniMax-M2.5", + "agent": true, + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30, + "agent_max_steps": 15 +} +``` + +| Parameter | Description | Default | +| --- | --- | --- | +| `channel_type` | Channel type | `web` | +| `model` | Model name | `MiniMax-M2.5` | +| `agent` | Enable Agent mode | `true` | +| `agent_workspace` | Agent workspace path | `~/cow` | +| `agent_max_context_tokens` | Maximum context tokens | `40000` | +| `agent_max_context_turns` | Maximum context conversation turns | `30` | +| `agent_max_steps` | Maximum tool call steps per task | `15` | + +## Model API Keys + +Fill in the API key for your chosen model: + +```json +{ + "minimax_api_key": "", + "zhipu_ai_api_key": "", + "moonshot_api_key": "", + "ark_api_key": "", + "dashscope_api_key": "", + "claude_api_key": "", + "gemini_api_key": "", + "open_ai_api_key": "" +} +``` + +See [Models](/en/models) for detailed model configuration. + +## Voice Settings + +```json +{ + "speech_recognition": false, + "group_speech_recognition": false, + "voice_reply_voice": false +} +``` + +| Parameter | Description | +| --- | --- | +| `speech_recognition` | Enable private chat voice recognition | +| `group_speech_recognition` | Enable group chat voice recognition | +| `voice_reply_voice` | Reply to voice messages with voice | + +## LinkAI Settings + +```json +{ + "use_linkai": false, + "linkai_api_key": "", + "linkai_app_code": "" +} +``` + +| Parameter | Description | +| --- | --- | +| `use_linkai` | Enable LinkAI integration | +| `linkai_api_key` | LinkAI API Key, create at [console](https://link-ai.tech/console/interface) | +| `linkai_app_code` | LinkAI app or workflow code | + +## Proxy Settings + +If you need a network proxy: + +```json +{ + "proxy": "127.0.0.1:7890" +} +``` + + + For all configuration options, see the [`config.py`](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/config.py) file in the project. + diff --git a/docs/en/index.mdx b/docs/en/index.mdx new file mode 100644 index 0000000..c1fdfc8 --- /dev/null +++ b/docs/en/index.mdx @@ -0,0 +1,64 @@ +--- +title: Introduction +description: CowAgent - AI Super Assistant powered by LLMs +--- + +# CowAgent + +**CowAgent** is an AI super assistant powered by large language models, capable of autonomous thinking and task planning, operating computers and external resources, creating and executing Skills, with long-term memory that grows with you. + +CowAgent supports flexible switching between multiple models, handles multimodal messages including text, voice, images, and files, and can be integrated into web, Feishu (Lark), DingTalk, WeCom, and WeChat Official Accounts for 24/7 operation on your personal computer or server. + +## Core Capabilities + + + + Understands complex tasks and autonomously plans execution, continuously thinking and calling tools until the goal is achieved. Supports accessing files, terminals, browsers, scheduled tasks, and other system resources. + + + Automatically persists conversation memory to local files and databases, including core memory and daily memory, with keyword and vector search support. + + + Implements a Skills creation and execution engine with multiple built-in skills, and supports custom skill development through natural language conversations. + + + Supports parsing, processing, generating, and sending multiple message types including text, images, voice, and files. + + + Supports OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao, and other major model providers. + + + Runs on local computers or servers, integrable with web, Feishu, DingTalk, WeChat Official Accounts, and WeCom. + + + +## Quick Experience + +Run the following command in your terminal to install, configure, and start CowAgent with one click: + +```bash +bash <(curl -sS https://cdn.link-ai.tech/code/cow/run.sh) +``` + +After running, a Web service starts by default. Visit `http://localhost:9899/chat` to start chatting. + + + + View the complete installation and setup guide + + + Learn about the CowAgent system architecture + + + +## Disclaimer + +1. This project follows the [MIT License](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/LICENSE) and is primarily for technical research and learning. +2. Token usage in Agent mode is higher than regular chat mode. Please choose models based on both effectiveness and cost. The Agent has access to the operating system — please choose deployment environments carefully. +3. The CowAgent project focuses on open-source technology development and will not participate in, authorize, or issue any cryptocurrency. + +## Community + +Join the open-source community by adding the assistant on WeChat: + + diff --git a/docs/en/memory.mdx b/docs/en/memory.mdx new file mode 100644 index 0000000..b303019 --- /dev/null +++ b/docs/en/memory.mdx @@ -0,0 +1,68 @@ +--- +title: Memory +description: CowAgent long-term memory system +--- + +# Long-term Memory + +The memory system enables the Agent to remember important information over time, continuously accumulating experience, understanding user preferences, and achieving autonomous thinking and growth. + +## How It Works + +The Agent proactively stores memory in these scenarios: + +- **When users share important information** — Automatically identifies and stores preferences, decisions, facts, and other key information +- **When conversations reach a certain length** — Automatically extracts summaries to prevent information loss +- **When retrieval is needed** — Intelligently searches historical memory and combines it with context + +## Memory Types + +### Core Memory + +Stored in `~/cow/memory/core.md`, containing long-term user preferences, important decisions, key facts, and other information that doesn't fade over time. + +### Daily Memory + +Stored in the `~/cow/memory/daily/` directory, organized by date, recording daily conversation summaries and key events. + +## First Startup + +On first startup, the Agent proactively asks the user for key information and records it in the workspace (default `~/cow`): + +| File | Description | +| --- | --- | +| `system.md` | Agent system prompt and behavior settings | +| `user.md` | User identity and preferences | +| `memory/core.md` | Core memory | +| `memory/daily/` | Daily memory directory | + + + + + +## Memory Retrieval + +The memory system supports hybrid retrieval: + +- **Keyword Search** — Matches historical memory based on keywords +- **Vector Search** — Semantic similarity search that finds related memories even with different wording + +The Agent automatically triggers memory retrieval during conversations, incorporating relevant historical information into the context. + +## Configuration + +Adjust memory-related parameters in `config.json`: + +```json +{ + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30 +} +``` + +| Parameter | Description | Default | +| --- | --- | --- | +| `agent_workspace` | Workspace path where memory files are stored | `~/cow` | +| `agent_max_context_tokens` | Maximum context tokens, affects short-term memory capacity | `40000` | +| `agent_max_context_turns` | Maximum context turns, older conversations are discarded when exceeded | `30` | diff --git a/docs/en/models.mdx b/docs/en/models.mdx new file mode 100644 index 0000000..b2c897d --- /dev/null +++ b/docs/en/models.mdx @@ -0,0 +1,173 @@ +--- +title: Models +description: Supported models and configuration guide +--- + +# Models + +CowAgent supports major LLM providers from China and worldwide. Model implementations are in the `models/` directory. + + + Recommended models for Agent mode: MiniMax-M2.5, glm-5, kimi-k2.5, qwen3.5-plus, claude-sonnet-4-6, gemini-3.1-pro-preview. Choose based on effectiveness and cost. + + +## Model Configuration + +Set the model name and corresponding API key in `config.json`. + +### MiniMax + +```json +{ + "model": "MiniMax-M2.5", + "minimax_api_key": "YOUR_API_KEY" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `MiniMax-M2.5`, `MiniMax-M2.1`, `MiniMax-M2.1-lightning`, `MiniMax-M2`, etc. | +| `minimax_api_key` | Create at [MiniMax Console](https://platform.minimaxi.com/user-center/basic-information/interface-key) | + +### GLM (Zhipu AI) + +```json +{ + "model": "glm-5", + "zhipu_ai_api_key": "YOUR_API_KEY" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `glm-5`, `glm-4.7`, `glm-4-plus`, `glm-4-flash`, etc. See [model list](https://bigmodel.cn/dev/api/normal-model/glm-4) | +| `zhipu_ai_api_key` | Create at [Zhipu AI Console](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) | + +### Qwen (Tongyi Qianwen) + +```json +{ + "model": "qwen3.5-plus", + "dashscope_api_key": "YOUR_API_KEY" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `qwen3.5-plus`, `qwen3-max`, `qwen-max`, `qwen-plus`, etc. | +| `dashscope_api_key` | Create at [Bailian Console](https://bailian.console.aliyun.com/?tab=model#/api-key) | + +### Kimi (Moonshot) + +```json +{ + "model": "kimi-k2.5", + "moonshot_api_key": "YOUR_API_KEY" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `kimi-k2.5`, `kimi-k2`, `moonshot-v1-8k`, `moonshot-v1-32k`, etc. | +| `moonshot_api_key` | Create at [Moonshot Console](https://platform.moonshot.cn/console/api-keys) | + +### Doubao (ByteDance) + +```json +{ + "model": "doubao-seed-2-0-code-preview-260215", + "ark_api_key": "YOUR_API_KEY" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `doubao-seed-2-0-code-preview-260215`, `doubao-seed-2-0-pro-260215`, etc. | +| `ark_api_key` | Create at [Volcano Ark Console](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) | + +### Claude + +```json +{ + "model": "claude-sonnet-4-6", + "claude_api_key": "YOUR_API_KEY" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `claude-sonnet-4-6`, `claude-opus-4-6`, `claude-sonnet-4-5`, etc. See [official models](https://docs.anthropic.com/en/docs/about-claude/models/overview) | +| `claude_api_key` | Create at [Claude Console](https://console.anthropic.com/settings/keys) | +| `claude_api_base` | Optional, defaults to `https://api.anthropic.com/v1` | + +### Gemini + +```json +{ + "model": "gemini-3.1-pro-preview", + "gemini_api_key": "YOUR_API_KEY" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `gemini-3.1-pro-preview`, `gemini-3-flash-preview`, `gemini-2.5-pro`, etc. See [official docs](https://ai.google.dev/gemini-api/docs/models) | +| `gemini_api_key` | Create at [Google AI Studio](https://aistudio.google.com/app/apikey) | + +### OpenAI + +```json +{ + "model": "gpt-4.1-mini", + "open_ai_api_key": "YOUR_API_KEY", + "open_ai_api_base": "https://api.openai.com/v1" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | o-series, gpt-5.2, gpt-4.1, etc. See [model list](https://platform.openai.com/docs/models) | +| `open_ai_api_key` | Create at [OpenAI Platform](https://platform.openai.com/api-keys) | +| `open_ai_api_base` | Optional, modify to use third-party proxy | + +### DeepSeek + +```json +{ + "model": "deepseek-chat", + "open_ai_api_key": "YOUR_API_KEY", + "open_ai_api_base": "https://api.deepseek.com/v1", + "bot_type": "chatGPT" +} +``` + +| Parameter | Description | +| --- | --- | +| `model` | `deepseek-chat` (V3), `deepseek-reasoner` (R1) | +| `bot_type` | OpenAI-compatible mode, set to `chatGPT` | + +## OpenAI-Compatible Access + +Most models also support OpenAI-compatible access. Set `bot_type` to `chatGPT` and configure the corresponding `open_ai_api_base` and `open_ai_api_key`. + +## LinkAI Platform + +[LinkAI](https://link-ai.tech) allows flexible model switching with knowledge base, workflow, and plugin support. + +```json +{ + "use_linkai": true, + "linkai_api_key": "YOUR_API_KEY", + "linkai_app_code": "YOUR_APP_CODE" +} +``` + +| Parameter | Description | +| --- | --- | +| `use_linkai` | Set to `true` to enable LinkAI | +| `linkai_api_key` | Create at [console](https://link-ai.tech/console/interface) | +| `linkai_app_code` | LinkAI agent code, optional | + + + For all model names, see [`common/const.py`](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/common/const.py) in the project. + diff --git a/docs/en/quick-start.mdx b/docs/en/quick-start.mdx new file mode 100644 index 0000000..bbc50ff --- /dev/null +++ b/docs/en/quick-start.mdx @@ -0,0 +1,120 @@ +--- +title: Quick Start +description: One-click install or manually deploy CowAgent +--- + +# Quick Start + +CowAgent supports Linux, macOS, and Windows. It can run on personal computers or servers and requires Python 3.7 ~ 3.12 (3.9 recommended). + +## One-click Install + +The project provides a script for one-click installation, configuration, startup, and management: + +```bash +bash <(curl -sS https://cdn.link-ai.tech/code/cow/run.sh) +``` + +The script automatically: + +1. Checks the Python environment (requires Python 3.7+) +2. Installs necessary tools (git, curl, etc.) +3. Clones the project to `~/chatgpt-on-wechat` +4. Installs Python dependencies +5. Guides configuration of AI models and channels +6. Starts the service + +### Management Commands + +After installation, use the following commands to manage the service: + +| Command | Description | +| --- | --- | +| `./run.sh start` | Start service | +| `./run.sh stop` | Stop service | +| `./run.sh restart` | Restart service | +| `./run.sh status` | Check status | +| `./run.sh logs` | View live logs | +| `./run.sh config` | Reconfigure | +| `./run.sh update` | Update project | + +## Manual Installation + +### 1. Clone the Repository + +```bash +git clone https://github.com/zhayujie/chatgpt-on-wechat +cd chatgpt-on-wechat/ +``` + + + For users in China, use the mirror: https://gitee.com/zhayujie/chatgpt-on-wechat + + +### 2. Install Dependencies + +Core dependencies (required): + +```bash +pip3 install -r requirements.txt +``` + +Extended dependencies (optional, recommended): + +```bash +pip3 install -r requirements-optional.txt +``` + +### 3. Configuration + +Copy the config template and edit: + +```bash +cp config-template.json config.json +``` + +See [Configuration](/en/configuration) for detailed settings. + +### 4. Run + +**Local:** + +```bash +python3 app.py +``` + +After starting, visit `http://localhost:9899/chat` to begin chatting. + +**Server (background):** + +```bash +nohup python3 app.py & tail -f nohup.out +``` + +## Docker Deployment + +Docker deployment requires no source code download or dependency installation. Source code deployment is recommended in Agent mode for better system access. + + + Requires [Docker](https://docs.docker.com/engine/install/) and docker-compose. + + +**1. Download config file** + +```bash +wget https://cdn.link-ai.tech/code/cow/docker-compose.yml +``` + +Edit `docker-compose.yml` to fill in required configuration. + +**2. Start container** + +```bash +sudo docker compose up -d +``` + +**3. View logs** + +```bash +sudo docker logs -f chatgpt-on-wechat +``` diff --git a/docs/en/releases/overview.mdx b/docs/en/releases/overview.mdx new file mode 100644 index 0000000..b6f823b --- /dev/null +++ b/docs/en/releases/overview.mdx @@ -0,0 +1,25 @@ +--- +title: Changelog +description: CowAgent version history +--- + +# Changelog + +| Version | Date | Description | +| --- | --- | --- | +| [2.0.0](/en/releases/v2.0.0) | 2026.02.03 | Full upgrade to AI super assistant | +| 1.7.6 | 2025.05.23 | Web Channel optimization, AgentMesh plugin | +| 1.7.5 | 2025.04.11 | wechatferry protocol, DeepSeek model | +| 1.7.4 | 2024.12.13 | Gemini 2.0 model, Web Channel | +| 1.7.3 | 2024.10.31 | Stability improvements, database features | +| 1.7.2 | 2024.09.26 | One-click install script, o1 model | +| 1.7.0 | 2024.08.02 | iFlytek 4.0 model, knowledge base references | +| 1.6.9 | 2024.07.19 | gpt-4o-mini, Alibaba voice recognition | +| 1.6.8 | 2024.07.05 | Claude 3.5, Gemini 1.5 Pro | +| 1.6.0 | 2024.04.26 | Kimi integration, gpt-4-turbo upgrade | +| 1.5.8 | 2024.03.26 | GLM-4, Claude-3, edge-tts | +| 1.5.2 | 2023.11.10 | Feishu channel, image recognition | +| 1.5.0 | 2023.11.10 | gpt-4-turbo, dall-e-3, tts multimodal | +| 1.0.0 | 2022.12.12 | Project created, first ChatGPT integration | + +See [GitHub Releases](https://github.com/zhayujie/chatgpt-on-wechat/releases) for full history. diff --git a/docs/en/releases/v2.0.0.mdx b/docs/en/releases/v2.0.0.mdx new file mode 100644 index 0000000..f46cbf6 --- /dev/null +++ b/docs/en/releases/v2.0.0.mdx @@ -0,0 +1,107 @@ +--- +title: v2.0.0 +description: CowAgent 2.0 - Full upgrade from chatbot to AI super assistant +--- + +# CowAgent 2.0 + +CowAgent 2.0 is a comprehensive upgrade from a chatbot to an **AI super assistant**! It can now autonomously think and plan tasks, has long-term memory, operates computers and external resources, and creates and executes skills — truly understanding you and growing alongside you. + +**Release Date**: 2026.02.03 | [GitHub Release](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/2.0.0) + +## Key Updates + +### Agent Core Capabilities + +- **Complex Task Planning**: Understands complex tasks and autonomously plans execution, continuously thinking and calling tools until goals are achieved, with multi-turn reasoning and context understanding +- **Long-term Memory**: Automatically persists conversation memory to local files and databases, including core memory and daily memory, with keyword and vector search support +- **Built-in System Tools**: 10+ built-in tools including file operations, Bash terminal, browser, file sending, scheduled tasks, memory management, etc. +- **Skills**: New Skill execution engine with built-in skills and support for custom skill development through natural language conversation +- **Security and Cost**: Controls Agent access security through secret key management, prompt controls, and system permissions; limits token costs through max memory turns, max context tokens, and tool execution steps + +### Other Updates + +- **Channel Improvements**: Feishu and DingTalk channels support WebSocket connections (no public IP needed), with image/file message support +- **Model Updates**: Added claude-sonnet-4-5, gemini-3-pro-preview, glm-4.7, MiniMax-M2.1, qwen3-max, and other latest models +- **Deployment**: Added one-click install, configure, run, and management script to simplify deployment + +## Long-term Memory System + +The Agent proactively stores information when users share important details, and automatically extracts summaries when conversations reach a certain length. Supports hybrid retrieval with semantic search and vector search. + +On **first startup**, the Agent proactively asks for key information and records it in the workspace (default `~/cow`) including agent settings, user identity, and memory files. + +During **long-term conversations**, the Agent intelligently records and retrieves memories, continuously updating its settings, user preferences, and summarizing experiences — achieving true autonomous thinking and continuous growth. + + + + + +## Task Planning and Tool Calling + +The Agent intelligently selects and calls tools based on task requirements to complete various complex operations. + +### Terminal and File Access + +The most fundamental tool capabilities. Users can interact with the Agent from mobile devices to operate resources on personal computers or servers: + + + + + +### Application Programming + +With programming and system access capabilities, the Agent can handle the complete **Vibecoding workflow** — from information search, asset generation, coding, testing, deployment, Nginx configuration, to publishing — all from a single mobile command. + + + + + +### Scheduled Tasks + +Supports **one-time tasks, fixed intervals, and Cron expressions**, with two trigger modes: **fixed message sending** or **Agent dynamic task execution**: + + + + + +### Environment Variable Management + +Manages skill-required secrets via the `env_config` tool, with conversational updates and built-in security protection and data masking: + + + + + +## Skills System + +Each Skill consists of a description file, execution script (optional), and resources (optional), providing infinite extensibility. + +### Skill Creator + +Quickly create skills through conversation, codifying workflows or integrating with any third-party API: + + + + + +### Search and Image Recognition + +- **Search Skill**: Built-in `bocha-search`, configure `BOCHA_SEARCH_API_KEY` to enable +- **Image Recognition**: Supports `gpt-4.1-mini`, `gpt-4.1`, etc., configure `OPENAI_API_KEY` to enable + + + + + +### Third-party Knowledge Bases and Plugins + +The `linkai-agent` skill integrates all agents from [LinkAI](https://link-ai.tech/) as Skills, enabling multi-agent decision-making: + + + + + +## Contributing + +After version 2.0, the project will continue upgrading Agent capabilities, expanding channels, built-in tools, and the skills system, while reducing model costs and improving security. Welcome to [submit feedback](https://github.com/zhayujie/chatgpt-on-wechat/issues) and [contribute code](https://github.com/zhayujie/chatgpt-on-wechat/pulls). diff --git a/docs/en/skills.mdx b/docs/en/skills.mdx new file mode 100644 index 0000000..1a3832b --- /dev/null +++ b/docs/en/skills.mdx @@ -0,0 +1,103 @@ +--- +title: Skills +description: CowAgent Skills System - infinite extensibility for the Agent +--- + +# Skills System + +The Skills system provides infinite extensibility for the Agent. Each Skill consists of a description file, an execution script (optional), and resources (optional), describing how to complete a specific type of task. Skills enable the Agent to follow instructions to complete complex workflows, call various tools, or integrate with third-party systems. + +## Skill Types + +### Built-in Skills + +Located in the `skills/` directory of the project, automatically enabled based on dependency conditions (API keys, system commands, etc.). + +| Skill | Description | +| --- | --- | +| `skill-creator` | Skill creator — create custom skills through conversation | +| `bocha-search` | Web search capability | +| `openai-image-vision` | Image recognition using OpenAI vision models | +| `linkai-agent` | LinkAI agent integration for third-party knowledge bases and plugins | +| `web-scraper` | Web page content extraction | + +### Custom Skills + +Created by users through conversation, stored in the workspace (`~/cow/skills/`). Custom skills can implement any complex business workflow or third-party system integration. + +## Creating Skills + +Use the built-in `skill-creator` to create skills through natural language conversation: + +- Codify workflows into reusable skills +- Send API documentation and examples to the Agent for automatic integration +- Create customized automation pipelines + + + + + +## Web Search + +The built-in `bocha-search` skill provides web search capability: + +1. Create an API Key at [Bocha Open Platform](https://open.bochaai.com/) +2. Configure `BOCHA_SEARCH_API_KEY` via the `env_config` tool or send it directly to the Agent + +## Image Recognition + +The `openai-image-vision` skill supports image recognition using models like `gpt-4.1-mini` and `gpt-4.1`. + +Configure `OPENAI_API_KEY` via `config.json` or the `env_config` tool to enable. + + + + + +## LinkAI Agents + +The `linkai-agent` skill integrates all agents from [LinkAI](https://link-ai.tech/) as Skills, enabling multi-agent decision-making. + +### Configuration + +1. Configure `LINKAI_API_KEY` (via `env_config` tool or `linkai_api_key` in `config.json`) +2. Add agent descriptions in `skills/linkai-agent/config.json`: + +```json +{ + "apps": [ + { + "app_code": "G7z6vKwp", + "app_name": "LinkAI Support", + "app_description": "Use this agent only for LinkAI platform questions" + }, + { + "app_code": "SFY5x7JR", + "app_name": "Content Creator", + "app_description": "Use this agent for image or video creation" + } + ] +} +``` + +The Agent selects the appropriate agent based on name and description, and calls the corresponding app via `app_code`. + + + + + +## Skill File Structure + +Each skill directory follows this structure: + +``` +skills/ +├── my-skill/ +│ ├── SKILL.md # Skill description and instructions +│ ├── run.py # Execution script (optional) +│ └── resources/ # Additional resources (optional) +``` + + + For custom skill development, see the [Skill Creator Guide](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/skills/skill-creator/SKILL.md). + diff --git a/docs/release/2.0.0.md b/docs/release/2.0.0.md deleted file mode 100644 index 469a1c5..0000000 --- a/docs/release/2.0.0.md +++ /dev/null @@ -1,121 +0,0 @@ -# CowAgent 2.0 - -🚀 CowAgent 2.0 实现了从聊天机器人到**超级智能助理**的全面升级!现在它能够主动思考和规划任务、拥有长期记忆、操作计算机和外部资源、创造和执行技能,真正理解你并和你一起成长。 - -### ✨ 重点更新 - -- Agent核心能力: - - **复杂任务规划**:能够理解复杂任务并自主规划执行,持续思考和调用工具直到完成目标,支持多轮推理和上下文理解。 - - **长期记忆**:自动将对话记忆持久化至本地文件和数据库中,包括全局记忆和天级记忆,支持关键词及向量检索。 - - **内置系统工具**:内置实现10+种工具,包括文件操作、bash终端、浏览器、文件发送、定时任务、记忆管理等。 - - **Skills**:新增Skill运行引擎,内置多种技能,并支持通过自然语言对话完成自定义Skills开发。 - - **安全和成本**:通过秘钥管理工具、提示词控制、系统权限等手段控制Agent的访问安全;通过最大记忆轮次、最大上下文token、工具执行步数对token成本进行限制。 -- 其他更新: - - 渠道优化:飞书及钉钉接入渠道支持长连接接入(无需公网IP)、支持图片/文件消息的接收和发送。 - - 模型更新:新增claude-sonnet-4-5、gemini-3-pro-preview、glm-4.7、MiniMax-M2.1、qwen3-max等最新模型。 - - 部署优化:增加一键安装、配置、运行、管理的脚本,简化部署流程。 - -## 一、长期记忆系统 - -Agent 会在用户分享重要信息时主动存储,也会在对话达到一定长度时自动提取摘要。支持语义搜索和向量检索的混合检索模式。 - -**首次启动**时,Agent 会主动询问关键信息,并记录至工作空间(默认 `~/cow`)中的智能体设定、用户身份、记忆文件中。 - -**长期对话**中,Agent 会智能记录或检索记忆,不断更新自身设定、用户偏好,总结经验和教训,真正实现自主思考和持续成长。 - - - - -## 二、任务规划与工具调用 - -Agent 根据任务需求智能选择和调用工具,完成各类复杂操作。 - -### 1. 终端和文件访问 - -最基础和核心的工具能力,用户可通过手机端与 Agent 交互,操作个人电脑或服务器上的资源: - - - -### 2. 应用编程能力 - -基于编程能力和系统访问能力,Agent 可实现从信息搜索、素材生成、编码、测试、部署、Nginx配置、发布的 **Vibecoding 全流程**,通过手机端一句命令完成应用快速 demo。 - - - -### 3. 定时任务 - -支持 **一次性任务、固定时间间隔、Cron表达式** 三种形式,任务触发可选择 **固定消息发送** 或 **Agent动态任务执行** 两种模式: - - - -### 4. 环境变量管理 - -通过 `env_config` 工具管理技能所需秘钥,支持对话式更新,内置安全保护和脱敏策略: - - - -## 三、技能系统 - -每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,为 Agent 提供无限扩展性。 - -### 1. 技能创造器 - -通过对话方式快速创建技能,将工作流程固化或对接任意第三方接口: - - - -### 2. 搜索和图像识别 - -- **搜索技能**:内置 `bocha-search`(博查搜索),配置 `BOCHA_SEARCH_API_KEY` 即可使用。 -- **图像识别**:支持 `gpt-4.1-mini`、`gpt-4.1` 等模型,配置 `OPENAI_API_KEY` 即可使用。 - - - -### 3. 三方知识库和插件 - -`linkai-agent` 技能可将 [LinkAI](https://link-ai.tech/) 上的所有智能体作为 skill 使用,实现多智能体决策: - - - - -## 四、快速开始 - -### 一键启动 - -本次新增了一键下载、配置、运行和管理的脚本,只需命令行中执行: - -```bash -bash <(curl -sS https://cdn.link-ai.tech/code/cow/run.sh) -``` - -详细说明参考:[项目启动脚本](https://github.com/zhayujie/chatgpt-on-wechat/wiki/CowAgentQuickStart) - -### 模型选择 - -Agent 模式推荐使用以下模型: - -- **Claude**: `claude-sonnet-4-5`、`claude-sonnet-4-0` -- **Gemini**: `gemini-3-flash-preview`、`gemini-3-pro-preview` -- **GLM**: `glm-4.7` -- **MiniMax**: `MiniMax-M2.1` -- **Qwen**: `qwen3-max` - -详细配置方式参考 [README.md 模型说明](../README.md#模型说明) - - -### 渠道接入 - -支持在 Web、飞书、钉钉、企业微信 等多渠道与 Agent 交互,随时随地使用超级助理,只需修改 `config.json` 中的 `channel_type` 配置即可切换: - -- **Web网页**:默认使用该渠道,运行后监听本地端口,通过浏览器访问。 -- **飞书接入**:[飞书接入文档](https://docs.link-ai.tech/cow/multi-platform/feishu) -- **钉钉接入**:[钉钉接入文档](https://docs.link-ai.tech/cow/multi-platform/dingtalk) -- **企业微信应用接入**:[企微应用文档](https://docs.link-ai.tech/cow/multi-platform/wechat-com) - -更多渠道配置参考:[通道说明](../README.md#通道说明) - -## 五、参与共建 - -2.0版本后,项目将持续升级Agent能力、拓展接入渠道、内置工具、技能系统,降低模型成本和提升安全性。欢迎 [提出反馈](https://github.com/zhayujie/chatgpt-on-wechat/issues) 和 [贡献代码](https://github.com/zhayujie/chatgpt-on-wechat/pulls)。 - -**🤖立即体验 CowAgent 2.0,开启你的超级AI助理之旅!** diff --git a/docs/release/history.md b/docs/release/history.md deleted file mode 100644 index c9b778a..0000000 --- a/docs/release/history.md +++ /dev/null @@ -1,51 +0,0 @@ -## 更新日志 - ->**2025.05.23:** [1.7.6版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.7.6) 优化web网页channel、新增 [AgentMesh多智能体插件](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/plugins/agent/README.md)、百度语音合成优化、企微应用`access_token`获取优化、支持`claude-4-sonnet`和`claude-4-opus`模型 - ->**2025.04.11:** [1.7.5版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.7.5) 新增支持 [wechatferry](https://github.com/zhayujie/chatgpt-on-wechat/pull/2562) 协议、新增 deepseek 模型、新增支持腾讯云语音能力、新增支持 ModelScope 和 Gitee-AI API接口 - ->**2024.12.13:** [1.7.4版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.7.4) 新增 Gemini 2.0 模型、新增web channel、解决内存泄漏问题、解决 `#reloadp` 命令重载不生效问题 - ->**2024.10.31:** [1.7.3版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.7.3) 程序稳定性提升、数据库功能、Claude模型优化、linkai插件优化、离线通知 - ->**2024.09.26:** [1.7.2版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.7.2) 和 [1.7.1版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.7.1) 新增一键安装和管理脚本、文心,讯飞等模型优化、o1 模型 - ->**2024.08.02:** [1.7.0版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.7.0) 新增 讯飞4.0 模型、知识库引用来源展示、相关插件优化 - ->**2024.07.19:** [1.6.9版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.6.9) 新增 gpt-4o-mini 模型、阿里语音识别、企微应用渠道路由优化 - ->**2024.07.05:** [1.6.8版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.6.8) 和 [1.6.7版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.6.7),Claude3.5, Gemini 1.5 Pro, MiniMax模型、工作流图片输入、模型列表完善 - ->**2024.06.04:** [1.6.6版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.6.6) 和 [1.6.5版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.6.5),gpt-4o模型、钉钉流式卡片、讯飞语音识别/合成 - ->**2024.04.26:** [1.6.0版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.6.0),新增 Kimi 接入、gpt-4-turbo版本升级、文件总结和语音识别问题修复 - ->**2024.03.26:** [1.5.8版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.5.8) 和 [1.5.7版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.5.7),新增 GLM-4、Claude-3 模型,edge-tts 语音支持 - ->**2024.01.26:** [1.5.6版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.5.6) 和 [1.5.5版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.5.5),钉钉接入,tool插件升级,4-turbo模型更新 - ->**2023.11.11:** [1.5.3版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.5.3) 和 [1.5.4版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.5.4),新增通义千问模型、Google Gemini - ->**2023.11.10:** [1.5.2版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.5.2),新增飞书通道、图像识别对话、黑名单配置 - ->**2023.11.10:** [1.5.0版本](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/1.5.0),新增 `gpt-4-turbo`, `dall-e-3`, `tts` 模型接入,完善图像理解&生成、语音识别&生成的多模态能力 - ->**2023.10.16:** 支持通过意图识别使用LinkAI联网搜索、数学计算、网页访问等插件,参考[插件文档](https://docs.link-ai.tech/platform/plugins) - ->**2023.09.26:** 插件增加 文件/文章链接 一键总结和对话的功能,使用参考:[插件说明](https://github.com/zhayujie/chatgpt-on-wechat/tree/master/plugins/linkai#3%E6%96%87%E6%A1%A3%E6%80%BB%E7%BB%93%E5%AF%B9%E8%AF%9D%E5%8A%9F%E8%83%BD) - ->**2023.08.08:** 接入百度文心一言模型,通过 [插件](https://github.com/zhayujie/chatgpt-on-wechat/tree/master/plugins/linkai) 支持 Midjourney 绘图 - ->**2023.06.12:** 接入 [LinkAI](https://link-ai.tech/console) 平台,可在线创建领域知识库,打造专属客服机器人。使用参考 [接入文档](https://link-ai.tech/platform/link-app/wechat)。 - -> **2023.04.26:** 支持企业微信应用号部署,兼容插件,并支持语音图片交互,私人助理理想选择,使用文档。(contributed by @lanvent in #944) - -> **2023.04.05:** 支持微信公众号部署,兼容插件,并支持语音图片交互,使用文档。(contributed by @JS00000 in #686) - -> **2023.04.05:** 增加能让ChatGPT使用工具的tool插件,使用文档。工具相关issue可反馈至chatgpt-tool-hub。(contributed by @goldfishh in #663) - -> **2023.03.25:** 支持插件化开发,目前已实现 多角色切换、文字冒险游戏、管理员指令、Stable Diffusion等插件,使用参考 #578。(contributed by @lanvent in #565) - -> **2023.03.09:** 基于 whisper API(后续已接入更多的语音API服务) 实现对语音消息的解析和回复,添加配置项 "speech_recognition":true 即可启用,使用参考 #415。(contributed by wanggang1987 in #385) - -> **2022.12.12:** 项目框架搭建,首次接入ChatGPT模型 diff --git a/docs/zh/architecture.mdx b/docs/zh/architecture.mdx new file mode 100644 index 0000000..dd2ea66 --- /dev/null +++ b/docs/zh/architecture.mdx @@ -0,0 +1,95 @@ +--- +title: 项目架构 +description: CowAgent 2.0 的系统架构和核心设计 +--- + +# 项目架构 + +CowAgent 2.0 从简单的聊天机器人全面升级为超级智能助理,采用 Agent 架构设计,具备自主思考、规划任务、长期记忆和技能扩展等能力。 + +## 系统架构 + +CowAgent 的整体架构由以下核心模块组成: + +``` +┌──────────────────────────────────────────────────────┐ +│ Channels │ +│ Web │ Feishu │ DingTalk │ WeCom │ WeChat MP │ +└───────────────────────┬──────────────────────────────┘ + │ +┌───────────────────────▼──────────────────────────────┐ +│ Agent Core │ +│ ┌─────────────┐ ┌──────────┐ ┌───────────────────┐ │ +│ │ Task Planner│ │ Memory │ │ Skills Engine │ │ +│ └──────┬──────┘ └────┬─────┘ └────────┬──────────┘ │ +│ │ │ │ │ +│ ┌──────▼─────────────▼────────────────▼──────────┐ │ +│ │ Tools │ │ +│ │ File R/W │ Bash │ Browser │ Scheduler │ ... │ │ +│ └────────────────────────────────────────────────┘ │ +└───────────────────────┬──────────────────────────────┘ + │ +┌───────────────────────▼──────────────────────────────┐ +│ Models │ +│ OpenAI │ Claude │ Gemini │ MiniMax │ GLM │ ... │ +└──────────────────────────────────────────────────────┘ +``` + +### 核心模块说明 + +| 模块 | 说明 | +| --- | --- | +| **Channels** | 消息通道层,负责接收和发送消息,支持 Web、飞书、钉钉、企微、公众号等 | +| **Agent Core** | 智能体核心引擎,包括任务规划、记忆系统和技能引擎 | +| **Tools** | 工具层,Agent 通过工具访问操作系统资源,内置 10+ 种工具 | +| **Models** | 模型层,支持国内外主流大语言模型的统一接入 | + +## Agent 模式 + +启用 Agent 模式后,CowAgent 会以自主智能体的方式运行,核心工作流如下: + +1. **接收消息** - 通过通道接收用户输入 +2. **理解意图** - 分析任务需求和上下文 +3. **规划任务** - 将复杂任务分解为多个步骤 +4. **调用工具** - 选择合适的工具执行每个步骤 +5. **记忆更新** - 将重要信息存入长期记忆 +6. **返回结果** - 将执行结果发送回用户 + +## 工作空间 + +Agent 的工作空间默认位于 `~/cow` 目录,用于存储系统提示词、记忆文件、技能文件等: + +``` +~/cow/ +├── system.md # Agent system prompt +├── user.md # User profile +├── memory/ # Long-term memory storage +│ ├── core.md # Core memory +│ └── daily/ # Daily memory +├── skills/ # Custom skills +│ ├── skill-1/ +│ └── skill-2/ +└── .env # Secret keys for skills +``` + +## 核心配置 + +在 `config.json` 中配置 Agent 模式的核心参数: + +```json +{ + "agent": true, + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30, + "agent_max_steps": 15 +} +``` + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `agent` | 是否启用 Agent 模式 | `true` | +| `agent_workspace` | 工作空间路径 | `~/cow` | +| `agent_max_context_tokens` | 最大上下文 token 数 | `40000` | +| `agent_max_context_turns` | 最大上下文记忆轮次 | `30` | +| `agent_max_steps` | 单次任务最大决策步数 | `15` | diff --git a/docs/zh/channels/dingtalk.mdx b/docs/zh/channels/dingtalk.mdx new file mode 100644 index 0000000..ab8e003 --- /dev/null +++ b/docs/zh/channels/dingtalk.mdx @@ -0,0 +1,40 @@ +--- +title: 钉钉 +description: 将 CowAgent 接入钉钉应用 +--- + +# 钉钉 + +通过钉钉开放平台创建智能机器人应用,将 CowAgent 接入钉钉。 + +## 一、创建应用 + +1. 进入 [钉钉开发者后台](https://open-dev.dingtalk.com/fe/app#/corp/app),点击 **创建应用**,填写应用信息 +2. 点击添加应用能力,选择 **机器人** 能力并添加 +3. 配置机器人信息后点击 **发布** + +## 二、项目配置 + +1. 在 **凭证与基础信息** 中获取 `Client ID` 和 `Client Secret` + +2. 填入 `config.json`: + +```json +{ + "channel_type": "dingtalk", + "dingtalk_client_id": "YOUR_CLIENT_ID", + "dingtalk_client_secret": "YOUR_CLIENT_SECRET" +} +``` + +3. 安装依赖: + +```bash +pip3 install dingtalk_stream +``` + +4. 启动项目后,在钉钉开发者后台点击 **事件订阅**,点击 **已完成接入,验证连接通道**,显示"连接接入成功"即表示配置完成 + +## 三、使用 + +与机器人私聊或将机器人拉入企业群中均可开启对话。 diff --git a/docs/zh/channels/feishu.mdx b/docs/zh/channels/feishu.mdx new file mode 100644 index 0000000..6bc0bbe --- /dev/null +++ b/docs/zh/channels/feishu.mdx @@ -0,0 +1,69 @@ +--- +title: 飞书 +description: 将 CowAgent 接入飞书应用 +--- + +# 飞书 + +通过自建应用将 CowAgent 接入飞书,支持 WebSocket 长连接(推荐)和 Webhook 两种事件接收模式。 + +## 一、创建企业自建应用 + +### 1. 创建应用 + +进入 [飞书开发平台](https://open.feishu.cn/app/),点击 **创建企业自建应用**,填写必要信息后创建。 + +### 2. 添加机器人能力 + +在 **添加应用能力** 菜单中,为应用添加 **机器人** 能力。 + +### 3. 配置应用权限 + +点击 **权限管理**,粘贴以下权限配置,全选并批量开通: + +``` +im:message,im:message.group_at_msg,im:message.group_at_msg:readonly,im:message.p2p_msg,im:message.p2p_msg:readonly,im:message:send_as_bot,im:resource +``` + +## 二、项目配置 + +在 **凭证与基础信息** 中获取 `App ID` 和 `App Secret`,填入 `config.json`: + + + + 无需公网 IP,配置如下: + + ```json + { + "channel_type": "feishu", + "feishu_app_id": "YOUR_APP_ID", + "feishu_app_secret": "YOUR_APP_SECRET", + "feishu_event_mode": "websocket" + } + ``` + + 需安装依赖:`pip3 install lark-oapi` + + + 需要公网 IP,配置如下: + + ```json + { + "channel_type": "feishu", + "feishu_app_id": "YOUR_APP_ID", + "feishu_app_secret": "YOUR_APP_SECRET", + "feishu_token": "VERIFICATION_TOKEN", + "feishu_event_mode": "webhook", + "feishu_port": 9891 + } + ``` + + + +## 三、配置事件订阅 + +1. 启动项目后,在飞书开放平台点击 **事件与回调**,选择 **长连接** 方式并保存 +2. 点击 **添加事件**,搜索 "接收消息",选择 "接收消息v2.0",确认添加 +3. 点击 **版本管理与发布**,创建版本并申请线上发布,审核通过后即可使用 + +完成后在飞书中搜索机器人名称,即可开始对话。 diff --git a/docs/zh/channels/web.mdx b/docs/zh/channels/web.mdx new file mode 100644 index 0000000..6f1a46c --- /dev/null +++ b/docs/zh/channels/web.mdx @@ -0,0 +1,33 @@ +--- +title: Web 网页 +description: 通过 Web 网页端使用 CowAgent +--- + +# Web 网页 + +Web 是 CowAgent 的默认通道,启动后会自动运行 Web 控制台,通过浏览器即可与 Agent 对话。 + +## 配置 + +```json +{ + "channel_type": "web", + "web_port": 9899 +} +``` + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `channel_type` | 设为 `web` | `web` | +| `web_port` | Web 服务监听端口 | `9899` | + +## 使用 + +启动项目后访问: + +- 本地运行:`http://localhost:9899/chat` +- 服务器运行:`http://:9899/chat` + + + 请确保服务器防火墙和安全组已放行对应端口。 + diff --git a/docs/zh/channels/wechatmp.mdx b/docs/zh/channels/wechatmp.mdx new file mode 100644 index 0000000..aaaa876 --- /dev/null +++ b/docs/zh/channels/wechatmp.mdx @@ -0,0 +1,58 @@ +--- +title: 微信公众号 +description: 将 CowAgent 接入微信公众号 +--- + +# 微信公众号 + +CowAgent 支持接入个人订阅号和企业服务号两种公众号类型。 + +| 类型 | 要求 | 特点 | +| --- | --- | --- | +| **个人订阅号** | 个人可申请 | 回复生成后需用户主动发消息获取 | +| **企业服务号** | 企业申请,需通过微信认证开通客服接口 | 回复生成后可主动推送给用户 | + + + 公众号仅支持服务器和 Docker 部署,需额外安装扩展依赖:`pip3 install -r requirements-optional.txt` + + +## 一、个人订阅号 + +在 `config.json` 中配置: + +```json +{ + "channel_type": "wechatmp", + "wechatmp_app_id": "YOUR_APP_ID", + "wechatmp_app_secret": "YOUR_APP_SECRET", + "wechatmp_aes_key": "", + "wechatmp_token": "YOUR_TOKEN", + "wechatmp_port": 80 +} +``` + +### 配置步骤 + +1. 在 [微信公众平台](https://mp.weixin.qq.com/) 的 **设置与开发 → 基本配置 → 服务器配置** 中获取参数 +2. 启用开发者密码,将服务器 IP 加入白名单 +3. 启动程序(监听 80 端口) +4. 在公众号后台 **启用服务器配置**,URL 格式为 `http://{HOST}/wx` + +## 二、企业服务号 + +与个人订阅号流程基本相同,差异如下: + +1. 在公众平台申请企业服务号并完成微信认证,确认已获得 **客服接口** 权限 +2. 在 `config.json` 中设置 `"channel_type": "wechatmp_service"` +3. 即使是较长耗时的回复,也可以主动推送给用户 + +```json +{ + "channel_type": "wechatmp_service", + "wechatmp_app_id": "YOUR_APP_ID", + "wechatmp_app_secret": "YOUR_APP_SECRET", + "wechatmp_aes_key": "", + "wechatmp_token": "YOUR_TOKEN", + "wechatmp_port": 80 +} +``` diff --git a/docs/zh/channels/wecom.mdx b/docs/zh/channels/wecom.mdx new file mode 100644 index 0000000..a635f59 --- /dev/null +++ b/docs/zh/channels/wecom.mdx @@ -0,0 +1,61 @@ +--- +title: 企业微信 +description: 将 CowAgent 接入企业微信自建应用 +--- + +# 企业微信 + +通过企业微信自建应用接入 CowAgent,支持企业内部人员单聊使用。 + + + 企业微信只能使用 Docker 部署或服务器 Python 部署,不支持本地运行模式。 + + +## 一、准备 + +需要的资源: + +1. 一台服务器(有公网 IP) +2. 注册一个企业微信(个人也可注册,但无法认证) +3. 认证企业微信还需要对应主体备案的域名 + +## 二、创建企业微信应用 + +1. 在 [企业微信管理后台](https://work.weixin.qq.com/wework_admin/frame#profile) **我的企业** 中获取 **企业ID** +2. 切换到 **应用管理**,点击创建应用,记录 `AgentId` 和 `Secret` +3. 点击 **设置API接收**,配置应用接口: + - URL 格式为 `http://ip:port/wxcomapp`(认证企业需使用备案域名) + - 随机获取 `Token` 和 `EncodingAESKey` 并保存 + +## 三、配置和运行 + +```json +{ + "channel_type": "wechatcom_app", + "wechatcom_corp_id": "YOUR_CORP_ID", + "wechatcomapp_token": "YOUR_TOKEN", + "wechatcomapp_secret": "YOUR_SECRET", + "wechatcomapp_agent_id": "YOUR_AGENT_ID", + "wechatcomapp_aes_key": "YOUR_AES_KEY", + "wechatcomapp_port": 9898 +} +``` + +| 参数 | 说明 | +| --- | --- | +| `wechatcom_corp_id` | 企业 ID | +| `wechatcomapp_token` | API 接收配置中的 Token | +| `wechatcomapp_secret` | 应用的 Secret | +| `wechatcomapp_agent_id` | 应用的 AgentId | +| `wechatcomapp_aes_key` | API 接收配置中的 EncodingAESKey | +| `wechatcomapp_port` | 监听端口,默认 9898 | + +启动程序后,回到企业微信后台保存 **消息服务器配置**,并将服务器 IP 添加到 **企业可信IP** 中。 + + + 如遇到配置失败:1. 确保防火墙和安全组已放行端口;2. 检查各参数配置是否一致;3. 认证企业需配置备案域名。 + + +## 四、使用 + +在企业微信中搜索应用名称即可直接对话。如需让外部微信用户使用,可在 **我的企业 → 微信插件** 中分享邀请关注二维码。 diff --git a/docs/zh/configuration.mdx b/docs/zh/configuration.mdx new file mode 100644 index 0000000..d02358e --- /dev/null +++ b/docs/zh/configuration.mdx @@ -0,0 +1,101 @@ +--- +title: 配置说明 +description: CowAgent 的配置文件详解 +--- + +# 配置说明 + +配置文件的模板在根目录的 `config-template.json` 中,需复制该模板创建最终生效的 `config.json` 文件: + +```bash +cp config-template.json config.json +``` + +## 核心配置项 + +```json +{ + "channel_type": "web", + "model": "MiniMax-M2.5", + "agent": true, + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30, + "agent_max_steps": 15 +} +``` + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `channel_type` | 接入渠道类型 | `web` | +| `model` | 模型名称 | `MiniMax-M2.5` | +| `agent` | 是否启用 Agent 模式 | `true` | +| `agent_workspace` | Agent 工作空间路径 | `~/cow` | +| `agent_max_context_tokens` | 最大上下文 tokens | `40000` | +| `agent_max_context_turns` | 最大上下文记忆轮次 | `30` | +| `agent_max_steps` | 单次任务最大决策步数 | `15` | + +## 模型 API Key 配置 + +根据所选模型填写对应的 API Key: + +```json +{ + "minimax_api_key": "", + "zhipu_ai_api_key": "", + "moonshot_api_key": "", + "ark_api_key": "", + "dashscope_api_key": "", + "claude_api_key": "", + "gemini_api_key": "", + "open_ai_api_key": "" +} +``` + +详细模型配置请参考 [模型说明](/zh/models)。 + +## 语音配置 + +```json +{ + "speech_recognition": false, + "group_speech_recognition": false, + "voice_reply_voice": false +} +``` + +| 参数 | 说明 | +| --- | --- | +| `speech_recognition` | 是否开启私聊语音识别 | +| `group_speech_recognition` | 是否开启群组语音识别 | +| `voice_reply_voice` | 是否使用语音回复语音 | + +## LinkAI 配置 + +```json +{ + "use_linkai": false, + "linkai_api_key": "", + "linkai_app_code": "" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `use_linkai` | 是否使用 LinkAI 接口 | +| `linkai_api_key` | LinkAI API Key,在 [控制台](https://link-ai.tech/console/interface) 创建 | +| `linkai_app_code` | LinkAI 应用或工作流的 code | + +## 代理配置 + +如需使用网络代理: + +```json +{ + "proxy": "127.0.0.1:7890" +} +``` + + + 全部配置项说明可在项目 [`config.py`](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/config.py) 文件中查看。 + diff --git a/docs/zh/index.mdx b/docs/zh/index.mdx new file mode 100644 index 0000000..69088cb --- /dev/null +++ b/docs/zh/index.mdx @@ -0,0 +1,64 @@ +--- +title: 项目介绍 +description: CowAgent - 基于大模型的超级AI助理 +--- + +# CowAgent + +**CowAgent** 是基于大模型的超级AI助理,能够主动思考和任务规划、操作计算机和外部资源、创造和执行Skills、拥有长期记忆并不断成长。 + +CowAgent 支持灵活切换多种模型,能处理文本、语音、图片、文件等多模态消息,可接入网页、飞书、钉钉、企业微信应用、微信公众号中使用,7×24小时运行于你的个人电脑或服务器中。 + +## 核心能力 + + + + 能够理解复杂任务并自主规划执行,持续思考和调用工具直到完成目标,支持通过工具操作访问文件、终端、浏览器、定时任务等系统资源。 + + + 自动将对话记忆持久化至本地文件和数据库中,包括全局记忆和天级记忆,支持关键词及向量检索。 + + + 实现了Skills创建和运行的引擎,内置多种技能,并支持通过自然语言对话完成自定义Skills开发。 + + + 支持对文本、图片、语音、文件等多类型消息进行解析、处理、生成、发送等操作。 + + + 支持 OpenAI, Claude, Gemini, DeepSeek, MiniMax, GLM, Qwen, Kimi, Doubao 等国内外主流模型厂商。 + + + 支持运行在本地计算机或服务器,可集成到网页、飞书、钉钉、微信公众号、企业微信应用中使用。 + + + +## 快速体验 + +在终端执行以下命令,即可一键安装、配置、启动 CowAgent: + +```bash +bash <(curl -sS https://cdn.link-ai.tech/code/cow/run.sh) +``` + +运行后默认会启动 Web 服务,通过访问 `http://localhost:9899/chat` 在网页端对话。 + + + + 查看完整的安装和运行指南 + + + 了解 CowAgent 的系统架构设计 + + + +## 声明 + +1. 本项目遵循 [MIT开源协议](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/LICENSE),主要用于技术研究和学习,使用本项目时需遵守所在地法律法规、相关政策以及企业章程,禁止用于任何违法或侵犯他人权益的行为。 +2. Agent模式下Token使用量高于普通对话模式,请根据效果及成本综合选择模型。Agent具有访问所在操作系统的能力,请谨慎选择项目部署环境。 +3. CowAgent项目专注于开源技术开发,不会参与、授权或发行任何加密货币。 + +## 社区 + +添加小助手微信加入开源项目交流群: + + diff --git a/docs/zh/memory.mdx b/docs/zh/memory.mdx new file mode 100644 index 0000000..0117608 --- /dev/null +++ b/docs/zh/memory.mdx @@ -0,0 +1,68 @@ +--- +title: 记忆 +description: CowAgent 的长期记忆系统 +--- + +# 长期记忆 + +记忆系统让 Agent 能够长期记住重要信息,在对话中不断积累经验、理解用户偏好,真正实现自主思考和持续成长。 + +## 工作原理 + +Agent 会在以下场景主动存储记忆: + +- **用户分享重要信息时** - 自动识别偏好、决策、事实等关键信息并存储 +- **对话达到一定长度时** - 自动提取摘要,避免信息丢失 +- **需要检索时** - 智能搜索历史记忆,结合上下文进行回答 + +## 记忆类型 + +### 核心记忆 + +存储在 `~/cow/memory/core.md` 中,包含用户的长期偏好、重要决策、关键事实等不会随时间淡化的信息。 + +### 天级记忆 + +存储在 `~/cow/memory/daily/` 目录下,按日期组织,记录每天的对话摘要和关键事件。 + +## 首次启动 + +首次启动 Agent 时,Agent 会主动向用户询问关键信息,并记录至工作空间(默认 `~/cow`)中的以下文件: + +| 文件 | 说明 | +| --- | --- | +| `system.md` | Agent 的系统提示词和行为设定 | +| `user.md` | 用户身份信息和偏好 | +| `memory/core.md` | 核心记忆 | +| `memory/daily/` | 天级记忆目录 | + + + + + +## 记忆检索 + +记忆系统支持混合检索模式: + +- **关键词检索** - 基于关键词匹配历史记忆 +- **向量检索** - 基于语义相似度进行搜索,即使表述不同也能找到相关记忆 + +Agent 会在对话中根据需要自动触发记忆检索,将相关历史信息纳入上下文。 + +## 相关配置 + +在 `config.json` 中可调整记忆相关参数: + +```json +{ + "agent_workspace": "~/cow", + "agent_max_context_tokens": 40000, + "agent_max_context_turns": 30 +} +``` + +| 参数 | 说明 | 默认值 | +| --- | --- | --- | +| `agent_workspace` | 工作空间路径,记忆文件存储在此目录下 | `~/cow` | +| `agent_max_context_tokens` | 最大上下文 token 数,影响短期记忆容量 | `40000` | +| `agent_max_context_turns` | 最大上下文轮次,超出后自动丢弃最早对话 | `30` | diff --git a/docs/zh/models.mdx b/docs/zh/models.mdx new file mode 100644 index 0000000..0aa7d0c --- /dev/null +++ b/docs/zh/models.mdx @@ -0,0 +1,173 @@ +--- +title: 模型 +description: CowAgent 支持的模型及配置方式 +--- + +# 模型 + +CowAgent 支持国内外主流厂商的大语言模型,模型接口实现在项目的 `models/` 目录下。 + + + Agent 模式下推荐使用以下模型:MiniMax-M2.5、glm-5、kimi-k2.5、qwen3.5-plus、claude-sonnet-4-6、gemini-3.1-pro-preview,可根据效果及成本综合选择。 + + +## 模型配置 + +根据所选模型,在 `config.json` 中填写对应的模型名称和 API Key。 + +### MiniMax + +```json +{ + "model": "MiniMax-M2.5", + "minimax_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `MiniMax-M2.5`、`MiniMax-M2.1`、`MiniMax-M2.1-lightning`、`MiniMax-M2` 等 | +| `minimax_api_key` | 在 [MiniMax控制台](https://platform.minimaxi.com/user-center/basic-information/interface-key) 创建 | + +### 智谱AI (GLM) + +```json +{ + "model": "glm-5", + "zhipu_ai_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `glm-5`、`glm-4.7`、`glm-4-plus`、`glm-4-flash` 等,参考 [模型编码](https://bigmodel.cn/dev/api/normal-model/glm-4) | +| `zhipu_ai_api_key` | 在 [智谱AI控制台](https://www.bigmodel.cn/usercenter/proj-mgmt/apikeys) 创建 | + +### 通义千问 (Qwen) + +```json +{ + "model": "qwen3.5-plus", + "dashscope_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `qwen3.5-plus`、`qwen3-max`、`qwen-max`、`qwen-plus` 等 | +| `dashscope_api_key` | 在 [百炼控制台](https://bailian.console.aliyun.com/?tab=model#/api-key) 创建 | + +### Kimi (Moonshot) + +```json +{ + "model": "kimi-k2.5", + "moonshot_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `kimi-k2.5`、`kimi-k2`、`moonshot-v1-8k`、`moonshot-v1-32k` 等 | +| `moonshot_api_key` | 在 [Moonshot控制台](https://platform.moonshot.cn/console/api-keys) 创建 | + +### 豆包 (Doubao) + +```json +{ + "model": "doubao-seed-2-0-code-preview-260215", + "ark_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 可填 `doubao-seed-2-0-code-preview-260215`、`doubao-seed-2-0-pro-260215` 等 | +| `ark_api_key` | 在 [火山方舟控制台](https://console.volcengine.com/ark/region:ark+cn-beijing/apikey) 创建 | + +### Claude + +```json +{ + "model": "claude-sonnet-4-6", + "claude_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 支持 `claude-sonnet-4-6`、`claude-opus-4-6`、`claude-sonnet-4-5` 等,参考 [官方模型](https://docs.anthropic.com/en/docs/about-claude/models/overview) | +| `claude_api_key` | 在 [Claude控制台](https://console.anthropic.com/settings/keys) 创建 | +| `claude_api_base` | 可选,默认为 `https://api.anthropic.com/v1`,可接入第三方代理 | + +### Gemini + +```json +{ + "model": "gemini-3.1-pro-preview", + "gemini_api_key": "YOUR_API_KEY" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 支持 `gemini-3.1-pro-preview`、`gemini-3-flash-preview`、`gemini-2.5-pro` 等,参考 [官方文档](https://ai.google.dev/gemini-api/docs/models) | +| `gemini_api_key` | 在 [Google AI Studio](https://aistudio.google.com/app/apikey) 创建 | + +### OpenAI + +```json +{ + "model": "gpt-4.1-mini", + "open_ai_api_key": "YOUR_API_KEY", + "open_ai_api_base": "https://api.openai.com/v1" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | 支持 o 系列、gpt-5.2、gpt-4.1 等,参考 [模型列表](https://platform.openai.com/docs/models) | +| `open_ai_api_key` | 在 [OpenAI平台](https://platform.openai.com/api-keys) 创建 | +| `open_ai_api_base` | 可选,修改可接入第三方代理接口 | + +### DeepSeek + +```json +{ + "model": "deepseek-chat", + "open_ai_api_key": "YOUR_API_KEY", + "open_ai_api_base": "https://api.deepseek.com/v1", + "bot_type": "chatGPT" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `model` | `deepseek-chat`(V3)、`deepseek-reasoner`(R1) | +| `bot_type` | 使用 OpenAI 兼容方式接入,固定为 `chatGPT` | + +## OpenAI 兼容接入 + +除官方 SDK 接入外,多数模型也支持 OpenAI 兼容方式接入,将 `bot_type` 设为 `chatGPT`,并配置对应的 `open_ai_api_base` 和 `open_ai_api_key` 即可。 + +## LinkAI 平台 + +通过 [LinkAI](https://link-ai.tech) 平台可灵活切换多种模型,并支持知识库、工作流、插件等 Agent 能力。 + +```json +{ + "use_linkai": true, + "linkai_api_key": "YOUR_API_KEY", + "linkai_app_code": "YOUR_APP_CODE" +} +``` + +| 参数 | 说明 | +| --- | --- | +| `use_linkai` | 设为 `true` 启用 LinkAI 接口 | +| `linkai_api_key` | 在 [控制台](https://link-ai.tech/console/interface) 创建 | +| `linkai_app_code` | LinkAI 智能体的 code,选填 | + + + 全部模型名称可参考项目 [`common/const.py`](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/common/const.py) 文件。 + diff --git a/docs/zh/quick-start.mdx b/docs/zh/quick-start.mdx new file mode 100644 index 0000000..253b6f6 --- /dev/null +++ b/docs/zh/quick-start.mdx @@ -0,0 +1,120 @@ +--- +title: 快速开始 +description: 一键安装或手动部署 CowAgent +--- + +# 快速开始 + +CowAgent 支持 Linux、macOS、Windows 操作系统,可在个人计算机及服务器上运行,需安装 Python 3.7 ~ 3.12(推荐 3.9)。 + +## 一键安装 + +项目提供了一键安装、配置、启动、管理程序的脚本,推荐使用脚本快速运行: + +```bash +bash <(curl -sS https://cdn.link-ai.tech/code/cow/run.sh) +``` + +脚本自动执行以下流程: + +1. 检查 Python 环境(需要 Python 3.7+) +2. 安装必要工具(git、curl 等) +3. 克隆项目代码到 `~/chatgpt-on-wechat` +4. 安装 Python 依赖 +5. 引导配置 AI 模型和通信渠道 +6. 启动服务 + +### 脚本管理命令 + +安装完成后,可使用以下命令管理服务: + +| 命令 | 说明 | +| --- | --- | +| `./run.sh start` | 启动服务 | +| `./run.sh stop` | 停止服务 | +| `./run.sh restart` | 重启服务 | +| `./run.sh status` | 查看运行状态 | +| `./run.sh logs` | 查看实时日志 | +| `./run.sh config` | 重新配置 | +| `./run.sh update` | 更新项目代码 | + +## 手动安装 + +### 1. 克隆项目代码 + +```bash +git clone https://github.com/zhayujie/chatgpt-on-wechat +cd chatgpt-on-wechat/ +``` + + + 若遇到网络问题可使用国内仓库地址:https://gitee.com/zhayujie/chatgpt-on-wechat + + +### 2. 安装依赖 + +核心依赖(必选): + +```bash +pip3 install -r requirements.txt +``` + +扩展依赖(可选,建议安装): + +```bash +pip3 install -r requirements-optional.txt +``` + +### 3. 配置 + +复制配置文件模板并编辑: + +```bash +cp config-template.json config.json +``` + +填写核心配置项,详细说明参考 [配置说明](/zh/configuration)。 + +### 4. 运行 + +**本地运行:** + +```bash +python3 app.py +``` + +运行后默认启动 Web 服务,访问 `http://localhost:9899/chat` 开始对话。 + +**服务器后台运行:** + +```bash +nohup python3 app.py & tail -f nohup.out +``` + +## Docker 部署 + +使用 Docker 部署无需下载源码和安装依赖。Agent 模式下更推荐使用源码部署以获得更多系统访问能力。 + + + 需要安装 [Docker](https://docs.docker.com/engine/install/) 和 docker-compose。 + + +**1. 下载配置文件** + +```bash +wget https://cdn.link-ai.tech/code/cow/docker-compose.yml +``` + +打开 `docker-compose.yml` 填写所需配置。 + +**2. 启动容器** + +```bash +sudo docker compose up -d +``` + +**3. 查看日志** + +```bash +sudo docker logs -f chatgpt-on-wechat +``` diff --git a/docs/zh/releases/overview.mdx b/docs/zh/releases/overview.mdx new file mode 100644 index 0000000..7951d6b --- /dev/null +++ b/docs/zh/releases/overview.mdx @@ -0,0 +1,25 @@ +--- +title: 更新日志 +description: CowAgent 版本更新历史 +--- + +# 更新日志 + +| 版本 | 日期 | 说明 | +| --- | --- | --- | +| [2.0.0](/zh/releases/v2.0.0) | 2026.02.03 | 全面升级为超级 Agent 助理 | +| 1.7.6 | 2025.05.23 | Web Channel 优化、AgentMesh 多智能体插件 | +| 1.7.5 | 2025.04.11 | wechatferry 协议、DeepSeek 模型 | +| 1.7.4 | 2024.12.13 | Gemini 2.0 模型、Web Channel | +| 1.7.3 | 2024.10.31 | 稳定性提升、数据库功能 | +| 1.7.2 | 2024.09.26 | 一键安装脚本、o1 模型 | +| 1.7.0 | 2024.08.02 | 讯飞 4.0 模型、知识库引用 | +| 1.6.9 | 2024.07.19 | gpt-4o-mini、阿里语音识别 | +| 1.6.8 | 2024.07.05 | Claude 3.5、Gemini 1.5 Pro | +| 1.6.0 | 2024.04.26 | Kimi 接入、gpt-4-turbo 升级 | +| 1.5.8 | 2024.03.26 | GLM-4、Claude-3、edge-tts | +| 1.5.2 | 2023.11.10 | 飞书通道、图像识别对话 | +| 1.5.0 | 2023.11.10 | gpt-4-turbo、dall-e-3、tts 多模态 | +| 1.0.0 | 2022.12.12 | 项目创建,首次接入 ChatGPT 模型 | + +更多历史版本请查看 [GitHub Releases](https://github.com/zhayujie/chatgpt-on-wechat/releases)。 diff --git a/docs/zh/releases/v2.0.0.mdx b/docs/zh/releases/v2.0.0.mdx new file mode 100644 index 0000000..8618e96 --- /dev/null +++ b/docs/zh/releases/v2.0.0.mdx @@ -0,0 +1,107 @@ +--- +title: v2.0.0 +description: CowAgent 2.0 - 从聊天机器人到超级智能助理的全面升级 +--- + +# CowAgent 2.0 + +CowAgent 2.0 实现了从聊天机器人到**超级智能助理**的全面升级!现在它能够主动思考和规划任务、拥有长期记忆、操作计算机和外部资源、创造和执行技能,真正理解你并和你一起成长。 + +**发布日期**:2026.02.03 | [GitHub Release](https://github.com/zhayujie/chatgpt-on-wechat/releases/tag/2.0.0) + +## 重点更新 + +### Agent 核心能力 + +- **复杂任务规划**:能够理解复杂任务并自主规划执行,持续思考和调用工具直到完成目标,支持多轮推理和上下文理解 +- **长期记忆**:自动将对话记忆持久化至本地文件和数据库中,包括全局记忆和天级记忆,支持关键词及向量检索 +- **内置系统工具**:内置实现 10+ 种工具,包括文件操作、Bash 终端、浏览器、文件发送、定时任务、记忆管理等 +- **Skills**:新增 Skill 运行引擎,内置多种技能,并支持通过自然语言对话完成自定义 Skills 开发 +- **安全和成本**:通过秘钥管理工具、提示词控制、系统权限等手段控制 Agent 的访问安全;通过最大记忆轮次、最大上下文 token、工具执行步数对 token 成本进行限制 + +### 其他更新 + +- **渠道优化**:飞书及钉钉接入渠道支持长连接接入(无需公网 IP)、支持图片/文件消息的接收和发送 +- **模型更新**:新增 claude-sonnet-4-5、gemini-3-pro-preview、glm-4.7、MiniMax-M2.1、qwen3-max 等最新模型 +- **部署优化**:增加一键安装、配置、运行、管理的脚本,简化部署流程 + +## 长期记忆系统 + +Agent 会在用户分享重要信息时主动存储,也会在对话达到一定长度时自动提取摘要。支持语义搜索和向量检索的混合检索模式。 + +**首次启动**时,Agent 会主动询问关键信息,并记录至工作空间(默认 `~/cow`)中的智能体设定、用户身份、记忆文件中。 + +**长期对话**中,Agent 会智能记录或检索记忆,不断更新自身设定、用户偏好,总结经验和教训,真正实现自主思考和持续成长。 + + + + + +## 任务规划与工具调用 + +Agent 根据任务需求智能选择和调用工具,完成各类复杂操作。 + +### 终端和文件访问 + +最基础和核心的工具能力,用户可通过手机端与 Agent 交互,操作个人电脑或服务器上的资源: + + + + + +### 应用编程能力 + +基于编程能力和系统访问能力,Agent 可实现从信息搜索、素材生成、编码、测试、部署、Nginx 配置、发布的 **Vibecoding 全流程**,通过手机端一句命令完成应用快速 demo。 + + + + + +### 定时任务 + +支持 **一次性任务、固定时间间隔、Cron 表达式** 三种形式,任务触发可选择 **固定消息发送** 或 **Agent 动态任务执行** 两种模式: + + + + + +### 环境变量管理 + +通过 `env_config` 工具管理技能所需秘钥,支持对话式更新,内置安全保护和脱敏策略: + + + + + +## 技能系统 + +每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,为 Agent 提供无限扩展性。 + +### 技能创造器 + +通过对话方式快速创建技能,将工作流程固化或对接任意第三方接口: + + + + + +### 搜索和图像识别 + +- **搜索技能**:内置 `bocha-search`(博查搜索),配置 `BOCHA_SEARCH_API_KEY` 即可使用 +- **图像识别**:支持 `gpt-4.1-mini`、`gpt-4.1` 等模型,配置 `OPENAI_API_KEY` 即可使用 + + + + + +### 三方知识库和插件 + +`linkai-agent` 技能可将 [LinkAI](https://link-ai.tech/) 上的所有智能体作为 Skill 使用,实现多智能体决策: + + + + + +## 参与共建 + +2.0 版本后,项目将持续升级 Agent 能力、拓展接入渠道、内置工具、技能系统,降低模型成本和提升安全性。欢迎 [提出反馈](https://github.com/zhayujie/chatgpt-on-wechat/issues) 和 [贡献代码](https://github.com/zhayujie/chatgpt-on-wechat/pulls)。 diff --git a/docs/zh/skills.mdx b/docs/zh/skills.mdx new file mode 100644 index 0000000..13b9fa3 --- /dev/null +++ b/docs/zh/skills.mdx @@ -0,0 +1,103 @@ +--- +title: 技能 +description: CowAgent 的技能系统 - 为 Agent 提供无限扩展性 +--- + +# 技能系统 + +技能系统为 Agent 提供无限的扩展性。每个 Skill 由说明文件、运行脚本(可选)、资源(可选)组成,描述如何完成特定类型的任务。通过 Skill 可以让 Agent 遵循说明完成复杂流程,调用各类工具或对接第三方系统。 + +## 技能类型 + +### 内置技能 + +位于项目的 `skills/` 目录下,根据依赖条件(API Key、系统命令等)自动判断是否启用。内置技能包括: + +| 技能 | 说明 | +| --- | --- | +| `skill-creator` | 技能创造器,通过对话创建自定义技能 | +| `bocha-search` | 博查搜索,联网搜索能力 | +| `openai-image-vision` | 图像识别,使用 OpenAI 视觉模型 | +| `linkai-agent` | LinkAI 智能体,对接第三方知识库和插件 | +| `web-scraper` | 网页抓取,获取网页内容 | + +### 自定义技能 + +由用户通过对话创建,存放在工作空间中(`~/cow/skills/`),基于自定义技能可以实现任何复杂的业务流程和第三方系统对接。 + +## 创建技能 + +通过内置的 `skill-creator` 技能,可以使用自然语言对话快速创建技能。你可以: + +- 将工作流程固化为技能 +- 将接口文档和示例发送给 Agent,让它直接完成对接 +- 创建定制化的自动化流程 + + + + + +## 搜索技能 + +系统内置了 `bocha-search`(博查搜索)技能,配置步骤: + +1. 在 [博查开放平台](https://open.bochaai.com/) 创建 API Key +2. 通过 `env_config` 工具配置 `BOCHA_SEARCH_API_KEY`,或直接发送给 Agent + +## 图像识别 + +内置 `openai-image-vision` 技能,支持使用 `gpt-4.1-mini`、`gpt-4.1` 等图像识别模型。 + +配置 `OPENAI_API_KEY` 后即可使用,可通过 `config.json` 或 `env_config` 工具进行维护。 + + + + + +## LinkAI 智能体 + +`linkai-agent` 技能可以将 [LinkAI](https://link-ai.tech/) 上的所有智能体作为 Skill 交给 Agent 使用,实现多智能体决策。 + +### 配置方式 + +1. 配置 `LINKAI_API_KEY`(通过 `env_config` 工具或 `config.json` 中的 `linkai_api_key`) +2. 在 `skills/linkai-agent/config.json` 中添加智能体说明: + +```json +{ + "apps": [ + { + "app_code": "G7z6vKwp", + "app_name": "LinkAI客服助手", + "app_description": "当用户需要了解LinkAI平台相关问题时才选择该助手" + }, + { + "app_code": "SFY5x7JR", + "app_name": "内容创作助手", + "app_description": "当用户需要创作图片或视频时才使用该助手" + } + ] +} +``` + +Agent 会根据智能体的名称和描述进行决策,通过 `app_code` 调用对应的应用或工作流。 + + + + + +## 技能文件结构 + +每个技能目录的结构如下: + +``` +skills/ +├── my-skill/ +│ ├── SKILL.md # Skill description and instructions +│ ├── run.py # Execution script (optional) +│ └── resources/ # Additional resources (optional) +``` + + + 自定义技能的开发文档可参考 [Skill创造器说明](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/skills/skill-creator/SKILL.md)。 +