diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py
index 25179ba..1727c0b 100644
--- a/agent/prompt/builder.py
+++ b/agent/prompt/builder.py
@@ -237,8 +237,8 @@ def _build_tooling_section(tools: List[Any], language: str) -> List[str]:
         "叙述要求: 保持简洁、信息密度高，避免重复显而易见的步骤。",
         "",
         "完成标准:",
-        "- 确保用户的需求得到实际解决，而不仅仅是制定计划",
-        "- 当任务需要多次工具调用时，持续推进直到完成",
+        "- 确保用户的需求得到实际解决，而不仅仅是制定计划。",
+        "- 当任务需要多次工具调用时，持续推进直到完成, 解决完后向用户报告结果或回复用户的问题",
         "- 每次工具调用后，评估是否已获得足够信息来推进或完成任务",
         "- 避免重复调用相同的工具和相同参数获取相同的信息，除非用户明确要求",
         "",
diff --git a/agent/protocol/agent.py b/agent/protocol/agent.py
index 5c4f994..6759ebc 100644
--- a/agent/protocol/agent.py
+++ b/agent/protocol/agent.py
@@ -360,6 +360,9 @@ class Agent:
 
         # Update agent's message history from executor
         self.messages = executor.messages
+        
+        # Store executor reference for agent_bridge to access files_to_send
+        self.stream_executor = executor
 
         # Execute all post-process tools
         self._execute_post_process_tools()
diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py
index 7ea1eeb..49812b9 100644
--- a/agent/protocol/agent_stream.py
+++ b/agent/protocol/agent_stream.py
@@ -58,6 +58,9 @@ class AgentStreamExecutor:
         
         # Tool failure tracking for retry protection
         self.tool_failure_history = []  # List of (tool_name, args_hash, success) tuples
+        
+        # Track files to send (populated by read tool)
+        self.files_to_send = []  # List of file metadata dicts
 
     def _emit_event(self, event_type: str, data: dict = None):
         """Emit event"""
@@ -191,21 +194,47 @@ class AgentStreamExecutor:
                             logger.info(
                                 f"Memory flush recommended: tokens={current_tokens}, turns={self.agent.memory_manager.flush_manager.turn_count}")
 
-                # Call LLM
-                assistant_msg, tool_calls = self._call_llm_stream()
+                # Call LLM (enable retry_on_empty for better reliability)
+                assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=True)
                 final_response = assistant_msg
 
                 # No tool calls, end loop
                 if not tool_calls:
                     # 检查是否返回了空响应
                     if not assistant_msg:
-                        logger.warning(f"[Agent] LLM returned empty response (no content and no tool calls)")
+                        logger.warning(f"[Agent] LLM returned empty response after retry (no content and no tool calls)")
+                        logger.info(f"[Agent] This usually happens when LLM thinks the task is complete after tool execution")
                         
-                        # 生成通用的友好提示
-                        final_response = (
-                            "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
-                        )
-                        logger.info(f"Generated fallback response for empty LLM output")
+                        # 如果之前有工具调用，强制要求 LLM 生成文本回复
+                        if turn > 1:
+                            logger.info(f"[Agent] Requesting explicit response from LLM...")
+                            
+                            # 添加一条消息，明确要求回复用户
+                            self.messages.append({
+                                "role": "user",
+                                "content": [{
+                                    "type": "text",
+                                    "text": "请向用户说明刚才工具执行的结果或回答用户的问题。"
+                                }]
+                            })
+                            
+                            # 再调用一次 LLM
+                            assistant_msg, tool_calls = self._call_llm_stream(retry_on_empty=False)
+                            final_response = assistant_msg
+                            
+                            # 如果还是空，才使用 fallback
+                            if not assistant_msg and not tool_calls:
+                                logger.warning(f"[Agent] Still empty after explicit request")
+                                final_response = (
+                                    "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
+                                )
+                                logger.info(f"Generated fallback response for empty LLM output")
+                        else:
+                            # 第一轮就空回复，直接 fallback
+                            final_response = (
+                                "抱歉，我暂时无法生成回复。请尝试换一种方式描述你的需求，或稍后再试。"
+                            )
+                            logger.info(f"Generated fallback response for empty LLM output")
                     else:
                         logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}")
                     
@@ -235,6 +264,14 @@ class AgentStreamExecutor:
                         result = self._execute_tool(tool_call)
                         tool_results.append(result)
                         
+                        # Check if this is a file to send (from read tool)
+                        if result.get("status") == "success" and isinstance(result.get("result"), dict):
+                            result_data = result.get("result")
+                            if result_data.get("type") == "file_to_send":
+                                # Store file metadata for later sending
+                                self.files_to_send.append(result_data)
+                                logger.info(f"📎 检测到待发送文件: {result_data.get('file_name', result_data.get('path'))}")
+                        
                         # Check for critical error - abort entire conversation
                         if result.get("status") == "critical_error":
                             logger.error(f"💥 检测到严重错误，终止对话")
@@ -392,6 +429,7 @@ class AgentStreamExecutor:
         # Streaming response
         full_content = ""
         tool_calls_buffer = {}  # {index: {id, name, arguments}}
+        stop_reason = None  # Track why the stream stopped
 
         try:
             stream = self.model.call_stream(request)
@@ -404,21 +442,47 @@ class AgentStreamExecutor:
                     if isinstance(error_data, dict):
                         error_msg = error_data.get("message", chunk.get("message", "Unknown error"))
                         error_code = error_data.get("code", "")
+                        error_type = error_data.get("type", "")
                     else:
                         error_msg = chunk.get("message", str(error_data))
                         error_code = ""
+                        error_type = ""
                     
                     status_code = chunk.get("status_code", "N/A")
-                    logger.error(f"API Error: {error_msg} (Status: {status_code}, Code: {error_code})")
-                    logger.error(f"Full error chunk: {chunk}")
                     
-                    # Raise exception with full error message for retry logic
-                    raise Exception(f"{error_msg} (Status: {status_code})")
+                    # Log error with all available information
+                    logger.error(f"🔴 Stream API Error:")
+                    logger.error(f"   Message: {error_msg}")
+                    logger.error(f"   Status Code: {status_code}")
+                    logger.error(f"   Error Code: {error_code}")
+                    logger.error(f"   Error Type: {error_type}")
+                    logger.error(f"   Full chunk: {chunk}")
+                    
+                    # Check if this is a context overflow error (keyword-based, works for all models)
+                    # Don't rely on specific status codes as different providers use different codes
+                    error_msg_lower = error_msg.lower()
+                    is_overflow = any(keyword in error_msg_lower for keyword in [
+                        'context length exceeded', 'maximum context length', 'prompt is too long',
+                        'context overflow', 'context window', 'too large', 'exceeds model context',
+                        'request_too_large', 'request exceeds the maximum size', 'tokens exceed'
+                    ])
+                    
+                    if is_overflow:
+                        # Mark as context overflow for special handling
+                        raise Exception(f"[CONTEXT_OVERFLOW] {error_msg} (Status: {status_code})")
+                    else:
+                        # Raise exception with full error message for retry logic
+                        raise Exception(f"{error_msg} (Status: {status_code}, Code: {error_code}, Type: {error_type})")
 
                 # Parse chunk
                 if isinstance(chunk, dict) and "choices" in chunk:
                     choice = chunk["choices"][0]
                     delta = choice.get("delta", {})
+                    
+                    # Capture finish_reason if present
+                    finish_reason = choice.get("finish_reason")
+                    if finish_reason:
+                        stop_reason = finish_reason
 
                     # Handle text content
                     if "content" in delta and delta["content"]:
@@ -449,9 +513,46 @@ class AgentStreamExecutor:
                                     tool_calls_buffer[index]["arguments"] += func["arguments"]
 
         except Exception as e:
-            error_str = str(e).lower()
+            error_str = str(e)
+            error_str_lower = error_str.lower()
+            
+            # Check if error is context overflow (non-retryable, needs session reset)
+            # Method 1: Check for special marker (set in stream error handling above)
+            is_context_overflow = '[context_overflow]' in error_str_lower
+            
+            # Method 2: Fallback to keyword matching for non-stream errors
+            if not is_context_overflow:
+                is_context_overflow = any(keyword in error_str_lower for keyword in [
+                    'context length exceeded', 'maximum context length', 'prompt is too long',
+                    'context overflow', 'context window', 'too large', 'exceeds model context',
+                    'request_too_large', 'request exceeds the maximum size'
+                ])
+            
+            # Check if error is message format error (incomplete tool_use/tool_result pairs)
+            # This happens when previous conversation had tool failures
+            is_message_format_error = any(keyword in error_str_lower for keyword in [
+                'tool_use', 'tool_result', 'without', 'immediately after',
+                'corresponding', 'must have', 'each'
+            ]) and 'status: 400' in error_str_lower
+            
+            if is_context_overflow or is_message_format_error:
+                error_type = "context overflow" if is_context_overflow else "message format error"
+                logger.error(f"💥 {error_type} detected: {e}")
+                # Clear message history to recover
+                logger.warning("🔄 Clearing conversation history to recover")
+                self.messages.clear()
+                # Raise special exception with user-friendly message
+                if is_context_overflow:
+                    raise Exception(
+                        "抱歉，对话历史过长导致上下文溢出。我已清空历史记录，请重新描述你的需求。"
+                    )
+                else:
+                    raise Exception(
+                        "抱歉，之前的对话出现了问题。我已清空历史记录，请重新发送你的消息。"
+                    )
+            
             # Check if error is retryable (timeout, connection, rate limit, server busy, etc.)
-            is_retryable = any(keyword in error_str for keyword in [
+            is_retryable = any(keyword in error_str_lower for keyword in [
                 'timeout', 'timed out', 'connection', 'network', 
                 'rate limit', 'overloaded', 'unavailable', 'busy', 'retry',
                 '429', '500', '502', '503', '504', '512'
@@ -505,11 +606,12 @@ class AgentStreamExecutor:
 
         # Check for empty response and retry once if enabled
         if retry_on_empty and not full_content and not tool_calls:
-            logger.warning(f"⚠️  LLM returned empty response, retrying once...")
+            logger.warning(f"⚠️  LLM returned empty response (stop_reason: {stop_reason}), retrying once...")
             self._emit_event("message_end", {
                 "content": "",
                 "tool_calls": [],
-                "empty_retry": True
+                "empty_retry": True,
+                "stop_reason": stop_reason
             })
             # Retry without retry flag to avoid infinite loop
             return self._call_llm_stream(
diff --git a/agent/skills/loader.py b/agent/skills/loader.py
index 0bc8f4a..5d67c47 100644
--- a/agent/skills/loader.py
+++ b/agent/skills/loader.py
@@ -137,6 +137,10 @@ class SkillLoader:
         name = frontmatter.get('name', parent_dir_name)
         description = frontmatter.get('description', '')
         
+        # Special handling for linkai-agent: dynamically load apps from config.json
+        if name == 'linkai-agent':
+            description = self._load_linkai_agent_description(skill_dir, description)
+        
         if not description or not description.strip():
             diagnostics.append(f"Skill {name} has no description: {file_path}")
             return LoadSkillsResult(skills=[], diagnostics=diagnostics)
@@ -161,6 +165,45 @@ class SkillLoader:
         
         return LoadSkillsResult(skills=[skill], diagnostics=diagnostics)
     
+    def _load_linkai_agent_description(self, skill_dir: str, default_description: str) -> str:
+        """
+        Dynamically load LinkAI agent description from config.json
+        
+        :param skill_dir: Skill directory
+        :param default_description: Default description from SKILL.md
+        :return: Dynamic description with app list
+        """
+        import json
+        
+        config_path = os.path.join(skill_dir, "config.json")
+        template_path = os.path.join(skill_dir, "config.json.template")
+        
+        # Try to load config.json or fallback to template
+        config_file = config_path if os.path.exists(config_path) else template_path
+        
+        if not os.path.exists(config_file):
+            return default_description
+        
+        try:
+            with open(config_file, 'r', encoding='utf-8') as f:
+                config = json.load(f)
+            
+            apps = config.get("apps", [])
+            if not apps:
+                return default_description
+            
+            # Build dynamic description with app details
+            app_descriptions = "; ".join([
+                f"{app['app_name']}({app['app_code']}: {app['app_description']})"
+                for app in apps
+            ])
+            
+            return f"Call LinkAI apps/workflows. {app_descriptions}"
+        
+        except Exception as e:
+            logger.warning(f"[SkillLoader] Failed to load linkai-agent config: {e}")
+            return default_description
+    
     def load_all_skills(
         self,
         managed_dir: Optional[str] = None,
diff --git a/agent/tools/__init__.py b/agent/tools/__init__.py
index b449c25..fe37aec 100644
--- a/agent/tools/__init__.py
+++ b/agent/tools/__init__.py
@@ -8,6 +8,7 @@ from agent.tools.write.write import Write
 from agent.tools.edit.edit import Edit
 from agent.tools.bash.bash import Bash
 from agent.tools.ls.ls import Ls
+from agent.tools.send.send import Send
 
 # Import memory tools
 from agent.tools.memory.memory_search import MemorySearchTool
@@ -112,6 +113,7 @@ __all__ = [
     'Edit',
     'Bash',
     'Ls',
+    'Send',
     'MemorySearchTool',
     'MemoryGetTool',
     'EnvConfig',
diff --git a/agent/tools/bash/bash.py b/agent/tools/bash/bash.py
index e9b6ca0..4d7e564 100644
--- a/agent/tools/bash/bash.py
+++ b/agent/tools/bash/bash.py
@@ -3,12 +3,14 @@ Bash tool - Execute bash commands
 """
 
 import os
+import sys
 import subprocess
 import tempfile
 from typing import Dict, Any
 
 from agent.tools.base_tool import BaseTool, ToolResult
 from agent.tools.utils.truncate import truncate_tail, format_size, DEFAULT_MAX_LINES, DEFAULT_MAX_BYTES
+from common.log import logger
 
 
 class Bash(BaseTool):
@@ -60,6 +62,12 @@ IMPORTANT SAFETY GUIDELINES:
         if not command:
             return ToolResult.fail("Error: command parameter is required")
 
+        # Security check: Prevent accessing sensitive config files
+        if "~/.cow/.env" in command or "~/.cow" in command:
+            return ToolResult.fail(
+                "Error: Access denied. API keys and credentials must be accessed through the env_config tool only."
+            )
+
         # Optional safety check - only warn about extremely dangerous commands
         if self.safety_mode:
             warning = self._get_safety_warning(command)
@@ -68,7 +76,31 @@ IMPORTANT SAFETY GUIDELINES:
                     f"Safety Warning: {warning}\n\nIf you believe this command is safe and necessary, please ask the user for confirmation first, explaining what the command does and why it's needed.")
 
         try:
-            # Execute command
+            # Prepare environment with .env file variables
+            env = os.environ.copy()
+            
+            # Load environment variables from ~/.cow/.env if it exists
+            env_file = os.path.expanduser("~/.cow/.env")
+            if os.path.exists(env_file):
+                try:
+                    from dotenv import dotenv_values
+                    env_vars = dotenv_values(env_file)
+                    env.update(env_vars)
+                    logger.debug(f"[Bash] Loaded {len(env_vars)} variables from {env_file}")
+                except ImportError:
+                    logger.debug("[Bash] python-dotenv not installed, skipping .env loading")
+                except Exception as e:
+                    logger.debug(f"[Bash] Failed to load .env: {e}")
+            
+            # Debug logging
+            logger.debug(f"[Bash] CWD: {self.cwd}")
+            logger.debug(f"[Bash] Command: {command[:500]}")
+            logger.debug(f"[Bash] OPENAI_API_KEY in env: {'OPENAI_API_KEY' in env}")
+            logger.debug(f"[Bash] SHELL: {env.get('SHELL', 'not set')}")
+            logger.debug(f"[Bash] Python executable: {sys.executable}")
+            logger.debug(f"[Bash] Process UID: {os.getuid()}")
+            
+            # Execute command with inherited environment variables
             result = subprocess.run(
                 command,
                 shell=True,
@@ -76,8 +108,50 @@ IMPORTANT SAFETY GUIDELINES:
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 text=True,
-                timeout=timeout
+                timeout=timeout,
+                env=env
             )
+            
+            logger.debug(f"[Bash] Exit code: {result.returncode}")
+            logger.debug(f"[Bash] Stdout length: {len(result.stdout)}")
+            logger.debug(f"[Bash] Stderr length: {len(result.stderr)}")
+            
+            # Workaround for exit code 126 with no output
+            if result.returncode == 126 and not result.stdout and not result.stderr:
+                logger.warning(f"[Bash] Exit 126 with no output - trying alternative execution method")
+                # Try using argument list instead of shell=True
+                import shlex
+                try:
+                    parts = shlex.split(command)
+                    if len(parts) > 0:
+                        logger.info(f"[Bash] Retrying with argument list: {parts[:3]}...")
+                        retry_result = subprocess.run(
+                            parts,
+                            cwd=self.cwd,
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE,
+                            text=True,
+                            timeout=timeout,
+                            env=env
+                        )
+                        logger.debug(f"[Bash] Retry exit code: {retry_result.returncode}, stdout: {len(retry_result.stdout)}, stderr: {len(retry_result.stderr)}")
+                        
+                        # If retry succeeded, use retry result
+                        if retry_result.returncode == 0 or retry_result.stdout or retry_result.stderr:
+                            result = retry_result
+                        else:
+                            # Both attempts failed - check if this is openai-image-vision skill
+                            if 'openai-image-vision' in command or 'vision.sh' in command:
+                                # Create a mock result with helpful error message
+                                from types import SimpleNamespace
+                                result = SimpleNamespace(
+                                    returncode=1,
+                                    stdout='{"error": "图片无法解析", "reason": "该图片格式可能不受支持，或图片文件存在问题", "suggestion": "请尝试其他图片"}',
+                                    stderr=''
+                                )
+                                logger.info(f"[Bash] Converted exit 126 to user-friendly image error message for vision skill")
+                except Exception as retry_err:
+                    logger.warning(f"[Bash] Retry failed: {retry_err}")
 
             # Combine stdout and stderr
             output = result.stdout
diff --git a/agent/tools/env_config/env_config.py b/agent/tools/env_config/env_config.py
index a988f42..f0a10fe 100644
--- a/agent/tools/env_config/env_config.py
+++ b/agent/tools/env_config/env_config.py
@@ -27,7 +27,7 @@ class EnvConfig(BaseTool):
     
     name: str = "env_config"
     description: str = (
-        "Manage API keys and skill configurations stored in the workspace .env file. "
+        "Manage API keys and skill configurations securely. "
         "Use this tool when user wants to configure API keys (like BOCHA_API_KEY, OPENAI_API_KEY), "
         "view configured keys, or manage skill settings. "
         "Actions: 'set' (add/update key), 'get' (view specific key), 'list' (show all configured keys), 'delete' (remove key). "
@@ -65,16 +65,17 @@ class EnvConfig(BaseTool):
     
     def __init__(self, config: dict = None):
         self.config = config or {}
-        self.workspace_dir = self.config.get("workspace_dir", os.path.expanduser("~/cow"))
-        self.env_path = os.path.join(self.workspace_dir, '.env')
+        # Store env config in ~/.cow directory (outside workspace for security)
+        self.env_dir = os.path.expanduser("~/.cow")
+        self.env_path = os.path.join(self.env_dir, '.env')
         self.agent_bridge = self.config.get("agent_bridge")  # Reference to AgentBridge for hot reload
         # Don't create .env file in __init__ to avoid issues during tool discovery
         # It will be created on first use in execute()
     
     def _ensure_env_file(self):
         """Ensure the .env file exists"""
-        # Create workspace directory if it doesn't exist
-        os.makedirs(self.workspace_dir, exist_ok=True)
+        # Create ~/.cow directory if it doesn't exist
+        os.makedirs(self.env_dir, exist_ok=True)
         
         if not os.path.exists(self.env_path):
             Path(self.env_path).touch()
diff --git a/agent/tools/ls/ls.py b/agent/tools/ls/ls.py
index d3e5330..d6517b3 100644
--- a/agent/tools/ls/ls.py
+++ b/agent/tools/ls/ls.py
@@ -50,6 +50,13 @@ class Ls(BaseTool):
         # Resolve path
         absolute_path = self._resolve_path(path)
         
+        # Security check: Prevent accessing sensitive config directory
+        env_config_dir = os.path.expanduser("~/.cow")
+        if os.path.abspath(absolute_path) == os.path.abspath(env_config_dir):
+            return ToolResult.fail(
+                "Error: Access denied. API keys and credentials must be accessed through the env_config tool only."
+            )
+        
         if not os.path.exists(absolute_path):
             # Provide helpful hint if using relative path
             if not os.path.isabs(path) and not path.startswith('~'):
diff --git a/agent/tools/read/read.py b/agent/tools/read/read.py
index 6ecae07..f88bc50 100644
--- a/agent/tools/read/read.py
+++ b/agent/tools/read/read.py
@@ -15,7 +15,7 @@ class Read(BaseTool):
     """Tool for reading file contents"""
     
     name: str = "read"
-    description: str = f"Read the contents of a file. Supports text files, PDF files, and images (jpg, png, gif, webp). For text files, output is truncated to {DEFAULT_MAX_LINES} lines or {DEFAULT_MAX_BYTES // 1024}KB (whichever is hit first). Use offset/limit for large files."
+    description: str = f"Read or inspect file contents. For text/PDF files, returns content (truncated to {DEFAULT_MAX_LINES} lines or {DEFAULT_MAX_BYTES // 1024}KB). For images/videos/audio, returns metadata only (file info, size, type). Use offset/limit for large text files."
     
     params: dict = {
         "type": "object",
@@ -39,10 +39,25 @@ class Read(BaseTool):
     def __init__(self, config: dict = None):
         self.config = config or {}
         self.cwd = self.config.get("cwd", os.getcwd())
-        # Supported image formats
-        self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp'}
-        # Supported PDF format
+        
+        # File type categories
+        self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg', '.ico'}
+        self.video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.webm', '.m4v'}
+        self.audio_extensions = {'.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.wma'}
+        self.binary_extensions = {'.exe', '.dll', '.so', '.dylib', '.bin', '.dat', '.db', '.sqlite'}
+        self.archive_extensions = {'.zip', '.tar', '.gz', '.rar', '.7z', '.bz2', '.xz'}
         self.pdf_extensions = {'.pdf'}
+        
+        # Readable text formats (will be read with truncation)
+        self.text_extensions = {
+            '.txt', '.md', '.markdown', '.rst', '.log', '.csv', '.tsv', '.json', '.xml', '.yaml', '.yml',
+            '.py', '.js', '.ts', '.java', '.c', '.cpp', '.h', '.hpp', '.go', '.rs', '.rb', '.php',
+            '.html', '.css', '.scss', '.sass', '.less', '.vue', '.jsx', '.tsx',
+            '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
+            '.sql', '.r', '.m', '.swift', '.kt', '.scala', '.clj', '.erl', '.ex',
+            '.dockerfile', '.makefile', '.cmake', '.gradle', '.properties', '.ini', '.conf', '.cfg',
+            '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'  # Office documents
+        }
     
     def execute(self, args: Dict[str, Any]) -> ToolResult:
         """
@@ -61,6 +76,13 @@ class Read(BaseTool):
         # Resolve path
         absolute_path = self._resolve_path(path)
         
+        # Security check: Prevent reading sensitive config files
+        env_config_path = os.path.expanduser("~/.cow/.env")
+        if os.path.abspath(absolute_path) == os.path.abspath(env_config_path):
+            return ToolResult.fail(
+                "Error: Access denied. API keys and credentials must be accessed through the env_config tool only."
+            )
+        
         # Check if file exists
         if not os.path.exists(absolute_path):
             # Provide helpful hint if using relative path
@@ -78,16 +100,25 @@ class Read(BaseTool):
         
         # Check file type
         file_ext = Path(absolute_path).suffix.lower()
+        file_size = os.path.getsize(absolute_path)
         
-        # Check if image
+        # Check if image - return metadata for sending
         if file_ext in self.image_extensions:
             return self._read_image(absolute_path, file_ext)
         
+        # Check if video/audio/binary/archive - return metadata only
+        if file_ext in self.video_extensions:
+            return self._return_file_metadata(absolute_path, "video", file_size)
+        if file_ext in self.audio_extensions:
+            return self._return_file_metadata(absolute_path, "audio", file_size)
+        if file_ext in self.binary_extensions or file_ext in self.archive_extensions:
+            return self._return_file_metadata(absolute_path, "binary", file_size)
+        
         # Check if PDF
         if file_ext in self.pdf_extensions:
             return self._read_pdf(absolute_path, path, offset, limit)
         
-        # Read text file
+        # Read text file (with truncation for large files)
         return self._read_text(absolute_path, path, offset, limit)
     
     def _resolve_path(self, path: str) -> str:
@@ -103,25 +134,56 @@ class Read(BaseTool):
             return path
         return os.path.abspath(os.path.join(self.cwd, path))
     
+    def _return_file_metadata(self, absolute_path: str, file_type: str, file_size: int) -> ToolResult:
+        """
+        Return file metadata for non-readable files (video, audio, binary, etc.)
+        
+        :param absolute_path: Absolute path to the file
+        :param file_type: Type of file (video, audio, binary, etc.)
+        :param file_size: File size in bytes
+        :return: File metadata
+        """
+        file_name = Path(absolute_path).name
+        file_ext = Path(absolute_path).suffix.lower()
+        
+        # Determine MIME type
+        mime_types = {
+            # Video
+            '.mp4': 'video/mp4', '.avi': 'video/x-msvideo', '.mov': 'video/quicktime',
+            '.mkv': 'video/x-matroska', '.webm': 'video/webm',
+            # Audio
+            '.mp3': 'audio/mpeg', '.wav': 'audio/wav', '.ogg': 'audio/ogg',
+            '.m4a': 'audio/mp4', '.flac': 'audio/flac',
+            # Binary
+            '.zip': 'application/zip', '.tar': 'application/x-tar',
+            '.gz': 'application/gzip', '.rar': 'application/x-rar-compressed',
+        }
+        mime_type = mime_types.get(file_ext, 'application/octet-stream')
+        
+        result = {
+            "type": f"{file_type}_metadata",
+            "file_type": file_type,
+            "path": absolute_path,
+            "file_name": file_name,
+            "mime_type": mime_type,
+            "size": file_size,
+            "size_formatted": format_size(file_size),
+            "message": f"{file_type.capitalize()} 文件: {file_name} ({format_size(file_size)})\n提示: 如果需要发送此文件，请使用 send 工具。"
+        }
+        
+        return ToolResult.success(result)
+    
     def _read_image(self, absolute_path: str, file_ext: str) -> ToolResult:
         """
-        Read image file
+        Read image file - always return metadata only (images should be sent, not read into context)
         
         :param absolute_path: Absolute path to the image file
         :param file_ext: File extension
-        :return: Result containing image information
+        :return: Result containing image metadata for sending
         """
         try:
-            # Read image file
-            with open(absolute_path, 'rb') as f:
-                image_data = f.read()
-            
             # Get file size
-            file_size = len(image_data)
-            
-            # Return image information (actual image data can be base64 encoded when needed)
-            import base64
-            base64_data = base64.b64encode(image_data).decode('utf-8')
+            file_size = os.path.getsize(absolute_path)
             
             # Determine MIME type
             mime_type_map = {
@@ -133,12 +195,15 @@ class Read(BaseTool):
             }
             mime_type = mime_type_map.get(file_ext, 'image/jpeg')
             
+            # Return metadata for images (NOT file_to_send - use send tool to actually send)
             result = {
-                "type": "image",
+                "type": "image_metadata",
+                "file_type": "image",
+                "path": absolute_path,
                 "mime_type": mime_type,
                 "size": file_size,
                 "size_formatted": format_size(file_size),
-                "data": base64_data  # Base64 encoded image data
+                "message": f"图片文件: {Path(absolute_path).name} ({format_size(file_size)})\n提示: 如果需要发送此图片，请使用 send 工具。"
             }
             
             return ToolResult.success(result)
@@ -157,10 +222,32 @@ class Read(BaseTool):
         :return: File content or error message
         """
         try:
+            # Check file size first
+            file_size = os.path.getsize(absolute_path)
+            MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
+            
+            if file_size > MAX_FILE_SIZE:
+                # File too large, return metadata only
+                return ToolResult.success({
+                    "type": "file_to_send",
+                    "file_type": "document",
+                    "path": absolute_path,
+                    "size": file_size,
+                    "size_formatted": format_size(file_size),
+                    "message": f"文件过大 ({format_size(file_size)} > 50MB)，无法读取内容。文件路径: {absolute_path}"
+                })
+            
             # Read file
             with open(absolute_path, 'r', encoding='utf-8') as f:
                 content = f.read()
             
+            # Truncate content if too long (20K characters max for model context)
+            MAX_CONTENT_CHARS = 20 * 1024  # 20K characters
+            content_truncated = False
+            if len(content) > MAX_CONTENT_CHARS:
+                content = content[:MAX_CONTENT_CHARS]
+                content_truncated = True
+            
             all_lines = content.split('\n')
             total_file_lines = len(all_lines)
             
@@ -197,6 +284,10 @@ class Read(BaseTool):
             output_text = ""
             details = {}
             
+            # Add truncation warning if content was truncated
+            if content_truncated:
+                output_text = f"[文件内容已截断到前 {format_size(MAX_CONTENT_CHARS)}，完整文件大小: {format_size(file_size)}]\n\n"
+            
             if truncation.first_line_exceeds_limit:
                 # First line exceeds 30KB limit
                 first_line_size = format_size(len(all_lines[start_line].encode('utf-8')))
diff --git a/agent/tools/scheduler/README.md b/agent/tools/scheduler/README.md
index ea3432c..55be2f9 100644
--- a/agent/tools/scheduler/README.md
+++ b/agent/tools/scheduler/README.md
@@ -42,24 +42,26 @@ Agent: [调用 scheduler 工具]
 
 **示例对话：**
 ```
-用户: 每天早上8点帮我搜索一下当前新闻
+用户: 每天早上8点帮我读取一下今日日程
 Agent: [调用 scheduler 工具]
       action: create
-      name: 每日新闻
+      name: 每日日程
       tool_call:
-        tool_name: bocha_search
+        tool_name: read
         tool_params:
-          query: 今日新闻
-        result_prefix: 📰 今日新闻播报
+          file_path: ~/cow/schedule.txt
+        result_prefix: 📅 今日日程
       schedule_type: cron
       schedule_value: 0 8 * * *
 ```
 
 **工具调用参数说明：**
-- `tool_name`: 要调用的工具名称（如 `bocha_search`、`web_fetch` 等）
+- `tool_name`: 要调用的工具名称（如 `bash`、`read`、`write` 等内置工具）
 - `tool_params`: 工具的参数（字典格式）
 - `result_prefix`: 可选，在结果前添加的前缀文本
 
+**注意：** 如果要使用 skills（如 bocha-search），需要通过 `bash` 工具调用 skill 脚本
+
 ### 2. 支持的调度类型
 
 #### Cron 表达式 (`cron`)
@@ -167,7 +169,7 @@ Agent: [调用 scheduler 工具]
 ```json
 {
   "id": "def456",
-  "name": "每日新闻",
+  "name": "每日日程",
   "enabled": true,
   "created_at": "2024-01-01T10:00:00",
   "updated_at": "2024-01-01T10:00:00",
@@ -177,11 +179,11 @@ Agent: [调用 scheduler 工具]
   },
   "action": {
     "type": "tool_call",
-    "tool_name": "bocha_search",
+    "tool_name": "read",
     "tool_params": {
-      "query": "今日新闻"
+      "file_path": "~/cow/schedule.txt"
     },
-    "result_prefix": "📰 今日新闻播报",
+    "result_prefix": "📅 今日日程",
     "receiver": "wxid_xxx",
     "receiver_name": "张三",
     "is_group": false,
@@ -234,30 +236,29 @@ Agent: [创建 cron: 0 18 * * 1-5]
 Agent: [创建 interval: 3600]
 ```
 
-### 4. 每日新闻推送（动态工具调用）
+### 4. 每日日程推送（动态工具调用）
 ```
-用户: 每天早上8点帮我搜索一下当前新闻
+用户: 每天早上8点帮我读取今日日程
 Agent: ✅ 定时任务创建成功
-       任务ID: news001
+       任务ID: schedule001
        调度: 每天 8:00
-       工具: bocha_search(query='今日新闻')
-       前缀: 📰 今日新闻播报
+       工具: read(file_path='~/cow/schedule.txt')
+       前缀: 📅 今日日程
 ```
 
-### 5. 定时天气查询（动态工具调用）
+### 5. 定时文件备份（动态工具调用）
 ```
-用户: 每天早上7点查询今天的天气
-Agent: [创建 cron: 0 7 * * *]
-       工具: bocha_search(query='今日天气')
-       前缀: 🌤️ 今日天气预报
+用户: 每天晚上11点备份工作文件
+Agent: [创建 cron: 0 23 * * *]
+       工具: bash(command='cp ~/cow/work.txt ~/cow/backup/work_$(date +%Y%m%d).txt')
+       前缀: ✅ 文件已备份
 ```
 
-### 6. 周报提醒（动态工具调用）
+### 6. 周报提醒（静态消息）
 ```
-用户: 每周五下午5点搜索本周热点
+用户: 每周五下午5点提醒我写周报
 Agent: [创建 cron: 0 17 * * 5]
-       工具: bocha_search(query='本周热点新闻')
-       前缀: 📊 本周热点回顾
+       消息: 📊 该写周报了！
 ```
 
 ### 4. 特定日期提醒
diff --git a/agent/tools/scheduler/integration.py b/agent/tools/scheduler/integration.py
index 1f345bf..8b54ccd 100644
--- a/agent/tools/scheduler/integration.py
+++ b/agent/tools/scheduler/integration.py
@@ -45,10 +45,17 @@ def init_scheduler(agent_bridge) -> bool:
                 action = task.get("action", {})
                 action_type = action.get("type")
                 
-                if action_type == "send_message":
+                if action_type == "agent_task":
+                    _execute_agent_task(task, agent_bridge)
+                elif action_type == "send_message":
+                    # Legacy support for old tasks
                     _execute_send_message(task, agent_bridge)
                 elif action_type == "tool_call":
+                    # Legacy support for old tasks
                     _execute_tool_call(task, agent_bridge)
+                elif action_type == "skill_call":
+                    # Legacy support for old tasks
+                    _execute_skill_call(task, agent_bridge)
                 else:
                     logger.warning(f"[Scheduler] Unknown action type: {action_type}")
             except Exception as e:
@@ -76,6 +83,100 @@ def get_scheduler_service():
     return _scheduler_service
 
 
+def _execute_agent_task(task: dict, agent_bridge):
+    """
+    Execute an agent_task action - let Agent handle the task
+    
+    Args:
+        task: Task dictionary
+        agent_bridge: AgentBridge instance
+    """
+    try:
+        action = task.get("action", {})
+        task_description = action.get("task_description")
+        receiver = action.get("receiver")
+        is_group = action.get("is_group", False)
+        channel_type = action.get("channel_type", "unknown")
+        
+        if not task_description:
+            logger.error(f"[Scheduler] Task {task['id']}: No task_description specified")
+            return
+        
+        if not receiver:
+            logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
+            return
+        
+        # Check for unsupported channels
+        if channel_type == "dingtalk":
+            logger.warning(f"[Scheduler] Task {task['id']}: DingTalk channel does not support scheduled messages (Stream mode limitation). Task will execute but message cannot be sent.")
+        
+        logger.info(f"[Scheduler] Task {task['id']}: Executing agent task '{task_description}'")
+        
+        # Create context for Agent
+        context = Context(ContextType.TEXT, task_description)
+        context["receiver"] = receiver
+        context["isgroup"] = is_group
+        context["session_id"] = receiver
+        
+        # Channel-specific setup
+        if channel_type == "web":
+            import uuid
+            request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
+            context["request_id"] = request_id
+        elif channel_type == "feishu":
+            context["receive_id_type"] = "chat_id" if is_group else "open_id"
+            context["msg"] = None
+        elif channel_type == "dingtalk":
+            # DingTalk requires msg object, set to None for scheduled tasks
+            context["msg"] = None
+            # 如果是单聊，需要传递 sender_staff_id
+            if not is_group:
+                sender_staff_id = action.get("dingtalk_sender_staff_id")
+                if sender_staff_id:
+                    context["dingtalk_sender_staff_id"] = sender_staff_id
+        
+        # Use Agent to execute the task
+        # Mark this as a scheduled task execution to prevent recursive task creation
+        context["is_scheduled_task"] = True
+        
+        try:
+            reply = agent_bridge.agent_reply(task_description, context=context, on_event=None, clear_history=True)
+            
+            if reply and reply.content:
+                # Send the reply via channel
+                from channel.channel_factory import create_channel
+                
+                try:
+                    channel = create_channel(channel_type)
+                    if channel:
+                        # For web channel, register request_id
+                        if channel_type == "web" and hasattr(channel, 'request_to_session'):
+                            request_id = context.get("request_id")
+                            if request_id:
+                                channel.request_to_session[request_id] = receiver
+                                logger.debug(f"[Scheduler] Registered request_id {request_id} -> session {receiver}")
+                        
+                        # Send the reply
+                        channel.send(reply, context)
+                        logger.info(f"[Scheduler] Task {task['id']} executed successfully, result sent to {receiver}")
+                    else:
+                        logger.error(f"[Scheduler] Failed to create channel: {channel_type}")
+                except Exception as e:
+                    logger.error(f"[Scheduler] Failed to send result: {e}")
+            else:
+                logger.error(f"[Scheduler] Task {task['id']}: No result from agent execution")
+                
+        except Exception as e:
+            logger.error(f"[Scheduler] Failed to execute task via Agent: {e}")
+            import traceback
+            logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+            
+    except Exception as e:
+        logger.error(f"[Scheduler] Error in _execute_agent_task: {e}")
+        import traceback
+        logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+
+
 def _execute_send_message(task: dict, agent_bridge):
     """
     Execute a send_message action
@@ -116,6 +217,17 @@ def _execute_send_message(task: dict, agent_bridge):
             # Feishu channel will detect this and send as new message instead of reply
             context["msg"] = None
             logger.debug(f"[Scheduler] Feishu: receive_id_type={context['receive_id_type']}, is_group={is_group}, receiver={receiver}")
+        elif channel_type == "dingtalk":
+            # DingTalk channel setup
+            context["msg"] = None
+            # 如果是单聊，需要传递 sender_staff_id
+            if not is_group:
+                sender_staff_id = action.get("dingtalk_sender_staff_id")
+                if sender_staff_id:
+                    context["dingtalk_sender_staff_id"] = sender_staff_id
+                    logger.debug(f"[Scheduler] DingTalk single chat: sender_staff_id={sender_staff_id}")
+                else:
+                    logger.warning(f"[Scheduler] Task {task['id']}: DingTalk single chat message missing sender_staff_id")
         
         # Create reply
         reply = Reply(ReplyType.TEXT, content)
@@ -156,8 +268,9 @@ def _execute_tool_call(task: dict, agent_bridge):
     """
     try:
         action = task.get("action", {})
-        tool_name = action.get("tool_name")
-        tool_params = action.get("tool_params", {})
+        # Support both old and new field names
+        tool_name = action.get("call_name") or action.get("tool_name")
+        tool_params = action.get("call_params") or action.get("tool_params", {})
         result_prefix = action.get("result_prefix", "")
         receiver = action.get("receiver")
         is_group = action.get("is_group", False)
@@ -237,6 +350,82 @@ def _execute_tool_call(task: dict, agent_bridge):
         logger.error(f"[Scheduler] Error in _execute_tool_call: {e}")
 
 
+def _execute_skill_call(task: dict, agent_bridge):
+    """
+    Execute a skill_call action by asking Agent to run the skill
+    
+    Args:
+        task: Task dictionary
+        agent_bridge: AgentBridge instance
+    """
+    try:
+        action = task.get("action", {})
+        # Support both old and new field names
+        skill_name = action.get("call_name") or action.get("skill_name")
+        skill_params = action.get("call_params") or action.get("skill_params", {})
+        result_prefix = action.get("result_prefix", "")
+        receiver = action.get("receiver")
+        is_group = action.get("isgroup", False)
+        channel_type = action.get("channel_type", "unknown")
+        
+        if not skill_name:
+            logger.error(f"[Scheduler] Task {task['id']}: No skill_name specified")
+            return
+        
+        if not receiver:
+            logger.error(f"[Scheduler] Task {task['id']}: No receiver specified")
+            return
+        
+        logger.info(f"[Scheduler] Task {task['id']}: Executing skill '{skill_name}' with params {skill_params}")
+        
+        # Build a natural language query for the Agent to execute the skill
+        # Format: "Use skill-name to do something with params"
+        param_str = ", ".join([f"{k}={v}" for k, v in skill_params.items()])
+        query = f"Use {skill_name} skill"
+        if param_str:
+            query += f" with {param_str}"
+        
+        # Create context for Agent
+        context = Context(ContextType.TEXT, query)
+        context["receiver"] = receiver
+        context["isgroup"] = is_group
+        context["session_id"] = receiver
+        
+        # Channel-specific setup
+        if channel_type == "web":
+            import uuid
+            request_id = f"scheduler_{task['id']}_{uuid.uuid4().hex[:8]}"
+            context["request_id"] = request_id
+        elif channel_type == "feishu":
+            context["receive_id_type"] = "chat_id" if is_group else "open_id"
+            context["msg"] = None
+        
+        # Use Agent to execute the skill
+        try:
+            reply = agent_bridge.agent_reply(query, context=context, on_event=None, clear_history=True)
+            
+            if reply and reply.content:
+                content = reply.content
+                
+                # Add prefix if specified
+                if result_prefix:
+                    content = f"{result_prefix}\n\n{content}"
+                
+                logger.info(f"[Scheduler] Task {task['id']} executed: skill result sent to {receiver}")
+            else:
+                logger.error(f"[Scheduler] Task {task['id']}: No result from skill execution")
+                
+        except Exception as e:
+            logger.error(f"[Scheduler] Failed to execute skill via Agent: {e}")
+            import traceback
+            logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+            
+    except Exception as e:
+        logger.error(f"[Scheduler] Error in _execute_skill_call: {e}")
+        import traceback
+        logger.error(f"[Scheduler] Traceback: {traceback.format_exc()}")
+
+
 def attach_scheduler_to_tool(tool, context: Context = None):
     """
     Attach scheduler components to a SchedulerTool instance
diff --git a/agent/tools/scheduler/scheduler_service.py b/agent/tools/scheduler/scheduler_service.py
index bc338c2..248c776 100644
--- a/agent/tools/scheduler/scheduler_service.py
+++ b/agent/tools/scheduler/scheduler_service.py
@@ -118,6 +118,34 @@ class SchedulerService:
         
         try:
             next_run = datetime.fromisoformat(next_run_str)
+            
+            # Check if task is overdue (e.g., service restart)
+            if next_run < now:
+                time_diff = (now - next_run).total_seconds()
+                
+                # If overdue by more than 5 minutes, skip this run and schedule next
+                if time_diff > 300:  # 5 minutes
+                    logger.warning(f"[Scheduler] Task {task['id']} is overdue by {int(time_diff)}s, skipping and scheduling next run")
+                    
+                    # For one-time tasks, disable them
+                    schedule = task.get("schedule", {})
+                    if schedule.get("type") == "once":
+                        self.task_store.update_task(task['id'], {
+                            "enabled": False,
+                            "last_run_at": now.isoformat()
+                        })
+                        logger.info(f"[Scheduler] One-time task {task['id']} expired, disabled")
+                        return False
+                    
+                    # For recurring tasks, calculate next run from now
+                    next_next_run = self._calculate_next_run(task, now)
+                    if next_next_run:
+                        self.task_store.update_task(task['id'], {
+                            "next_run_at": next_next_run.isoformat()
+                        })
+                        logger.info(f"[Scheduler] Rescheduled task {task['id']} to {next_next_run}")
+                    return False
+            
             return now >= next_run
         except:
             return False
diff --git a/agent/tools/scheduler/scheduler_tool.py b/agent/tools/scheduler/scheduler_tool.py
index 764711b..9d961c3 100644
--- a/agent/tools/scheduler/scheduler_tool.py
+++ b/agent/tools/scheduler/scheduler_tool.py
@@ -20,23 +20,16 @@ class SchedulerTool(BaseTool):
     
     name: str = "scheduler"
     description: str = (
-        "创建、查询和管理定时任务。支持两种任务类型：\n"
-        "1. 静态消息任务：定时发送预定义的消息\n"
-        "2. 动态工具任务：定时执行工具调用并发送结果（如搜索新闻、查询天气等）\n\n"
+        "创建、查询和管理定时任务。支持固定消息和AI任务两种类型。\n\n"
         "使用方法：\n"
-        "- 创建静态消息任务：action='create', name='任务名', message='消息内容', schedule_type='interval'/'cron'/'once', schedule_value='间隔秒数/cron表达式/时间'\n"
-        "- 创建动态工具任务：action='create', name='任务名', tool_call={'tool_name': '工具名', 'tool_params': {...}, 'result_prefix': '前缀'}, schedule_type='interval'/'cron'/'once', schedule_value='值'\n"
-        "- 查询列表：action='list'\n"
-        "- 查看详情：action='get', task_id='任务ID'\n"
-        "- 删除任务：action='delete', task_id='任务ID'\n"
-        "- 启用任务：action='enable', task_id='任务ID'\n"
-        "- 禁用任务：action='disable', task_id='任务ID'\n\n"
-        "调度类型说明：\n"
-        "- interval: 固定间隔秒数（如3600表示每小时）\n"
-        "- cron: cron表达式（如'0 9 * * *'表示每天9点，'*/10 * * * *'表示每10分钟）\n"
-        "- once: 一次性任务，ISO时间格式（如'2024-12-25T09:00:00'）\n\n"
-        "示例：每天早上8点搜索新闻\n"
-        "action='create', name='每日新闻', tool_call={'tool_name': 'bocha_search', 'tool_params': {'query': '今日新闻'}, 'result_prefix': '📰 今日新闻播报'}, schedule_type='cron', schedule_value='0 8 * * *'"
+        "- 创建：action='create', name='任务名', message/ai_task='内容', schedule_type='once/interval/cron', schedule_value='...'\n"
+        "- 查询：action='list' / action='get', task_id='任务ID'\n"
+        "- 管理：action='delete/enable/disable', task_id='任务ID'\n\n"
+        "调度类型：\n"
+        "- once: 一次性任务，支持相对时间(+5s,+10m,+1h,+1d)或ISO时间\n"
+        "- interval: 固定间隔(秒)，如3600表示每小时\n"
+        "- cron: cron表达式，如'0 8 * * *'表示每天8点\n\n"
+        "注意：'X秒后'用once+相对时间，'每X秒'用interval"
     )
     params: dict = {
         "type": "object",
@@ -56,26 +49,11 @@ class SchedulerTool(BaseTool):
             },
             "message": {
                 "type": "string",
-                "description": "要发送的静态消息内容 (用于 create 操作，与tool_call二选一)"
+                "description": "固定消息内容 (与ai_task二选一)"
             },
-            "tool_call": {
-                "type": "object",
-                "description": "要执行的工具调用 (用于 create 操作，与message二选一)",
-                "properties": {
-                    "tool_name": {
-                        "type": "string",
-                        "description": "工具名称，如 'bocha_search'"
-                    },
-                    "tool_params": {
-                        "type": "object",
-                        "description": "工具参数"
-                    },
-                    "result_prefix": {
-                        "type": "string",
-                        "description": "结果前缀，如 '今日新闻：'"
-                    }
-                },
-                "required": ["tool_name"]
+            "ai_task": {
+                "type": "string",
+                "description": "AI任务描述 (与message二选一)，如'搜索今日新闻'、'查询天气'"
             },
             "schedule_type": {
                 "type": "string",
@@ -84,12 +62,7 @@ class SchedulerTool(BaseTool):
             },
             "schedule_value": {
                 "type": "string",
-                "description": (
-                    "调度值 (用于 create 操作):\n"
-                    "- cron类型: cron表达式，如 '0 9 * * *' (每天9点)，'*/10 * * * *' (每10分钟)\n"
-                    "- interval类型: 间隔秒数，如 '3600' (每小时)，'10' (每10秒)\n"
-                    "- once类型: ISO时间，如 '2024-12-25T09:00:00'"
-                )
+                "description": "调度值: cron表达式/间隔秒数/时间(+5s,+10m,+1h或ISO格式)"
             }
         },
         "required": ["action"]
@@ -151,17 +124,20 @@ class SchedulerTool(BaseTool):
         """Create a new scheduled task"""
         name = kwargs.get("name")
         message = kwargs.get("message")
-        tool_call = kwargs.get("tool_call")
+        ai_task = kwargs.get("ai_task")
         schedule_type = kwargs.get("schedule_type")
         schedule_value = kwargs.get("schedule_value")
         
         # Validate required fields
         if not name:
             return "错误: 缺少任务名称 (name)"
-        if not message and not tool_call:
-            return "错误: 必须提供 message 或 tool_call 之一"
-        if message and tool_call:
-            return "错误: message 和 tool_call 不能同时提供，请选择其一"
+        
+        # Check that exactly one of message/ai_task is provided
+        if not message and not ai_task:
+            return "错误: 必须提供 message（固定消息）或 ai_task（AI任务）之一"
+        if message and ai_task:
+            return "错误: message 和 ai_task 只能提供其中一个"
+        
         if not schedule_type:
             return "错误: 缺少调度类型 (schedule_type)"
         if not schedule_value:
@@ -181,7 +157,7 @@ class SchedulerTool(BaseTool):
         # Create task
         task_id = str(uuid.uuid4())[:8]
         
-        # Build action based on message or tool_call
+        # Build action based on message or ai_task
         if message:
             action = {
                 "type": "send_message",
@@ -191,19 +167,22 @@ class SchedulerTool(BaseTool):
                 "is_group": context.get("isgroup", False),
                 "channel_type": self.config.get("channel_type", "unknown")
             }
-        else:  # tool_call
+        else:  # ai_task
             action = {
-                "type": "tool_call",
-                "tool_name": tool_call.get("tool_name"),
-                "tool_params": tool_call.get("tool_params", {}),
-                "result_prefix": tool_call.get("result_prefix", ""),
+                "type": "agent_task",
+                "task_description": ai_task,
                 "receiver": context.get("receiver"),
                 "receiver_name": self._get_receiver_name(context),
                 "is_group": context.get("isgroup", False),
                 "channel_type": self.config.get("channel_type", "unknown")
             }
         
-        task = {
+        # 针对钉钉单聊，额外存储 sender_staff_id
+        msg = context.kwargs.get("msg")
+        if msg and hasattr(msg, 'sender_staff_id') and not context.get("isgroup", False):
+            action["dingtalk_sender_staff_id"] = msg.sender_staff_id
+        
+        task_data = {
             "id": task_id,
             "name": name,
             "enabled": True,
@@ -214,26 +193,21 @@ class SchedulerTool(BaseTool):
         }
         
         # Calculate initial next_run_at
-        next_run = self._calculate_next_run(task)
+        next_run = self._calculate_next_run(task_data)
         if next_run:
-            task["next_run_at"] = next_run.isoformat()
+            task_data["next_run_at"] = next_run.isoformat()
         
         # Save task
-        self.task_store.add_task(task)
+        self.task_store.add_task(task_data)
         
         # Format response
         schedule_desc = self._format_schedule_description(schedule)
-        receiver_desc = task["action"]["receiver_name"] or task["action"]["receiver"]
+        receiver_desc = task_data["action"]["receiver_name"] or task_data["action"]["receiver"]
         
         if message:
-            content_desc = f"💬 消息: {message}"
+            content_desc = f"💬 固定消息: {message}"
         else:
-            tool_name = tool_call.get("tool_name")
-            tool_params_str = str(tool_call.get("tool_params", {}))
-            prefix = tool_call.get("result_prefix", "")
-            content_desc = f"🔧 工具调用: {tool_name}({tool_params_str})"
-            if prefix:
-                content_desc += f"\n📝 结果前缀: {prefix}"
+            content_desc = f"🤖 AI任务: {ai_task}"
         
         return (
             f"✅ 定时任务创建成功\n\n"
@@ -353,9 +327,38 @@ class SchedulerTool(BaseTool):
                 return {"type": "interval", "seconds": seconds}
             
             elif schedule_type == "once":
-                # Parse datetime
-                datetime.fromisoformat(schedule_value)
-                return {"type": "once", "run_at": schedule_value}
+                # Parse datetime - support both relative and absolute time
+                
+                # Check if it's relative time (e.g., "+5s", "+10m", "+1h", "+1d")
+                if schedule_value.startswith("+"):
+                    import re
+                    match = re.match(r'\+(\d+)([smhd])', schedule_value)
+                    if match:
+                        amount = int(match.group(1))
+                        unit = match.group(2)
+                        
+                        from datetime import timedelta
+                        now = datetime.now()
+                        
+                        if unit == 's':  # seconds
+                            target_time = now + timedelta(seconds=amount)
+                        elif unit == 'm':  # minutes
+                            target_time = now + timedelta(minutes=amount)
+                        elif unit == 'h':  # hours
+                            target_time = now + timedelta(hours=amount)
+                        elif unit == 'd':  # days
+                            target_time = now + timedelta(days=amount)
+                        else:
+                            return None
+                        
+                        return {"type": "once", "run_at": target_time.isoformat()}
+                    else:
+                        logger.error(f"[SchedulerTool] Invalid relative time format: {schedule_value}")
+                        return None
+                else:
+                    # Absolute time in ISO format
+                    datetime.fromisoformat(schedule_value)
+                    return {"type": "once", "run_at": schedule_value}
             
         except Exception as e:
             logger.error(f"[SchedulerTool] Invalid schedule: {e}")
diff --git a/agent/tools/send/__init__.py b/agent/tools/send/__init__.py
new file mode 100644
index 0000000..b76702a
--- /dev/null
+++ b/agent/tools/send/__init__.py
@@ -0,0 +1,3 @@
+from .send import Send
+
+__all__ = ['Send']
diff --git a/agent/tools/send/send.py b/agent/tools/send/send.py
new file mode 100644
index 0000000..a778b74
--- /dev/null
+++ b/agent/tools/send/send.py
@@ -0,0 +1,159 @@
+"""
+Send tool - Send files to the user
+"""
+
+import os
+from typing import Dict, Any
+from pathlib import Path
+
+from agent.tools.base_tool import BaseTool, ToolResult
+
+
+class Send(BaseTool):
+    """Tool for sending files to the user"""
+    
+    name: str = "send"
+    description: str = "Send a file (image, video, audio, document) to the user. Use this when the user explicitly asks to send/share a file."
+    
+    params: dict = {
+        "type": "object",
+        "properties": {
+            "path": {
+                "type": "string",
+                "description": "Path to the file to send. Can be absolute path or relative to workspace."
+            },
+            "message": {
+                "type": "string",
+                "description": "Optional message to accompany the file"
+            }
+        },
+        "required": ["path"]
+    }
+    
+    def __init__(self, config: dict = None):
+        self.config = config or {}
+        self.cwd = self.config.get("cwd", os.getcwd())
+        
+        # Supported file types
+        self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg', '.ico'}
+        self.video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.webm', '.m4v'}
+        self.audio_extensions = {'.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.wma'}
+        self.document_extensions = {'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.txt', '.md'}
+    
+    def execute(self, args: Dict[str, Any]) -> ToolResult:
+        """
+        Execute file send operation
+        
+        :param args: Contains file path and optional message
+        :return: File metadata for channel to send
+        """
+        path = args.get("path", "").strip()
+        message = args.get("message", "")
+        
+        if not path:
+            return ToolResult.fail("Error: path parameter is required")
+        
+        # Resolve path
+        absolute_path = self._resolve_path(path)
+        
+        # Check if file exists
+        if not os.path.exists(absolute_path):
+            return ToolResult.fail(f"Error: File not found: {path}")
+        
+        # Check if readable
+        if not os.access(absolute_path, os.R_OK):
+            return ToolResult.fail(f"Error: File is not readable: {path}")
+        
+        # Get file info
+        file_ext = Path(absolute_path).suffix.lower()
+        file_size = os.path.getsize(absolute_path)
+        file_name = Path(absolute_path).name
+        
+        # Determine file type
+        if file_ext in self.image_extensions:
+            file_type = "image"
+            mime_type = self._get_image_mime_type(file_ext)
+        elif file_ext in self.video_extensions:
+            file_type = "video"
+            mime_type = self._get_video_mime_type(file_ext)
+        elif file_ext in self.audio_extensions:
+            file_type = "audio"
+            mime_type = self._get_audio_mime_type(file_ext)
+        elif file_ext in self.document_extensions:
+            file_type = "document"
+            mime_type = self._get_document_mime_type(file_ext)
+        else:
+            file_type = "file"
+            mime_type = "application/octet-stream"
+        
+        # Return file_to_send metadata
+        result = {
+            "type": "file_to_send",
+            "file_type": file_type,
+            "path": absolute_path,
+            "file_name": file_name,
+            "mime_type": mime_type,
+            "size": file_size,
+            "size_formatted": self._format_size(file_size),
+            "message": message or f"正在发送 {file_name}"
+        }
+        
+        return ToolResult.success(result)
+    
+    def _resolve_path(self, path: str) -> str:
+        """Resolve path to absolute path"""
+        path = os.path.expanduser(path)
+        if os.path.isabs(path):
+            return path
+        return os.path.abspath(os.path.join(self.cwd, path))
+    
+    def _get_image_mime_type(self, ext: str) -> str:
+        """Get MIME type for image"""
+        mime_map = {
+            '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
+            '.png': 'image/png', '.gif': 'image/gif',
+            '.webp': 'image/webp', '.bmp': 'image/bmp',
+            '.svg': 'image/svg+xml', '.ico': 'image/x-icon'
+        }
+        return mime_map.get(ext, 'image/jpeg')
+    
+    def _get_video_mime_type(self, ext: str) -> str:
+        """Get MIME type for video"""
+        mime_map = {
+            '.mp4': 'video/mp4', '.avi': 'video/x-msvideo',
+            '.mov': 'video/quicktime', '.mkv': 'video/x-matroska',
+            '.webm': 'video/webm', '.flv': 'video/x-flv'
+        }
+        return mime_map.get(ext, 'video/mp4')
+    
+    def _get_audio_mime_type(self, ext: str) -> str:
+        """Get MIME type for audio"""
+        mime_map = {
+            '.mp3': 'audio/mpeg', '.wav': 'audio/wav',
+            '.ogg': 'audio/ogg', '.m4a': 'audio/mp4',
+            '.flac': 'audio/flac', '.aac': 'audio/aac'
+        }
+        return mime_map.get(ext, 'audio/mpeg')
+    
+    def _get_document_mime_type(self, ext: str) -> str:
+        """Get MIME type for document"""
+        mime_map = {
+            '.pdf': 'application/pdf',
+            '.doc': 'application/msword',
+            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            '.xls': 'application/vnd.ms-excel',
+            '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+            '.ppt': 'application/vnd.ms-powerpoint',
+            '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+            '.txt': 'text/plain',
+            '.md': 'text/markdown'
+        }
+        return mime_map.get(ext, 'application/octet-stream')
+    
+    def _format_size(self, size_bytes: int) -> str:
+        """Format file size in human-readable format"""
+        for unit in ['B', 'KB', 'MB', 'GB']:
+            if size_bytes < 1024.0:
+                return f"{size_bytes:.1f}{unit}"
+            size_bytes /= 1024.0
+        return f"{size_bytes:.1f}TB"
diff --git a/bridge/agent_bridge.py b/bridge/agent_bridge.py
index ebc2ca7..a9884e5 100644
--- a/bridge/agent_bridge.py
+++ b/bridge/agent_bridge.py
@@ -2,6 +2,7 @@
 Agent Bridge - Integrates Agent system with existing COW bridge
 """
 
+import os
 from typing import Optional, List
 
 from agent.protocol import Agent, LLMModel, LLMRequest
@@ -269,8 +270,11 @@ class AgentBridge:
         # Get workspace from config
         workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
 
-        # Load environment variables from workspace .env file
-        env_file = os.path.join(workspace_root, '.env')
+        # Migrate API keys from config.json to environment variables (if not already set)
+        self._migrate_config_to_env(workspace_root)
+        
+        # Load environment variables from secure .env file location
+        env_file = os.path.expanduser("~/.cow/.env")
         if os.path.exists(env_file):
             try:
                 from dotenv import load_dotenv
@@ -280,9 +284,6 @@ class AgentBridge:
                 logger.warning("[AgentBridge] python-dotenv not installed, skipping .env file loading")
             except Exception as e:
                 logger.warning(f"[AgentBridge] Failed to load .env file: {e}")
-        
-        # Migrate API keys from config.json to environment variables (if not already set)
-        self._migrate_config_to_env(workspace_root)
 
         # Initialize workspace and create template files
         from agent.prompt import ensure_workspace, load_context_files, PromptBuilder
@@ -377,7 +378,6 @@ class AgentBridge:
                 if tool_name == "env_config":
                     from agent.tools import EnvConfig
                     tool = EnvConfig({
-                        "workspace_dir": workspace_root,
                         "agent_bridge": self  # Pass self reference for hot reload
                     })
                 else:
@@ -390,12 +390,6 @@ class AgentBridge:
                         tool.cwd = file_config.get("cwd", tool.cwd if hasattr(tool, 'cwd') else None)
                         if 'memory_manager' in file_config:
                             tool.memory_manager = file_config['memory_manager']
-                    # Apply API key for bocha_search tool
-                    elif tool_name == 'bocha_search':
-                        bocha_api_key = conf().get("bocha_api_key", "")
-                        if bocha_api_key:
-                            tool.config = {"bocha_api_key": bocha_api_key}
-                            tool.api_key = bocha_api_key
                     tools.append(tool)
                     logger.debug(f"[AgentBridge] Loaded tool: {tool_name}")
             except Exception as e:
@@ -504,8 +498,11 @@ class AgentBridge:
         # Get workspace from config
         workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
 
-        # Load environment variables from workspace .env file
-        env_file = os.path.join(workspace_root, '.env')
+        # Migrate API keys from config.json to environment variables (if not already set)
+        self._migrate_config_to_env(workspace_root)
+        
+        # Load environment variables from secure .env file location
+        env_file = os.path.expanduser("~/.cow/.env")
         if os.path.exists(env_file):
             try:
                 from dotenv import load_dotenv
@@ -609,11 +606,6 @@ class AgentBridge:
                         tool.cwd = file_config.get("cwd", tool.cwd if hasattr(tool, 'cwd') else None)
                         if 'memory_manager' in file_config:
                             tool.memory_manager = file_config['memory_manager']
-                    elif tool_name == 'bocha_search':
-                        bocha_api_key = conf().get("bocha_api_key", "")
-                        if bocha_api_key:
-                            tool.config = {"bocha_api_key": bocha_api_key}
-                            tool.api_key = bocha_api_key
                     tools.append(tool)
             except Exception as e:
                 logger.warning(f"[AgentBridge] Failed to load tool {tool_name} for session {session_id}: {e}")
@@ -767,23 +759,52 @@ class AgentBridge:
             if not agent:
                 return Reply(ReplyType.ERROR, "Failed to initialize super agent")
             
-            # Attach context to scheduler tool if present
-            if context and agent.tools:
-                for tool in agent.tools:
-                    if tool.name == "scheduler":
-                        try:
-                            from agent.tools.scheduler.integration import attach_scheduler_to_tool
-                            attach_scheduler_to_tool(tool, context)
-                        except Exception as e:
-                            logger.warning(f"[AgentBridge] Failed to attach context to scheduler: {e}")
-                        break
+            # Filter tools based on context
+            original_tools = agent.tools
+            filtered_tools = original_tools
             
-            # Use agent's run_stream method
-            response = agent.run_stream(
-                user_message=query,
-                on_event=on_event,
-                clear_history=clear_history
-            )
+            # If this is a scheduled task execution, exclude scheduler tool to prevent recursion
+            if context and context.get("is_scheduled_task"):
+                filtered_tools = [tool for tool in agent.tools if tool.name != "scheduler"]
+                agent.tools = filtered_tools
+                logger.info(f"[AgentBridge] Scheduled task execution: excluded scheduler tool ({len(filtered_tools)}/{len(original_tools)} tools)")
+            else:
+                # Attach context to scheduler tool if present
+                if context and agent.tools:
+                    for tool in agent.tools:
+                        if tool.name == "scheduler":
+                            try:
+                                from agent.tools.scheduler.integration import attach_scheduler_to_tool
+                                attach_scheduler_to_tool(tool, context)
+                            except Exception as e:
+                                logger.warning(f"[AgentBridge] Failed to attach context to scheduler: {e}")
+                            break
+            
+            try:
+                # Use agent's run_stream method
+                response = agent.run_stream(
+                    user_message=query,
+                    on_event=on_event,
+                    clear_history=clear_history
+                )
+            finally:
+                # Restore original tools
+                if context and context.get("is_scheduled_task"):
+                    agent.tools = original_tools
+            
+            # Check if there are files to send (from read tool)
+            if hasattr(agent, 'stream_executor') and hasattr(agent.stream_executor, 'files_to_send'):
+                files_to_send = agent.stream_executor.files_to_send
+                if files_to_send:
+                    # Send the first file (for now, handle one file at a time)
+                    file_info = files_to_send[0]
+                    logger.info(f"[AgentBridge] Sending file: {file_info.get('path')}")
+                    
+                    # Clear files_to_send for next request
+                    agent.stream_executor.files_to_send = []
+                    
+                    # Return file reply based on file type
+                    return self._create_file_reply(file_info, response, context)
             
             return Reply(ReplyType.TEXT, response)
             
@@ -791,12 +812,53 @@ class AgentBridge:
             logger.error(f"Agent reply error: {e}")
             return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
     
+    def _create_file_reply(self, file_info: dict, text_response: str, context: Context = None) -> Reply:
+        """
+        Create a reply for sending files
+        
+        Args:
+            file_info: File metadata from read tool
+            text_response: Text response from agent
+            context: Context object
+            
+        Returns:
+            Reply object for file sending
+        """
+        file_type = file_info.get("file_type", "file")
+        file_path = file_info.get("path")
+        
+        # For images, use IMAGE_URL type (channel will handle upload)
+        if file_type == "image":
+            # Convert local path to file:// URL for channel processing
+            file_url = f"file://{file_path}"
+            logger.info(f"[AgentBridge] Sending image: {file_url}")
+            reply = Reply(ReplyType.IMAGE_URL, file_url)
+            # Attach text message if present (for channels that support text+image)
+            if text_response:
+                reply.text_content = text_response  # Store accompanying text
+            return reply
+        
+        # For documents (PDF, Excel, Word, PPT), use FILE type
+        if file_type == "document":
+            file_url = f"file://{file_path}"
+            logger.info(f"[AgentBridge] Sending document: {file_url}")
+            reply = Reply(ReplyType.FILE, file_url)
+            reply.file_name = file_info.get("file_name", os.path.basename(file_path))
+            return reply
+        
+        # For other files (video, audio), we need channel-specific handling
+        # For now, return text with file info
+        # TODO: Implement video/audio sending when channel supports it
+        message = text_response or file_info.get("message", "文件已准备")
+        message += f"\n\n[文件: {file_info.get('file_name', file_path)}]"
+        return Reply(ReplyType.TEXT, message)
+    
     def _migrate_config_to_env(self, workspace_root: str):
         """
         Migrate API keys from config.json to .env file if not already set
         
         Args:
-            workspace_root: Workspace directory path
+            workspace_root: Workspace directory path (not used, kept for compatibility)
         """
         from config import conf
         import os
@@ -810,7 +872,8 @@ class AgentBridge:
             "linkai_api_key": "LINKAI_API_KEY",
         }
         
-        env_file = os.path.join(workspace_root, '.env')
+        # Use fixed secure location for .env file
+        env_file = os.path.expanduser("~/.cow/.env")
         
         # Read existing env vars from .env file
         existing_env_vars = {}
@@ -830,19 +893,25 @@ class AgentBridge:
         for config_key, env_key in key_mapping.items():
             # Skip if already in .env file
             if env_key in existing_env_vars:
+                logger.debug(f"[AgentBridge] Skipping {env_key} - already in .env")
                 continue
             
             # Get value from config.json
             value = conf().get(config_key, "")
             if value and value.strip():  # Only migrate non-empty values
                 keys_to_migrate[env_key] = value.strip()
+                logger.debug(f"[AgentBridge] Will migrate {env_key} from config.json")
+            else:
+                logger.debug(f"[AgentBridge] Skipping {env_key} - no value in config.json")
         
         # Write new keys to .env file
         if keys_to_migrate:
             try:
-                # Ensure .env file exists
+                # Ensure ~/.cow directory and .env file exist
+                env_dir = os.path.dirname(env_file)
+                if not os.path.exists(env_dir):
+                    os.makedirs(env_dir, exist_ok=True)
                 if not os.path.exists(env_file):
-                    os.makedirs(os.path.dirname(env_file), exist_ok=True)
                     open(env_file, 'a').close()
                 
                 # Append new keys
diff --git a/channel/chat_channel.py b/channel/chat_channel.py
index 1523f67..bceaeef 100644
--- a/channel/chat_channel.py
+++ b/channel/chat_channel.py
@@ -64,15 +64,22 @@ class ChatChannel(Channel):
                         check_contain(group_name, group_name_keyword_white_list),
                     ]
                 ):
-                    group_chat_in_one_session = conf().get("group_chat_in_one_session", [])
-                    session_id = cmsg.actual_user_id
-                    if any(
-                        [
-                            group_name in group_chat_in_one_session,
-                            "ALL_GROUP" in group_chat_in_one_session,
-                        ]
-                    ):
+                    # Check global group_shared_session config first
+                    group_shared_session = conf().get("group_shared_session", True)
+                    if group_shared_session:
+                        # All users in the group share the same session
                         session_id = group_id
+                    else:
+                        # Check group-specific whitelist (legacy behavior)
+                        group_chat_in_one_session = conf().get("group_chat_in_one_session", [])
+                        session_id = cmsg.actual_user_id
+                        if any(
+                            [
+                                group_name in group_chat_in_one_session,
+                                "ALL_GROUP" in group_chat_in_one_session,
+                            ]
+                        ):
+                            session_id = group_id
                 else:
                     logger.debug(f"No need reply, groupName not in whitelist, group_name={group_name}")
                     return None
@@ -283,7 +290,98 @@ class ChatChannel(Channel):
             reply = e_context["reply"]
             if not e_context.is_pass() and reply and reply.type:
                 logger.debug("[chat_channel] ready to send reply: {}, context: {}".format(reply, context))
-                self._send(reply, context)
+                
+                # 如果是文本回复，尝试提取并发送图片
+                if reply.type == ReplyType.TEXT:
+                    self._extract_and_send_images(reply, context)
+                # 如果是图片回复但带有文本内容，先发文本再发图片
+                elif reply.type == ReplyType.IMAGE_URL and hasattr(reply, 'text_content') and reply.text_content:
+                    # 先发送文本
+                    text_reply = Reply(ReplyType.TEXT, reply.text_content)
+                    self._send(text_reply, context)
+                    # 短暂延迟后发送图片
+                    time.sleep(0.3)
+                    self._send(reply, context)
+                else:
+                    self._send(reply, context)
+    
+    def _extract_and_send_images(self, reply: Reply, context: Context):
+        """
+        从文本回复中提取图片/视频URL并单独发送
+        支持格式：[图片: /path/to/image.png], [视频: /path/to/video.mp4], ![](url), <img src="url">
+        最多发送5个媒体文件
+        """
+        content = reply.content
+        media_items = []  # [(url, type), ...]
+        
+        # 正则提取各种格式的媒体URL
+        patterns = [
+            (r'\[图片:\s*([^\]]+)\]', 'image'),   # [图片: /path/to/image.png]
+            (r'\[视频:\s*([^\]]+)\]', 'video'),   # [视频: /path/to/video.mp4]
+            (r'!\[.*?\]\(([^\)]+)\)', 'image'),   # ![alt](url) - 默认图片
+            (r'<img[^>]+src=["\']([^"\']+)["\']', 'image'),  # <img src="url">
+            (r'<video[^>]+src=["\']([^"\']+)["\']', 'video'),  # <video src="url">
+            (r'https?://[^\s]+\.(?:jpg|jpeg|png|gif|webp)', 'image'),  # 直接的图片URL
+            (r'https?://[^\s]+\.(?:mp4|avi|mov|wmv|flv)', 'video'),  # 直接的视频URL
+        ]
+        
+        for pattern, media_type in patterns:
+            matches = re.findall(pattern, content, re.IGNORECASE)
+            for match in matches:
+                media_items.append((match, media_type))
+        
+        # 去重（保持顺序）并限制最多5个
+        seen = set()
+        unique_items = []
+        for url, mtype in media_items:
+            if url not in seen:
+                seen.add(url)
+                unique_items.append((url, mtype))
+        media_items = unique_items[:5]
+        
+        if media_items:
+            logger.info(f"[chat_channel] Extracted {len(media_items)} media item(s) from reply")
+            
+            # 先发送文本（保持原文本不变）
+            self._send(reply, context)
+            
+            # 然后逐个发送媒体文件
+            for i, (url, media_type) in enumerate(media_items):
+                try:
+                    # 判断是本地文件还是URL
+                    if url.startswith(('http://', 'https://')):
+                        # 网络资源
+                        if media_type == 'video':
+                            # 视频使用 FILE 类型发送
+                            media_reply = Reply(ReplyType.FILE, url)
+                            media_reply.file_name = os.path.basename(url)
+                        else:
+                            # 图片使用 IMAGE_URL 类型
+                            media_reply = Reply(ReplyType.IMAGE_URL, url)
+                    elif os.path.exists(url):
+                        # 本地文件
+                        if media_type == 'video':
+                            # 视频使用 FILE 类型，转换为 file:// URL
+                            media_reply = Reply(ReplyType.FILE, f"file://{url}")
+                            media_reply.file_name = os.path.basename(url)
+                        else:
+                            # 图片使用 IMAGE_URL 类型，转换为 file:// URL
+                            media_reply = Reply(ReplyType.IMAGE_URL, f"file://{url}")
+                    else:
+                        logger.warning(f"[chat_channel] Media file not found or invalid URL: {url}")
+                        continue
+                    
+                    # 发送媒体文件（添加小延迟避免频率限制）
+                    if i > 0:
+                        time.sleep(0.5)
+                    self._send(media_reply, context)
+                    logger.info(f"[chat_channel] Sent {media_type} {i+1}/{len(media_items)}: {url[:50]}...")
+                    
+                except Exception as e:
+                    logger.error(f"[chat_channel] Failed to send {media_type} {url}: {e}")
+        else:
+            # 没有媒体文件，正常发送文本
+            self._send(reply, context)
 
     def _send(self, reply: Reply, context: Context, retry_cnt=0):
         try:
diff --git a/channel/dingtalk/dingtalk_channel.py b/channel/dingtalk/dingtalk_channel.py
index 6c99e5f..67f90a3 100644
--- a/channel/dingtalk/dingtalk_channel.py
+++ b/channel/dingtalk/dingtalk_channel.py
@@ -9,6 +9,7 @@ import json
 # -*- coding=utf-8 -*-
 import logging
 import time
+import requests
 
 import dingtalk_stream
 from dingtalk_stream import AckMessage
@@ -107,16 +108,156 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
         conf()["group_name_white_list"] = ["ALL_GROUP"]
         # 单聊无需前缀
         conf()["single_chat_prefix"] = [""]
+        # Access token cache
+        self._access_token = None
+        self._access_token_expires_at = 0
+        # Robot code cache (extracted from incoming messages)
+        self._robot_code = None
 
     def startup(self):
         credential = dingtalk_stream.Credential(self.dingtalk_client_id, self.dingtalk_client_secret)
         client = dingtalk_stream.DingTalkStreamClient(credential)
         client.register_callback_handler(dingtalk_stream.chatbot.ChatbotMessage.TOPIC, self)
         client.start_forever()
+    
+    def get_access_token(self):
+        """
+        获取企业内部应用的 access_token
+        文档: https://open.dingtalk.com/document/orgapp/obtain-orgapp-token
+        """
+        current_time = time.time()
+        
+        # 如果 token 还没过期，直接返回缓存的 token
+        if self._access_token and current_time < self._access_token_expires_at:
+            return self._access_token
+        
+        # 获取新的 access_token
+        url = "https://api.dingtalk.com/v1.0/oauth2/accessToken"
+        headers = {"Content-Type": "application/json"}
+        data = {
+            "appKey": self.dingtalk_client_id,
+            "appSecret": self.dingtalk_client_secret
+        }
+        
+        try:
+            response = requests.post(url, headers=headers, json=data, timeout=10)
+            result = response.json()
+            
+            if response.status_code == 200 and "accessToken" in result:
+                self._access_token = result["accessToken"]
+                # Token 有效期为 2 小时，提前 5 分钟刷新
+                self._access_token_expires_at = current_time + result.get("expireIn", 7200) - 300
+                logger.info("[DingTalk] Access token refreshed successfully")
+                return self._access_token
+            else:
+                logger.error(f"[DingTalk] Failed to get access token: {result}")
+                return None
+        except Exception as e:
+            logger.error(f"[DingTalk] Error getting access token: {e}")
+            return None
+    
+    def send_single_message(self, user_id: str, content: str, robot_code: str) -> bool:
+        """
+        Send message to single user (private chat)
+        API: https://open.dingtalk.com/document/orgapp/chatbots-send-one-on-one-chat-messages-in-batches
+        """
+        access_token = self.get_access_token()
+        if not access_token:
+            logger.error("[DingTalk] Failed to send single message: Access token not available.")
+            return False
+
+        if not robot_code:
+            logger.error("[DingTalk] Cannot send single message: robot_code is required")
+            return False
+
+        url = "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend"
+        headers = {
+            "x-acs-dingtalk-access-token": access_token,
+            "Content-Type": "application/json"
+        }
+        data = {
+            "msgParam": json.dumps({"content": content}),
+            "msgKey": "sampleText",
+            "userIds": [user_id],
+            "robotCode": robot_code
+        }
+
+        logger.info(f"[DingTalk] Sending single message to user {user_id} with robot_code {robot_code}")
+        try:
+            response = requests.post(url, headers=headers, json=data, timeout=10)
+            result = response.json()
+            
+            if response.status_code == 200 and result.get("processQueryKey"):
+                logger.info(f"[DingTalk] Single message sent successfully to {user_id}")
+                return True
+            else:
+                logger.error(f"[DingTalk] Failed to send single message: {result}")
+                return False
+        except Exception as e:
+            logger.error(f"[DingTalk] Error sending single message: {e}")
+            return False
+    
+    def send_group_message(self, conversation_id: str, content: str, robot_code: str = None):
+        """
+        主动发送群消息
+        文档: https://open.dingtalk.com/document/orgapp/the-robot-sends-a-group-message
+        
+        Args:
+            conversation_id: 会话ID (openConversationId)
+            content: 消息内容
+            robot_code: 机器人编码，默认使用 dingtalk_client_id
+        """
+        access_token = self.get_access_token()
+        if not access_token:
+            logger.error("[DingTalk] Cannot send group message: no access token")
+            return False
+        
+        # Validate robot_code
+        if not robot_code:
+            logger.error("[DingTalk] Cannot send group message: robot_code is required")
+            return False
+        
+        url = "https://api.dingtalk.com/v1.0/robot/groupMessages/send"
+        headers = {
+            "x-acs-dingtalk-access-token": access_token,
+            "Content-Type": "application/json"
+        }
+        data = {
+            "msgParam": json.dumps({"content": content}),
+            "msgKey": "sampleText",
+            "openConversationId": conversation_id,
+            "robotCode": robot_code
+        }
+        
+        try:
+            response = requests.post(url, headers=headers, json=data, timeout=10)
+            result = response.json()
+            
+            if response.status_code == 200:
+                logger.info(f"[DingTalk] Group message sent successfully to {conversation_id}")
+                return True
+            else:
+                logger.error(f"[DingTalk] Failed to send group message: {result}")
+                return False
+        except Exception as e:
+            logger.error(f"[DingTalk] Error sending group message: {e}")
+            return False
 
     async def process(self, callback: dingtalk_stream.CallbackMessage):
         try:
             incoming_message = dingtalk_stream.ChatbotMessage.from_dict(callback.data)
+            
+            # Debug: 打印完整的 event 数据
+            logger.info(f"[DingTalk] ===== Incoming Message Debug =====")
+            logger.info(f"[DingTalk] callback.data keys: {callback.data.keys() if hasattr(callback.data, 'keys') else 'N/A'}")
+            logger.info(f"[DingTalk] incoming_message attributes: {dir(incoming_message)}")
+            logger.info(f"[DingTalk] robot_code: {getattr(incoming_message, 'robot_code', 'N/A')}")
+            logger.info(f"[DingTalk] chatbot_corp_id: {getattr(incoming_message, 'chatbot_corp_id', 'N/A')}")
+            logger.info(f"[DingTalk] chatbot_user_id: {getattr(incoming_message, 'chatbot_user_id', 'N/A')}")
+            logger.info(f"[DingTalk] conversation_id: {getattr(incoming_message, 'conversation_id', 'N/A')}")
+            logger.info(f"[DingTalk] Raw callback.data: {callback.data}")
+            logger.info(f"[DingTalk] =====================================")
+            
             image_download_handler = self  # 传入方法所在的类实例
             dingtalk_msg = DingTalkMessage(incoming_message, image_download_handler)
 
@@ -174,8 +315,48 @@ class DingTalkChanel(ChatChannel, dingtalk_stream.ChatbotHandler):
 
     def send(self, reply: Reply, context: Context):
         receiver = context["receiver"]
-        isgroup = context.kwargs['msg'].is_group
-        incoming_message = context.kwargs['msg'].incoming_message
+        
+        # Check if msg exists (for scheduled tasks, msg might be None)
+        msg = context.kwargs.get('msg')
+        if msg is None:
+            # 定时任务场景：使用主动发送 API
+            is_group = context.get("isgroup", False)
+            logger.info(f"[DingTalk] Sending scheduled task message to {receiver} (is_group={is_group})")
+            
+            # 使用缓存的 robot_code 或配置的值
+            robot_code = self._robot_code or conf().get("dingtalk_robot_code")
+            logger.info(f"[DingTalk] Using robot_code: {robot_code}, cached: {self._robot_code}, config: {conf().get('dingtalk_robot_code')}")
+            
+            if not robot_code:
+                logger.error(f"[DingTalk] Cannot send scheduled task: robot_code not available. Please send at least one message to the bot first, or configure dingtalk_robot_code in config.json")
+                return
+            
+            # 根据是否群聊选择不同的 API
+            if is_group:
+                success = self.send_group_message(receiver, reply.content, robot_code)
+            else:
+                # 单聊场景：尝试从 context 中获取 dingtalk_sender_staff_id
+                sender_staff_id = context.get("dingtalk_sender_staff_id")
+                if not sender_staff_id:
+                    logger.error(f"[DingTalk] Cannot send single chat scheduled message: sender_staff_id not available in context")
+                    return
+                
+                logger.info(f"[DingTalk] Sending single message to staff_id: {sender_staff_id}")
+                success = self.send_single_message(sender_staff_id, reply.content, robot_code)
+            
+            if not success:
+                logger.error(f"[DingTalk] Failed to send scheduled task message")
+            return
+        
+        # 从正常消息中提取并缓存 robot_code
+        if hasattr(msg, 'robot_code'):
+            robot_code = msg.robot_code
+            if robot_code and robot_code != self._robot_code:
+                self._robot_code = robot_code
+                logger.info(f"[DingTalk] Cached robot_code: {robot_code}")
+        
+        isgroup = msg.is_group
+        incoming_message = msg.incoming_message
 
         if conf().get("dingtalk_card_enabled"):
             logger.info("[Dingtalk] sendMsg={}, receiver={}".format(reply, receiver))
diff --git a/channel/dingtalk/dingtalk_message.py b/channel/dingtalk/dingtalk_message.py
index c069a1b..2196a1f 100644
--- a/channel/dingtalk/dingtalk_message.py
+++ b/channel/dingtalk/dingtalk_message.py
@@ -22,6 +22,7 @@ class DingTalkMessage(ChatMessage):
         self.create_time = event.create_at
         self.image_content = event.image_content
         self.rich_text_content = event.rich_text_content
+        self.robot_code = event.robot_code  # 机器人编码
         if event.conversation_type == "1":
             self.is_group = False
         else:
diff --git a/channel/feishu/feishu_channel.py b/channel/feishu/feishu_channel.py
index e971302..0def10b 100644
--- a/channel/feishu/feishu_channel.py
+++ b/channel/feishu/feishu_channel.py
@@ -204,10 +204,36 @@ class FeiShuChanel(ChatChannel):
             # 图片上传
             reply_content = self._upload_image_url(reply.content, access_token)
             if not reply_content:
-                logger.warning("[FeiShu] upload file failed")
+                logger.warning("[FeiShu] upload image failed")
                 return
             msg_type = "image"
             content_key = "image_key"
+        elif reply.type == ReplyType.FILE:
+            # 判断是否为视频文件
+            file_path = reply.content
+            if file_path.startswith("file://"):
+                file_path = file_path[7:]
+            
+            is_video = file_path.lower().endswith(('.mp4', '.avi', '.mov', '.wmv', '.flv'))
+            
+            if is_video:
+                # 视频使用 media 类型
+                file_key = self._upload_video_url(reply.content, access_token)
+                if not file_key:
+                    logger.warning("[FeiShu] upload video failed")
+                    return
+                reply_content = file_key
+                msg_type = "media"
+                content_key = "file_key"
+            else:
+                # 其他文件使用 file 类型
+                file_key = self._upload_file_url(reply.content, access_token)
+                if not file_key:
+                    logger.warning("[FeiShu] upload file failed")
+                    return
+                reply_content = file_key
+                msg_type = "file"
+                content_key = "file_key"
         
         # Check if we can reply to an existing message (need msg_id)
         can_reply = is_group and msg and hasattr(msg, 'msg_id') and msg.msg_id
@@ -260,7 +286,34 @@ class FeiShuChanel(ChatChannel):
 
 
     def _upload_image_url(self, img_url, access_token):
-        logger.debug(f"[WX] start download image, img_url={img_url}")
+        logger.debug(f"[FeiShu] start process image, img_url={img_url}")
+        
+        # Check if it's a local file path (file:// protocol)
+        if img_url.startswith("file://"):
+            local_path = img_url[7:]  # Remove "file://" prefix
+            logger.info(f"[FeiShu] uploading local file: {local_path}")
+            
+            if not os.path.exists(local_path):
+                logger.error(f"[FeiShu] local file not found: {local_path}")
+                return None
+            
+            # Upload directly from local file
+            upload_url = "https://open.feishu.cn/open-apis/im/v1/images"
+            data = {'image_type': 'message'}
+            headers = {'Authorization': f'Bearer {access_token}'}
+            
+            with open(local_path, "rb") as file:
+                upload_response = requests.post(upload_url, files={"image": file}, data=data, headers=headers)
+                logger.info(f"[FeiShu] upload file, res={upload_response.content}")
+                
+                response_data = upload_response.json()
+                if response_data.get("code") == 0:
+                    return response_data.get("data").get("image_key")
+                else:
+                    logger.error(f"[FeiShu] upload failed: {response_data}")
+                    return None
+        
+        # Original logic for HTTP URLs
         response = requests.get(img_url)
         suffix = utils.get_path_suffix(img_url)
         temp_name = str(uuid.uuid4()) + "." + suffix
@@ -283,6 +336,207 @@ class FeiShuChanel(ChatChannel):
             os.remove(temp_name)
             return upload_response.json().get("data").get("image_key")
 
+    def _upload_video_url(self, video_url, access_token):
+        """
+        Upload video to Feishu and return file_key (for media type messages)
+        Supports:
+        - file:// URLs for local files
+        - http(s):// URLs (download then upload)
+        """
+        # For file:// URLs (local files), upload directly
+        if video_url.startswith("file://"):
+            local_path = video_url[7:]  # Remove file:// prefix
+            if not os.path.exists(local_path):
+                logger.error(f"[FeiShu] local video file not found: {local_path}")
+                return None
+            
+            file_name = os.path.basename(local_path)
+            file_ext = os.path.splitext(file_name)[1].lower()
+            
+            # Determine file type for Feishu API (for media messages)
+            # Media type only supports mp4
+            file_type_map = {
+                '.mp4': 'mp4',
+            }
+            file_type = file_type_map.get(file_ext, 'mp4')  # Default to mp4
+            
+            # Upload video to Feishu (use file upload API, but send as media type)
+            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
+            data = {'file_type': file_type, 'file_name': file_name}
+            headers = {'Authorization': f'Bearer {access_token}'}
+            
+            try:
+                with open(local_path, "rb") as file:
+                    upload_response = requests.post(
+                        upload_url, 
+                        files={"file": file}, 
+                        data=data, 
+                        headers=headers,
+                        timeout=(5, 60)  # 5s connect, 60s read timeout (videos are larger)
+                    )
+                    logger.info(f"[FeiShu] upload video response, status={upload_response.status_code}, res={upload_response.content}")
+                    
+                    response_data = upload_response.json()
+                    if response_data.get("code") == 0:
+                        return response_data.get("data").get("file_key")
+                    else:
+                        logger.error(f"[FeiShu] upload video failed: {response_data}")
+                        return None
+            except Exception as e:
+                logger.error(f"[FeiShu] upload video exception: {e}")
+                return None
+        
+        # For HTTP URLs, download first then upload
+        try:
+            logger.info(f"[FeiShu] Downloading video from URL: {video_url}")
+            response = requests.get(video_url, timeout=(5, 60))
+            if response.status_code != 200:
+                logger.error(f"[FeiShu] download video failed, status={response.status_code}")
+                return None
+            
+            # Save to temp file
+            import uuid
+            file_name = os.path.basename(video_url) or "video.mp4"
+            temp_name = str(uuid.uuid4()) + "_" + file_name
+            
+            with open(temp_name, "wb") as file:
+                file.write(response.content)
+            
+            logger.info(f"[FeiShu] Video downloaded, size={len(response.content)} bytes, uploading...")
+            
+            # Upload
+            file_ext = os.path.splitext(file_name)[1].lower()
+            file_type_map = {
+                '.mp4': 'mp4',
+            }
+            file_type = file_type_map.get(file_ext, 'mp4')
+            
+            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
+            data = {'file_type': file_type, 'file_name': file_name}
+            headers = {'Authorization': f'Bearer {access_token}'}
+            
+            with open(temp_name, "rb") as file:
+                upload_response = requests.post(upload_url, files={"file": file}, data=data, headers=headers, timeout=(5, 60))
+                logger.info(f"[FeiShu] upload video, res={upload_response.content}")
+                
+                response_data = upload_response.json()
+                os.remove(temp_name)  # Clean up temp file
+                
+                if response_data.get("code") == 0:
+                    return response_data.get("data").get("file_key")
+                else:
+                    logger.error(f"[FeiShu] upload video failed: {response_data}")
+                    return None
+        except Exception as e:
+            logger.error(f"[FeiShu] upload video from URL exception: {e}")
+            # Clean up temp file if exists
+            if 'temp_name' in locals() and os.path.exists(temp_name):
+                os.remove(temp_name)
+            return None
+
+    def _upload_file_url(self, file_url, access_token):
+        """
+        Upload file to Feishu
+        Supports both local files (file://) and HTTP URLs
+        """
+        logger.debug(f"[FeiShu] start process file, file_url={file_url}")
+        
+        # Check if it's a local file path (file:// protocol)
+        if file_url.startswith("file://"):
+            local_path = file_url[7:]  # Remove "file://" prefix
+            logger.info(f"[FeiShu] uploading local file: {local_path}")
+            
+            if not os.path.exists(local_path):
+                logger.error(f"[FeiShu] local file not found: {local_path}")
+                return None
+            
+            # Get file info
+            file_name = os.path.basename(local_path)
+            file_ext = os.path.splitext(file_name)[1].lower()
+            
+            # Determine file type for Feishu API
+            # Feishu supports: opus, mp4, pdf, doc, xls, ppt, stream (other types)
+            file_type_map = {
+                '.opus': 'opus',
+                '.mp4': 'mp4',
+                '.pdf': 'pdf',
+                '.doc': 'doc', '.docx': 'doc',
+                '.xls': 'xls', '.xlsx': 'xls',
+                '.ppt': 'ppt', '.pptx': 'ppt',
+            }
+            file_type = file_type_map.get(file_ext, 'stream')  # Default to stream for other types
+            
+            # Upload file to Feishu
+            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
+            data = {'file_type': file_type, 'file_name': file_name}
+            headers = {'Authorization': f'Bearer {access_token}'}
+            
+            try:
+                with open(local_path, "rb") as file:
+                    upload_response = requests.post(
+                        upload_url, 
+                        files={"file": file}, 
+                        data=data, 
+                        headers=headers,
+                        timeout=(5, 30)  # 5s connect, 30s read timeout
+                    )
+                    logger.info(f"[FeiShu] upload file response, status={upload_response.status_code}, res={upload_response.content}")
+                    
+                    response_data = upload_response.json()
+                    if response_data.get("code") == 0:
+                        return response_data.get("data").get("file_key")
+                    else:
+                        logger.error(f"[FeiShu] upload file failed: {response_data}")
+                        return None
+            except Exception as e:
+                logger.error(f"[FeiShu] upload file exception: {e}")
+                return None
+        
+        # For HTTP URLs, download first then upload
+        try:
+            response = requests.get(file_url, timeout=(5, 30))
+            if response.status_code != 200:
+                logger.error(f"[FeiShu] download file failed, status={response.status_code}")
+                return None
+            
+            # Save to temp file
+            import uuid
+            file_name = os.path.basename(file_url)
+            temp_name = str(uuid.uuid4()) + "_" + file_name
+            
+            with open(temp_name, "wb") as file:
+                file.write(response.content)
+            
+            # Upload
+            file_ext = os.path.splitext(file_name)[1].lower()
+            file_type_map = {
+                '.opus': 'opus', '.mp4': 'mp4', '.pdf': 'pdf',
+                '.doc': 'doc', '.docx': 'doc',
+                '.xls': 'xls', '.xlsx': 'xls',
+                '.ppt': 'ppt', '.pptx': 'ppt',
+            }
+            file_type = file_type_map.get(file_ext, 'stream')
+            
+            upload_url = "https://open.feishu.cn/open-apis/im/v1/files"
+            data = {'file_type': file_type, 'file_name': file_name}
+            headers = {'Authorization': f'Bearer {access_token}'}
+            
+            with open(temp_name, "rb") as file:
+                upload_response = requests.post(upload_url, files={"file": file}, data=data, headers=headers)
+                logger.info(f"[FeiShu] upload file, res={upload_response.content}")
+                
+                response_data = upload_response.json()
+                os.remove(temp_name)  # Clean up temp file
+                
+                if response_data.get("code") == 0:
+                    return response_data.get("data").get("file_key")
+                else:
+                    logger.error(f"[FeiShu] upload file failed: {response_data}")
+                    return None
+        except Exception as e:
+            logger.error(f"[FeiShu] upload file from URL exception: {e}")
+            return None
+
     def _compose_context(self, ctype: ContextType, content, **kwargs):
         context = Context(ctype, content)
         context.kwargs = kwargs
@@ -291,13 +545,18 @@ class FeiShuChanel(ChatChannel):
 
         cmsg = context["msg"]
         
-        # Set session_id based on chat type to ensure proper session isolation
+        # Set session_id based on chat type
         if cmsg.is_group:
-            # Group chat: combine user_id and group_id to create unique session per user per group
-            # This ensures:
-            # - Same user in different groups have separate conversation histories
-            # - Same user in private chat and group chat have separate histories
-            context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
+            # Group chat: check if group_shared_session is enabled
+            if conf().get("group_shared_session", True):
+                # All users in the group share the same session context
+                context["session_id"] = cmsg.other_user_id  # group_id
+            else:
+                # Each user has their own session within the group
+                # This ensures:
+                # - Same user in different groups have separate conversation histories
+                # - Same user in private chat and group chat have separate histories
+                context["session_id"] = f"{cmsg.from_user_id}:{cmsg.other_user_id}"
         else:
             # Private chat: use user_id only
             context["session_id"] = cmsg.from_user_id
diff --git a/channel/feishu/feishu_message.py b/channel/feishu/feishu_message.py
index e2054c1..7b6408f 100644
--- a/channel/feishu/feishu_message.py
+++ b/channel/feishu/feishu_message.py
@@ -1,10 +1,12 @@
 from bridge.context import ContextType
 from channel.chat_message import ChatMessage
 import json
+import os
 import requests
 from common.log import logger
 from common.tmp_dir import TmpDir
 from common import utils
+from config import conf
 
 
 class FeishuMessage(ChatMessage):
@@ -22,6 +24,99 @@ class FeishuMessage(ChatMessage):
             self.ctype = ContextType.TEXT
             content = json.loads(msg.get('content'))
             self.content = content.get("text").strip()
+        elif msg_type == "image":
+            # 单张图片消息，不处理和存储
+            self.ctype = ContextType.IMAGE
+            content = json.loads(msg.get("content"))
+            image_key = content.get("image_key")
+            # 仅记录图片key，不下载
+            self.content = f"[图片: {image_key}]"
+            logger.info(f"[FeiShu] Received single image message, key={image_key}, skipped download")
+        elif msg_type == "post":
+            # 富文本消息，可能包含图片、文本等多种元素
+            content = json.loads(msg.get("content"))
+            
+            # 飞书富文本消息结构：content 直接包含 title 和 content 数组
+            # 不是嵌套在 post 字段下
+            title = content.get("title", "")
+            content_list = content.get("content", [])
+            
+            logger.info(f"[FeiShu] Post message - title: '{title}', content_list length: {len(content_list)}")
+            
+            # 收集所有图片和文本
+            image_keys = []
+            text_parts = []
+            
+            if title:
+                text_parts.append(title)
+            
+            for block in content_list:
+                logger.debug(f"[FeiShu] Processing block: {block}")
+                # block 本身就是元素列表
+                if not isinstance(block, list):
+                    continue
+                    
+                for element in block:
+                    element_tag = element.get("tag")
+                    logger.debug(f"[FeiShu] Element tag: {element_tag}, element: {element}")
+                    if element_tag == "img":
+                        # 找到图片元素
+                        image_key = element.get("image_key")
+                        if image_key:
+                            image_keys.append(image_key)
+                    elif element_tag == "text":
+                        # 文本元素
+                        text_content = element.get("text", "")
+                        if text_content:
+                            text_parts.append(text_content)
+            
+            logger.info(f"[FeiShu] Parsed - images: {len(image_keys)}, text_parts: {text_parts}")
+            
+            # 富文本消息统一作为文本消息处理
+            self.ctype = ContextType.TEXT
+            
+            if image_keys:
+                # 如果包含图片，下载并在文本中引用本地路径
+                workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
+                tmp_dir = os.path.join(workspace_root, "tmp")
+                os.makedirs(tmp_dir, exist_ok=True)
+                
+                # 保存图片路径映射
+                self.image_paths = {}
+                for image_key in image_keys:
+                    image_path = os.path.join(tmp_dir, f"{image_key}.png")
+                    self.image_paths[image_key] = image_path
+                
+                def _download_images():
+                    for image_key, image_path in self.image_paths.items():
+                        url = f"https://open.feishu.cn/open-apis/im/v1/messages/{self.msg_id}/resources/{image_key}"
+                        headers = {"Authorization": "Bearer " + access_token}
+                        params = {"type": "image"}
+                        response = requests.get(url=url, headers=headers, params=params)
+                        if response.status_code == 200:
+                            with open(image_path, "wb") as f:
+                                f.write(response.content)
+                            logger.info(f"[FeiShu] Image downloaded from post message, key={image_key}, path={image_path}")
+                        else:
+                            logger.error(f"[FeiShu] Failed to download image from post, key={image_key}, status={response.status_code}")
+                
+                # 立即下载图片，不使用延迟下载
+                # 因为 TEXT 类型消息不会调用 prepare()
+                _download_images()
+                
+                # 构建消息内容：文本 + 图片路径
+                content_parts = []
+                if text_parts:
+                    content_parts.append("\n".join(text_parts).strip())
+                for image_key, image_path in self.image_paths.items():
+                    content_parts.append(f"[图片: {image_path}]")
+                
+                self.content = "\n".join(content_parts)
+                logger.info(f"[FeiShu] Received post message with {len(image_keys)} image(s) and text: {self.content}")
+            else:
+                # 纯文本富文本消息
+                self.content = "\n".join(text_parts).strip() if text_parts else "[富文本消息]"
+                logger.info(f"[FeiShu] Received post message (text only): {self.content}")
         elif msg_type == "file":
             self.ctype = ContextType.FILE
             content = json.loads(msg.get("content"))
diff --git a/config-template.json b/config-template.json
index c287fc6..ec9701a 100644
--- a/config-template.json
+++ b/config-template.json
@@ -20,9 +20,7 @@
     "Agent测试群",
     "ChatGPT测试群2"
   ],
-  "image_create_prefix": [
-    "画"
-  ],
+  "image_create_prefix": [""],
   "speech_recognition": true,
   "group_speech_recognition": false,
   "voice_reply_voice": false,
diff --git a/config.py b/config.py
index 0e8625a..6b9ec32 100644
--- a/config.py
+++ b/config.py
@@ -35,6 +35,7 @@ available_setting = {
     "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],  # 开启自动回复的群名称列表
     "group_name_keyword_white_list": [],  # 开启自动回复的群名称关键词列表
     "group_chat_in_one_session": ["ChatGPT测试群"],  # 支持会话上下文共享的群名称
+    "group_shared_session": True,  # 群聊是否共享会话上下文（所有成员共享），默认为True。False时每个用户在群内有独立会话
     "nick_name_black_list": [],  # 用户昵称黑名单
     "group_welcome_msg": "",  # 配置新人进群固定欢迎语，不配置则使用随机风格欢迎
     "trigger_by_self": False,  # 是否允许机器人触发
diff --git a/models/claudeapi/claude_api_bot.py b/models/claudeapi/claude_api_bot.py
index eaf08ae..4eb70c0 100644
--- a/models/claudeapi/claude_api_bot.py
+++ b/models/claudeapi/claude_api_bot.py
@@ -365,6 +365,7 @@ class ClaudeAPIBot(Bot, OpenAIImage):
         # Track tool use state
         tool_uses_map = {}  # {index: {id, name, input}}
         current_tool_use_index = -1
+        stop_reason = None  # Track stop reason from Claude
 
         try:
             # Make streaming HTTP request
@@ -440,6 +441,12 @@ class ClaudeAPIBot(Bot, OpenAIImage):
                                         tool_uses_map[current_tool_use_index]["input"] += delta.get("partial_json", "")
 
                             elif event_type == "message_delta":
+                                # Extract stop_reason from delta
+                                delta = event.get("delta", {})
+                                if "stop_reason" in delta:
+                                    stop_reason = delta.get("stop_reason")
+                                    logger.info(f"[Claude] Stream stop_reason: {stop_reason}")
+                                
                                 # Message complete - yield tool calls if any
                                 if tool_uses_map:
                                     for idx in sorted(tool_uses_map.keys()):
@@ -462,9 +469,13 @@ class ClaudeAPIBot(Bot, OpenAIImage):
                                                         }
                                                     }]
                                                 },
-                                                "finish_reason": None
+                                                "finish_reason": stop_reason
                                             }]
                                         }
+                            
+                            elif event_type == "message_stop":
+                                # Final event - log completion
+                                logger.debug(f"[Claude] Stream completed with stop_reason: {stop_reason}")
 
                         except json.JSONDecodeError:
                             continue
diff --git a/skills/linkai-agent/README.md b/skills/linkai-agent/README.md
new file mode 100644
index 0000000..d801ff6
--- /dev/null
+++ b/skills/linkai-agent/README.md
@@ -0,0 +1,297 @@
+# LinkAI Agent Skill
+
+这个 skill 允许你调用 LinkAI 平台上的多个应用(App)和工作流(Workflow)，通过简单的配置即可集成多个智能体能力。
+
+## 特性
+
+- ✅ **多应用支持** - 在一个配置文件中管理多个 LinkAI 应用/工作流
+- ✅ **动态加载** - skill 系统加载时自动从 `config.json` 读取应用列表
+- ✅ **自动技能描述** - 所有配置的应用会自动添加到技能描述中
+- ✅ **模型切换** - 可以为每个请求指定不同的模型
+- ✅ **知识库集成** - 支持应用绑定的知识库
+- ✅ **插件能力** - 支持应用启用的各类插件
+- ✅ **工作流执行** - 支持执行复杂的多步骤工作流
+
+## 快速开始
+
+### 1. 配置 API Key
+
+```bash
+env_config(action="set", key="LINKAI_API_KEY", value="your-linkai-api-key")
+```
+
+获取 API Key: https://link-ai.tech/console/interface
+
+### 2. 配置应用列表
+
+将 `config.json.template` 复制为 `config.json`：
+
+```bash
+cp config.json.template config.json
+```
+
+编辑 `config.json`，添加你的应用/工作流：
+
+```json
+{
+  "apps": [
+    {
+      "app_code": "G7z6vKwp",
+      "app_name": "通用助手",
+      "app_description": "通用AI助手，可以回答各类问题"
+    },
+    {
+      "app_code": "your_kb_app",
+      "app_name": "产品文档助手",
+      "app_description": "基于产品文档知识库的问答助手"
+    },
+    {
+      "app_code": "your_workflow",
+      "app_name": "数据分析工作流",
+      "app_description": "执行数据清洗、分析和可视化的完整工作流"
+    }
+  ]
+}
+```
+
+**注意：** 修改 `config.json` 后，Agent 在下次加载技能时会自动读取新配置。
+
+### 3. 调用应用
+
+```bash
+bash scripts/call.sh "G7z6vKwp" "What is artificial intelligence?"
+```
+
+## 使用示例
+
+### 基础调用
+
+```bash
+# 调用默认模型
+bash scripts/call.sh "G7z6vKwp" "解释一下量子计算"
+```
+
+### 指定模型
+
+```bash
+# 使用 GPT-4.1 模型
+bash scripts/call.sh "G7z6vKwp" "写一篇关于AI的文章" "LinkAI-4.1"
+
+# 使用 DeepSeek 模型
+bash scripts/call.sh "G7z6vKwp" "帮我写代码" "deepseek-chat"
+
+# 使用 Claude 模型
+bash scripts/call.sh "G7z6vKwp" "分析这段文本" "claude-4-sonnet"
+```
+
+### 调用工作流
+
+```bash
+# 工作流会按照配置的节点顺序执行
+bash scripts/call.sh "workflow_code" "输入数据或问题"
+```
+
+## ⚠️ 重要提示
+
+### 超时配置
+
+LinkAI 应用（特别是视频/图片生成、复杂工作流）可能需要较长时间处理。
+
+**脚本内置超时**：
+- 默认：120 秒（适合大多数场景）
+- 可通过第 5 个参数自定义：`bash scripts/call.sh <app_code> <question> "" "false" "180"`
+
+**推荐超时时间**：
+- **文本问答**：120 秒（默认）
+- **图片生成**：120-180 秒
+- **视频生成**：180-300 秒
+
+Agent 调用时会自动设置合适的超时时间。
+
+## 配置说明
+
+### config.json 字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `app_code` | string | 应用或工作流的唯一标识码，从 LinkAI 控制台获取 |
+| `app_name` | string | 应用名称，会显示在技能描述中 |
+| `app_description` | string | 应用功能描述，帮助 Agent 理解何时使用该应用 |
+
+### 获取 app_code
+
+1. 登录 [LinkAI 控制台](https://link-ai.tech/console)
+2. 进入「应用管理」或「工作流管理」
+3. 选择要集成的应用/工作流
+4. 在应用详情页找到 `app_code`
+
+## 支持的模型
+
+LinkAI 支持多种主流 AI 模型：
+
+**OpenAI 系列：**
+- `LinkAI-4.1` - GPT-4.1 (1000K 上下文)
+- `LinkAI-4.1-mini` - GPT-4.1 mini (1000K)
+- `LinkAI-4.1-nano` - GPT-4.1 nano (1000K)
+- `LinkAI-4o` - GPT-4o (128K)
+- `LinkAI-4o-mini` - GPT-4o mini (128K)
+
+**DeepSeek 系列：**
+- `deepseek-chat` - DeepSeek-V3 对话模型 (64K)
+- `deepseek-reasoner` - DeepSeek-R1 推理模型 (64K)
+
+**Claude 系列：**
+- `claude-4-sonnet` - Claude 4 Sonnet (200K)
+- `claude-3-7-sonnet` - Claude 3.7 (200K)
+- `claude-3-5-sonnet` - Claude 3.5 (200K)
+
+**Google 系列：**
+- `gemini-2.5-pro` - Gemini 2.5 Pro (1000K)
+- `gemini-2.0-flash` - Gemini 2.0 Flash (1000K)
+
+**国产模型：**
+- `qwen3` - 通义千问3 (128K)
+- `wenxin-4.5` - 文心一言4.5 (8K)
+- `doubao-1.5-pro-256k` - 豆包1.5 (256K)
+- `glm-4-plus` - 智谱GLM-4-Plus (4K)
+
+完整模型列表：https://link-ai.tech/console/models
+
+## 应用类型
+
+### 1. 普通应用
+
+配置了系统提示词和参数的标准对话应用，可以：
+- 设置角色和性格
+- 绑定知识库
+- 启用插件（图像识别、网页搜索、代码执行等）
+
+### 2. 知识库应用
+
+基于特定知识库的问答应用，适合：
+- 企业内部知识库
+- 产品文档问答
+- 客户支持
+
+### 3. 工作流
+
+多步骤的自动化流程，可以：
+- 串联多个处理节点
+- 条件分支
+- 循环处理
+- 调用外部 API
+
+## 响应格式
+
+### 成功响应
+
+```json
+{
+  "app_code": "G7z6vKwp",
+  "content": "人工智能（AI）是计算机科学的一个分支...",
+  "usage": {
+    "prompt_tokens": 10,
+    "completion_tokens": 150,
+    "total_tokens": 160
+  }
+}
+```
+
+### 错误响应
+
+```json
+{
+  "error": "LinkAI API error",
+  "message": "应用不存在",
+  "response": { ... }
+}
+```
+
+## 常见错误
+
+### LINKAI_API_KEY environment variable is not set
+**原因：** 未配置 API Key  
+**解决：** 使用 `env_config` 工具设置 LINKAI_API_KEY
+
+### 应用不存在 (402)
+**原因：** app_code 不正确或应用已删除  
+**解决：** 检查 app_code 是否正确，确认应用存在
+
+### 无访问权限 (403)
+**原因：** 尝试访问他人的私有应用  
+**解决：** 确保应用是公开的或你是创建者
+
+### 账号积分额度不足 (406)
+**原因：** LinkAI 账户余额不足  
+**解决：** 前往控制台充值
+
+### 内容审核不通过 (409)
+**原因：** 请求或响应包含敏感内容  
+**解决：** 修改输入内容，避免敏感词
+
+## 技术实现
+
+### 自动技能描述生成
+
+当 skill 系统加载 `linkai-agent` 时，会自动：
+1. 读取 `config.json` 中的应用列表
+2. 将每个应用的 name 和 description 动态添加到技能描述中
+3. Agent 加载时会看到完整的应用列表
+
+这是在 `agent/skills/loader.py` 中实现的特殊处理。
+
+### 工作流程
+
+```
+用户配置 config.json
+  ↓
+Agent 启动/重新加载技能
+  ↓
+SkillLoader 检测到 linkai-agent
+  ↓
+动态读取 config.json
+  ↓
+生成包含所有应用描述的 description
+  ↓
+Agent 看到所有可用应用的完整信息
+  ↓
+用户请求触发
+  ↓
+Agent 根据描述选择合适的应用
+  ↓
+调用 call.sh <app_code> <question>
+  ↓
+LinkAI API 处理并返回结果
+```
+
+## 最佳实践
+
+1. **清晰的描述** - 为每个应用写清晰、具体的描述，帮助 Agent 理解应用用途
+2. **合理分工** - 不同应用负责不同领域，避免功能重叠
+3. **无需重启** - 修改 config.json 后，Agent 下次加载技能时会自动更新
+4. **模型选择** - 根据任务复杂度选择合适的模型
+5. **知识库优化** - 为专业领域的应用绑定相关知识库
+
+## 扩展用法
+
+### 在 Agent 系统中使用
+
+当 Agent 系统加载这个 skill 时，会自动从 `config.json` 读取应用列表并生成描述：
+
+```
+Call LinkAI apps/workflows. 通用助手(G7z6vKwp: 通用AI助手，可以回答各类问题); 产品文档助手(kb_app_001: 基于产品文档知识库的问答助手); 数据分析工作流(wf_002: 执行数据清洗、分析和可视化的完整工作流)
+```
+
+Agent 会根据用户问题自动选择最合适的应用进行调用。
+
+## 相关链接
+
+- LinkAI 平台: https://link-ai.tech
+- API 文档: https://docs.link-ai.tech
+- 控制台: https://link-ai.tech/console
+- 模型列表: https://link-ai.tech/console/models
+- 应用广场: https://link-ai.tech/square
+
+## License
+
+Part of the chatgpt-on-wechat project.
diff --git a/skills/linkai-agent/SKILL.md b/skills/linkai-agent/SKILL.md
new file mode 100644
index 0000000..364d5cc
--- /dev/null
+++ b/skills/linkai-agent/SKILL.md
@@ -0,0 +1,165 @@
+---
+name: linkai-agent
+description: Call LinkAI applications and workflows. Use bash command to execute like 'bash <base_dir>/scripts/call.sh <app_code> <question>'.
+homepage: https://link-ai.tech
+metadata:
+  emoji: 🤖
+  requires:
+    bins: ["curl"]
+    env: ["LINKAI_API_KEY"]
+  primaryEnv: "LINKAI_API_KEY"
+---
+
+# LinkAI Agent Caller
+
+Call LinkAI applications and workflows through API. Supports multiple apps/workflows configured in config.json.
+
+The available apps are dynamically loaded from `config.json` at skill loading time.
+
+## Setup
+
+This skill requires a LinkAI API key. If not configured:
+
+1. Get your API key from https://link-ai.tech/console/api-keys
+2. Set the key using: `env_config(action="set", key="LINKAI_API_KEY", value="your-key")`
+
+## Configuration
+
+1. Copy `config.json.template` to `config.json`
+2. Configure your apps/workflows:
+
+```json
+{
+  "apps": [
+    {
+      "app_code": "your_app_code",
+      "app_name": "App Name",
+      "app_description": "What this app does"
+    }
+  ]
+}
+```
+
+3. The skill description will be automatically updated when the agent loads this skill
+
+## Usage
+
+**Important**: Scripts are located relative to this skill's base directory.
+
+When you see this skill in `<available_skills>`, note the `<base_dir>` path.
+
+**CRITICAL**: Always use `bash` command to execute the script:
+
+```bash
+# General pattern (MUST start with bash):
+bash "<base_dir>/scripts/call.sh" "<app_code>" "<question>" [model] [stream] [timeout]
+
+# DO NOT execute the script directly like this (WRONG):
+# "<base_dir>/scripts/call.sh" ...
+
+# Parameters:
+# - app_code: LinkAI app or workflow code (required)
+# - question: User question (required)
+# - model: Override model (optional, uses app default if not specified)
+# - stream: Enable streaming (true/false, default: false)
+# - timeout: curl timeout in seconds (default: 120, recommended for video/image generation)
+```
+
+**IMPORTANT - Timeout Configuration**:
+- The script has a **default timeout of 120 seconds** (suitable for most cases)
+- For complex tasks (video generation, large workflows), pass a longer timeout as the 5th parameter
+- The bash tool also needs sufficient timeout - set its `timeout` parameter accordingly
+- Example: `bash(command="bash <script> <app_code> <question> '' 'false' 180", timeout=200)`
+
+## Examples
+
+### Call an app (uses default 60s timeout)
+```bash
+bash(command='bash "<base_dir>/scripts/call.sh" "G7z6vKwp" "What is AI?"', timeout=60)
+```
+
+### Call an app with specific model
+```bash
+bash(command='bash "<base_dir>/scripts/call.sh" "G7z6vKwp" "Explain machine learning" "LinkAI-4.1"', timeout=60)
+```
+
+### Call a workflow with custom timeout (video generation)
+```bash
+# Pass timeout as 5th parameter to script, and set bash timeout slightly longer
+bash(command='bash "<base_dir>/scripts/call.sh" "workflow_code" "Generate a sunset video" "" "false" "180"', timeout=180)
+```
+```bash
+bash "<base_dir>/scripts/call.sh" "workflow_code" "Analyze this data: ..."
+```
+
+## Supported Models
+
+You can specify any LinkAI supported model:
+- `LinkAI-4.1` - Latest GPT-4.1 model (1000K context)
+- `LinkAI-4.1-mini` - GPT-4.1 mini (1000K context)
+- `LinkAI-4o` - GPT-4o model (128K context)
+- `LinkAI-4o-mini` - GPT-4o mini (128K context)
+- `deepseek-chat` - DeepSeek-V3 (64K context)
+- `deepseek-reasoner` - DeepSeek-R1 reasoning model
+- `claude-4-sonnet` - Claude 4 Sonnet (200K context)
+- `gemini-2.5-pro` - Gemini 2.5 Pro (1000K context)
+- And many more...
+
+Full model list: https://link-ai.tech/console/models
+
+## Response Format
+
+Success response:
+```json
+{
+  "app_code": "G7z6vKwp",
+  "content": "AI stands for Artificial Intelligence...",
+  "usage": {
+    "prompt_tokens": 10,
+    "completion_tokens": 50,
+    "total_tokens": 60
+  }
+}
+```
+
+Error response:
+```json
+{
+  "error": "Error description",
+  "message": "Detailed error message"
+}
+```
+
+## Features
+
+- ✅ **Multiple Apps**: Configure and call multiple LinkAI apps/workflows
+- ✅ **Dynamic Loading**: Apps are loaded from config.json at runtime
+- ✅ **Model Override**: Optionally specify model per request
+- ✅ **Streaming Support**: Enable streaming output
+- ✅ **Knowledge Base**: Apps can use configured knowledge bases
+- ✅ **Plugins**: Apps can use enabled plugins (image recognition, web search, etc.)
+- ✅ **Workflows**: Execute complex multi-step workflows
+
+## Notes
+
+- Each app/workflow maintains its own configuration (prompt, model, temperature, etc.)
+- Apps can have knowledge bases attached for domain-specific Q&A
+- Workflows execute from start node to end node and return final output
+- Token usage and costs depend on the model used
+- See LinkAI documentation for pricing: https://link-ai.tech/console/funds
+- The skill description is automatically generated from config.json when loaded
+
+## Troubleshooting
+
+**"LINKAI_API_KEY environment variable is not set"**
+- Use env_config tool to set the API key
+
+**"app_code is required"**
+- Make sure you're passing the app_code as the first parameter
+
+**"应用不存在" (App not found)**
+- Check that the app_code is correct
+- Ensure you have access to the app
+
+**"账号积分额度不足" (Insufficient credits)**
+- Top up your LinkAI account credits
diff --git a/skills/linkai-agent/config.json.template b/skills/linkai-agent/config.json.template
new file mode 100644
index 0000000..b22522d
--- /dev/null
+++ b/skills/linkai-agent/config.json.template
@@ -0,0 +1,14 @@
+{
+  "apps": [
+    {
+      "app_code": "your_app_code_2",
+      "app_name": "知识库助手",
+      "app_description": "基于特定领域知识库提供智能问答的知识助手"
+    },
+    {
+      "app_code": "your_workflow_code",
+      "app_name": "数据分析工作流",
+      "app_description": "用于数据分析任务的工作流程"
+    }
+  ]
+}
diff --git a/skills/linkai-agent/scripts/call.sh b/skills/linkai-agent/scripts/call.sh
new file mode 100755
index 0000000..1c262a7
--- /dev/null
+++ b/skills/linkai-agent/scripts/call.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+# LinkAI Agent Caller
+# API Docs: https://api.link-ai.tech/v1/chat/completions
+
+set -euo pipefail
+
+app_code="${1:-}"
+question="${2:-}"
+model="${3:-}"
+stream="${4:-false}"
+timeout="${5:-120}"  # Default 120 seconds for video/image generation
+
+if [ -z "$app_code" ]; then
+    echo '{"error": "app_code is required", "usage": "bash call.sh <app_code> <question> [model] [stream] [timeout]"}'
+    exit 1
+fi
+
+if [ -z "$question" ]; then
+    echo '{"error": "question is required", "usage": "bash call.sh <app_code> <question> [model] [stream] [timeout]"}'
+    exit 1
+fi
+
+if [ -z "${LINKAI_API_KEY:-}" ]; then
+    echo '{"error": "LINKAI_API_KEY environment variable is not set", "help": "Use env_config to set LINKAI_API_KEY"}'
+    exit 1
+fi
+
+# API endpoint
+api_url="https://api.link-ai.tech/v1/chat/completions"
+
+# Build JSON request body
+if [ -n "$model" ]; then
+    request_body=$(cat <<EOF
+{
+  "app_code": "$app_code",
+  "model": "$model",
+  "messages": [
+    {
+      "role": "user",
+      "content": "$question"
+    }
+  ],
+  "stream": $stream
+}
+EOF
+)
+else
+    request_body=$(cat <<EOF
+{
+  "app_code": "$app_code",
+  "messages": [
+    {
+      "role": "user",
+      "content": "$question"
+    }
+  ],
+  "stream": $stream
+}
+EOF
+)
+fi
+
+# Call LinkAI API
+response=$(curl -sS --max-time "$timeout" \
+    -X POST \
+    -H "Authorization: Bearer $LINKAI_API_KEY" \
+    -H "Content-Type: application/json" \
+    -d "$request_body" \
+    "$api_url" 2>&1)
+
+curl_exit_code=$?
+
+if [ $curl_exit_code -ne 0 ]; then
+    echo "{\"error\": \"Failed to call LinkAI API\", \"details\": \"$response\"}"
+    exit 1
+fi
+
+# Simple JSON validation
+if [[ ! "$response" =~ ^[[:space:]]*[\{\[] ]]; then
+    echo "{\"error\": \"Invalid JSON response from API\", \"response\": \"$response\"}"
+    exit 1
+fi
+
+# Check for API error (top-level error only, not content_filter_result)
+if echo "$response" | grep -q '^[[:space:]]*{[[:space:]]*"error"[[:space:]]*:' || echo "$response" | grep -q '"error"[[:space:]]*:[[:space:]]*{[^}]*"code"[[:space:]]*:[[:space:]]*"[^"]*"[^}]*"message"'; then
+    # Make sure it's not just content_filter_result inside choices
+    if ! echo "$response" | grep -q '"choices"[[:space:]]*:[[:space:]]*\['; then
+        # Extract error message
+        error_msg=$(echo "$response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"message"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
+        error_code=$(echo "$response" | grep -o '"code"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"code"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
+        
+        if [ -z "$error_msg" ]; then
+            error_msg="Unknown API error"
+        fi
+        
+        # Provide friendly error message for content filter
+        if [ "$error_code" = "content_filter_error" ] || echo "$error_msg" | grep -qi "content.*filter"; then
+            echo "{\"error\": \"内容安全审核\", \"message\": \"您的问题或应用返回的内容触发了LinkAI的安全审核机制，请换一种方式提问或检查应用配置\", \"details\": \"$error_msg\"}"
+        else
+            echo "{\"error\": \"LinkAI API error\", \"message\": \"$error_msg\", \"code\": \"$error_code\"}"
+        fi
+        exit 1
+    fi
+fi
+
+# For non-stream mode, extract and format the response
+if [ "$stream" = "false" ]; then
+    # Extract content from response
+    content=$(echo "$response" | grep -o '"content"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"content"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
+    
+    # Extract usage information
+    prompt_tokens=$(echo "$response" | grep -o '"prompt_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
+    completion_tokens=$(echo "$response" | grep -o '"completion_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
+    total_tokens=$(echo "$response" | grep -o '"total_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
+    
+    if [ -n "$content" ]; then
+        # Unescape JSON content
+        content=$(echo "$content" | sed 's/\\n/\n/g' | sed 's/\\"/"/g')
+        
+        cat <<EOF
+{
+  "app_code": "$app_code",
+  "content": "$content",
+  "usage": {
+    "prompt_tokens": ${prompt_tokens:-0},
+    "completion_tokens": ${completion_tokens:-0},
+    "total_tokens": ${total_tokens:-0}
+  }
+}
+EOF
+    else
+        # Return full response if we can't extract content
+        echo "$response"
+    fi
+else
+    # For stream mode, return raw response (caller needs to handle streaming)
+    echo "$response"
+fi
diff --git a/skills/openai-image-vision/EXAMPLE.md b/skills/openai-image-vision/EXAMPLE.md
new file mode 100644
index 0000000..295460f
--- /dev/null
+++ b/skills/openai-image-vision/EXAMPLE.md
@@ -0,0 +1,168 @@
+# OpenAI Image Vision - Usage Examples
+
+## Setup
+
+Set up your API credentials using the agent's env_config tool:
+
+```bash
+# Set your OpenAI API key
+env_config(action="set", key="OPENAI_API_KEY", value="sk-your-api-key-here")
+
+# Optional: Set custom API base URL (for proxy or compatible services)
+env_config(action="set", key="OPENAI_API_BASE", value="https://api.openai.com/v1")
+```
+
+## Example 1: Analyze a Local Image
+
+```bash
+bash scripts/vision.sh "/path/to/photo.jpg" "What's in this image?"
+```
+
+**Expected Output:**
+```json
+{
+  "model": "gpt-4.1-mini",
+  "content": "The image shows a beautiful landscape with mountains in the background and a lake in the foreground. The sky is clear with some clouds, and there are trees along the shoreline.",
+  "usage": {
+    "prompt_tokens": 1234,
+    "completion_tokens": 45,
+    "total_tokens": 1279
+  }
+}
+```
+
+## Example 2: Analyze an Image from URL
+
+```bash
+bash scripts/vision.sh "https://example.com/image.jpg" "Describe this image in detail"
+```
+
+## Example 3: Extract Text (OCR)
+
+```bash
+bash scripts/vision.sh "document.png" "Extract all text from this image"
+```
+
+**Use Case:** Extract text from screenshots, scanned documents, or photos of text.
+
+## Example 4: Identify Objects
+
+```bash
+bash scripts/vision.sh "scene.jpg" "List all objects you can identify in this image"
+```
+
+## Example 5: Analyze Colors and Composition
+
+```bash
+bash scripts/vision.sh "artwork.jpg" "Describe the color palette and composition of this image"
+```
+
+## Example 6: Count Items
+
+```bash
+bash scripts/vision.sh "crowd.jpg" "How many people are in this image?"
+```
+
+## Example 7: Use Different Models
+
+```bash
+# Use gpt-4.1-mini (default, latest mini model)
+bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4.1-mini"
+
+# Use gpt-4.1 (most capable, best for complex analysis)
+bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4.1"
+
+# Use gpt-4o-mini (previous mini model)
+bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4o-mini"
+```
+
+## Example 8: Complex Analysis
+
+```bash
+bash scripts/vision.sh "product.jpg" "Analyze this product image. Describe the product, its features, colors, and suggest what kind of marketing copy would work well for it."
+```
+
+## Example 9: Safety and Content Moderation
+
+```bash
+bash scripts/vision.sh "content.jpg" "Is there any inappropriate or unsafe content in this image?"
+```
+
+## Example 10: Technical Analysis
+
+```bash
+bash scripts/vision.sh "diagram.png" "Explain what this technical diagram represents and how it works"
+```
+
+## Integration with Agent
+
+When the agent loads this skill, it will be available in the `<available_skills>` section. The agent can use it like:
+
+```bash
+bash "<base_dir>/scripts/vision.sh" "user_uploaded_image.jpg" "What's in this image?"
+```
+
+The `<base_dir>` will be automatically provided by the skill system.
+
+## Error Handling Examples
+
+### Missing API Key
+```bash
+$ bash scripts/vision.sh "image.jpg" "What is this?"
+{"error": "OPENAI_API_KEY environment variable is not set", "help": "Visit https://platform.openai.com/api-keys to get an API key"}
+```
+
+### File Not Found
+```bash
+$ bash scripts/vision.sh "nonexistent.jpg" "What is this?"
+{"error": "Image file not found", "path": "nonexistent.jpg"}
+```
+
+### Unsupported Format
+```bash
+$ bash scripts/vision.sh "file.bmp" "What is this?"
+{"error": "Unsupported image format", "extension": "bmp", "supported": ["jpg", "jpeg", "png", "gif", "webp"]}
+```
+
+### Missing Parameters
+```bash
+$ bash scripts/vision.sh
+{"error": "Image path or URL is required", "usage": "bash vision.sh <image_path_or_url> <question> [model]"}
+```
+
+## Tips for Best Results
+
+1. **Be Specific**: Ask clear, specific questions about what you want to know
+2. **Image Quality**: Higher quality images generally produce better results
+3. **Model Selection**: 
+   - Use `gpt-4.1` for complex analysis requiring highest accuracy
+   - Use `gpt-4.1-mini` (default) for most tasks - latest mini model with good balance
+4. **Text Extraction**: For OCR tasks, ensure text is clearly visible and not too small
+5. **Multiple Aspects**: You can ask about multiple things in one question
+6. **Context**: Provide context in your question if needed (e.g., "This is a medical scan, what do you see?")
+
+## Performance Notes
+
+- **Local Files**: Automatically base64-encoded, adds ~33% size overhead
+- **URLs**: Passed directly to API, no encoding overhead
+- **Timeout**: 60 seconds for API calls
+- **Max Tokens**: 1000 tokens for responses (configurable in script)
+- **Rate Limits**: Subject to your OpenAI API plan
+
+## Supported Image Formats
+
+✅ JPEG (`.jpg`, `.jpeg`)  
+✅ PNG (`.png`)  
+✅ GIF (`.gif`)  
+✅ WebP (`.webp`)  
+
+❌ BMP, TIFF, SVG, and other formats are not supported
+
+## Cost Considerations
+
+Vision API calls cost more than text-only calls because they include image tokens. Costs vary by:
+- Model used (gpt-4.1 vs gpt-4.1-mini)
+- Image size and resolution
+- Length of response
+
+Check OpenAI's pricing page for current rates: https://openai.com/pricing
diff --git a/skills/openai-image-vision/README.md b/skills/openai-image-vision/README.md
new file mode 100644
index 0000000..1db0f42
--- /dev/null
+++ b/skills/openai-image-vision/README.md
@@ -0,0 +1,178 @@
+# OpenAI Image Vision Skill
+
+This skill enables image analysis using OpenAI's Vision API (GPT-4 Vision models).
+
+## Features
+
+- ✅ Analyze images from local files or URLs
+- ✅ Support for multiple image formats (JPEG, PNG, GIF, WebP)
+- ✅ Automatic base64 encoding for local files
+- ✅ Direct URL passing for remote images
+- ✅ Configurable model selection
+- ✅ Custom API base URL support
+- ✅ Pure bash/curl implementation (no Python dependencies)
+
+## Quick Start
+
+1. **Set up API credentials using env_config:**
+   ```bash
+   env_config(action="set", key="OPENAI_API_KEY", value="sk-your-api-key-here")
+   # Optional: custom API base
+   env_config(action="set", key="OPENAI_API_BASE", value="https://api.openai.com/v1")
+   ```
+
+2. **Analyze an image:**
+   ```bash
+   bash scripts/vision.sh "/path/to/photo.jpg" "What's in this image?"
+   ```
+
+3. **Analyze from URL:**
+   ```bash
+   bash scripts/vision.sh "https://example.com/image.jpg" "Describe this image"
+   ```
+   ```bash
+   bash scripts/vision.sh "/path/to/image.jpg" "What's in this image?"
+   ```
+
+3. **Analyze from URL:**
+   ```bash
+   bash scripts/vision.sh "https://example.com/image.jpg" "Describe this image"
+   ```
+
+## Usage Examples
+
+### Basic image analysis
+```bash
+bash scripts/vision.sh "photo.jpg" "What objects can you see?"
+```
+
+### Text extraction (OCR)
+```bash
+bash scripts/vision.sh "document.png" "Extract all text from this image"
+```
+
+### Detailed description
+```bash
+bash scripts/vision.sh "scene.jpg" "Describe this scene in detail, including colors, mood, and composition"
+```
+
+### Using different models
+```bash
+# Use gpt-4.1-mini (default, latest mini model)
+bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4.1-mini"
+
+# Use gpt-4.1 (most capable, latest model)
+bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4.1"
+
+# Use gpt-4o-mini (previous mini model)
+bash scripts/vision.sh "image.jpg" "Analyze this" "gpt-4o-mini"
+```
+
+## Environment Variables
+
+| Variable | Required | Default | Description |
+|----------|----------|---------|-------------|
+| `OPENAI_API_KEY` | Yes | - | Your OpenAI API key |
+| `OPENAI_API_BASE` | No | `https://api.openai.com/v1` | Custom API base URL |
+
+## Response Format
+
+Success response:
+```json
+{
+  "model": "gpt-4.1-mini",
+  "content": "The image shows a beautiful sunset over mountains...",
+  "usage": {
+    "prompt_tokens": 1234,
+    "completion_tokens": 567,
+    "total_tokens": 1801
+  }
+}
+```
+
+Error response:
+```json
+{
+  "error": "Error description",
+  "details": "Additional information"
+}
+```
+
+## Supported Models
+
+- `gpt-4.1-mini` (default) - Latest mini model, fast and cost-effective
+- `gpt-4.1` - Latest GPT-4 variant, most capable
+- `gpt-4o-mini` - Previous generation mini model
+- `gpt-4-turbo` - Previous generation turbo model
+
+## Supported Image Formats
+
+- JPEG (`.jpg`, `.jpeg`)
+- PNG (`.png`)
+- GIF (`.gif`)
+- WebP (`.webp`)
+
+## Technical Details
+
+- **Implementation**: Pure bash script using curl and base64
+- **Timeout**: 60 seconds for API calls
+- **Max tokens**: 1000 tokens for responses
+- **Image handling**: 
+  - Local files are automatically base64-encoded
+  - URLs are passed directly to the API
+  - MIME types are auto-detected from file extensions
+
+## Error Handling
+
+The script handles various error cases:
+- Missing required parameters
+- Missing API key
+- File not found
+- Unsupported image formats
+- API errors
+- Network timeouts
+- Invalid JSON responses
+
+## Integration with Agent System
+
+When loaded by the agent system, this skill will appear in `<available_skills>` with a `<base_dir>` path. Use it like:
+
+```bash
+bash "<base_dir>/scripts/vision.sh" "image.jpg" "What's in this image?"
+```
+
+The agent will automatically:
+- Load environment variables from `~/.cow/.env`
+- Provide the correct `<base_dir>` path
+- Handle skill discovery and registration
+
+## Notes
+
+- Images are sent to OpenAI's servers for processing
+- Large images may be automatically resized by the API
+- Rate limits depend on your OpenAI API plan
+- Token usage includes both the image and text in the prompt
+- Base64 encoding increases the size of local images by ~33%
+
+## Troubleshooting
+
+**"OPENAI_API_KEY environment variable is not set"**
+- Set the environment variable using env_config tool
+- Or use the agent's env_config tool
+
+**"Image file not found"**
+- Check the file path is correct
+- Use absolute paths or paths relative to current directory
+
+**"Unsupported image format"**
+- Only JPEG, PNG, GIF, and WebP are supported
+- Check the file extension matches the actual format
+
+**"Failed to call OpenAI API"**
+- Check your internet connection
+- Verify the API key is valid
+- Check if custom API base URL is correct
+
+## License
+
+Part of the chatgpt-on-wechat project.
diff --git a/skills/openai-image-vision/SKILL.md b/skills/openai-image-vision/SKILL.md
new file mode 100644
index 0000000..5173b0c
--- /dev/null
+++ b/skills/openai-image-vision/SKILL.md
@@ -0,0 +1,119 @@
+---
+name: openai-image-vision
+description: Analyze images using OpenAI's Vision API. Use bash command to execute the vision script like 'bash <base_dir>/scripts/vision.sh <image> <question>'. Can understand image content, objects, text, colors, and answer questions about images.
+homepage: https://platform.openai.com/docs/guides/vision
+metadata:
+  emoji: 👁️
+  requires:
+    bins: ["curl", "base64"]
+    env: ["OPENAI_API_KEY"]
+  primaryEnv: "OPENAI_API_KEY"
+---
+
+# OpenAI Image Vision
+
+Analyze images using OpenAI's GPT-4 Vision API. The model can understand visual elements including objects, shapes, colors, textures, and text within images.
+
+## Setup
+
+This skill requires an OpenAI API key. If not configured:
+
+1. Get your API key from https://platform.openai.com/api-keys
+2. Set the key using: `env_config(action="set", key="OPENAI_API_KEY", value="your-key")`
+
+Optional: Set custom API base URL (default: https://api.openai.com/v1):
+```bash
+env_config(action="set", key="OPENAI_API_BASE", value="your-base-url")
+```
+
+## Usage
+
+**Important**: Scripts are located relative to this skill's base directory.
+
+When you see this skill in `<available_skills>`, note the `<base_dir>` path.
+
+**CRITICAL**: Always use `bash` command to execute the script:
+
+```bash
+# General pattern (MUST start with bash):
+bash "<base_dir>/scripts/vision.sh" "<image_path_or_url>" "<question>" [model]
+
+# DO NOT execute the script directly like this (WRONG):
+# "<base_dir>/scripts/vision.sh" ...
+
+# Parameters:
+# - image_path_or_url: Local image file path or HTTP(S) URL (required)
+# - question: Question to ask about the image (required)
+# - model: OpenAI model to use (default: gpt-4.1-mini)
+#   Options: gpt-4.1-mini, gpt-4.1, gpt-4o-mini, gpt-4-turbo
+```
+
+## Examples
+
+### Analyze a local image
+```bash
+bash "<base_dir>/scripts/vision.sh" "/path/to/image.jpg" "What's in this image?"
+```
+
+### Analyze an image from URL
+```bash
+bash "<base_dir>/scripts/vision.sh" "https://example.com/image.jpg" "Describe this image in detail"
+```
+
+### Use specific model
+```bash
+bash "<base_dir>/scripts/vision.sh" "/path/to/photo.png" "What colors are prominent?" "gpt-4o-mini"
+```
+
+### Extract text from image
+```bash
+bash "<base_dir>/scripts/vision.sh" "/path/to/document.jpg" "Extract all text from this image"
+```
+
+### Analyze multiple aspects
+```bash
+bash "<base_dir>/scripts/vision.sh" "image.jpg" "List all objects you can see and describe the overall scene"
+```
+
+## Supported Image Formats
+
+- JPEG (.jpg, .jpeg)
+- PNG (.png)
+- GIF (.gif)
+- WebP (.webp)
+
+**Performance Optimization**: Files larger than 1MB are automatically compressed to 800px (longest side) to avoid command-line parameter limits. This happens transparently without affecting analysis quality.
+
+## Response Format
+
+The script returns a JSON response:
+
+```json
+{
+  "model": "gpt-4.1-mini",
+  "content": "The image shows...",
+  "usage": {
+    "prompt_tokens": 1234,
+    "completion_tokens": 567,
+    "total_tokens": 1801
+  }
+}
+```
+
+Or in case of error:
+
+```json
+{
+  "error": "Error description",
+  "details": "Additional error information"
+}
+```
+
+## Notes
+
+- **Image size**: Images are automatically resized if too large
+- **Timeout**: 60 seconds for API calls
+- **Rate limits**: Subject to your OpenAI API plan limits
+- **Privacy**: Images are sent to OpenAI's servers for processing
+- **Local files**: Automatically converted to base64 for API submission
+- **URLs**: Can be passed directly to the API without downloading
diff --git a/skills/openai-image-vision/scripts/vision.sh b/skills/openai-image-vision/scripts/vision.sh
new file mode 100755
index 0000000..65008c4
--- /dev/null
+++ b/skills/openai-image-vision/scripts/vision.sh
@@ -0,0 +1,233 @@
+#!/usr/bin/env bash
+# OpenAI Vision API wrapper
+# API Docs: https://platform.openai.com/docs/guides/vision
+
+set -euo pipefail
+
+image_input="${1:-}"
+question="${2:-}"
+model="${3:-gpt-4.1-mini}"
+
+if [ -z "$image_input" ]; then
+    echo '{"error": "Image path or URL is required", "usage": "bash vision.sh <image_path_or_url> <question> [model]"}'
+    exit 1
+fi
+
+if [ -z "$question" ]; then
+    echo '{"error": "Question is required", "usage": "bash vision.sh <image_path_or_url> <question> [model]"}'
+    exit 1
+fi
+
+if [ -z "${OPENAI_API_KEY:-}" ]; then
+    echo '{"error": "OPENAI_API_KEY environment variable is not set", "help": "Visit https://platform.openai.com/api-keys to get an API key"}'
+    exit 1
+fi
+
+# Set API base URL (default to OpenAI's official endpoint)
+api_base="${OPENAI_API_BASE:-https://api.openai.com/v1}"
+# Remove trailing slash if present
+api_base="${api_base%/}"
+
+# Determine if input is a URL or local file
+if [[ "$image_input" =~ ^https?:// ]]; then
+    # It's a URL - use it directly
+    image_url="$image_input"
+    
+    # Build JSON request body with URL
+    request_body=$(cat <<EOF
+{
+  "model": "$model",
+  "messages": [
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "text",
+          "text": "$question"
+        },
+        {
+          "type": "image_url",
+          "image_url": {
+            "url": "$image_url"
+          }
+        }
+      ]
+    }
+  ],
+  "max_tokens": 1000
+}
+EOF
+)
+else
+    # It's a local file - need to encode as base64
+    if [ ! -f "$image_input" ]; then
+        echo "{\"error\": \"Image file not found\", \"path\": \"$image_input\"}"
+        exit 1
+    fi
+    
+    # Check file size and compress if needed to avoid "Argument list too long" error
+    # Files larger than 1MB should be compressed
+    file_size=$(wc -c < "$image_input" | tr -d ' ')
+    max_size=1048576  # 1MB
+    
+    image_to_encode="$image_input"
+    temp_compressed=""
+    
+    if [ "$file_size" -gt "$max_size" ]; then
+        # File is too large, compress it
+        temp_compressed=$(mktemp "${TMPDIR:-/tmp}/vision_compressed_XXXXXX.jpg")
+        
+        # Use sips (macOS) or convert (ImageMagick) to compress
+        if command -v sips &> /dev/null; then
+            # macOS: resize to max 800px on longest side
+            sips -Z 800 "$image_input" --out "$temp_compressed" &> /dev/null
+            if [ $? -eq 0 ]; then
+                image_to_encode="$temp_compressed"
+                >&2 echo "[vision.sh] Compressed large image ($(($file_size / 1024))KB) to avoid parameter limit"
+            fi
+        elif command -v convert &> /dev/null; then
+            # Linux: use ImageMagick
+            convert "$image_input" -resize 800x800\> "$temp_compressed" 2>/dev/null
+            if [ $? -eq 0 ]; then
+                image_to_encode="$temp_compressed"
+                >&2 echo "[vision.sh] Compressed large image ($(($file_size / 1024))KB) to avoid parameter limit"
+            fi
+        fi
+    fi
+    
+    # Detect image format from file extension
+    extension="${image_to_encode##*.}"
+    extension_lower=$(echo "$extension" | tr '[:upper:]' '[:lower:]')
+    
+    case "$extension_lower" in
+        jpg|jpeg)
+            mime_type="image/jpeg"
+            ;;
+        png)
+            mime_type="image/png"
+            ;;
+        gif)
+            mime_type="image/gif"
+            ;;
+        webp)
+            mime_type="image/webp"
+            ;;
+        *)
+            echo "{\"error\": \"Unsupported image format\", \"extension\": \"$extension\", \"supported\": [\"jpg\", \"jpeg\", \"png\", \"gif\", \"webp\"]}"
+            # Clean up temp file if exists
+            [ -n "$temp_compressed" ] && rm -f "$temp_compressed"
+            exit 1
+            ;;
+    esac
+    
+    # Encode image to base64
+    if command -v base64 &> /dev/null; then
+        # macOS and most Linux systems
+        base64_image=$(base64 -i "$image_to_encode" 2>/dev/null || base64 "$image_to_encode" 2>/dev/null)
+    else
+        echo '{"error": "base64 command not found", "help": "Please install base64 utility"}'
+        # Clean up temp file if exists
+        [ -n "$temp_compressed" ] && rm -f "$temp_compressed"
+        exit 1
+    fi
+    
+    # Clean up temp compressed file
+    [ -n "$temp_compressed" ] && rm -f "$temp_compressed"
+    
+    if [ -z "$base64_image" ]; then
+        echo "{\"error\": \"Failed to encode image to base64\", \"path\": \"$image_input\"}"
+        exit 1
+    fi
+    
+    # Escape question for JSON (replace " with \")
+    escaped_question=$(echo "$question" | sed 's/"/\\"/g')
+    
+    # Build JSON request body with base64 image
+    # Note: Using printf to avoid issues with special characters
+    request_body=$(cat <<EOF
+{
+  "model": "$model",
+  "messages": [
+    {
+      "role": "user",
+      "content": [
+        {
+          "type": "text",
+          "text": "$escaped_question"
+        },
+        {
+          "type": "image_url",
+          "image_url": {
+            "url": "data:$mime_type;base64,$base64_image"
+          }
+        }
+      ]
+    }
+  ],
+  "max_tokens": 1000
+}
+EOF
+)
+fi
+
+# Call OpenAI API
+response=$(curl -sS --max-time 60 \
+    -X POST \
+    -H "Authorization: Bearer $OPENAI_API_KEY" \
+    -H "Content-Type: application/json" \
+    -d "$request_body" \
+    "$api_base/chat/completions" 2>&1)
+
+curl_exit_code=$?
+
+if [ $curl_exit_code -ne 0 ]; then
+    echo "{\"error\": \"Failed to call OpenAI API\", \"details\": \"$response\"}"
+    exit 1
+fi
+
+# Simple JSON validation - check if response starts with { or [
+if [[ ! "$response" =~ ^[[:space:]]*[\{\[] ]]; then
+    echo "{\"error\": \"Invalid JSON response from API\", \"response\": \"$response\"}"
+    exit 1
+fi
+
+# Check for API error (look for "error" field in response)
+if echo "$response" | grep -q '"error"[[:space:]]*:[[:space:]]*{'; then
+    # Extract error message if possible
+    error_msg=$(echo "$response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"message"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
+    if [ -z "$error_msg" ]; then
+        error_msg="Unknown API error"
+    fi
+    echo "{\"error\": \"OpenAI API error\", \"message\": \"$error_msg\", \"response\": $response}"
+    exit 1
+fi
+
+# Extract the content from the response
+# The response structure is: choices[0].message.content
+content=$(echo "$response" | grep -o '"content"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/"content"[[:space:]]*:[[:space:]]*"\(.*\)"/\1/' | head -1)
+
+# Extract usage information
+prompt_tokens=$(echo "$response" | grep -o '"prompt_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
+completion_tokens=$(echo "$response" | grep -o '"completion_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
+total_tokens=$(echo "$response" | grep -o '"total_tokens"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*' | head -1)
+
+# Build simplified response
+if [ -n "$content" ]; then
+    # Unescape JSON content (basic unescaping)
+    content=$(echo "$content" | sed 's/\\n/\n/g' | sed 's/\\"/"/g')
+    
+    cat <<EOF
+{
+  "model": "$model",
+  "content": "$content",
+  "usage": {
+    "prompt_tokens": ${prompt_tokens:-0},
+    "completion_tokens": ${completion_tokens:-0},
+    "total_tokens": ${total_tokens:-0}
+  }
+}
+EOF
+else
+    # If we can't extract content, return the full response
+    echo "$response"
+fi