import os import time import re import json from pathlib import Path from typing import Dict, Any, Optional, Tuple from agent.tools.base_tool import BaseTool, ToolResult, ToolStage from agent.models import LLMRequest from common.log import logger class FileSave(BaseTool): """Tool for saving content to files in the workspace directory.""" name = "file_save" description = "Save the agent's output to a file in the workspace directory. Content is automatically extracted from the agent's previous outputs." # Set as post-process stage tool stage = ToolStage.POST_PROCESS params = { "type": "object", "properties": { "file_name": { "type": "string", "description": "Optional. The name of the file to save. If not provided, a name will be generated based on the content." }, "file_type": { "type": "string", "description": "Optional. The type/extension of the file (e.g., 'txt', 'md', 'py', 'java'). If not provided, it will be inferred from the content." }, "extract_code": { "type": "boolean", "description": "Optional. If true, will attempt to extract code blocks from the content. Default is false." } }, "required": [] # No required fields, as everything can be extracted from context } def __init__(self): self.context = None self.config = {} self.workspace_dir = Path("workspace") def execute(self, params: Dict[str, Any]) -> ToolResult: """ Save content to a file in the workspace directory. :param params: The parameters for the file output operation. :return: Result of the operation. """ # Extract content from context if not hasattr(self, 'context') or not self.context: return ToolResult.fail("Error: No context available to extract content from.") content = self._extract_content_from_context() # If no content could be extracted, return error if not content: return ToolResult.fail("Error: Couldn't extract content from context.") # Use model to determine file parameters try: task_dir = self._get_task_dir_from_context() file_name, file_type, extract_code = self._get_file_params_from_model(content) except Exception as e: logger.error(f"Error determining file parameters: {str(e)}") # Fall back to manual parameter extraction task_dir = params.get("task_dir") or self._get_task_id_from_context() or f"task_{int(time.time())}" file_name = params.get("file_name") or self._infer_file_name(content) file_type = params.get("file_type") or self._infer_file_type(content) extract_code = params.get("extract_code", False) # Get team_name from context team_name = self._get_team_name_from_context() or "default_team" # Create directory structure task_dir_path = self.workspace_dir / team_name / task_dir task_dir_path.mkdir(parents=True, exist_ok=True) if extract_code: # Save the complete content as markdown md_file_name = f"{file_name}.md" md_file_path = task_dir_path / md_file_name # Write content to file with open(md_file_path, 'w', encoding='utf-8') as f: f.write(content) return self._handle_multiple_code_blocks(content) # Ensure file_name has the correct extension if file_type and not file_name.endswith(f".{file_type}"): file_name = f"{file_name}.{file_type}" # Create the full file path file_path = task_dir_path / file_name # Get absolute path for storage in team_context abs_file_path = file_path.absolute() try: # Write content to file with open(file_path, 'w', encoding='utf-8') as f: f.write(content) # Update the current agent's final_answer to include file information if hasattr(self.context, 'team_context'): # Store with absolute path in team_context self.context.team_context.agent_outputs[-1].output += f"\n\nSaved file: {abs_file_path}" return ToolResult.success({ "status": "success", "file_path": str(file_path) # Return relative path in result }) except Exception as e: return ToolResult.fail(f"Error saving file: {str(e)}") def _handle_multiple_code_blocks(self, content: str) -> ToolResult: """ Handle content with multiple code blocks, extracting and saving each as a separate file. :param content: The content containing multiple code blocks :return: Result of the operation """ # Extract code blocks with context (including potential file name information) code_blocks_with_context = self._extract_code_blocks_with_context(content) if not code_blocks_with_context: return ToolResult.fail("No code blocks found in the content.") # Get task directory and team name task_dir = self._get_task_dir_from_context() or f"task_{int(time.time())}" team_name = self._get_team_name_from_context() or "default_team" # Create directory structure task_dir_path = self.workspace_dir / team_name / task_dir task_dir_path.mkdir(parents=True, exist_ok=True) saved_files = [] for block_with_context in code_blocks_with_context: try: # Use model to determine file name for this code block block_file_name, block_file_type = self._get_filename_for_code_block(block_with_context) # Clean the code block (remove md code markers) clean_code = self._clean_code_block(block_with_context) # Ensure file_name has the correct extension if block_file_type and not block_file_name.endswith(f".{block_file_type}"): block_file_name = f"{block_file_name}.{block_file_type}" # Create the full file path (no subdirectories) file_path = task_dir_path / block_file_name # Get absolute path for storage in team_context abs_file_path = file_path.absolute() # Write content to file with open(file_path, 'w', encoding='utf-8') as f: f.write(clean_code) saved_files.append({ "file_path": str(file_path), "abs_file_path": str(abs_file_path), # Store absolute path for internal use "file_name": block_file_name, "size": len(clean_code), "status": "success", "type": "code" }) except Exception as e: logger.error(f"Error saving code block: {str(e)}") # Continue with the next block even if this one fails if not saved_files: return ToolResult.fail("Failed to save any code blocks.") # Update the current agent's final_answer to include files information if hasattr(self, 'context') and self.context: # If the agent has a final_answer attribute, append the files info to it if hasattr(self.context, 'team_context'): # Use relative paths for display display_info = f"\n\nSaved files to {task_dir_path}:\n" + "\n".join( [f"- {f['file_path']}" for f in saved_files]) # Check if we need to append the info if not self.context.team_context.agent_outputs[-1].output.endswith(display_info): # Store with absolute paths in team_context abs_info = f"\n\nSaved files to {task_dir_path.absolute()}:\n" + "\n".join( [f"- {f['abs_file_path']}" for f in saved_files]) self.context.team_context.agent_outputs[-1].output += abs_info result = { "status": "success", "files": [{"file_path": f["file_path"]} for f in saved_files] } return ToolResult.success(result) def _extract_code_blocks_with_context(self, content: str) -> list: """ Extract code blocks from content, including context lines before the block. :param content: The content to extract code blocks from :return: List of code blocks with context """ # Check if content starts with Tuple[str, str]: """ Determine the file name for a code block. :param block_with_context: The code block with context lines :return: Tuple of (file_name, file_type) """ # Define common code file extensions COMMON_CODE_EXTENSIONS = { 'py', 'js', 'java', 'c', 'cpp', 'h', 'hpp', 'cs', 'go', 'rb', 'php', 'html', 'css', 'ts', 'jsx', 'tsx', 'vue', 'sh', 'sql', 'json', 'xml', 'yaml', 'yml', 'md', 'rs', 'swift', 'kt', 'scala', 'pl', 'r', 'lua' } # Split the block into lines to examine only the context around code block markers lines = block_with_context.split('\n') # Find the code block start marker line index start_marker_idx = -1 for i, line in enumerate(lines): if line.strip().startswith('```') and not line.strip() == '```': start_marker_idx = i break if start_marker_idx == -1: # No code block marker found return "", "" # Extract the language from the code block marker code_marker = lines[start_marker_idx].strip() language = "" if len(code_marker) > 3: language = code_marker[3:].strip().split('=')[0].strip() # Define the context range (5 lines before and 2 after the marker) context_start = max(0, start_marker_idx - 5) context_end = min(len(lines), start_marker_idx + 3) # Extract only the relevant context lines context_lines = lines[context_start:context_end] # First, check for explicit file headers like "## filename.ext" for line in context_lines: # Match patterns like "## filename.ext" or "# filename.ext" header_match = re.search(r'^\s*#{1,6}\s+([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)\s*$', line) if header_match: file_name = header_match.group(1) file_type = os.path.splitext(file_name)[1].lstrip('.') if file_type in COMMON_CODE_EXTENSIONS: return os.path.splitext(file_name)[0], file_type # Simple patterns to match explicit file names in the context file_patterns = [ # Match explicit file names in headers or text r'(?:file|filename)[:=\s]+[\'"]?([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)[\'"]?', # Match language=filename.ext in code markers r'language=([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)', # Match standalone filenames with extensions r'\b([a-zA-Z0-9_-]+\.(py|js|java|c|cpp|h|hpp|cs|go|rb|php|html|css|ts|jsx|tsx|vue|sh|sql|json|xml|yaml|yml|md|rs|swift|kt|scala|pl|r|lua))\b', # Match file paths in comments r'#\s*([a-zA-Z0-9_/-]+\.[a-zA-Z0-9]+)' ] # Check each context line for file name patterns for line in context_lines: line = line.strip() for pattern in file_patterns: matches = re.findall(pattern, line) if matches: for match in matches: if isinstance(match, tuple): # If the match is a tuple (filename, extension) file_name = match[0] file_type = match[1] # Verify it's not a code reference like Direction.DOWN if not any(keyword in file_name for keyword in ['class.', 'enum.', 'import.']): return os.path.splitext(file_name)[0], file_type else: # If the match is a string (full filename) file_name = match file_type = os.path.splitext(file_name)[1].lstrip('.') # Verify it's not a code reference if file_type in COMMON_CODE_EXTENSIONS and not any( keyword in file_name for keyword in ['class.', 'enum.', 'import.']): return os.path.splitext(file_name)[0], file_type # If no explicit file name found, use LLM to infer from code content # Extract the code content code_content = block_with_context # Get the first 20 lines of code for LLM analysis code_lines = code_content.split('\n') code_preview = '\n'.join(code_lines[:20]) # Get the model to use model_to_use = None if hasattr(self, 'context') and self.context: if hasattr(self.context, 'model') and self.context.model: model_to_use = self.context.model elif hasattr(self.context, 'team_context') and self.context.team_context: if hasattr(self.context.team_context, 'model') and self.context.team_context.model: model_to_use = self.context.team_context.model # If no model is available in context, use the tool's model if not model_to_use and hasattr(self, 'model') and self.model: model_to_use = self.model if model_to_use: # Prepare a prompt for the model prompt = f"""Analyze the following code and determine the most appropriate file name and file type/extension. The file name should be descriptive but concise, using snake_case (lowercase with underscores). The file type should be a standard file extension (e.g., py, js, html, css, java). Code preview (first 20 lines): {code_preview} Return your answer in JSON format with these fields: - file_name: The suggested file name (without extension) - file_type: The suggested file extension JSON response:""" # Create a request to the model request = LLMRequest( messages=[{"role": "user", "content": prompt}], temperature=0, json_format=True ) try: response = model_to_use.call(request) if not response.is_error: # Clean the JSON response json_content = self._clean_json_response(response.data["choices"][0]["message"]["content"]) result = json.loads(json_content) file_name = result.get("file_name", "") file_type = result.get("file_type", "") if file_name and file_type: return file_name, file_type except Exception as e: logger.error(f"Error using model to determine file name: {str(e)}") # If we still don't have a file name, use the language as file type if language and language in COMMON_CODE_EXTENSIONS: timestamp = int(time.time()) return f"code_{timestamp}", language # If all else fails, return empty strings return "", "" def _clean_json_response(self, text: str) -> str: """ Clean JSON response from LLM by removing markdown code block markers. :param text: The text containing JSON possibly wrapped in markdown code blocks :return: Clean JSON string """ # Remove markdown code block markers if present if text.startswith("```json"): text = text[7:] elif text.startswith("```"): # Find the first newline to skip the language identifier line first_newline = text.find('\n') if first_newline != -1: text = text[first_newline + 1:] if text.endswith("```"): text = text[:-3] return text.strip() def _clean_code_block(self, block_with_context: str) -> str: """ Clean a code block by removing markdown code markers and context lines. :param block_with_context: Code block with context lines :return: Clean code ready for execution """ # Check if this is a full HTML or XML document if block_with_context.strip().startswith((" 500 else ""} Respond in JSON format only with the following structure: {{ "is_code": true/false, # Whether this is primarily code implementation "filename": "suggested_filename", # Don't include extension, english words "extension": "appropriate_extension" # Don't include the dot, e.g., "md", "py", "js" }} """ try: # Create a request to the model request = LLMRequest( messages=[{"role": "user", "content": prompt}], temperature=0.1, json_format=True ) # Call the model using the standard interface response = model.call(request) if response.is_error: logger.warning(f"Error from model: {response.error_message}") raise Exception(f"Model error: {response.error_message}") # Extract JSON from response result = response.data["choices"][0]["message"]["content"] # Clean the JSON response result = self._clean_json_response(result) # Parse the JSON params = json.loads(result) # For backward compatibility, return tuple format file_name = params.get("filename", "output") # Remove dot from extension if present file_type = params.get("extension", "md").lstrip(".") extract_code = params.get("is_code", False) return file_name, file_type, extract_code except Exception as e: logger.warning(f"Error getting file parameters from model: {e}") # Default fallback return "output", "md", False def _get_team_name_from_context(self) -> Optional[str]: """ Get team name from the agent's context. :return: Team name or None if not found """ if hasattr(self, 'context') and self.context: # Try to get team name from team_context if hasattr(self.context, 'team_context') and self.context.team_context: return self.context.team_context.name # Try direct team_name attribute if hasattr(self.context, 'name'): return self.context.name return None def _get_task_id_from_context(self) -> Optional[str]: """ Get task ID from the agent's context. :return: Task ID or None if not found """ if hasattr(self, 'context') and self.context: # Try to get task ID from task object if hasattr(self.context, 'task') and self.context.task: return self.context.task.id # Try team_context's task if hasattr(self.context, 'team_context') and self.context.team_context: if hasattr(self.context.team_context, 'task') and self.context.team_context.task: return self.context.team_context.task.id return None def _get_task_dir_from_context(self) -> Optional[str]: """ Get task directory name from the team context. :return: Task directory name or None if not found """ if hasattr(self, 'context') and self.context: # Try to get from team_context if hasattr(self.context, 'team_context') and self.context.team_context: if hasattr(self.context.team_context, 'task_short_name') and self.context.team_context.task_short_name: return self.context.team_context.task_short_name # Fall back to task ID if available return self._get_task_id_from_context() def _extract_content_from_context(self) -> str: """ Extract content from the agent's context. :return: Extracted content """ # Check if we have access to the agent's context if not hasattr(self, 'context') or not self.context: return "" # Try to get the most recent final answer from the agent if hasattr(self.context, 'final_answer') and self.context.final_answer: return self.context.final_answer # Try to get the most recent final answer from team context if hasattr(self.context, 'team_context') and self.context.team_context: if hasattr(self.context.team_context, 'agent_outputs') and self.context.team_context.agent_outputs: latest_output = self.context.team_context.agent_outputs[-1].output return latest_output # If we have action history, try to get the most recent final answer if hasattr(self.context, 'action_history') and self.context.action_history: for action in reversed(self.context.action_history): if "final_answer" in action and action["final_answer"]: return action["final_answer"] return "" def _extract_code_blocks(self, content: str) -> str: """ Extract code blocks from markdown content. :param content: The content to extract code blocks from :return: Extracted code blocks """ # Pattern to match markdown code blocks code_block_pattern = r'```(?:\w+)?\n([\s\S]*?)\n```' # Find all code blocks code_blocks = re.findall(code_block_pattern, content) if code_blocks: # Join all code blocks with newlines return '\n\n'.join(code_blocks) return content # Return original content if no code blocks found def _infer_file_name(self, content: str) -> str: """ Infer a file name from the content. :param content: The content to analyze. :return: A suggested file name. """ # Check for title patterns in markdown title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE) if title_match: # Convert title to a valid filename title = title_match.group(1).strip() return self._sanitize_filename(title) # Check for class/function definitions in code code_match = re.search(r'(class|def|function)\s+(\w+)', content) if code_match: return self._sanitize_filename(code_match.group(2)) # Default name based on content type if self._is_likely_code(content): return "code" elif self._is_likely_markdown(content): return "document" elif self._is_likely_json(content): return "data" else: return "output" def _infer_file_type(self, content: str) -> str: """ Infer the file type/extension from the content. :param content: The content to analyze. :return: A suggested file extension. """ # Check for common programming language patterns if re.search(r'(import\s+[a-zA-Z0-9_]+|from\s+[a-zA-Z0-9_\.]+\s+import)', content): return "py" # Python elif re.search(r'(public\s+class|private\s+class|protected\s+class)', content): return "java" # Java elif re.search(r'(function\s+\w+\s*\(|const\s+\w+\s*=|let\s+\w+\s*=|var\s+\w+\s*=)', content): return "js" # JavaScript elif re.search(r'()', content): return "html" # HTML elif re.search(r'(#include\s+<\w+\.h>|int\s+main\s*\()', content): return "cpp" # C/C++ # Check for markdown if self._is_likely_markdown(content): return "md" # Check for JSON if self._is_likely_json(content): return "json" # Default to text return "txt" def _is_likely_code(self, content: str) -> bool: """Check if the content is likely code.""" # First check for common HTML/XML patterns if content.strip().startswith((".*?)', # HTML/XML tags r'(var|let|const)\s+\w+\s*=', # JavaScript variable declarations r'#\s*\w+', # CSS ID selectors or Python comments r'\.\w+\s*\{', # CSS class selectors r'@media|@import|@font-face' # CSS at-rules ] return any(re.search(pattern, content) for pattern in code_patterns) def _is_likely_markdown(self, content: str) -> bool: """Check if the content is likely markdown.""" md_patterns = [ r'^#\s+.+$', # Headers r'^\*\s+.+$', # Unordered lists r'^\d+\.\s+.+$', # Ordered lists r'\[.+\]\(.+\)', # Links r'!\[.+\]\(.+\)' # Images ] return any(re.search(pattern, content, re.MULTILINE) for pattern in md_patterns) def _is_likely_json(self, content: str) -> bool: """Check if the content is likely JSON.""" try: content = content.strip() if (content.startswith('{') and content.endswith('}')) or ( content.startswith('[') and content.endswith(']')): json.loads(content) return True except: pass return False def _sanitize_filename(self, name: str) -> str: """ Sanitize a string to be used as a filename. :param name: The string to sanitize. :return: A sanitized filename. """ # Replace spaces with underscores name = name.replace(' ', '_') # Remove invalid characters name = re.sub(r'[^\w\-\.]', '', name) # Limit length if len(name) > 50: name = name[:50] return name.lower() def _process_file_path(self, file_path: str) -> Tuple[str, str]: """ Process a file path to extract the file name and type, and create directories if needed. :param file_path: The file path to process :return: Tuple of (file_name, file_type) """ # Get the file name and extension file_name = os.path.basename(file_path) file_type = os.path.splitext(file_name)[1].lstrip('.') return os.path.splitext(file_name)[0], file_type