"""
Claude Code Executor

Executes tasks using Claude Code CLI and captures execution traces.
This is the core integration point between GEPA and Claude Code.

Supports:
- Running tasks with specific skills
- Capturing full execution traces
- Timeout and error handling
- Multiple execution modes (CLI, API simulation)
"""

import json
import os
import re
import subprocess
import tempfile
import time
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Optional


@dataclass
class ExecutionTrace:
    """Complete trace of a Claude Code execution."""
    
    # Identification
    task_id: str
    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
    
    # Input
    prompt: str = ""
    skill_path: str = ""
    
    # Execution details
    success: bool = False
    exit_code: int = -1
    execution_time: float = 0.0
    
    # Claude's reasoning and decisions
    reasoning: list[str] = field(default_factory=list)
    decisions: list[dict] = field(default_factory=list)
    
    # Code generated
    code_blocks: list[dict] = field(default_factory=list)  # {language, content, purpose}
    
    # Commands executed
    commands: list[dict] = field(default_factory=list)  # {cmd, exit_code, stdout, stderr}
    
    # Tool calls (for API mode)
    tool_calls: list[dict] = field(default_factory=list)
    
    # Files created/modified
    files_created: list[dict] = field(default_factory=list)  # {path, size, type}
    files_modified: list[str] = field(default_factory=list)
    
    # Errors and warnings
    errors: list[str] = field(default_factory=list)
    warnings: list[str] = field(default_factory=list)
    
    # Raw output
    raw_stdout: str = ""
    raw_stderr: str = ""
    conversation: list[dict] = field(default_factory=list)  # Full conversation if available
    
    # Metrics
    tokens_used: int = 0
    api_calls: int = 0
    
    def to_dict(self) -> dict:
        """Convert to dictionary for serialization."""
        return {
            "task_id": self.task_id,
            "timestamp": self.timestamp,
            "prompt": self.prompt,
            "skill_path": self.skill_path,
            "success": self.success,
            "exit_code": self.exit_code,
            "execution_time": self.execution_time,
            "reasoning": self.reasoning,
            "decisions": self.decisions,
            "code_blocks": self.code_blocks,
            "commands": self.commands,
            "tool_calls": self.tool_calls,
            "files_created": self.files_created,
            "files_modified": self.files_modified,
            "errors": self.errors,
            "warnings": self.warnings,
            "tokens_used": self.tokens_used,
            "api_calls": self.api_calls
        }
    
    def to_reflection_text(self) -> str:
        """Convert trace to text suitable for LLM reflection."""
        parts = [
            f"## Task: {self.task_id}",
            f"**Prompt**: {self.prompt[:200]}...",
            f"**Success**: {self.success}",
            f"**Execution Time**: {self.execution_time:.2f}s"
        ]
        
        if self.reasoning:
            parts.append("\n### Claude's Reasoning:")
            for reason in self.reasoning[:5]:
                parts.append(f"- {reason}")
        
        if self.errors:
            parts.append("\n### Errors:")
            for error in self.errors:
                parts.append(f"- ❌ {error}")
        
        if self.warnings:
            parts.append("\n### Warnings:")
            for warning in self.warnings[:5]:
                parts.append(f"- ⚠️ {warning}")
        
        if self.code_blocks and not self.success:
            parts.append("\n### Code Generated (excerpt):")
            for block in self.code_blocks[:2]:
                parts.append(f"```{block.get('language', '')}")
                parts.append(block.get('content', '')[:500])
                parts.append("```")
        
        if self.commands:
            parts.append("\n### Commands Executed:")
            for cmd in self.commands[:5]:
                status = "✓" if cmd.get("exit_code", 1) == 0 else "✗"
                parts.append(f"- {status} `{cmd.get('cmd', '')[:100]}`")
                if cmd.get("stderr"):
                    parts.append(f"  Error: {cmd.get('stderr', '')[:200]}")
        
        return "\n".join(parts)
    
    @classmethod
    def from_dict(cls, data: dict) -> "ExecutionTrace":
        """Create from dictionary."""
        return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})


@dataclass  
class ExecutorConfig:
    """Configuration for Claude Code executor."""
    
    # Claude Code settings
    claude_path: str = "claude"  # Path to claude CLI
    model: str = "claude-sonnet-4-20250514"
    max_tokens: int = 8000
    timeout: int = 300  # seconds
    
    # Execution settings
    work_dir: str = "/tmp/skill-optimizer"
    capture_conversation: bool = True
    verbose: bool = False
    
    # Skill settings
    skill_path: Optional[str] = None  # Override skill path
    
    # Output settings
    save_traces: bool = True
    traces_dir: str = "./traces"


class ClaudeCodeExecutor:
    """
    Execute tasks using Claude Code CLI.
    
    This executor invokes the `claude` CLI with a prompt and captures
    the full execution trace including:
    - Claude's reasoning
    - Code generated
    - Commands run
    - Files created
    - Errors encountered
    """
    
    def __init__(self, config: Optional[ExecutorConfig] = None):
        self.config = config or ExecutorConfig()
        os.makedirs(self.config.work_dir, exist_ok=True)
        
        if self.config.save_traces:
            os.makedirs(self.config.traces_dir, exist_ok=True)
    
    def execute(
        self,
        prompt: str,
        task_id: str = "task",
        skill_path: Optional[str] = None,
        work_dir: Optional[str] = None
    ) -> ExecutionTrace:
        """
        Execute a task using Claude Code.
        
        Args:
            prompt: The task prompt to execute
            task_id: Identifier for this task
            skill_path: Path to skill to use (overrides config)
            work_dir: Working directory (overrides config)
            
        Returns:
            ExecutionTrace with full execution details
        """
        start_time = time.time()
        
        trace = ExecutionTrace(
            task_id=task_id,
            prompt=prompt,
            skill_path=skill_path or self.config.skill_path or ""
        )
        
        # Prepare working directory
        task_work_dir = work_dir or os.path.join(
            self.config.work_dir,
            f"{task_id}_{int(time.time())}"
        )
        os.makedirs(task_work_dir, exist_ok=True)
        
        try:
            # Build command
            cmd = self._build_command(prompt, skill_path, task_work_dir)
            
            if self.config.verbose:
                print(f"Executing: {' '.join(cmd)}")
            
            # Execute Claude Code
            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                timeout=self.config.timeout,
                cwd=task_work_dir,
                env=self._get_env()
            )
            
            trace.exit_code = result.returncode
            trace.raw_stdout = result.stdout
            trace.raw_stderr = result.stderr
            trace.success = result.returncode == 0
            
            # Parse the output
            self._parse_output(trace, result.stdout, result.stderr)
            
            # Find created files
            trace.files_created = self._find_created_files(task_work_dir)
            
        except subprocess.TimeoutExpired:
            trace.errors.append(f"Execution timed out after {self.config.timeout}s")
            trace.success = False
            
        except FileNotFoundError:
            trace.errors.append(f"Claude CLI not found at: {self.config.claude_path}")
            trace.success = False
            
        except Exception as e:
            trace.errors.append(f"Execution error: {str(e)}")
            trace.success = False
        
        trace.execution_time = time.time() - start_time
        
        # Save trace if configured
        if self.config.save_traces:
            self._save_trace(trace)
        
        return trace
    
    def execute_batch(
        self,
        tasks: list[dict],
        skill_path: Optional[str] = None
    ) -> list[ExecutionTrace]:
        """
        Execute multiple tasks.
        
        Args:
            tasks: List of task dicts with 'id' and 'prompt' keys
            skill_path: Skill to use for all tasks
            
        Returns:
            List of ExecutionTraces
        """
        traces = []
        
        for task in tasks:
            trace = self.execute(
                prompt=task.get("prompt", task.get("description", "")),
                task_id=task.get("id", f"task_{len(traces)}"),
                skill_path=skill_path
            )
            traces.append(trace)
        
        return traces
    
    def _build_command(
        self,
        prompt: str,
        skill_path: Optional[str],
        work_dir: str
    ) -> list[str]:
        """Build the claude CLI command."""
        cmd = [self.config.claude_path]
        
        # Add skill path if provided
        if skill_path:
            # Claude Code uses different flags - adjust as needed
            # This might be --skill-path, --add-dir, or similar
            cmd.extend(["--add-dir", skill_path])
        
        # Output format for easier parsing
        cmd.extend(["--output-format", "json"])
        
        # Don't open browser/editor
        cmd.append("--print")
        
        # The prompt
        cmd.extend(["-p", prompt])
        
        return cmd
    
    def _get_env(self) -> dict:
        """Get environment variables for execution."""
        env = os.environ.copy()
        
        # Add any custom environment variables
        env["CLAUDE_CODE_TELEMETRY"] = "false"
        
        return env
    
    def _parse_output(self, trace: ExecutionTrace, stdout: str, stderr: str):
        """Parse Claude Code output to extract trace information."""
        
        # Try to parse as JSON first
        try:
            data = json.loads(stdout)
            self._parse_json_output(trace, data)
            return
        except json.JSONDecodeError:
            pass
        
        # Fall back to text parsing
        self._parse_text_output(trace, stdout, stderr)
    
    def _parse_json_output(self, trace: ExecutionTrace, data: dict):
        """Parse JSON-formatted Claude Code output."""
        
        # Extract conversation
        if "messages" in data:
            trace.conversation = data["messages"]
            
            for msg in data["messages"]:
                if msg.get("role") == "assistant":
                    content = msg.get("content", "")
                    
                    # Extract reasoning from thinking blocks
                    if isinstance(content, list):
                        for block in content:
                            if block.get("type") == "thinking":
                                trace.reasoning.append(block.get("thinking", ""))
                            elif block.get("type") == "tool_use":
                                trace.tool_calls.append({
                                    "tool": block.get("name"),
                                    "input": block.get("input")
                                })
        
        # Extract tool results
        if "tool_results" in data:
            for result in data["tool_results"]:
                if result.get("tool") == "bash":
                    trace.commands.append({
                        "cmd": result.get("input", {}).get("command", ""),
                        "exit_code": result.get("exit_code", 0),
                        "stdout": result.get("stdout", ""),
                        "stderr": result.get("stderr", "")
                    })
        
        # Extract metrics
        trace.tokens_used = data.get("usage", {}).get("total_tokens", 0)
        trace.api_calls = data.get("api_calls", 1)
    
    def _parse_text_output(self, trace: ExecutionTrace, stdout: str, stderr: str):
        """Parse text-formatted output."""
        
        # Extract code blocks
        code_pattern = r"```(\w+)?\n(.*?)```"
        for match in re.finditer(code_pattern, stdout, re.DOTALL):
            trace.code_blocks.append({
                "language": match.group(1) or "unknown",
                "content": match.group(2).strip()
            })
        
        # Extract command executions
        cmd_pattern = r"(?:Running|Executing|>\s*)(?:command:?\s*)?[`'\"]?([^`'\"]+?)[`'\"]?\s*(?:\n|$)"
        for match in re.finditer(cmd_pattern, stdout, re.IGNORECASE):
            trace.commands.append({
                "cmd": match.group(1).strip(),
                "exit_code": 0  # Unknown from text parsing
            })
        
        # Extract reasoning (common patterns)
        reasoning_patterns = [
            r"(?:I'll|I will|Let me|First,|Then,|Now)\s+(.+?)(?:\.|$)",
            r"(?:thinking|reasoning):\s*(.+?)(?:\n|$)"
        ]
        
        for pattern in reasoning_patterns:
            for match in re.finditer(pattern, stdout, re.IGNORECASE):
                trace.reasoning.append(match.group(1).strip())
        
        # Extract errors from stderr
        if stderr:
            for line in stderr.split("\n"):
                line = line.strip()
                if line and not line.startswith(("DEBUG", "INFO")):
                    if "error" in line.lower():
                        trace.errors.append(line)
                    elif "warning" in line.lower():
                        trace.warnings.append(line)
    
    def _find_created_files(self, work_dir: str) -> list[dict]:
        """Find files created in the working directory."""
        files = []
        
        for root, _, filenames in os.walk(work_dir):
            for filename in filenames:
                filepath = os.path.join(root, filename)
                rel_path = os.path.relpath(filepath, work_dir)
                
                try:
                    stat = os.stat(filepath)
                    files.append({
                        "path": rel_path,
                        "size": stat.st_size,
                        "type": self._get_file_type(filename)
                    })
                except OSError:
                    pass
        
        return files
    
    def _get_file_type(self, filename: str) -> str:
        """Determine file type from extension."""
        ext_map = {
            ".pptx": "presentation",
            ".docx": "document",
            ".xlsx": "spreadsheet",
            ".pdf": "pdf",
            ".py": "python",
            ".js": "javascript",
            ".html": "html",
            ".css": "css",
            ".json": "json",
            ".md": "markdown"
        }
        
        _, ext = os.path.splitext(filename.lower())
        return ext_map.get(ext, "unknown")
    
    def _save_trace(self, trace: ExecutionTrace):
        """Save trace to disk."""
        trace_file = os.path.join(
            self.config.traces_dir,
            f"{trace.task_id}_{trace.timestamp.replace(':', '-')}.json"
        )
        
        with open(trace_file, "w") as f:
            json.dump(trace.to_dict(), f, indent=2)


class MockClaudeExecutor(ClaudeCodeExecutor):
    """
    Mock executor for testing without Claude Code CLI.
    
    Simulates Claude's behavior based on skill instructions
    and task prompts.
    """
    
    def __init__(self, config: Optional[ExecutorConfig] = None):
        super().__init__(config)
        self.mock_responses: dict[str, ExecutionTrace] = {}
    
    def add_mock_response(self, task_id: str, trace: ExecutionTrace):
        """Add a mock response for a specific task."""
        self.mock_responses[task_id] = trace
    
    def execute(
        self,
        prompt: str,
        task_id: str = "task",
        skill_path: Optional[str] = None,
        work_dir: Optional[str] = None
    ) -> ExecutionTrace:
        """Return mock response or generate a simulated one."""
        
        if task_id in self.mock_responses:
            trace = self.mock_responses[task_id]
            trace.prompt = prompt
            trace.skill_path = skill_path or ""
            return trace
        
        # Generate simulated response
        return self._simulate_execution(prompt, task_id, skill_path)
    
    def _simulate_execution(
        self,
        prompt: str,
        task_id: str,
        skill_path: Optional[str]
    ) -> ExecutionTrace:
        """Simulate Claude execution based on prompt."""
        
        trace = ExecutionTrace(
            task_id=task_id,
            prompt=prompt,
            skill_path=skill_path or ""
        )
        
        # Simulate some reasoning
        trace.reasoning = [
            f"Analyzing the request: {prompt[:50]}...",
            "Determining the appropriate approach",
            "Generating necessary code"
        ]
        
        # Simulate success most of the time
        import random
        trace.success = random.random() > 0.2
        trace.exit_code = 0 if trace.success else 1
        trace.execution_time = random.uniform(1.0, 10.0)
        
        if not trace.success:
            trace.errors.append("Simulated error for testing")
        
        return trace


def create_executor(
    mode: str = "cli",
    **kwargs
) -> ClaudeCodeExecutor:
    """
    Factory function to create an executor.
    
    Args:
        mode: 'cli' for real Claude Code, 'mock' for testing
        **kwargs: Configuration options
        
    Returns:
        Configured executor instance
    """
    config = ExecutorConfig(**kwargs)
    
    if mode == "mock":
        return MockClaudeExecutor(config)
    else:
        return ClaudeCodeExecutor(config)
