#!/usr/bin/env python3
"""
Test Case Generator

Automatically generates test cases for any skill based on:
- Skill description and instructions
- Common usage patterns
- Edge cases and failure modes

Can use LLM to generate contextually relevant test cases
or use rule-based generation for quick setup.
"""

import os
import re
import sys
import yaml
from dataclasses import dataclass, field, asdict
from typing import Optional

# Add parent to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from core.skill_loader import Skill, SkillLoader


@dataclass
class GeneratedTestCase:
    """A generated test case."""
    id: str
    description: str
    prompt: str
    expected_outputs: list[dict] = field(default_factory=list)
    quality_criteria: list[str] = field(default_factory=list)
    tags: list[str] = field(default_factory=list)
    complexity: str = "medium"
    weight: float = 1.0
    
    def to_dict(self) -> dict:
        return asdict(self)


class TestCaseGenerator:
    """
    Generate test cases for any skill.
    
    Supports:
    - Rule-based generation from skill analysis
    - LLM-based generation for contextual cases
    - Template-based generation for common patterns
    """
    
    # Common test case templates for different skill types
    TEMPLATES = {
        "document": [
            ("simple", "Create a simple {doc_type} with a title and one paragraph"),
            ("structured", "Create a {doc_type} with multiple sections and headers"),
            ("formatted", "Create a {doc_type} with bullet points, bold, and italic text"),
            ("long", "Create a detailed {doc_type} with at least 5 pages of content"),
            ("edge_empty", "Create a minimal {doc_type} with just a title"),
            ("edge_unicode", "Create a {doc_type} with special characters: émojis 🎉, ñ, ü"),
        ],
        "presentation": [
            ("simple", "Create a 3-slide presentation about {topic}"),
            ("structured", "Create a presentation with title, agenda, content, and conclusion slides"),
            ("charts", "Create a presentation with a bar chart showing quarterly data"),
            ("long", "Create a 10-slide presentation covering multiple topics"),
            ("edge_overflow", "Create a presentation with a very long title that might overflow"),
            ("edge_bullets", "Create a slide with 15 bullet points"),
        ],
        "spreadsheet": [
            ("simple", "Create a spreadsheet with a simple data table"),
            ("formulas", "Create a spreadsheet with SUM and AVERAGE formulas"),
            ("charts", "Create a spreadsheet with a pie chart"),
            ("multi_sheet", "Create a spreadsheet with 3 sheets linked together"),
            ("edge_large", "Create a spreadsheet with 1000 rows of data"),
        ],
        "code": [
            ("simple", "Write a simple function that {action}"),
            ("class", "Create a class that represents a {entity}"),
            ("error_handling", "Write code with proper error handling for {scenario}"),
            ("test", "Write unit tests for the {component}"),
            ("edge_complex", "Write code that handles edge cases like empty input"),
        ],
        "generic": [
            ("simple", "Perform a basic task: {action}"),
            ("detailed", "Complete this detailed request: {detailed_action}"),
            ("multi_step", "First {step1}, then {step2}, finally {step3}"),
            ("edge_ambiguous", "{ambiguous_request}"),
            ("edge_minimal", "Do the minimum possible"),
        ]
    }
    
    def __init__(self, skill: Optional[Skill] = None):
        self.skill = skill
    
    def generate(
        self,
        count: int = 10,
        include_edge_cases: bool = True,
        categories: Optional[list[str]] = None
    ) -> list[GeneratedTestCase]:
        """
        Generate test cases for the skill.
        
        Args:
            count: Number of test cases to generate
            include_edge_cases: Include edge case tests
            categories: Specific categories to include
            
        Returns:
            List of generated test cases
        """
        if not self.skill:
            return self._generate_generic_cases(count)
        
        # Analyze skill to determine type
        skill_type = self._determine_skill_type()
        
        # Get relevant templates
        templates = self.TEMPLATES.get(skill_type, self.TEMPLATES["generic"])
        
        # Generate cases
        test_cases = []
        case_id = 1
        
        # Add cases from templates
        for template_id, template in templates:
            if categories and template_id not in categories:
                if not (include_edge_cases and template_id.startswith("edge_")):
                    continue
            
            if len(test_cases) >= count:
                break
            
            # Fill template with skill-specific content
            prompt = self._fill_template(template, skill_type)
            
            complexity = "edge_case" if template_id.startswith("edge_") else "medium"
            if template_id == "simple":
                complexity = "simple"
            elif template_id in ("long", "detailed", "multi_step"):
                complexity = "complex"
            
            test_case = GeneratedTestCase(
                id=f"gen_{case_id:03d}",
                description=f"Auto-generated: {template_id}",
                prompt=prompt,
                expected_outputs=self._get_expected_outputs(skill_type),
                quality_criteria=self._get_quality_criteria(skill_type, template_id),
                tags=[skill_type, template_id, complexity],
                complexity=complexity
            )
            
            test_cases.append(test_case)
            case_id += 1
        
        # Generate additional cases if needed
        while len(test_cases) < count:
            test_case = self._generate_random_case(
                case_id, skill_type, include_edge_cases
            )
            test_cases.append(test_case)
            case_id += 1
        
        return test_cases[:count]
    
    def generate_from_failures(
        self,
        failure_traces: list[dict]
    ) -> list[GeneratedTestCase]:
        """
        Generate test cases based on previous failures.
        
        Args:
            failure_traces: List of execution traces that failed
            
        Returns:
            Test cases targeting those failure modes
        """
        test_cases = []
        
        for i, trace in enumerate(failure_traces[:5]):
            # Extract the prompt that failed
            original_prompt = trace.get("prompt", "")
            errors = trace.get("errors", [])
            
            # Create variant test case
            test_case = GeneratedTestCase(
                id=f"failure_variant_{i+1:03d}",
                description=f"Variant of failed task: {original_prompt[:50]}...",
                prompt=self._create_failure_variant(original_prompt, errors),
                tags=["failure_mode", "regression"],
                complexity="edge_case",
                quality_criteria=[
                    "Should not produce the same errors",
                    f"Original errors: {errors[:2]}"
                ]
            )
            
            test_cases.append(test_case)
        
        return test_cases
    
    def _determine_skill_type(self) -> str:
        """Determine the type of skill based on content."""
        
        if not self.skill:
            return "generic"
        
        content = (self.skill.description + " " + self.skill.instructions).lower()
        
        # Check for presentation first (more specific)
        if any(word in content for word in ["pptx", "powerpoint", "presentation", "slide"]):
            return "presentation"
        # Then check for document types
        elif any(word in content for word in ["docx", "word", "document"]):
        elif any(word in content for word in ["xlsx", "excel", "spreadsheet"]):
            return "spreadsheet"
        elif any(word in content for word in ["pdf"]):
            return "document"
        elif any(word in content for word in ["code", "script", "function", "class"]):
            return "code"
        else:
            return "generic"
    
    def _fill_template(self, template: str, skill_type: str) -> str:
        """Fill template placeholders with appropriate content."""
        
        # Default fill values
        fills = {
            "doc_type": "document",
            "topic": "artificial intelligence",
            "action": "process the input",
            "detailed_action": "analyze the data and produce a report",
            "step1": "analyze the input",
            "step2": "process the data",
            "step3": "generate the output",
            "entity": "User",
            "scenario": "invalid input",
            "component": "main module",
            "ambiguous_request": "make it better"
        }
        
        # Skill-specific fills
        if skill_type == "presentation":
            fills.update({
                "doc_type": "presentation",
                "topic": "quarterly business results"
            })
        elif skill_type == "spreadsheet":
            fills.update({
                "doc_type": "spreadsheet",
                "topic": "sales data"
            })
        elif skill_type == "code":
            fills.update({
                "action": "calculates the factorial",
                "entity": "BankAccount"
            })
        
        # Fill template
        result = template
        for key, value in fills.items():
            result = result.replace("{" + key + "}", value)
        
        return result
    
    def _get_expected_outputs(self, skill_type: str) -> list[dict]:
        """Get expected outputs based on skill type."""
        
        outputs = {
            "document": [{"type": "file", "pattern": "*.docx"}],
            "presentation": [{"type": "file", "pattern": "*.pptx"}],
            "spreadsheet": [{"type": "file", "pattern": "*.xlsx"}],
            "code": [{"type": "file", "pattern": "*.py"}],
            "generic": []
        }
        
        return outputs.get(skill_type, [])
    
    def _get_quality_criteria(
        self,
        skill_type: str,
        template_id: str
    ) -> list[str]:
        """Get quality criteria for a test case."""
        
        base_criteria = ["Task completes successfully", "No errors in output"]
        
        if skill_type == "presentation":
            if template_id == "simple":
                base_criteria.append("Contains at least 3 slides")
            elif template_id == "charts":
                base_criteria.append("Contains a chart")
        elif skill_type == "document":
            if template_id == "structured":
                base_criteria.append("Contains multiple sections")
        elif skill_type == "spreadsheet":
            if template_id == "formulas":
                base_criteria.append("Contains working formulas")
        
        return base_criteria
    
    def _generate_random_case(
        self,
        case_id: int,
        skill_type: str,
        include_edge: bool
    ) -> GeneratedTestCase:
        """Generate a random test case."""
        
        import random
        
        prompts = {
            "presentation": [
                "Create a presentation about machine learning trends",
                "Make a pitch deck for a startup",
                "Create slides explaining the solar system",
                "Make a presentation for a project status update"
            ],
            "document": [
                "Write a report on market analysis",
                "Create a project proposal document",
                "Write meeting notes from a brainstorming session",
                "Create a user manual for a software product"
            ],
            "spreadsheet": [
                "Create a budget tracker spreadsheet",
                "Make a project timeline in a spreadsheet",
                "Create an inventory tracking sheet",
                "Build a grade calculator spreadsheet"
            ],
            "code": [
                "Write a function to parse JSON data",
                "Create a class for managing a todo list",
                "Write code to fetch data from an API",
                "Implement a simple calculator"
            ],
            "generic": [
                "Help me organize this information",
                "Create something useful from this data",
                "Process and summarize the input",
                "Generate a structured output"
            ]
        }
        
        prompt_list = prompts.get(skill_type, prompts["generic"])
        prompt = random.choice(prompt_list)
        
        return GeneratedTestCase(
            id=f"gen_{case_id:03d}",
            description=f"Random case: {prompt[:40]}...",
            prompt=prompt,
            expected_outputs=self._get_expected_outputs(skill_type),
            quality_criteria=["Task completes successfully"],
            tags=[skill_type, "random"],
            complexity="medium"
        )
    
    def _create_failure_variant(
        self,
        original_prompt: str,
        errors: list[str]
    ) -> str:
        """Create a variant prompt that addresses previous failures."""
        
        # Simple modification: add clarification
        modifications = [
            "Please ensure the output is properly formatted.",
            "Make sure to handle edge cases.",
            "Validate the output before completing.",
            "Keep the content concise and well-structured."
        ]
        
        import random
        modification = random.choice(modifications)
        
        return f"{original_prompt}\n\nNote: {modification}"
    
    def save_to_yaml(
        self,
        test_cases: list[GeneratedTestCase],
        output_path: str
    ):
        """Save test cases to YAML file."""
        
        data = {
            "metadata": {
                "generator": "skill-optimizer-test-generator",
                "skill_name": self.skill.name if self.skill else "unknown",
                "count": len(test_cases)
            },
            "test_cases": [tc.to_dict() for tc in test_cases]
        }
        
        with open(output_path, "w") as f:
            yaml.dump(data, f, default_flow_style=False, sort_keys=False)
        
        print(f"Saved {len(test_cases)} test cases to {output_path}")
    
    def _generate_generic_cases(self, count: int) -> list[GeneratedTestCase]:
        """Generate generic test cases when no skill is provided."""
        
        cases = []
        templates = self.TEMPLATES["generic"]
        
        for i, (template_id, template) in enumerate(templates):
            if i >= count:
                break
            
            cases.append(GeneratedTestCase(
                id=f"generic_{i+1:03d}",
                description=f"Generic test: {template_id}",
                prompt=self._fill_template(template, "generic"),
                tags=["generic", template_id],
                complexity="medium"
            ))
        
        return cases


def main():
    """CLI for test case generation."""
    
    import argparse
    
    parser = argparse.ArgumentParser(description="Generate test cases for a skill")
    parser.add_argument("--skill-path", help="Path to skill directory")
    parser.add_argument("--output", "-o", default="test_cases.yaml", help="Output file")
    parser.add_argument("--count", "-n", type=int, default=10, help="Number of test cases")
    parser.add_argument("--no-edge-cases", action="store_true", help="Exclude edge cases")
    
    args = parser.parse_args()
    
    # Load skill if provided
    skill = None
    if args.skill_path:
        loader = SkillLoader()
        skill = loader.load(args.skill_path)
        print(f"Loaded skill: {skill.name}")
    
    # Generate test cases
    generator = TestCaseGenerator(skill)
    test_cases = generator.generate(
        count=args.count,
        include_edge_cases=not args.no_edge_cases
    )
    
    # Save to file
    generator.save_to_yaml(test_cases, args.output)
    
    print(f"\nGenerated {len(test_cases)} test cases:")
    for tc in test_cases:
        print(f"  - {tc.id}: {tc.description[:50]}...")


if __name__ == "__main__":
    main()
