from pathlib import Path
from typing import List
import chromadb
from chromadb.errors import InvalidCollectionException
from pydantic_ai import Agent
from pydantic import BaseModel, Field
from pprint import pformat
from rich.progress import Progress

from knowlang.configs.config import AppConfig
from knowlang.core.types import CodeChunk
from knowlang.utils.chunking_util import format_code_summary
from knowlang.utils.fancy_log import FancyLogger
from knowlang.utils.model_provider import create_pydantic_model
from knowlang.models.embeddings import generate_embedding

LOG = FancyLogger(__name__)

class ChunkMetadata(BaseModel):
    """Model for chunk metadata stored in ChromaDB"""
    file_path: str
    start_line: int
    end_line: int
    type: str
    name: str
    docstring: str = Field(default='')

class CodeSummarizer:
    def __init__(self, config: AppConfig):
        self.config = config
        self._init_agent()
        self._init_db()

    def _init_agent(self):
        """Initialize the LLM agent with configuration"""
        system_prompt = """
You are an expert code analyzer specializing in creating searchable and contextual code summaries. 
Your summaries will be used in a RAG system to help developers understand complex codebases.
Focus on following points:
1. The main purpose and functionality
- Use precise technical terms
- Preserve class/function/variable names exactly
- State the primary purpose
2. Narrow down key implementation details
- Focus on key algorithms, patterns, or design choices
- Highlight important method signatures and interfaces
3. Any notable dependencies or requirements
- Reference related classes/functions by exact name
- List external dependencies
- Note any inherited or implemented interfaces
        
Provide a clean, concise and focused summary. Don't include unnecessary nor generic details.
"""
        
        self.agent = Agent(
            create_pydantic_model(
                model_provider=self.config.llm.model_provider,
                model_name=self.config.llm.model_name
            ),
            system_prompt=system_prompt,
            model_settings=self.config.llm.model_settings
        )

    def _init_db(self):
        """Initialize ChromaDB with configuration"""
        self.db_client = chromadb.PersistentClient(
            path=str(self.config.db.persist_directory)
        )
        
        try:
            self.collection = self.db_client.get_collection(
                name=self.config.db.collection_name
            )
        except InvalidCollectionException:
            LOG.debug(f"Collection {self.config.db.collection_name} not found, creating new collection")
            self.collection = self.db_client.create_collection(
                name=self.config.db.collection_name,
                metadata={"hnsw:space": "cosine"}
            )

    async def summarize_chunk(self, chunk: CodeChunk) -> str:
        """Summarize a single code chunk using the LLM"""
        prompt = f"""
        Analyze this {chunk.type.value} code chunk:
        
        {chunk.content}
        
        {f'Docstring: {chunk.docstring}' if chunk.docstring else ''}
        
        Provide a concise summary.
        """
        
        result = await self.agent.run(prompt)
        LOG.debug(f"Summary for chunk {chunk.file_path}:{chunk.start_line}-{chunk.end_line}:\n{pformat(result.data)}")

        return format_code_summary(chunk.content, result.data)
    
    async def process_and_store_chunk(self, chunk: CodeChunk):
        """Process a chunk and store it in ChromaDB"""
        summary = await self.summarize_chunk(chunk)
        
        # Create a unique ID for the chunk
        relative_path = Path(chunk.file_path).relative_to(self.config.db.codebase_directory).as_posix()
        chunk_id = f"{relative_path}:{chunk.start_line}-{chunk.end_line}"
        
        # Create metadata using Pydantic model
        metadata = ChunkMetadata(   
            file_path=relative_path,
            start_line=chunk.start_line,
            end_line=chunk.end_line,
            type=chunk.type.value,
            name=chunk.name,
            docstring=chunk.docstring if chunk.docstring else ''
        )
        
        # Get embedding for the summary
        embedding = generate_embedding(summary, self.config.embedding)
        
        # Store in ChromaDB
        self.collection.add(
            documents=[summary],
            embeddings=embedding,
            metadatas=[metadata.model_dump()],
            ids=[chunk_id]
        )

    async def process_chunks(self, chunks: List[CodeChunk]):
        """Process multiple chunks in parallel"""
        with Progress() as progress:
            task = progress.add_task("Summarizing chunks into vector database...", total=len(chunks))
            
            for chunk in chunks:
                await self.process_and_store_chunk(chunk)
                progress.advance(task)