from langchain.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import sys
import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Add project root to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS

def get_llm():
    """Initialize and return the language model."""
    # Set up cache directories with proper permissions
    cache_dir = "/app/models"
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
        except Exception as e:
            logger.warning(f"Could not create cache directory: {e}")
            cache_dir = None
            
    # Never rely on API key in Spaces environment
    api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "")
    logger.info(f"Using model: {LLM_MODEL}")
    
    # Always try local pipeline first (most reliable in Spaces)
    try:
        from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
        
        logger.info(f"Loading model {LLM_MODEL} as local pipeline")
        
        # Try loading with more specific model classes for better compatibility
        try:
            # Load tokenizer and model explicitly
            tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
            model = AutoModelForCausalLM.from_pretrained(LLM_MODEL)
            
            # Create pipeline with loaded components
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=MAX_TOKENS,
                temperature=DEFAULT_TEMPERATURE
            )
            
            return HuggingFacePipeline(pipeline=pipe)
        except Exception as e:
            logger.warning(f"Error loading with explicit model/tokenizer: {e}")
            
            # Fallback to simpler pipeline instantiation
            pipe = pipeline(
                "text-generation",
                model=LLM_MODEL,
                max_length=MAX_TOKENS,
                temperature=DEFAULT_TEMPERATURE
            )
            
            return HuggingFacePipeline(pipeline=pipe)
            
    except Exception as e:
        logger.warning(f"Error creating local pipeline: {e}")
        
        # Last resort - mock LLM for fallback
        from langchain.llms.fake import FakeListLLM
        logger.warning("Using mock LLM as fallback")
        return FakeListLLM(
            responses=[
                "I'm running in fallback mode due to model loading issues. I have limited capabilities right now.",
                "I can't access the language model currently. Please check the Space logs for more information.",
                "I'm operating with a simplified model. For better performance, try running this app locally with proper models configured."
            ]
        )

def get_embeddings():
    """Initialize and return the embeddings model."""
    # Set up cache directories with proper permissions
    cache_dir = "/app/models"
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
        except Exception as e:
            logger.warning(f"Could not create cache directory: {e}")
            cache_dir = None
    
    # Try to use local embeddings
    try:
        logger.info(f"Loading embeddings model: {EMBEDDING_MODEL}")
        return HuggingFaceEmbeddings(
            model_name=EMBEDDING_MODEL,
            cache_folder=cache_dir
        )
    except Exception as e:
        logger.warning(f"Error initializing embeddings: {e}")
        
        # Create mock embeddings that return random vectors for fallback
        from langchain.embeddings.fake import FakeEmbeddings
        logger.warning("Using mock embeddings as fallback")
        return FakeEmbeddings(size=384)  # Standard size for small embedding models

def get_chat_model():
    """
    Create a chat-like interface using a regular LLM.
    This is necessary because many free HF models don't have chat interfaces.
    """
    llm = get_llm()
    
    # Create a chat-like prompt template
    chat_template = """
    Context: {context}
    
    Chat History:
    {chat_history}
    
    User: {question}
    AI Assistant:
    """
    
    prompt = PromptTemplate(
        input_variables=["context", "chat_history", "question"],
        template=chat_template
    )
    
    # Create a chain
    return LLMChain(llm=llm, prompt=prompt)