try:
    from transformers import AutoModelForCausalLM, AutoTokenizer
    from llama_index.llms.huggingface import HuggingFaceLLM
    import torch
except ImportError as e:
    print(f"Import error in local_llm.py: {e}")
    raise

class LocalLLM:
    def __init__(self):
        # Use a simple, reliable model that works well with LlamaIndex
        self.model_name = "microsoft/DialoGPT-small"  # Changed to smaller model
        print(f"Initializing LocalLLM with model: {self.model_name}")
        self.llm = self._create_llama_index_llm()
    
    def _create_llama_index_llm(self):
        """Create LlamaIndex compatible LLM"""
        try:
            print("Loading tokenizer...")
            tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            if tokenizer.pad_token is None:
                tokenizer.pad_token = tokenizer.eos_token
            
            print("Loading model...")
            model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                device_map="auto" if torch.cuda.is_available() else None,
                low_cpu_mem_usage=True
            )
            
            print("Creating LlamaIndex LLM...")
            # Fix the generate_kwargs to avoid conflicts
            llm = HuggingFaceLLM(
                model=model,
                tokenizer=tokenizer,
                # Simplified generate_kwargs to avoid conflicts
                generate_kwargs={
                    "do_sample": True,
                    "temperature": 0.7,
                    "pad_token_id": tokenizer.eos_token_id
                },
                # Set these parameters at the LLM level instead
                max_new_tokens=256,
                device_map="auto" if torch.cuda.is_available() else None
            )
            
            print("LLM created successfully!")
            return llm
            
        except Exception as e:
            print(f"Failed to load model {self.model_name}: {str(e)}")
            # Fallback to even simpler model
            return self._create_fallback_llm()
    
    def _create_fallback_llm(self):
        """Fallback to a very basic model"""
        print("Using fallback model: gpt2")
        model_name = "gpt2"
        
        try:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            tokenizer.pad_token = tokenizer.eos_token
            
            model = AutoModelForCausalLM.from_pretrained(model_name)
            
            return HuggingFaceLLM(
                model=model,
                tokenizer=tokenizer,
                generate_kwargs={
                    "do_sample": True,
                    "temperature": 0.7,
                    "pad_token_id": tokenizer.eos_token_id
                },
                max_new_tokens=256
            )
        except Exception as e:
            print(f"Even fallback model failed: {str(e)}")
            # Return a mock LLM for testing
            return self._create_mock_llm()
    
    def _create_mock_llm(self):
        """Create a mock LLM for testing when models fail"""
        print("Creating mock LLM for testing...")
        
        class MockLLM:
            def chat(self, messages, **kwargs):
                # Simple mock response
                class MockResponse:
                    def __init__(self, text):
                        self.message = type('obj', (object,), {'content': text})
                        
                return MockResponse("This is a mock response. The actual LLM failed to load.")
            
            def complete(self, prompt, **kwargs):
                class MockCompletion:
                    def __init__(self, text):
                        self.text = text
                        
                return MockCompletion("Mock completion response.")
        
        return MockLLM()
    
    def get_llm(self):
        """Return the LlamaIndex LLM instance"""
        return self.llm