try:
    from transformers import AutoModelForCausalLM, AutoTokenizer
    from llama_index.llms.huggingface import HuggingFaceLLM
    import torch
except ImportError as e:
    print(f"Import error in local_llm.py: {e}")
    raise

class LocalLLM:
    def __init__(self):
        # Use a lightweight chat-compatible model that actually exists
        self.model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
        print(f"Initializing LocalLLM with model: {self.model_name}")
        self.llm = self._create_llama_index_llm()

    def _create_llama_index_llm(self):
        try:
            print("Loading tokenizer...")
            tokenizer = AutoTokenizer.from_pretrained(self.model_name)

            print("Loading model...")
            model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                device_map="auto" if torch.cuda.is_available() else None,
                low_cpu_mem_usage=True
            )

            print("Creating LlamaIndex-compatible LLM...")
            llm = HuggingFaceLLM(
                model=model,
                tokenizer=tokenizer,
                context_window=2048,
                generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
                tokenizer_kwargs={"use_fast": True},
                device_map="auto" if torch.cuda.is_available() else None
            )
            print("✅ LLM created successfully!")
            return llm

        except Exception as e:
            print(f"❌ Failed to load {self.model_name}: {e}")
            return self._create_fallback_llm()

    def _create_fallback_llm(self):
        print("⚠️ Falling back to GPT2 model")
        model_name = "gpt2"
        try:
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            tokenizer.pad_token = tokenizer.eos_token

            model = AutoModelForCausalLM.from_pretrained(model_name)

            return HuggingFaceLLM(
                model=model,
                tokenizer=tokenizer,
                generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
            )
        except Exception as e:
            print(f"⚠️ Fallback model also failed: {e}")
            return self._create_mock_llm()

    def _create_mock_llm(self):
        print("⚠️ Using mock LLM")
        class MockLLM:
            def chat(self, messages, **kwargs):
                class MockResponse:
                    def __init__(self, text):
                        self.message = type('obj', (object,), {'content': text})
                return MockResponse("Mock chat response: I would analyze this question and provide an answer.")
            
            def complete(self, prompt, **kwargs):
                class MockCompletion:
                    def __init__(self, text):
                        self.text = text
                return MockCompletion("Mock completion response: I would analyze this question and provide an answer.")
        
        return MockLLM()

    def get_llm(self):
        return self.llm