try: from transformers import AutoModelForCausalLM, AutoTokenizer from llama_index.llms.huggingface import HuggingFaceLLM import torch except ImportError as e: print(f"Import error in local_llm.py: {e}") raise class LocalLLM: def __init__(self): # Use a lightweight chat-compatible model that actually exists self.model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" print(f"Initializing LocalLLM with model: {self.model_name}") self.llm = self._create_llama_index_llm() def _create_llama_index_llm(self): try: print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(self.model_name) print("Loading model...") model = AutoModelForCausalLM.from_pretrained( self.model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, low_cpu_mem_usage=True ) print("Creating LlamaIndex-compatible LLM...") llm = HuggingFaceLLM( model=model, tokenizer=tokenizer, context_window=2048, generate_kwargs={"temperature": 0.7, "max_new_tokens": 256}, tokenizer_kwargs={"use_fast": True}, device_map="auto" if torch.cuda.is_available() else None ) print("✅ LLM created successfully!") return llm except Exception as e: print(f"❌ Failed to load {self.model_name}: {e}") return self._create_fallback_llm() def _create_fallback_llm(self): print("⚠️ Falling back to GPT2 model") model_name = "gpt2" try: tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained(model_name) return HuggingFaceLLM( model=model, tokenizer=tokenizer, generate_kwargs={"temperature": 0.7, "max_new_tokens": 256}, ) except Exception as e: print(f"⚠️ Fallback model also failed: {e}") return self._create_mock_llm() def _create_mock_llm(self): print("⚠️ Using mock LLM") class MockLLM: def chat(self, messages, **kwargs): class MockResponse: def __init__(self, text): self.message = type('obj', (object,), {'content': text}) return MockResponse("Mock chat response: I would analyze this question and provide an answer.") def complete(self, prompt, **kwargs): class MockCompletion: def __init__(self, text): self.text = text return MockCompletion("Mock completion response: I would analyze this question and provide an answer.") return MockLLM() def get_llm(self): return self.llm