try: from transformers import AutoModelForCausalLM, AutoTokenizer from llama_index.llms.huggingface import HuggingFaceLLM import torch except ImportError as e: print(f"Import error in local_llm.py: {e}") raise class LocalLLM: def __init__(self): # Use a simple, reliable model that works well with LlamaIndex self.model_name = "microsoft/DialoGPT-small" # Changed to smaller model print(f"Initializing LocalLLM with model: {self.model_name}") self.llm = self._create_llama_index_llm() def _create_llama_index_llm(self): """Create LlamaIndex compatible LLM""" try: print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(self.model_name) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("Loading model...") model = AutoModelForCausalLM.from_pretrained( self.model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, low_cpu_mem_usage=True ) print("Creating LlamaIndex LLM...") # Fix the generate_kwargs to avoid conflicts llm = HuggingFaceLLM( model=model, tokenizer=tokenizer, # Simplified generate_kwargs to avoid conflicts generate_kwargs={ "do_sample": True, "temperature": 0.7, "pad_token_id": tokenizer.eos_token_id }, # Set these parameters at the LLM level instead max_new_tokens=256, device_map="auto" if torch.cuda.is_available() else None ) print("LLM created successfully!") return llm except Exception as e: print(f"Failed to load model {self.model_name}: {str(e)}") # Fallback to even simpler model return self._create_fallback_llm() def _create_fallback_llm(self): """Fallback to a very basic model""" print("Using fallback model: gpt2") model_name = "gpt2" try: tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained(model_name) return HuggingFaceLLM( model=model, tokenizer=tokenizer, generate_kwargs={ "do_sample": True, "temperature": 0.7, "pad_token_id": tokenizer.eos_token_id }, max_new_tokens=256 ) except Exception as e: print(f"Even fallback model failed: {str(e)}") # Return a mock LLM for testing return self._create_mock_llm() def _create_mock_llm(self): """Create a mock LLM for testing when models fail""" print("Creating mock LLM for testing...") class MockLLM: def chat(self, messages, **kwargs): # Simple mock response class MockResponse: def __init__(self, text): self.message = type('obj', (object,), {'content': text}) return MockResponse("This is a mock response. The actual LLM failed to load.") def complete(self, prompt, **kwargs): class MockCompletion: def __init__(self, text): self.text = text return MockCompletion("Mock completion response.") return MockLLM() def get_llm(self): """Return the LlamaIndex LLM instance""" return self.llm