LamiaYT's picture
Fix LLM conflicts and environment issues
72146a4
raw
history blame
4.05 kB
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
except ImportError as e:
print(f"Import error in local_llm.py: {e}")
raise
class LocalLLM:
def __init__(self):
# Use a simple, reliable model that works well with LlamaIndex
self.model_name = "microsoft/DialoGPT-small" # Changed to smaller model
print(f"Initializing LocalLLM with model: {self.model_name}")
self.llm = self._create_llama_index_llm()
def _create_llama_index_llm(self):
"""Create LlamaIndex compatible LLM"""
try:
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
self.model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
low_cpu_mem_usage=True
)
print("Creating LlamaIndex LLM...")
# Fix the generate_kwargs to avoid conflicts
llm = HuggingFaceLLM(
model=model,
tokenizer=tokenizer,
# Simplified generate_kwargs to avoid conflicts
generate_kwargs={
"do_sample": True,
"temperature": 0.7,
"pad_token_id": tokenizer.eos_token_id
},
# Set these parameters at the LLM level instead
max_new_tokens=256,
device_map="auto" if torch.cuda.is_available() else None
)
print("LLM created successfully!")
return llm
except Exception as e:
print(f"Failed to load model {self.model_name}: {str(e)}")
# Fallback to even simpler model
return self._create_fallback_llm()
def _create_fallback_llm(self):
"""Fallback to a very basic model"""
print("Using fallback model: gpt2")
model_name = "gpt2"
try:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name)
return HuggingFaceLLM(
model=model,
tokenizer=tokenizer,
generate_kwargs={
"do_sample": True,
"temperature": 0.7,
"pad_token_id": tokenizer.eos_token_id
},
max_new_tokens=256
)
except Exception as e:
print(f"Even fallback model failed: {str(e)}")
# Return a mock LLM for testing
return self._create_mock_llm()
def _create_mock_llm(self):
"""Create a mock LLM for testing when models fail"""
print("Creating mock LLM for testing...")
class MockLLM:
def chat(self, messages, **kwargs):
# Simple mock response
class MockResponse:
def __init__(self, text):
self.message = type('obj', (object,), {'content': text})
return MockResponse("This is a mock response. The actual LLM failed to load.")
def complete(self, prompt, **kwargs):
class MockCompletion:
def __init__(self, text):
self.text = text
return MockCompletion("Mock completion response.")
return MockLLM()
def get_llm(self):
"""Return the LlamaIndex LLM instance"""
return self.llm