LamiaYT's picture
Fix Dockerfile & Gradio compatibility
c2e1cfe
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
except ImportError as e:
print(f"Import error in local_llm.py: {e}")
raise
class LocalLLM:
def __init__(self):
# Use a lightweight chat-compatible model that actually exists
self.model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
print(f"Initializing LocalLLM with model: {self.model_name}")
self.llm = self._create_llama_index_llm()
def _create_llama_index_llm(self):
try:
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
self.model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
low_cpu_mem_usage=True
)
print("Creating LlamaIndex-compatible LLM...")
llm = HuggingFaceLLM(
model=model,
tokenizer=tokenizer,
context_window=2048,
generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
tokenizer_kwargs={"use_fast": True},
device_map="auto" if torch.cuda.is_available() else None
)
print("✅ LLM created successfully!")
return llm
except Exception as e:
print(f"❌ Failed to load {self.model_name}: {e}")
return self._create_fallback_llm()
def _create_fallback_llm(self):
print("⚠️ Falling back to GPT2 model")
model_name = "gpt2"
try:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name)
return HuggingFaceLLM(
model=model,
tokenizer=tokenizer,
generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
)
except Exception as e:
print(f"⚠️ Fallback model also failed: {e}")
return self._create_mock_llm()
def _create_mock_llm(self):
print("⚠️ Using mock LLM")
class MockLLM:
def chat(self, messages, **kwargs):
class MockResponse:
def __init__(self, text):
self.message = type('obj', (object,), {'content': text})
return MockResponse("Mock chat response: I would analyze this question and provide an answer.")
def complete(self, prompt, **kwargs):
class MockCompletion:
def __init__(self, text):
self.text = text
return MockCompletion("Mock completion response: I would analyze this question and provide an answer.")
return MockLLM()
def get_llm(self):
return self.llm