Spaces:
Sleeping
Sleeping
File size: 3,124 Bytes
e51386e 8ac5ef4 c2e1cfe e51386e 580bcf5 e4f0435 580bcf5 e0860a0 e51386e 580bcf5 e4f0435 e51386e 580bcf5 e4f0435 580bcf5 c2e1cfe e4f0435 72146a4 580bcf5 e4f0435 580bcf5 e4f0435 e0860a0 e4f0435 580bcf5 e4f0435 580bcf5 e4f0435 580bcf5 72146a4 e4f0435 72146a4 e4f0435 72146a4 e4f0435 72146a4 e4f0435 72146a4 e4f0435 72146a4 e4f0435 72146a4 c2e1cfe 72146a4 c2e1cfe 580bcf5 72146a4 e4f0435 580bcf5 c2e1cfe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
try:
from transformers import AutoModelForCausalLM, AutoTokenizer
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
except ImportError as e:
print(f"Import error in local_llm.py: {e}")
raise
class LocalLLM:
def __init__(self):
# Use a lightweight chat-compatible model that actually exists
self.model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
print(f"Initializing LocalLLM with model: {self.model_name}")
self.llm = self._create_llama_index_llm()
def _create_llama_index_llm(self):
try:
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
self.model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
low_cpu_mem_usage=True
)
print("Creating LlamaIndex-compatible LLM...")
llm = HuggingFaceLLM(
model=model,
tokenizer=tokenizer,
context_window=2048,
generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
tokenizer_kwargs={"use_fast": True},
device_map="auto" if torch.cuda.is_available() else None
)
print("✅ LLM created successfully!")
return llm
except Exception as e:
print(f"❌ Failed to load {self.model_name}: {e}")
return self._create_fallback_llm()
def _create_fallback_llm(self):
print("⚠️ Falling back to GPT2 model")
model_name = "gpt2"
try:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name)
return HuggingFaceLLM(
model=model,
tokenizer=tokenizer,
generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
)
except Exception as e:
print(f"⚠️ Fallback model also failed: {e}")
return self._create_mock_llm()
def _create_mock_llm(self):
print("⚠️ Using mock LLM")
class MockLLM:
def chat(self, messages, **kwargs):
class MockResponse:
def __init__(self, text):
self.message = type('obj', (object,), {'content': text})
return MockResponse("Mock chat response: I would analyze this question and provide an answer.")
def complete(self, prompt, **kwargs):
class MockCompletion:
def __init__(self, text):
self.text = text
return MockCompletion("Mock completion response: I would analyze this question and provide an answer.")
return MockLLM()
def get_llm(self):
return self.llm |