Spaces:

LamiaYT
/

gaia-llamaindex-agent

Sleeping

App Files Files Community

gaia-llamaindex-agent / agent /local_llm.py

LamiaYT

Fix Dockerfile & Gradio compatibility

c2e1cfe about 2 months ago

raw

history blame contribute delete

3.12 kB

	try:
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from llama_index.llms.huggingface import HuggingFaceLLM
	import torch
	except ImportError as e:
	print(f"Import error in local_llm.py: {e}")
	raise

	class LocalLLM:
	def __init__(self):
	# Use a lightweight chat-compatible model that actually exists
	self.model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
	print(f"Initializing LocalLLM with model: {self.model_name}")
	self.llm = self._create_llama_index_llm()

	def _create_llama_index_llm(self):
	try:
	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(self.model_name)

	print("Loading model...")
	model = AutoModelForCausalLM.from_pretrained(
	self.model_name,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	low_cpu_mem_usage=True
	)

	print("Creating LlamaIndex-compatible LLM...")
	llm = HuggingFaceLLM(
	model=model,
	tokenizer=tokenizer,
	context_window=2048,
	generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
	tokenizer_kwargs={"use_fast": True},
	device_map="auto" if torch.cuda.is_available() else None
	)
	print("✅ LLM created successfully!")
	return llm

	except Exception as e:
	print(f"❌ Failed to load {self.model_name}: {e}")
	return self._create_fallback_llm()

	def _create_fallback_llm(self):
	print("⚠️ Falling back to GPT2 model")
	model_name = "gpt2"
	try:
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoModelForCausalLM.from_pretrained(model_name)

	return HuggingFaceLLM(
	model=model,
	tokenizer=tokenizer,
	generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
	)
	except Exception as e:
	print(f"⚠️ Fallback model also failed: {e}")
	return self._create_mock_llm()

	def _create_mock_llm(self):
	print("⚠️ Using mock LLM")
	class MockLLM:
	def chat(self, messages, **kwargs):
	class MockResponse:
	def __init__(self, text):
	self.message = type('obj', (object,), {'content': text})
	return MockResponse("Mock chat response: I would analyze this question and provide an answer.")

	def complete(self, prompt, **kwargs):
	class MockCompletion:
	def __init__(self, text):
	self.text = text
	return MockCompletion("Mock completion response: I would analyze this question and provide an answer.")

	return MockLLM()

	def get_llm(self):
	return self.llm