Spaces:

LamiaYT
/

gaia-llamaindex-agent

Sleeping

App Files Files Community

gaia-llamaindex-agent / agent /local_llm.py

LamiaYT

Fix LLM conflicts and environment issues

72146a4 2 months ago

raw

history blame

4.05 kB

	try:
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from llama_index.llms.huggingface import HuggingFaceLLM
	import torch
	except ImportError as e:
	print(f"Import error in local_llm.py: {e}")
	raise

	class LocalLLM:
	def __init__(self):
	# Use a simple, reliable model that works well with LlamaIndex
	self.model_name = "microsoft/DialoGPT-small" # Changed to smaller model
	print(f"Initializing LocalLLM with model: {self.model_name}")
	self.llm = self._create_llama_index_llm()

	def _create_llama_index_llm(self):
	"""Create LlamaIndex compatible LLM"""
	try:
	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(self.model_name)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	print("Loading model...")
	model = AutoModelForCausalLM.from_pretrained(
	self.model_name,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	low_cpu_mem_usage=True
	)

	print("Creating LlamaIndex LLM...")
	# Fix the generate_kwargs to avoid conflicts
	llm = HuggingFaceLLM(
	model=model,
	tokenizer=tokenizer,
	# Simplified generate_kwargs to avoid conflicts
	generate_kwargs={
	"do_sample": True,
	"temperature": 0.7,
	"pad_token_id": tokenizer.eos_token_id
	},
	# Set these parameters at the LLM level instead
	max_new_tokens=256,
	device_map="auto" if torch.cuda.is_available() else None
	)

	print("LLM created successfully!")
	return llm

	except Exception as e:
	print(f"Failed to load model {self.model_name}: {str(e)}")
	# Fallback to even simpler model
	return self._create_fallback_llm()

	def _create_fallback_llm(self):
	"""Fallback to a very basic model"""
	print("Using fallback model: gpt2")
	model_name = "gpt2"

	try:
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoModelForCausalLM.from_pretrained(model_name)

	return HuggingFaceLLM(
	model=model,
	tokenizer=tokenizer,
	generate_kwargs={
	"do_sample": True,
	"temperature": 0.7,
	"pad_token_id": tokenizer.eos_token_id
	},
	max_new_tokens=256
	)
	except Exception as e:
	print(f"Even fallback model failed: {str(e)}")
	# Return a mock LLM for testing
	return self._create_mock_llm()

	def _create_mock_llm(self):
	"""Create a mock LLM for testing when models fail"""
	print("Creating mock LLM for testing...")

	class MockLLM:
	def chat(self, messages, **kwargs):
	# Simple mock response
	class MockResponse:
	def __init__(self, text):
	self.message = type('obj', (object,), {'content': text})

	return MockResponse("This is a mock response. The actual LLM failed to load.")

	def complete(self, prompt, **kwargs):
	class MockCompletion:
	def __init__(self, text):
	self.text = text

	return MockCompletion("Mock completion response.")

	return MockLLM()

	def get_llm(self):
	"""Return the LlamaIndex LLM instance"""
	return self.llm