Spaces:

p3rc03
/

2B

Running

2B / app /core /llm.py

37-AN

Initial commit for Hugging Face Space deployment

31cd25b 3 months ago

4.02 kB

	from langchain.llms import HuggingFaceHub
	from langchain_community.llms import HuggingFaceEndpoint
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.chains import LLMChain
	from langchain.prompts import PromptTemplate
	import sys
	import os

	# Add project root to path for imports
	sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
	from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS

	def get_llm():
	"""Initialize and return the language model."""
	# Set up cache directories with proper permissions
	cache_dir = "/app/models"
	if not os.path.exists(cache_dir):
	try:
	os.makedirs(cache_dir, exist_ok=True)
	os.chmod(cache_dir, 0o777)
	except Exception as e:
	print(f"Warning: Could not create cache directory: {e}")
	cache_dir = None

	# Set environment variable for Hugging Face Hub
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY

	# For Hugging Face Spaces, we'll use a simpler model approach
	# that doesn't require authentication for free models
	try:
	if HF_API_KEY:
	# If we have an API key, use the HuggingFaceHub
	llm = HuggingFaceHub(
	huggingfacehub_api_token=HF_API_KEY,
	repo_id=LLM_MODEL,
	model_kwargs={
	"temperature": DEFAULT_TEMPERATURE,
	"max_length": MAX_TOKENS
	}
	)
	else:
	# If no API key, inform the user
	print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
	llm = HuggingFaceEndpoint(
	endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
	task="text-generation",
	model_kwargs={
	"temperature": DEFAULT_TEMPERATURE,
	"max_length": MAX_TOKENS
	}
	)
	return llm
	except Exception as e:
	print(f"Error initializing Hugging Face LLM: {e}")
	print("Using a fallback approach with a mock LLM.")

	# Create a very simple mock LLM for fallback
	from langchain.llms.fake import FakeListLLM
	return FakeListLLM(
	responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
	)

	def get_embeddings():
	"""Initialize and return the embeddings model."""
	# Set up cache directories with proper permissions
	cache_dir = "/app/models"
	if not os.path.exists(cache_dir):
	try:
	os.makedirs(cache_dir, exist_ok=True)
	os.chmod(cache_dir, 0o777)
	except Exception as e:
	print(f"Warning: Could not create cache directory: {e}")
	cache_dir = None

	# SentenceTransformers can be used locally without an API key
	try:
	return HuggingFaceEmbeddings(
	model_name=EMBEDDING_MODEL,
	cache_folder=cache_dir
	)
	except Exception as e:
	print(f"Error initializing embeddings: {e}")

	# Create mock embeddings that return random vectors for fallback
	from langchain.embeddings.fake import FakeEmbeddings
	return FakeEmbeddings(size=384) # Standard size for small embedding models

	def get_chat_model():
	"""
	Create a chat-like interface using a regular LLM.
	This is necessary because many free HF models don't have chat interfaces.
	"""
	llm = get_llm()

	# Create a chat-like prompt template
	chat_template = """
	Context: {context}

	Chat History:
	{chat_history}

	User: {question}
	AI Assistant:
	"""

	prompt = PromptTemplate(
	input_variables=["context", "chat_history", "question"],
	template=chat_template
	)

	# Create a chain
	return LLMChain(llm=llm, prompt=prompt)