2B / app /core /llm.py
37-AN
Initial commit for Hugging Face Space deployment
31cd25b
raw
history blame
4.02 kB
from langchain.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import sys
import os
# Add project root to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS
def get_llm():
"""Initialize and return the language model."""
# Set up cache directories with proper permissions
cache_dir = "/app/models"
if not os.path.exists(cache_dir):
try:
os.makedirs(cache_dir, exist_ok=True)
os.chmod(cache_dir, 0o777)
except Exception as e:
print(f"Warning: Could not create cache directory: {e}")
cache_dir = None
# Set environment variable for Hugging Face Hub
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
# For Hugging Face Spaces, we'll use a simpler model approach
# that doesn't require authentication for free models
try:
if HF_API_KEY:
# If we have an API key, use the HuggingFaceHub
llm = HuggingFaceHub(
huggingfacehub_api_token=HF_API_KEY,
repo_id=LLM_MODEL,
model_kwargs={
"temperature": DEFAULT_TEMPERATURE,
"max_length": MAX_TOKENS
}
)
else:
# If no API key, inform the user
print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
llm = HuggingFaceEndpoint(
endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
task="text-generation",
model_kwargs={
"temperature": DEFAULT_TEMPERATURE,
"max_length": MAX_TOKENS
}
)
return llm
except Exception as e:
print(f"Error initializing Hugging Face LLM: {e}")
print("Using a fallback approach with a mock LLM.")
# Create a very simple mock LLM for fallback
from langchain.llms.fake import FakeListLLM
return FakeListLLM(
responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
)
def get_embeddings():
"""Initialize and return the embeddings model."""
# Set up cache directories with proper permissions
cache_dir = "/app/models"
if not os.path.exists(cache_dir):
try:
os.makedirs(cache_dir, exist_ok=True)
os.chmod(cache_dir, 0o777)
except Exception as e:
print(f"Warning: Could not create cache directory: {e}")
cache_dir = None
# SentenceTransformers can be used locally without an API key
try:
return HuggingFaceEmbeddings(
model_name=EMBEDDING_MODEL,
cache_folder=cache_dir
)
except Exception as e:
print(f"Error initializing embeddings: {e}")
# Create mock embeddings that return random vectors for fallback
from langchain.embeddings.fake import FakeEmbeddings
return FakeEmbeddings(size=384) # Standard size for small embedding models
def get_chat_model():
"""
Create a chat-like interface using a regular LLM.
This is necessary because many free HF models don't have chat interfaces.
"""
llm = get_llm()
# Create a chat-like prompt template
chat_template = """
Context: {context}
Chat History:
{chat_history}
User: {question}
AI Assistant:
"""
prompt = PromptTemplate(
input_variables=["context", "chat_history", "question"],
template=chat_template
)
# Create a chain
return LLMChain(llm=llm, prompt=prompt)