from langchain.llms import HuggingFaceHub from langchain_community.llms import HuggingFaceEndpoint from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import LLMChain from langchain.prompts import PromptTemplate import sys import os # Add project root to path for imports sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS def get_llm(): """Initialize and return the language model.""" # Set up cache directories with proper permissions cache_dir = "/app/models" if not os.path.exists(cache_dir): try: os.makedirs(cache_dir, exist_ok=True) os.chmod(cache_dir, 0o777) except Exception as e: print(f"Warning: Could not create cache directory: {e}") cache_dir = None # Set environment variable for Hugging Face Hub os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY # For Hugging Face Spaces, we'll use a simpler model approach # that doesn't require authentication for free models try: if HF_API_KEY: # If we have an API key, use the HuggingFaceHub llm = HuggingFaceHub( huggingfacehub_api_token=HF_API_KEY, repo_id=LLM_MODEL, model_kwargs={ "temperature": DEFAULT_TEMPERATURE, "max_length": MAX_TOKENS } ) else: # If no API key, inform the user print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.") llm = HuggingFaceEndpoint( endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}", task="text-generation", model_kwargs={ "temperature": DEFAULT_TEMPERATURE, "max_length": MAX_TOKENS } ) return llm except Exception as e: print(f"Error initializing Hugging Face LLM: {e}") print("Using a fallback approach with a mock LLM.") # Create a very simple mock LLM for fallback from langchain.llms.fake import FakeListLLM return FakeListLLM( responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."] ) def get_embeddings(): """Initialize and return the embeddings model.""" # Set up cache directories with proper permissions cache_dir = "/app/models" if not os.path.exists(cache_dir): try: os.makedirs(cache_dir, exist_ok=True) os.chmod(cache_dir, 0o777) except Exception as e: print(f"Warning: Could not create cache directory: {e}") cache_dir = None # SentenceTransformers can be used locally without an API key try: return HuggingFaceEmbeddings( model_name=EMBEDDING_MODEL, cache_folder=cache_dir ) except Exception as e: print(f"Error initializing embeddings: {e}") # Create mock embeddings that return random vectors for fallback from langchain.embeddings.fake import FakeEmbeddings return FakeEmbeddings(size=384) # Standard size for small embedding models def get_chat_model(): """ Create a chat-like interface using a regular LLM. This is necessary because many free HF models don't have chat interfaces. """ llm = get_llm() # Create a chat-like prompt template chat_template = """ Context: {context} Chat History: {chat_history} User: {question} AI Assistant: """ prompt = PromptTemplate( input_variables=["context", "chat_history", "question"], template=chat_template ) # Create a chain return LLMChain(llm=llm, prompt=prompt)