Spaces:

p3rc03
/

2B

Running

File size: 4,024 Bytes

from langchain.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import sys
import os

# Add project root to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS

def get_llm():
    """Initialize and return the language model."""
    # Set up cache directories with proper permissions
    cache_dir = "/app/models"
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
        except Exception as e:
            print(f"Warning: Could not create cache directory: {e}")
            cache_dir = None
            
    # Set environment variable for Hugging Face Hub
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
    
    # For Hugging Face Spaces, we'll use a simpler model approach
    # that doesn't require authentication for free models
    try:
        if HF_API_KEY:
            # If we have an API key, use the HuggingFaceHub
            llm = HuggingFaceHub(
                huggingfacehub_api_token=HF_API_KEY,
                repo_id=LLM_MODEL,
                model_kwargs={
                    "temperature": DEFAULT_TEMPERATURE,
                    "max_length": MAX_TOKENS
                }
            )
        else:
            # If no API key, inform the user
            print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
            llm = HuggingFaceEndpoint(
                endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
                task="text-generation",
                model_kwargs={
                    "temperature": DEFAULT_TEMPERATURE,
                    "max_length": MAX_TOKENS
                }
            )
        return llm
    except Exception as e:
        print(f"Error initializing Hugging Face LLM: {e}")
        print("Using a fallback approach with a mock LLM.")
        
        # Create a very simple mock LLM for fallback
        from langchain.llms.fake import FakeListLLM
        return FakeListLLM(
            responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
        )

def get_embeddings():
    """Initialize and return the embeddings model."""
    # Set up cache directories with proper permissions
    cache_dir = "/app/models"
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
        except Exception as e:
            print(f"Warning: Could not create cache directory: {e}")
            cache_dir = None
    
    # SentenceTransformers can be used locally without an API key
    try:
        return HuggingFaceEmbeddings(
            model_name=EMBEDDING_MODEL,
            cache_folder=cache_dir
        )
    except Exception as e:
        print(f"Error initializing embeddings: {e}")
        
        # Create mock embeddings that return random vectors for fallback
        from langchain.embeddings.fake import FakeEmbeddings
        return FakeEmbeddings(size=384)  # Standard size for small embedding models

def get_chat_model():
    """
    Create a chat-like interface using a regular LLM.
    This is necessary because many free HF models don't have chat interfaces.
    """
    llm = get_llm()
    
    # Create a chat-like prompt template
    chat_template = """
    Context: {context}
    
    Chat History:
    {chat_history}
    
    User: {question}
    AI Assistant:
    """
    
    prompt = PromptTemplate(
        input_variables=["context", "chat_history", "question"],
        template=chat_template
    )
    
    # Create a chain
    return LLMChain(llm=llm, prompt=prompt)