Spaces:

p3rc03
/

2B

Running

File size: 4,916 Bytes

a33458e
207d24c
a33458e
 
 
 
 
403ced7
 
 
 
 
a33458e
 
 
 
 
 
 
31cd25b
 
 
 
 
 
 
403ced7
31cd25b
 
403ced7
 
 
a33458e
403ced7
31cd25b
403ced7
 
 
207d24c
403ced7
207d24c
403ced7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207d24c
403ced7
207d24c
 
 
 
 
31cd25b
207d24c
 
 
31cd25b
403ced7
31cd25b
403ced7
31cd25b
403ced7
31cd25b
207d24c
403ced7
 
 
207d24c
31cd25b
a33458e
 
 
8faa239
 
 
 
 
 
 
403ced7
8faa239
 
403ced7
31cd25b
403ced7
31cd25b
 
 
 
 
403ced7
31cd25b
 
 
403ced7
31cd25b
a33458e

from langchain.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import sys
import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Add project root to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS

def get_llm():
    """Initialize and return the language model."""
    # Set up cache directories with proper permissions
    cache_dir = "/app/models"
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
        except Exception as e:
            logger.warning(f"Could not create cache directory: {e}")
            cache_dir = None
            
    # Never rely on API key in Spaces environment
    api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "")
    logger.info(f"Using model: {LLM_MODEL}")
    
    # Always try local pipeline first (most reliable in Spaces)
    try:
        from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
        
        logger.info(f"Loading model {LLM_MODEL} as local pipeline")
        
        # Try loading with more specific model classes for better compatibility
        try:
            # Load tokenizer and model explicitly
            tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
            model = AutoModelForCausalLM.from_pretrained(LLM_MODEL)
            
            # Create pipeline with loaded components
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=MAX_TOKENS,
                temperature=DEFAULT_TEMPERATURE
            )
            
            return HuggingFacePipeline(pipeline=pipe)
        except Exception as e:
            logger.warning(f"Error loading with explicit model/tokenizer: {e}")
            
            # Fallback to simpler pipeline instantiation
            pipe = pipeline(
                "text-generation",
                model=LLM_MODEL,
                max_length=MAX_TOKENS,
                temperature=DEFAULT_TEMPERATURE
            )
            
            return HuggingFacePipeline(pipeline=pipe)
            
    except Exception as e:
        logger.warning(f"Error creating local pipeline: {e}")
        
        # Last resort - mock LLM for fallback
        from langchain.llms.fake import FakeListLLM
        logger.warning("Using mock LLM as fallback")
        return FakeListLLM(
            responses=[
                "I'm running in fallback mode due to model loading issues. I have limited capabilities right now.",
                "I can't access the language model currently. Please check the Space logs for more information.",
                "I'm operating with a simplified model. For better performance, try running this app locally with proper models configured."
            ]
        )

def get_embeddings():
    """Initialize and return the embeddings model."""
    # Set up cache directories with proper permissions
    cache_dir = "/app/models"
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
        except Exception as e:
            logger.warning(f"Could not create cache directory: {e}")
            cache_dir = None
    
    # Try to use local embeddings
    try:
        logger.info(f"Loading embeddings model: {EMBEDDING_MODEL}")
        return HuggingFaceEmbeddings(
            model_name=EMBEDDING_MODEL,
            cache_folder=cache_dir
        )
    except Exception as e:
        logger.warning(f"Error initializing embeddings: {e}")
        
        # Create mock embeddings that return random vectors for fallback
        from langchain.embeddings.fake import FakeEmbeddings
        logger.warning("Using mock embeddings as fallback")
        return FakeEmbeddings(size=384)  # Standard size for small embedding models

def get_chat_model():
    """
    Create a chat-like interface using a regular LLM.
    This is necessary because many free HF models don't have chat interfaces.
    """
    llm = get_llm()
    
    # Create a chat-like prompt template
    chat_template = """
    Context: {context}
    
    Chat History:
    {chat_history}
    
    User: {question}
    AI Assistant:
    """
    
    prompt = PromptTemplate(
        input_variables=["context", "chat_history", "question"],
        template=chat_template
    )
    
    # Create a chain
    return LLMChain(llm=llm, prompt=prompt)