from langchain.llms import HuggingFaceHub from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import LLMChain from langchain.prompts import PromptTemplate import sys import os import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Add project root to path for imports sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS def get_llm(): """Initialize and return the language model.""" # Set up cache directories with proper permissions cache_dir = "/app/models" if not os.path.exists(cache_dir): try: os.makedirs(cache_dir, exist_ok=True) os.chmod(cache_dir, 0o777) except Exception as e: logger.warning(f"Could not create cache directory: {e}") cache_dir = None # Never rely on API key in Spaces environment api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "") logger.info(f"Using model: {LLM_MODEL}") # Always try local pipeline first (most reliable in Spaces) try: from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM logger.info(f"Loading model {LLM_MODEL} as local pipeline") # Try loading with more specific model classes for better compatibility try: # Load tokenizer and model explicitly tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL) model = AutoModelForCausalLM.from_pretrained(LLM_MODEL) # Create pipeline with loaded components pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_length=MAX_TOKENS, temperature=DEFAULT_TEMPERATURE ) return HuggingFacePipeline(pipeline=pipe) except Exception as e: logger.warning(f"Error loading with explicit model/tokenizer: {e}") # Fallback to simpler pipeline instantiation pipe = pipeline( "text-generation", model=LLM_MODEL, max_length=MAX_TOKENS, temperature=DEFAULT_TEMPERATURE ) return HuggingFacePipeline(pipeline=pipe) except Exception as e: logger.warning(f"Error creating local pipeline: {e}") # Last resort - mock LLM for fallback from langchain.llms.fake import FakeListLLM logger.warning("Using mock LLM as fallback") return FakeListLLM( responses=[ "I'm running in fallback mode due to model loading issues. I have limited capabilities right now.", "I can't access the language model currently. Please check the Space logs for more information.", "I'm operating with a simplified model. For better performance, try running this app locally with proper models configured." ] ) def get_embeddings(): """Initialize and return the embeddings model.""" # Set up cache directories with proper permissions cache_dir = "/app/models" if not os.path.exists(cache_dir): try: os.makedirs(cache_dir, exist_ok=True) os.chmod(cache_dir, 0o777) except Exception as e: logger.warning(f"Could not create cache directory: {e}") cache_dir = None # Try to use local embeddings try: logger.info(f"Loading embeddings model: {EMBEDDING_MODEL}") return HuggingFaceEmbeddings( model_name=EMBEDDING_MODEL, cache_folder=cache_dir ) except Exception as e: logger.warning(f"Error initializing embeddings: {e}") # Create mock embeddings that return random vectors for fallback from langchain.embeddings.fake import FakeEmbeddings logger.warning("Using mock embeddings as fallback") return FakeEmbeddings(size=384) # Standard size for small embedding models def get_chat_model(): """ Create a chat-like interface using a regular LLM. This is necessary because many free HF models don't have chat interfaces. """ llm = get_llm() # Create a chat-like prompt template chat_template = """ Context: {context} Chat History: {chat_history} User: {question} AI Assistant: """ prompt = PromptTemplate( input_variables=["context", "chat_history", "question"], template=chat_template ) # Create a chain return LLMChain(llm=llm, prompt=prompt)