File size: 4,916 Bytes
a33458e 207d24c a33458e 403ced7 a33458e 31cd25b 403ced7 31cd25b 403ced7 a33458e 403ced7 31cd25b 403ced7 207d24c 403ced7 207d24c 403ced7 207d24c 403ced7 207d24c 31cd25b 207d24c 31cd25b 403ced7 31cd25b 403ced7 31cd25b 403ced7 31cd25b 207d24c 403ced7 207d24c 31cd25b a33458e 8faa239 403ced7 8faa239 403ced7 31cd25b 403ced7 31cd25b 403ced7 31cd25b 403ced7 31cd25b a33458e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
from langchain.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import sys
import os
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Add project root to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS
def get_llm():
"""Initialize and return the language model."""
# Set up cache directories with proper permissions
cache_dir = "/app/models"
if not os.path.exists(cache_dir):
try:
os.makedirs(cache_dir, exist_ok=True)
os.chmod(cache_dir, 0o777)
except Exception as e:
logger.warning(f"Could not create cache directory: {e}")
cache_dir = None
# Never rely on API key in Spaces environment
api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "")
logger.info(f"Using model: {LLM_MODEL}")
# Always try local pipeline first (most reliable in Spaces)
try:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
logger.info(f"Loading model {LLM_MODEL} as local pipeline")
# Try loading with more specific model classes for better compatibility
try:
# Load tokenizer and model explicitly
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
model = AutoModelForCausalLM.from_pretrained(LLM_MODEL)
# Create pipeline with loaded components
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_length=MAX_TOKENS,
temperature=DEFAULT_TEMPERATURE
)
return HuggingFacePipeline(pipeline=pipe)
except Exception as e:
logger.warning(f"Error loading with explicit model/tokenizer: {e}")
# Fallback to simpler pipeline instantiation
pipe = pipeline(
"text-generation",
model=LLM_MODEL,
max_length=MAX_TOKENS,
temperature=DEFAULT_TEMPERATURE
)
return HuggingFacePipeline(pipeline=pipe)
except Exception as e:
logger.warning(f"Error creating local pipeline: {e}")
# Last resort - mock LLM for fallback
from langchain.llms.fake import FakeListLLM
logger.warning("Using mock LLM as fallback")
return FakeListLLM(
responses=[
"I'm running in fallback mode due to model loading issues. I have limited capabilities right now.",
"I can't access the language model currently. Please check the Space logs for more information.",
"I'm operating with a simplified model. For better performance, try running this app locally with proper models configured."
]
)
def get_embeddings():
"""Initialize and return the embeddings model."""
# Set up cache directories with proper permissions
cache_dir = "/app/models"
if not os.path.exists(cache_dir):
try:
os.makedirs(cache_dir, exist_ok=True)
os.chmod(cache_dir, 0o777)
except Exception as e:
logger.warning(f"Could not create cache directory: {e}")
cache_dir = None
# Try to use local embeddings
try:
logger.info(f"Loading embeddings model: {EMBEDDING_MODEL}")
return HuggingFaceEmbeddings(
model_name=EMBEDDING_MODEL,
cache_folder=cache_dir
)
except Exception as e:
logger.warning(f"Error initializing embeddings: {e}")
# Create mock embeddings that return random vectors for fallback
from langchain.embeddings.fake import FakeEmbeddings
logger.warning("Using mock embeddings as fallback")
return FakeEmbeddings(size=384) # Standard size for small embedding models
def get_chat_model():
"""
Create a chat-like interface using a regular LLM.
This is necessary because many free HF models don't have chat interfaces.
"""
llm = get_llm()
# Create a chat-like prompt template
chat_template = """
Context: {context}
Chat History:
{chat_history}
User: {question}
AI Assistant:
"""
prompt = PromptTemplate(
input_variables=["context", "chat_history", "question"],
template=chat_template
)
# Create a chain
return LLMChain(llm=llm, prompt=prompt) |