|
from langchain.llms import HuggingFaceHub |
|
from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain.chains import LLMChain |
|
from langchain.prompts import PromptTemplate |
|
import sys |
|
import os |
|
import logging |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) |
|
from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS |
|
|
|
def get_llm(): |
|
"""Initialize and return the language model.""" |
|
|
|
cache_dir = "/app/models" |
|
if not os.path.exists(cache_dir): |
|
try: |
|
os.makedirs(cache_dir, exist_ok=True) |
|
os.chmod(cache_dir, 0o777) |
|
except Exception as e: |
|
logger.warning(f"Could not create cache directory: {e}") |
|
cache_dir = None |
|
|
|
|
|
api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "") |
|
logger.info(f"Using model: {LLM_MODEL}") |
|
|
|
|
|
try: |
|
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM |
|
|
|
logger.info(f"Loading model {LLM_MODEL} as local pipeline") |
|
|
|
|
|
try: |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL) |
|
model = AutoModelForCausalLM.from_pretrained(LLM_MODEL) |
|
|
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_length=MAX_TOKENS, |
|
temperature=DEFAULT_TEMPERATURE |
|
) |
|
|
|
return HuggingFacePipeline(pipeline=pipe) |
|
except Exception as e: |
|
logger.warning(f"Error loading with explicit model/tokenizer: {e}") |
|
|
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=LLM_MODEL, |
|
max_length=MAX_TOKENS, |
|
temperature=DEFAULT_TEMPERATURE |
|
) |
|
|
|
return HuggingFacePipeline(pipeline=pipe) |
|
|
|
except Exception as e: |
|
logger.warning(f"Error creating local pipeline: {e}") |
|
|
|
|
|
from langchain.llms.fake import FakeListLLM |
|
logger.warning("Using mock LLM as fallback") |
|
return FakeListLLM( |
|
responses=[ |
|
"I'm running in fallback mode due to model loading issues. I have limited capabilities right now.", |
|
"I can't access the language model currently. Please check the Space logs for more information.", |
|
"I'm operating with a simplified model. For better performance, try running this app locally with proper models configured." |
|
] |
|
) |
|
|
|
def get_embeddings(): |
|
"""Initialize and return the embeddings model.""" |
|
|
|
cache_dir = "/app/models" |
|
if not os.path.exists(cache_dir): |
|
try: |
|
os.makedirs(cache_dir, exist_ok=True) |
|
os.chmod(cache_dir, 0o777) |
|
except Exception as e: |
|
logger.warning(f"Could not create cache directory: {e}") |
|
cache_dir = None |
|
|
|
|
|
try: |
|
logger.info(f"Loading embeddings model: {EMBEDDING_MODEL}") |
|
return HuggingFaceEmbeddings( |
|
model_name=EMBEDDING_MODEL, |
|
cache_folder=cache_dir |
|
) |
|
except Exception as e: |
|
logger.warning(f"Error initializing embeddings: {e}") |
|
|
|
|
|
from langchain.embeddings.fake import FakeEmbeddings |
|
logger.warning("Using mock embeddings as fallback") |
|
return FakeEmbeddings(size=384) |
|
|
|
def get_chat_model(): |
|
""" |
|
Create a chat-like interface using a regular LLM. |
|
This is necessary because many free HF models don't have chat interfaces. |
|
""" |
|
llm = get_llm() |
|
|
|
|
|
chat_template = """ |
|
Context: {context} |
|
|
|
Chat History: |
|
{chat_history} |
|
|
|
User: {question} |
|
AI Assistant: |
|
""" |
|
|
|
prompt = PromptTemplate( |
|
input_variables=["context", "chat_history", "question"], |
|
template=chat_template |
|
) |
|
|
|
|
|
return LLMChain(llm=llm, prompt=prompt) |