File size: 4,916 Bytes
a33458e
207d24c
a33458e
 
 
 
 
403ced7
 
 
 
 
a33458e
 
 
 
 
 
 
31cd25b
 
 
 
 
 
 
403ced7
31cd25b
 
403ced7
 
 
a33458e
403ced7
31cd25b
403ced7
 
 
207d24c
403ced7
207d24c
403ced7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207d24c
403ced7
207d24c
 
 
 
 
31cd25b
207d24c
 
 
31cd25b
403ced7
31cd25b
403ced7
31cd25b
403ced7
31cd25b
207d24c
403ced7
 
 
207d24c
31cd25b
a33458e
 
 
8faa239
 
 
 
 
 
 
403ced7
8faa239
 
403ced7
31cd25b
403ced7
31cd25b
 
 
 
 
403ced7
31cd25b
 
 
403ced7
31cd25b
a33458e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from langchain.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import sys
import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Add project root to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS

def get_llm():
    """Initialize and return the language model."""
    # Set up cache directories with proper permissions
    cache_dir = "/app/models"
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
        except Exception as e:
            logger.warning(f"Could not create cache directory: {e}")
            cache_dir = None
            
    # Never rely on API key in Spaces environment
    api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "")
    logger.info(f"Using model: {LLM_MODEL}")
    
    # Always try local pipeline first (most reliable in Spaces)
    try:
        from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
        
        logger.info(f"Loading model {LLM_MODEL} as local pipeline")
        
        # Try loading with more specific model classes for better compatibility
        try:
            # Load tokenizer and model explicitly
            tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
            model = AutoModelForCausalLM.from_pretrained(LLM_MODEL)
            
            # Create pipeline with loaded components
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=MAX_TOKENS,
                temperature=DEFAULT_TEMPERATURE
            )
            
            return HuggingFacePipeline(pipeline=pipe)
        except Exception as e:
            logger.warning(f"Error loading with explicit model/tokenizer: {e}")
            
            # Fallback to simpler pipeline instantiation
            pipe = pipeline(
                "text-generation",
                model=LLM_MODEL,
                max_length=MAX_TOKENS,
                temperature=DEFAULT_TEMPERATURE
            )
            
            return HuggingFacePipeline(pipeline=pipe)
            
    except Exception as e:
        logger.warning(f"Error creating local pipeline: {e}")
        
        # Last resort - mock LLM for fallback
        from langchain.llms.fake import FakeListLLM
        logger.warning("Using mock LLM as fallback")
        return FakeListLLM(
            responses=[
                "I'm running in fallback mode due to model loading issues. I have limited capabilities right now.",
                "I can't access the language model currently. Please check the Space logs for more information.",
                "I'm operating with a simplified model. For better performance, try running this app locally with proper models configured."
            ]
        )

def get_embeddings():
    """Initialize and return the embeddings model."""
    # Set up cache directories with proper permissions
    cache_dir = "/app/models"
    if not os.path.exists(cache_dir):
        try:
            os.makedirs(cache_dir, exist_ok=True)
            os.chmod(cache_dir, 0o777)
        except Exception as e:
            logger.warning(f"Could not create cache directory: {e}")
            cache_dir = None
    
    # Try to use local embeddings
    try:
        logger.info(f"Loading embeddings model: {EMBEDDING_MODEL}")
        return HuggingFaceEmbeddings(
            model_name=EMBEDDING_MODEL,
            cache_folder=cache_dir
        )
    except Exception as e:
        logger.warning(f"Error initializing embeddings: {e}")
        
        # Create mock embeddings that return random vectors for fallback
        from langchain.embeddings.fake import FakeEmbeddings
        logger.warning("Using mock embeddings as fallback")
        return FakeEmbeddings(size=384)  # Standard size for small embedding models

def get_chat_model():
    """
    Create a chat-like interface using a regular LLM.
    This is necessary because many free HF models don't have chat interfaces.
    """
    llm = get_llm()
    
    # Create a chat-like prompt template
    chat_template = """
    Context: {context}
    
    Chat History:
    {chat_history}
    
    User: {question}
    AI Assistant:
    """
    
    prompt = PromptTemplate(
        input_variables=["context", "chat_history", "question"],
        template=chat_template
    )
    
    # Create a chain
    return LLMChain(llm=llm, prompt=prompt)