37-AN
commited on
Commit
·
31cd25b
1
Parent(s):
8faa239
Initial commit for Hugging Face Space deployment
Browse files- Dockerfile +5 -1
- app/core/llm.py +58 -17
Dockerfile
CHANGED
@@ -30,10 +30,14 @@ COPY . .
|
|
30 |
RUN mkdir -p data/documents data/vector_db && \
|
31 |
chmod -R 777 data
|
32 |
|
33 |
-
# Set environment
|
34 |
ENV TOKENIZERS_PARALLELISM=false
|
35 |
ENV HF_HOME=/app/.cache
|
36 |
ENV XDG_CACHE_HOME=/app/.cache
|
|
|
|
|
|
|
|
|
37 |
|
38 |
# Expose the port required by Hugging Face Spaces
|
39 |
EXPOSE 7860
|
|
|
30 |
RUN mkdir -p data/documents data/vector_db && \
|
31 |
chmod -R 777 data
|
32 |
|
33 |
+
# Set environment variables
|
34 |
ENV TOKENIZERS_PARALLELISM=false
|
35 |
ENV HF_HOME=/app/.cache
|
36 |
ENV XDG_CACHE_HOME=/app/.cache
|
37 |
+
ENV HUGGINGFACEHUB_API_TOKEN=""
|
38 |
+
ENV HF_API_KEY=""
|
39 |
+
ENV LLM_MODEL="google/flan-t5-small"
|
40 |
+
ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
|
41 |
|
42 |
# Expose the port required by Hugging Face Spaces
|
43 |
EXPOSE 7860
|
app/core/llm.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from langchain.llms import HuggingFaceHub
|
|
|
2 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
3 |
from langchain.chains import LLMChain
|
4 |
from langchain.prompts import PromptTemplate
|
@@ -11,20 +12,53 @@ from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATU
|
|
11 |
|
12 |
def get_llm():
|
13 |
"""Initialize and return the language model."""
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def get_embeddings():
|
30 |
"""Initialize and return the embeddings model."""
|
@@ -39,10 +73,17 @@ def get_embeddings():
|
|
39 |
cache_dir = None
|
40 |
|
41 |
# SentenceTransformers can be used locally without an API key
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def get_chat_model():
|
48 |
"""
|
|
|
1 |
from langchain.llms import HuggingFaceHub
|
2 |
+
from langchain_community.llms import HuggingFaceEndpoint
|
3 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
from langchain.chains import LLMChain
|
5 |
from langchain.prompts import PromptTemplate
|
|
|
12 |
|
13 |
def get_llm():
|
14 |
"""Initialize and return the language model."""
|
15 |
+
# Set up cache directories with proper permissions
|
16 |
+
cache_dir = "/app/models"
|
17 |
+
if not os.path.exists(cache_dir):
|
18 |
+
try:
|
19 |
+
os.makedirs(cache_dir, exist_ok=True)
|
20 |
+
os.chmod(cache_dir, 0o777)
|
21 |
+
except Exception as e:
|
22 |
+
print(f"Warning: Could not create cache directory: {e}")
|
23 |
+
cache_dir = None
|
24 |
+
|
25 |
+
# Set environment variable for Hugging Face Hub
|
26 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
|
27 |
|
28 |
+
# For Hugging Face Spaces, we'll use a simpler model approach
|
29 |
+
# that doesn't require authentication for free models
|
30 |
+
try:
|
31 |
+
if HF_API_KEY:
|
32 |
+
# If we have an API key, use the HuggingFaceHub
|
33 |
+
llm = HuggingFaceHub(
|
34 |
+
huggingfacehub_api_token=HF_API_KEY,
|
35 |
+
repo_id=LLM_MODEL,
|
36 |
+
model_kwargs={
|
37 |
+
"temperature": DEFAULT_TEMPERATURE,
|
38 |
+
"max_length": MAX_TOKENS
|
39 |
+
}
|
40 |
+
)
|
41 |
+
else:
|
42 |
+
# If no API key, inform the user
|
43 |
+
print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
|
44 |
+
llm = HuggingFaceEndpoint(
|
45 |
+
endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
|
46 |
+
task="text-generation",
|
47 |
+
model_kwargs={
|
48 |
+
"temperature": DEFAULT_TEMPERATURE,
|
49 |
+
"max_length": MAX_TOKENS
|
50 |
+
}
|
51 |
+
)
|
52 |
+
return llm
|
53 |
+
except Exception as e:
|
54 |
+
print(f"Error initializing Hugging Face LLM: {e}")
|
55 |
+
print("Using a fallback approach with a mock LLM.")
|
56 |
+
|
57 |
+
# Create a very simple mock LLM for fallback
|
58 |
+
from langchain.llms.fake import FakeListLLM
|
59 |
+
return FakeListLLM(
|
60 |
+
responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
|
61 |
+
)
|
62 |
|
63 |
def get_embeddings():
|
64 |
"""Initialize and return the embeddings model."""
|
|
|
73 |
cache_dir = None
|
74 |
|
75 |
# SentenceTransformers can be used locally without an API key
|
76 |
+
try:
|
77 |
+
return HuggingFaceEmbeddings(
|
78 |
+
model_name=EMBEDDING_MODEL,
|
79 |
+
cache_folder=cache_dir
|
80 |
+
)
|
81 |
+
except Exception as e:
|
82 |
+
print(f"Error initializing embeddings: {e}")
|
83 |
+
|
84 |
+
# Create mock embeddings that return random vectors for fallback
|
85 |
+
from langchain.embeddings.fake import FakeEmbeddings
|
86 |
+
return FakeEmbeddings(size=384) # Standard size for small embedding models
|
87 |
|
88 |
def get_chat_model():
|
89 |
"""
|