37-AN commited on
Commit
31cd25b
·
1 Parent(s): 8faa239

Initial commit for Hugging Face Space deployment

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -1
  2. app/core/llm.py +58 -17
Dockerfile CHANGED
@@ -30,10 +30,14 @@ COPY . .
30
  RUN mkdir -p data/documents data/vector_db && \
31
  chmod -R 777 data
32
 
33
- # Set environment variable to avoid TOKENIZERS_PARALLELISM warning
34
  ENV TOKENIZERS_PARALLELISM=false
35
  ENV HF_HOME=/app/.cache
36
  ENV XDG_CACHE_HOME=/app/.cache
 
 
 
 
37
 
38
  # Expose the port required by Hugging Face Spaces
39
  EXPOSE 7860
 
30
  RUN mkdir -p data/documents data/vector_db && \
31
  chmod -R 777 data
32
 
33
+ # Set environment variables
34
  ENV TOKENIZERS_PARALLELISM=false
35
  ENV HF_HOME=/app/.cache
36
  ENV XDG_CACHE_HOME=/app/.cache
37
+ ENV HUGGINGFACEHUB_API_TOKEN=""
38
+ ENV HF_API_KEY=""
39
+ ENV LLM_MODEL="google/flan-t5-small"
40
+ ENV EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
41
 
42
  # Expose the port required by Hugging Face Spaces
43
  EXPOSE 7860
app/core/llm.py CHANGED
@@ -1,4 +1,5 @@
1
  from langchain.llms import HuggingFaceHub
 
2
  from langchain_community.embeddings import HuggingFaceEmbeddings
3
  from langchain.chains import LLMChain
4
  from langchain.prompts import PromptTemplate
@@ -11,20 +12,53 @@ from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATU
11
 
12
  def get_llm():
13
  """Initialize and return the language model."""
14
- if not HF_API_KEY:
15
- # Can still work without API key but with rate limits
16
- print("Warning: Hugging Face API key not set. Using models without authentication.")
17
-
18
- llm = HuggingFaceHub(
19
- huggingfacehub_api_token=HF_API_KEY,
20
- repo_id=LLM_MODEL,
21
- model_kwargs={
22
- "temperature": DEFAULT_TEMPERATURE,
23
- "max_length": MAX_TOKENS
24
- }
25
- )
26
 
27
- return llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def get_embeddings():
30
  """Initialize and return the embeddings model."""
@@ -39,10 +73,17 @@ def get_embeddings():
39
  cache_dir = None
40
 
41
  # SentenceTransformers can be used locally without an API key
42
- return HuggingFaceEmbeddings(
43
- model_name=EMBEDDING_MODEL,
44
- cache_folder=cache_dir
45
- )
 
 
 
 
 
 
 
46
 
47
  def get_chat_model():
48
  """
 
1
  from langchain.llms import HuggingFaceHub
2
+ from langchain_community.llms import HuggingFaceEndpoint
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
  from langchain.chains import LLMChain
5
  from langchain.prompts import PromptTemplate
 
12
 
13
  def get_llm():
14
  """Initialize and return the language model."""
15
+ # Set up cache directories with proper permissions
16
+ cache_dir = "/app/models"
17
+ if not os.path.exists(cache_dir):
18
+ try:
19
+ os.makedirs(cache_dir, exist_ok=True)
20
+ os.chmod(cache_dir, 0o777)
21
+ except Exception as e:
22
+ print(f"Warning: Could not create cache directory: {e}")
23
+ cache_dir = None
24
+
25
+ # Set environment variable for Hugging Face Hub
26
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_API_KEY
27
 
28
+ # For Hugging Face Spaces, we'll use a simpler model approach
29
+ # that doesn't require authentication for free models
30
+ try:
31
+ if HF_API_KEY:
32
+ # If we have an API key, use the HuggingFaceHub
33
+ llm = HuggingFaceHub(
34
+ huggingfacehub_api_token=HF_API_KEY,
35
+ repo_id=LLM_MODEL,
36
+ model_kwargs={
37
+ "temperature": DEFAULT_TEMPERATURE,
38
+ "max_length": MAX_TOKENS
39
+ }
40
+ )
41
+ else:
42
+ # If no API key, inform the user
43
+ print("No Hugging Face API key found. Using a simpler approach with HuggingFaceEndpoint.")
44
+ llm = HuggingFaceEndpoint(
45
+ endpoint_url=f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
46
+ task="text-generation",
47
+ model_kwargs={
48
+ "temperature": DEFAULT_TEMPERATURE,
49
+ "max_length": MAX_TOKENS
50
+ }
51
+ )
52
+ return llm
53
+ except Exception as e:
54
+ print(f"Error initializing Hugging Face LLM: {e}")
55
+ print("Using a fallback approach with a mock LLM.")
56
+
57
+ # Create a very simple mock LLM for fallback
58
+ from langchain.llms.fake import FakeListLLM
59
+ return FakeListLLM(
60
+ responses=["I'm a simple AI assistant. I can't access external knowledge right now, but I'll try to help with basic questions."]
61
+ )
62
 
63
  def get_embeddings():
64
  """Initialize and return the embeddings model."""
 
73
  cache_dir = None
74
 
75
  # SentenceTransformers can be used locally without an API key
76
+ try:
77
+ return HuggingFaceEmbeddings(
78
+ model_name=EMBEDDING_MODEL,
79
+ cache_folder=cache_dir
80
+ )
81
+ except Exception as e:
82
+ print(f"Error initializing embeddings: {e}")
83
+
84
+ # Create mock embeddings that return random vectors for fallback
85
+ from langchain.embeddings.fake import FakeEmbeddings
86
+ return FakeEmbeddings(size=384) # Standard size for small embedding models
87
 
88
  def get_chat_model():
89
  """