37-AN commited on
Commit
f8ed285
·
1 Parent(s): b725ad2

Fix 403 errors by improving model loading and error handling

Browse files
Files changed (5) hide show
  1. app/config.py +3 -2
  2. app/core/llm.py +123 -40
  3. app/core/memory.py +73 -23
  4. app/ui/streamlit_app.py +17 -2
  5. fix_403_error.py +97 -0
app/config.py CHANGED
@@ -11,7 +11,8 @@ load_dotenv(dotenv_path=env_path)
11
  HF_API_KEY = os.getenv('HF_API_KEY', '')
12
 
13
  # LLM Configuration
14
- LLM_MODEL = os.getenv('LLM_MODEL', 'distilgpt2')
 
15
  EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
16
 
17
  # Vector Database
@@ -45,7 +46,7 @@ def create_env_example():
45
  HF_API_KEY=your_huggingface_api_key_here
46
 
47
  # LLM Configuration
48
- LLM_MODEL=distilgpt2 # Use small model for Hugging Face Spaces
49
  EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
50
 
51
  # Vector Database
 
11
  HF_API_KEY = os.getenv('HF_API_KEY', '')
12
 
13
  # LLM Configuration
14
+ # Use models that are freely accessible and don't require authentication
15
+ LLM_MODEL = os.getenv('LLM_MODEL', 'gpt2') # Changed from distilgpt2 to gpt2
16
  EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
17
 
18
  # Vector Database
 
46
  HF_API_KEY=your_huggingface_api_key_here
47
 
48
  # LLM Configuration
49
+ LLM_MODEL=gpt2 # Use small model for Hugging Face Spaces
50
  EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
51
 
52
  # Vector Database
app/core/llm.py CHANGED
@@ -37,38 +37,89 @@ def get_llm():
37
 
38
  logger.info(f"Loading model {LLM_MODEL} as local pipeline")
39
 
40
- # Try loading with more specific model classes for better compatibility
41
- try:
42
- # Load tokenizer and model explicitly
43
- tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
44
- model = AutoModelForCausalLM.from_pretrained(LLM_MODEL)
45
-
46
- # Create pipeline with loaded components
47
- pipe = pipeline(
48
- "text-generation",
49
- model=model,
50
- tokenizer=tokenizer,
51
- max_length=MAX_TOKENS,
52
- temperature=DEFAULT_TEMPERATURE
53
- )
54
-
55
- return HuggingFacePipeline(pipeline=pipe)
56
- except Exception as e:
57
- logger.warning(f"Error loading with explicit model/tokenizer: {e}")
58
-
59
- # Fallback to simpler pipeline instantiation
60
- pipe = pipeline(
61
- "text-generation",
62
- model=LLM_MODEL,
63
- max_length=MAX_TOKENS,
64
- temperature=DEFAULT_TEMPERATURE
65
- )
66
-
67
- return HuggingFacePipeline(pipeline=pipe)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  except Exception as e:
70
  logger.warning(f"Error creating local pipeline: {e}")
71
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # Last resort - mock LLM for fallback
73
  from langchain.llms.fake import FakeListLLM
74
  logger.warning("Using mock LLM as fallback")
@@ -92,20 +143,52 @@ def get_embeddings():
92
  logger.warning(f"Could not create cache directory: {e}")
93
  cache_dir = None
94
 
95
- # Try to use local embeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  try:
97
- logger.info(f"Loading embeddings model: {EMBEDDING_MODEL}")
98
- return HuggingFaceEmbeddings(
99
- model_name=EMBEDDING_MODEL,
100
- cache_folder=cache_dir
101
- )
102
- except Exception as e:
103
- logger.warning(f"Error initializing embeddings: {e}")
104
 
105
- # Create mock embeddings that return random vectors for fallback
106
- from langchain.embeddings.fake import FakeEmbeddings
107
- logger.warning("Using mock embeddings as fallback")
108
- return FakeEmbeddings(size=384) # Standard size for small embedding models
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def get_chat_model():
111
  """
 
37
 
38
  logger.info(f"Loading model {LLM_MODEL} as local pipeline")
39
 
40
+ # Try multiple fallbacks with increasingly simpler models
41
+ models_to_try = [
42
+ LLM_MODEL,
43
+ "distilgpt2", # Smaller fallback
44
+ "gpt2", # Standard fallback
45
+ "EleutherAI/gpt-neo-125M" # Another option
46
+ ]
47
+
48
+ last_error = None
49
+
50
+ for model_name in models_to_try:
51
+ try:
52
+ logger.info(f"Attempting to load model: {model_name}")
53
+
54
+ # Try with explicit loading first
55
+ try:
56
+ # Set trust_remote_code to False to avoid security issues
57
+ tokenizer = AutoTokenizer.from_pretrained(
58
+ model_name,
59
+ use_auth_token=api_key if api_key else None,
60
+ trust_remote_code=False
61
+ )
62
+ model = AutoModelForCausalLM.from_pretrained(
63
+ model_name,
64
+ use_auth_token=api_key if api_key else None,
65
+ trust_remote_code=False,
66
+ low_cpu_mem_usage=True # Help with memory issues
67
+ )
68
+
69
+ # Create pipeline with loaded components
70
+ pipe = pipeline(
71
+ "text-generation",
72
+ model=model,
73
+ tokenizer=tokenizer,
74
+ max_length=MAX_TOKENS,
75
+ temperature=DEFAULT_TEMPERATURE,
76
+ device=-1 # Use CPU
77
+ )
78
+
79
+ logger.info(f"Successfully loaded model: {model_name}")
80
+ return HuggingFacePipeline(pipeline=pipe)
81
+ except Exception as e:
82
+ logger.warning(f"Error loading {model_name} with explicit model/tokenizer: {e}")
83
+ last_error = e
84
+
85
+ # Try direct pipeline loading
86
+ pipe = pipeline(
87
+ "text-generation",
88
+ model=model_name,
89
+ max_length=MAX_TOKENS,
90
+ temperature=DEFAULT_TEMPERATURE,
91
+ use_auth_token=api_key if api_key else None,
92
+ device=-1 # Use CPU
93
+ )
94
+
95
+ logger.info(f"Successfully loaded model: {model_name} via direct pipeline")
96
+ return HuggingFacePipeline(pipeline=pipe)
97
+
98
+ except Exception as e:
99
+ logger.warning(f"Error loading model {model_name}: {e}")
100
+ last_error = e
101
+ # Continue to the next model
102
+ continue
103
+
104
+ # If we get here, all models failed
105
+ logger.error(f"All models failed to load. Last error: {last_error}")
106
+ raise last_error
107
 
108
  except Exception as e:
109
  logger.warning(f"Error creating local pipeline: {e}")
110
 
111
+ # Try the HuggingFaceEndpoint as fallback
112
+ try:
113
+ logger.info("Attempting to use HuggingFaceEndpoint")
114
+ return HuggingFaceEndpoint(
115
+ repo_id="gpt2",
116
+ max_length=MAX_TOKENS,
117
+ temperature=DEFAULT_TEMPERATURE,
118
+ huggingfacehub_api_token=api_key
119
+ )
120
+ except Exception as endpoint_error:
121
+ logger.warning(f"HuggingFaceEndpoint failed: {endpoint_error}")
122
+
123
  # Last resort - mock LLM for fallback
124
  from langchain.llms.fake import FakeListLLM
125
  logger.warning("Using mock LLM as fallback")
 
143
  logger.warning(f"Could not create cache directory: {e}")
144
  cache_dir = None
145
 
146
+ # Try multiple models with fallbacks
147
+ embedding_models_to_try = [
148
+ EMBEDDING_MODEL,
149
+ "sentence-transformers/all-MiniLM-L6-v2", # Standard model
150
+ "sentence-transformers/paraphrase-MiniLM-L3-v2", # Smaller model
151
+ "sentence-transformers/paraphrase-albert-small-v2" # Even smaller model
152
+ ]
153
+
154
+ api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "")
155
+
156
+ for model_name in embedding_models_to_try:
157
+ # Try to use local embeddings
158
+ try:
159
+ logger.info(f"Loading embeddings model: {model_name}")
160
+ return HuggingFaceEmbeddings(
161
+ model_name=model_name,
162
+ cache_folder=cache_dir,
163
+ encode_kwargs={"normalize_embeddings": True},
164
+ model_kwargs={"device": "cpu"} # Ensure using CPU
165
+ )
166
+ except Exception as e:
167
+ logger.warning(f"Error initializing embeddings with {model_name}: {e}")
168
+ # Continue to the next model
169
+
170
+ # If all models fail, try with direct transformers access
171
  try:
172
+ from sentence_transformers import SentenceTransformer
173
+ logger.info("Loading embeddings with SentenceTransformer directly")
174
+ model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
 
 
 
 
175
 
176
+ # Create a custom embeddings class
177
+ class DirectEmbeddings:
178
+ def embed_documents(self, texts):
179
+ return model.encode(texts, normalize_embeddings=True).tolist()
180
+
181
+ def embed_query(self, text):
182
+ return model.encode(text, normalize_embeddings=True).tolist()
183
+
184
+ return DirectEmbeddings()
185
+ except Exception as e:
186
+ logger.warning(f"Error with direct SentenceTransformer: {e}")
187
+
188
+ # Create mock embeddings as last resort
189
+ from langchain.embeddings.fake import FakeEmbeddings
190
+ logger.warning("Using mock embeddings as fallback")
191
+ return FakeEmbeddings(size=384) # Standard size for small embedding models
192
 
193
  def get_chat_model():
194
  """
app/core/memory.py CHANGED
@@ -8,6 +8,8 @@ from langchain.chains import ConversationalRetrievalChain
8
  from langchain.memory import ConversationBufferMemory
9
  from qdrant_client import QdrantClient
10
  from qdrant_client.models import Distance, VectorParams
 
 
11
 
12
  # Configure logging
13
  logging.basicConfig(level=logging.INFO)
@@ -18,6 +20,34 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
18
  from app.config import VECTOR_DB_PATH, COLLECTION_NAME
19
  from app.core.llm import get_llm, get_embeddings, get_chat_model
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  class MemoryManager:
22
  """Manages the RAG memory system using a vector database."""
23
 
@@ -117,38 +147,58 @@ class MemoryManager:
117
  def create_rag_chain(self):
118
  """Create a RAG chain for question answering."""
119
  try:
120
- # Configure correct return keys to match what agent.py expects
121
- logger.info("Creating ConversationalRetrievalChain")
122
- chain = ConversationalRetrievalChain.from_llm(
123
- llm=self.llm,
124
- retriever=self.get_retriever(),
125
- memory=self.memory,
126
- return_source_documents=True,
127
- return_generated_question=False,
128
- )
129
 
130
- # Create a wrapper function that normalizes the chain output format
131
- def normalized_chain(inputs):
132
- logger.info("Executing RAG chain with normalizer")
133
  try:
134
- # Execute the original chain
135
- response = chain(inputs)
136
- logger.info(f"Original chain output keys: {list(response.keys())}")
 
 
 
 
 
 
 
 
137
 
138
- # Create a normalized response
139
- normalized = {
140
- "answer": response.get("answer", "No answer generated"),
141
- "sources": response.get("source_documents", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  }
143
- return normalized
144
  except Exception as e:
145
- logger.error(f"Error in normalized chain: {e}")
146
  return {
147
- "answer": f"Error processing your query: {str(e)}",
148
  "sources": []
149
  }
150
 
151
- return normalized_chain
152
  except Exception as e:
153
  logger.error(f"Error creating RAG chain: {e}")
154
 
 
8
  from langchain.memory import ConversationBufferMemory
9
  from qdrant_client import QdrantClient
10
  from qdrant_client.models import Distance, VectorParams
11
+ from langchain.chains.base import Chain
12
+ from typing import Dict, List, Any
13
 
14
  # Configure logging
15
  logging.basicConfig(level=logging.INFO)
 
20
  from app.config import VECTOR_DB_PATH, COLLECTION_NAME
21
  from app.core.llm import get_llm, get_embeddings, get_chat_model
22
 
23
+ class CustomRAGChain:
24
+ """Custom RAG chain that always returns standardized output format."""
25
+
26
+ def __init__(self, base_chain):
27
+ self.base_chain = base_chain
28
+ logger.info("CustomRAGChain initialized")
29
+
30
+ def __call__(self, inputs):
31
+ """Process inputs and return standardized output."""
32
+ try:
33
+ logger.info("CustomRAGChain processing query")
34
+ # Execute the underlying chain
35
+ result = self.base_chain(inputs)
36
+ logger.info(f"Base chain returned keys: {list(result.keys())}")
37
+
38
+ # Create standardized output
39
+ standardized = {
40
+ "answer": result.get("answer", "I couldn't generate an answer."),
41
+ "sources": result.get("source_documents", [])
42
+ }
43
+ return standardized
44
+ except Exception as e:
45
+ logger.error(f"Error in CustomRAGChain: {e}")
46
+ return {
47
+ "answer": f"Error processing query: {str(e)}",
48
+ "sources": []
49
+ }
50
+
51
  class MemoryManager:
52
  """Manages the RAG memory system using a vector database."""
53
 
 
147
  def create_rag_chain(self):
148
  """Create a RAG chain for question answering."""
149
  try:
150
+ # Create the base conversational retrieval chain
151
+ logger.info("Creating base ConversationalRetrievalChain")
 
 
 
 
 
 
 
152
 
153
+ # Different approach: create a simple function instead
154
+ def simple_chain(query_dict):
 
155
  try:
156
+ # Extract the question
157
+ question = query_dict.get("question", "")
158
+ if not question.strip():
159
+ return {
160
+ "answer": "No question provided.",
161
+ "sources": []
162
+ }
163
+
164
+ # Get relevant documents from the retriever
165
+ retriever = self.get_retriever()
166
+ relevant_docs = retriever.get_relevant_documents(question)
167
 
168
+ # Format the context from relevant documents
169
+ context = "\n\n".join([doc.page_content for doc in relevant_docs])
170
+
171
+ # Get chat history from memory
172
+ chat_history = self.memory.chat_memory.messages
173
+ chat_history_str = "\n".join([f"{msg.type}: {msg.content}" for msg in chat_history])
174
+
175
+ # Create the prompt
176
+ prompt = f"""You are a helpful AI assistant. Answer the following question based on the provided context.
177
+
178
+ Context:
179
+ {context}
180
+
181
+ Chat History:
182
+ {chat_history_str}
183
+
184
+ Question: {question}
185
+ Answer:"""
186
+
187
+ # Get the answer from the LLM
188
+ answer = self.llm(prompt)
189
+
190
+ return {
191
+ "answer": answer,
192
+ "sources": relevant_docs
193
  }
 
194
  except Exception as e:
195
+ logger.error(f"Error in simple_chain: {e}")
196
  return {
197
+ "answer": f"I encountered an error: {str(e)}",
198
  "sources": []
199
  }
200
 
201
+ return simple_chain
202
  except Exception as e:
203
  logger.error(f"Error creating RAG chain: {e}")
204
 
app/ui/streamlit_app.py CHANGED
@@ -120,9 +120,21 @@ with st.sidebar:
120
  logger.warning("Using temporary file path instead of documents directory")
121
  doc_path = temp_path
122
 
123
- # Ingest the document
124
  logger.info("Ingesting document")
125
- document_processor.ingest_file(temp_path, {"original_name": uploaded_file.name})
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # Clean up the temporary file if different from doc_path
128
  if temp_path != doc_path and os.path.exists(temp_path):
@@ -136,6 +148,9 @@ with st.sidebar:
136
  except Exception as e:
137
  logger.error(f"Error processing document: {str(e)}")
138
  st.error(f"Error processing document: {str(e)}")
 
 
 
139
  except Exception as e:
140
  logger.error(f"File uploader error: {str(e)}")
141
  st.error(f"File upload functionality is currently unavailable: {str(e)}")
 
120
  logger.warning("Using temporary file path instead of documents directory")
121
  doc_path = temp_path
122
 
123
+ # Ingest the document with retry logic for 403 errors
124
  logger.info("Ingesting document")
125
+ max_retries = 3
126
+
127
+ for attempt in range(max_retries):
128
+ try:
129
+ document_processor.ingest_file(temp_path, {"original_name": uploaded_file.name})
130
+ break
131
+ except Exception as e:
132
+ error_str = str(e).lower()
133
+ if ("403" in error_str or "forbidden" in error_str or "permission" in error_str) and attempt < max_retries - 1:
134
+ logger.warning(f"Permission error ({attempt+1}/{max_retries}), retrying...")
135
+ time.sleep(1.5) # Add delay between retries
136
+ else:
137
+ raise # Re-raise if not a 403 error or on last attempt
138
 
139
  # Clean up the temporary file if different from doc_path
140
  if temp_path != doc_path and os.path.exists(temp_path):
 
148
  except Exception as e:
149
  logger.error(f"Error processing document: {str(e)}")
150
  st.error(f"Error processing document: {str(e)}")
151
+
152
+ if "403" in str(e) or "forbidden" in str(e).lower():
153
+ st.warning("This appears to be a permissions issue. Try using a different file format or using the text input option instead.")
154
  except Exception as e:
155
  logger.error(f"File uploader error: {str(e)}")
156
  st.error(f"File upload functionality is currently unavailable: {str(e)}")
fix_403_error.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """
3
+ Script to fix 403 errors and push changes to Hugging Face Spaces
4
+ """
5
+ import os
6
+ import subprocess
7
+ import sys
8
+ from getpass import getpass
9
+
10
+ def fix_403_errors():
11
+ """Update the app to fix 403 errors and push to Hugging Face Space."""
12
+ print("=" * 50)
13
+ print("Fix 403 Errors and Push to Hugging Face")
14
+ print("=" * 50)
15
+
16
+ # Get credentials
17
+ username = input("Enter your Hugging Face username: ")
18
+ token = getpass("Enter your Hugging Face token: ")
19
+ space_name = input("Enter your Space name: ")
20
+
21
+ # Set environment variables
22
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = token
23
+ os.environ["HF_API_KEY"] = token
24
+
25
+ # Add the direct remote URL with credentials embedded
26
+ remote_url = f"https://{username}:{token}@huggingface.co/spaces/{username}/{space_name}"
27
+
28
+ try:
29
+ # Update git remotes
30
+ remotes = subprocess.run(["git", "remote"], capture_output=True, text=True).stdout.strip().split('\n')
31
+ if "hf" not in remotes:
32
+ subprocess.run(["git", "remote", "add", "hf", remote_url], check=True)
33
+ else:
34
+ subprocess.run(["git", "remote", "set-url", "hf", remote_url], check=True)
35
+
36
+ # Pull the latest changes first to avoid conflicts
37
+ try:
38
+ subprocess.run(["git", "pull", "hf", "main"], check=True)
39
+ print("Successfully pulled latest changes")
40
+ except subprocess.CalledProcessError:
41
+ print("Warning: Could not pull latest changes. Will attempt to push anyway.")
42
+
43
+ # Stage all files
44
+ subprocess.run(["git", "add", "."], check=True)
45
+
46
+ # Commit changes
47
+ try:
48
+ subprocess.run(["git", "commit", "-m", "Fix 403 errors by improving model loading and error handling"], check=True)
49
+ print("Changes committed successfully")
50
+ except subprocess.CalledProcessError:
51
+ # Check if there are changes to commit
52
+ status = subprocess.run(["git", "status", "--porcelain"], capture_output=True, text=True).stdout.strip()
53
+ if not status:
54
+ print("No changes to commit.")
55
+ else:
56
+ print("Error making commit. Will try to push existing commits.")
57
+
58
+ # Push to Space
59
+ print("Pushing to Hugging Face Space...")
60
+
61
+ # First try a normal push
62
+ try:
63
+ subprocess.run(["git", "push", "hf", "HEAD:main"], check=True)
64
+ except subprocess.CalledProcessError:
65
+ print("Normal push failed. Trying force push instead...")
66
+ try:
67
+ # Force push if normal push fails
68
+ subprocess.run(["git", "push", "-f", "hf", "HEAD:main"], check=True)
69
+ except subprocess.CalledProcessError as e:
70
+ print(f"Force push also failed: {e}")
71
+ print("Trying alternative push approach...")
72
+
73
+ # Most reliable way to push to HF Spaces
74
+ api_url = f"https://huggingface.co/spaces/{username}/{space_name}"
75
+
76
+ try:
77
+ subprocess.run(["git", "remote", "set-url", "hf", api_url], check=True)
78
+ subprocess.run(["git", "push", "-f", "--set-upstream", "hf", "HEAD:main"], check=True)
79
+ except subprocess.CalledProcessError as e:
80
+ print(f"All push attempts failed. Final error: {e}")
81
+ return False
82
+
83
+ print("\nSuccess! Your fixes have been pushed to Hugging Face Space.")
84
+ print(f"View your Space at: https://huggingface.co/spaces/{username}/{space_name}")
85
+ print("Note: It may take a few minutes for changes to appear.")
86
+ return True
87
+
88
+ except Exception as e:
89
+ print(f"Unexpected error: {e}")
90
+ return False
91
+
92
+ if __name__ == "__main__":
93
+ if fix_403_errors():
94
+ print("403 error fixes successfully deployed!")
95
+ else:
96
+ print("Failed to deploy 403 error fixes. Please check the error messages above.")
97
+ sys.exit(1)