37-AN commited on
Commit
28ff371
·
1 Parent(s): f8ed285

Fix AI responses and file uploading functionality

Browse files

- Improved AI responses with better prompt formatting and instructions
- Enhanced file upload handling with better error recovery
- Added support for more file types (docx, html, md, etc.)
- Improved UI with progress tracking and better error messages
- Fixed edge cases with empty files and error handling

Files changed (3) hide show
  1. app/core/ingestion.py +107 -24
  2. app/core/memory.py +46 -7
  3. app/ui/streamlit_app.py +84 -10
app/core/ingestion.py CHANGED
@@ -3,11 +3,14 @@ import sys
3
  import logging
4
  import time
5
  import random
 
6
  from typing import List, Dict, Any
7
  from langchain.document_loaders import (
8
  PyPDFLoader,
9
  TextLoader,
10
- CSVLoader
 
 
11
  )
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
 
@@ -42,36 +45,86 @@ class DocumentProcessor:
42
 
43
  logger.info(f"Processing file: {file_path} with extension {extension}")
44
 
 
 
 
 
 
 
 
 
 
45
  # Load the file using the appropriate loader
46
- if extension == '.pdf':
47
- loader = PyPDFLoader(file_path)
48
- elif extension == '.txt':
49
- loader = TextLoader(file_path)
50
- elif extension == '.csv':
51
- loader = CSVLoader(file_path)
52
- else:
53
- raise ValueError(f"Unsupported file type: {extension}")
54
-
55
- # Load and split the documents
56
- documents = loader.load()
57
- chunks = self.text_splitter.split_documents(documents)
58
-
59
- logger.info(f"Split file into {len(chunks)} chunks")
60
- return chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  def _retry_operation(self, operation, max_retries=3):
63
  """Retry an operation with exponential backoff."""
 
64
  for attempt in range(max_retries):
65
  try:
66
  return operation()
67
  except Exception as e:
 
68
  if "already accessed by another instance" in str(e) and attempt < max_retries - 1:
69
  wait_time = random.uniform(0.5, 2.0) * (attempt + 1)
70
  logger.warning(f"Vector store access conflict, retrying ({attempt+1}/{max_retries}) in {wait_time:.2f}s...")
71
  time.sleep(wait_time)
 
 
 
 
 
72
  else:
73
  # Different error or last attempt, re-raise
74
  raise
 
 
 
 
 
 
75
 
76
  def ingest_file(self, file_path: str, metadata: Dict[str, Any] = None) -> List[str]:
77
  """Ingest a file into the vector database."""
@@ -86,7 +139,8 @@ class DocumentProcessor:
86
  # Add file path to metadata
87
  base_metadata = {
88
  "source": file_path,
89
- "file_name": os.path.basename(file_path)
 
90
  }
91
  base_metadata.update(metadata)
92
 
@@ -99,26 +153,43 @@ class DocumentProcessor:
99
  if hasattr(chunk, 'metadata'):
100
  chunk_metadata.update(chunk.metadata)
101
  chunk_metadata["chunk_id"] = i
 
102
  metadatas.append(chunk_metadata)
103
 
104
  # Store in vector database with retry mechanism
105
  logger.info(f"Adding {len(texts)} chunks to vector database")
106
 
 
 
 
 
 
 
107
  def add_to_vectordb():
108
  return self.memory_manager.add_texts(texts, metadatas)
109
 
110
- ids = self._retry_operation(add_to_vectordb)
111
- logger.info(f"Successfully added chunks with IDs: {ids[:3]}...")
 
 
 
 
 
112
 
113
  return ids
114
  except Exception as e:
115
  logger.error(f"Error ingesting file {file_path}: {str(e)}")
 
116
  # Return placeholder IDs if there's an error
117
- return [f"error-{random.randint(1000, 9999)}" for _ in range(len(chunks) if 'chunks' in locals() else 1)]
118
 
119
  def ingest_text(self, text: str, metadata: Dict[str, Any] = None) -> List[str]:
120
  """Ingest raw text into the vector database."""
121
  try:
 
 
 
 
122
  if metadata is None:
123
  metadata = {}
124
 
@@ -126,23 +197,35 @@ class DocumentProcessor:
126
  chunks = self.text_splitter.split_text(text)
127
  logger.info(f"Split text into {len(chunks)} chunks")
128
 
 
 
 
 
129
  # Prepare metadatas
130
  metadatas = []
131
  for i in range(len(chunks)):
132
  chunk_metadata = metadata.copy()
133
  chunk_metadata["chunk_id"] = i
 
134
  chunk_metadata["source"] = "direct_input"
 
135
  metadatas.append(chunk_metadata)
136
 
137
  # Store in vector database with retry mechanism
138
  def add_to_vectordb():
139
  return self.memory_manager.add_texts(chunks, metadatas)
140
 
141
- ids = self._retry_operation(add_to_vectordb)
142
- logger.info(f"Successfully added text chunks with IDs: {ids[:3] if len(ids) > 3 else ids}...")
143
-
 
 
 
 
 
144
  return ids
145
  except Exception as e:
146
  logger.error(f"Error ingesting text: {str(e)}")
 
147
  # Return placeholder IDs if there's an error
148
- return [f"error-{random.randint(1000, 9999)}" for _ in range(len(chunks) if 'chunks' in locals() else 1)]
 
3
  import logging
4
  import time
5
  import random
6
+ import traceback
7
  from typing import List, Dict, Any
8
  from langchain.document_loaders import (
9
  PyPDFLoader,
10
  TextLoader,
11
+ CSVLoader,
12
+ UnstructuredFileLoader,
13
+ Docx2txtLoader
14
  )
15
  from langchain.text_splitter import RecursiveCharacterTextSplitter
16
 
 
45
 
46
  logger.info(f"Processing file: {file_path} with extension {extension}")
47
 
48
+ # Verify file is readable
49
+ try:
50
+ with open(file_path, 'rb') as f:
51
+ # Just check if we can read from it
52
+ f.read(1)
53
+ except Exception as e:
54
+ logger.error(f"Cannot read file {file_path}: {e}")
55
+ raise IOError(f"File {file_path} exists but cannot be read: {str(e)}")
56
+
57
  # Load the file using the appropriate loader
58
+ try:
59
+ if extension == '.pdf':
60
+ loader = PyPDFLoader(file_path)
61
+ elif extension == '.txt':
62
+ loader = TextLoader(file_path)
63
+ elif extension == '.csv':
64
+ loader = CSVLoader(file_path)
65
+ elif extension in ['.doc', '.docx']:
66
+ loader = Docx2txtLoader(file_path)
67
+ elif extension in ['.md', '.html', '.htm', '.xml', '.json']:
68
+ # Dedicated loaders could be added for these formats
69
+ loader = TextLoader(file_path)
70
+ else:
71
+ # Try generic loader as fallback for unsupported types
72
+ logger.warning(f"No specific loader for {extension}, trying UnstructuredFileLoader")
73
+ loader = UnstructuredFileLoader(file_path)
74
+
75
+ # Load and split the documents
76
+ documents = loader.load()
77
+
78
+ if not documents:
79
+ logger.warning(f"No content extracted from {file_path}")
80
+ # Create a minimal document if empty to avoid errors
81
+ from langchain.schema import Document
82
+ documents = [Document(page_content=f"Empty file: {os.path.basename(file_path)}",
83
+ metadata={"source": file_path})]
84
+
85
+ chunks = self.text_splitter.split_documents(documents)
86
+
87
+ logger.info(f"Split file into {len(chunks)} chunks")
88
+ return chunks
89
+
90
+ except Exception as e:
91
+ logger.error(f"Error in document processing: {str(e)}")
92
+ logger.error(traceback.format_exc())
93
+
94
+ # Create a minimal document to represent the error
95
+ from langchain.schema import Document
96
+ error_doc = Document(
97
+ page_content=f"Error processing file {os.path.basename(file_path)}: {str(e)}",
98
+ metadata={"source": file_path, "error": str(e)}
99
+ )
100
+ return [error_doc]
101
 
102
  def _retry_operation(self, operation, max_retries=3):
103
  """Retry an operation with exponential backoff."""
104
+ last_exception = None
105
  for attempt in range(max_retries):
106
  try:
107
  return operation()
108
  except Exception as e:
109
+ last_exception = e
110
  if "already accessed by another instance" in str(e) and attempt < max_retries - 1:
111
  wait_time = random.uniform(0.5, 2.0) * (attempt + 1)
112
  logger.warning(f"Vector store access conflict, retrying ({attempt+1}/{max_retries}) in {wait_time:.2f}s...")
113
  time.sleep(wait_time)
114
+ elif attempt < max_retries - 1:
115
+ # For other errors, also retry but with different message
116
+ wait_time = random.uniform(0.5, 2.0) * (attempt + 1)
117
+ logger.warning(f"Operation failed ({str(e)}), retrying ({attempt+1}/{max_retries}) in {wait_time:.2f}s...")
118
+ time.sleep(wait_time)
119
  else:
120
  # Different error or last attempt, re-raise
121
  raise
122
+
123
+ # If we get here with a last_exception, re-raise it
124
+ if last_exception:
125
+ raise last_exception
126
+ else:
127
+ raise RuntimeError("Retry operation failed but no exception was captured")
128
 
129
  def ingest_file(self, file_path: str, metadata: Dict[str, Any] = None) -> List[str]:
130
  """Ingest a file into the vector database."""
 
139
  # Add file path to metadata
140
  base_metadata = {
141
  "source": file_path,
142
+ "file_name": os.path.basename(file_path),
143
+ "ingestion_time": time.strftime("%Y-%m-%d %H:%M:%S")
144
  }
145
  base_metadata.update(metadata)
146
 
 
153
  if hasattr(chunk, 'metadata'):
154
  chunk_metadata.update(chunk.metadata)
155
  chunk_metadata["chunk_id"] = i
156
+ chunk_metadata["total_chunks"] = len(chunks)
157
  metadatas.append(chunk_metadata)
158
 
159
  # Store in vector database with retry mechanism
160
  logger.info(f"Adding {len(texts)} chunks to vector database")
161
 
162
+ # Handle empty texts to avoid errors
163
+ if not texts:
164
+ logger.warning("No text chunks extracted from file, adding placeholder")
165
+ texts = [f"Empty file: {os.path.basename(file_path)}"]
166
+ metadatas = [{"source": file_path, "file_name": os.path.basename(file_path), "empty_file": True}]
167
+
168
  def add_to_vectordb():
169
  return self.memory_manager.add_texts(texts, metadatas)
170
 
171
+ try:
172
+ ids = self._retry_operation(add_to_vectordb)
173
+ logger.info(f"Successfully added chunks with IDs: {ids[:3] if len(ids) > 3 else ids}...")
174
+ except Exception as e:
175
+ logger.error(f"All attempts to add to vector DB failed: {e}")
176
+ # Return placeholder IDs
177
+ ids = [f"error-{random.randint(1000, 9999)}" for _ in range(len(texts))]
178
 
179
  return ids
180
  except Exception as e:
181
  logger.error(f"Error ingesting file {file_path}: {str(e)}")
182
+ logger.error(traceback.format_exc())
183
  # Return placeholder IDs if there's an error
184
+ return [f"error-{random.randint(1000, 9999)}"]
185
 
186
  def ingest_text(self, text: str, metadata: Dict[str, Any] = None) -> List[str]:
187
  """Ingest raw text into the vector database."""
188
  try:
189
+ if not text.strip():
190
+ logger.warning("Empty text provided for ingestion")
191
+ return ["empty-text-error"]
192
+
193
  if metadata is None:
194
  metadata = {}
195
 
 
197
  chunks = self.text_splitter.split_text(text)
198
  logger.info(f"Split text into {len(chunks)} chunks")
199
 
200
+ # If text splitting produced no chunks (unusual), create one
201
+ if not chunks:
202
+ chunks = ["Empty text input"]
203
+
204
  # Prepare metadatas
205
  metadatas = []
206
  for i in range(len(chunks)):
207
  chunk_metadata = metadata.copy()
208
  chunk_metadata["chunk_id"] = i
209
+ chunk_metadata["total_chunks"] = len(chunks)
210
  chunk_metadata["source"] = "direct_input"
211
+ chunk_metadata["ingestion_time"] = time.strftime("%Y-%m-%d %H:%M:%S")
212
  metadatas.append(chunk_metadata)
213
 
214
  # Store in vector database with retry mechanism
215
  def add_to_vectordb():
216
  return self.memory_manager.add_texts(chunks, metadatas)
217
 
218
+ try:
219
+ ids = self._retry_operation(add_to_vectordb)
220
+ logger.info(f"Successfully added text chunks with IDs: {ids[:3] if len(ids) > 3 else ids}...")
221
+ except Exception as e:
222
+ logger.error(f"All attempts to add text to vector DB failed: {e}")
223
+ # Return placeholder IDs
224
+ ids = [f"error-{random.randint(1000, 9999)}" for _ in range(len(chunks))]
225
+
226
  return ids
227
  except Exception as e:
228
  logger.error(f"Error ingesting text: {str(e)}")
229
+ logger.error(traceback.format_exc())
230
  # Return placeholder IDs if there's an error
231
+ return [f"error-{random.randint(1000, 9999)}"]
app/core/memory.py CHANGED
@@ -166,15 +166,28 @@ class MemoryManager:
166
  relevant_docs = retriever.get_relevant_documents(question)
167
 
168
  # Format the context from relevant documents
169
- context = "\n\n".join([doc.page_content for doc in relevant_docs])
 
 
 
 
 
170
 
171
  # Get chat history from memory
172
  chat_history = self.memory.chat_memory.messages
173
  chat_history_str = "\n".join([f"{msg.type}: {msg.content}" for msg in chat_history])
174
 
175
- # Create the prompt
176
- prompt = f"""You are a helpful AI assistant. Answer the following question based on the provided context.
177
-
 
 
 
 
 
 
 
 
178
  Context:
179
  {context}
180
 
@@ -184,8 +197,34 @@ Chat History:
184
  Question: {question}
185
  Answer:"""
186
 
187
- # Get the answer from the LLM
188
- answer = self.llm(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  return {
191
  "answer": answer,
@@ -194,7 +233,7 @@ Answer:"""
194
  except Exception as e:
195
  logger.error(f"Error in simple_chain: {e}")
196
  return {
197
- "answer": f"I encountered an error: {str(e)}",
198
  "sources": []
199
  }
200
 
 
166
  relevant_docs = retriever.get_relevant_documents(question)
167
 
168
  # Format the context from relevant documents
169
+ context_parts = []
170
+ for i, doc in enumerate(relevant_docs):
171
+ source_name = doc.metadata.get("file_name", "Unknown Source")
172
+ context_parts.append(f"Document {i+1} [{source_name}]:\n{doc.page_content}\n")
173
+
174
+ context = "\n".join(context_parts) if context_parts else "No relevant documents found."
175
 
176
  # Get chat history from memory
177
  chat_history = self.memory.chat_memory.messages
178
  chat_history_str = "\n".join([f"{msg.type}: {msg.content}" for msg in chat_history])
179
 
180
+ # Create the improved prompt with better instructions
181
+ prompt = f"""You are a helpful, accurate, and precise AI assistant. Answer the following question based on the provided context.
182
+
183
+ Follow these guidelines when responding:
184
+ 1. If the context contains relevant information, use it to provide a direct and specific answer.
185
+ 2. Format your answer in clear, concise paragraphs with appropriate spacing.
186
+ 3. If the answer is not in the context, acknowledge this and provide a general response based on your knowledge.
187
+ 4. Do not mention "context" or "documents" in your answer - integrate the information naturally.
188
+ 5. Keep answers factual, helpful, and to the point.
189
+ 6. Never make up information that isn't supported by the context.
190
+
191
  Context:
192
  {context}
193
 
 
197
  Question: {question}
198
  Answer:"""
199
 
200
+ # Get the answer from the LLM with a timeout and retries
201
+ try:
202
+ answer = self.llm(prompt)
203
+
204
+ # Simple quality check - if too short or generic, try again
205
+ if len(answer.strip()) < 20 or "I don't have enough information" in answer:
206
+ logger.info("Answer quality check failed, retrying with modified prompt")
207
+
208
+ # Add a more specific instruction to the prompt
209
+ enhanced_prompt = prompt + "\n\nPlease be as helpful as possible with the information available."
210
+ second_attempt = self.llm(enhanced_prompt)
211
+
212
+ # Use the better of the two responses
213
+ if len(second_attempt.strip()) > len(answer.strip()):
214
+ answer = second_attempt
215
+ except Exception as llm_error:
216
+ logger.error(f"Error getting answer from LLM: {llm_error}")
217
+ if not answer: # If answer wasn't set due to first attempt exception
218
+ answer = f"I'm having trouble generating a response right now. Please try again in a moment."
219
+
220
+ # Perform basic formatting cleanup
221
+ answer = answer.strip()
222
+
223
+ # Remove common prefixes that models sometimes add
224
+ prefixes_to_remove = ["Answer:", "AI:", "Assistant:"]
225
+ for prefix in prefixes_to_remove:
226
+ if answer.startswith(prefix):
227
+ answer = answer[len(prefix):].strip()
228
 
229
  return {
230
  "answer": answer,
 
233
  except Exception as e:
234
  logger.error(f"Error in simple_chain: {e}")
235
  return {
236
+ "answer": f"I encountered an error while processing your question. Please try again with a different query.",
237
  "sources": []
238
  }
239
 
app/ui/streamlit_app.py CHANGED
@@ -92,14 +92,42 @@ with st.sidebar:
92
 
93
  # Add file uploader with error handling
94
  try:
95
- uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt", "csv"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  if uploaded_file is not None:
 
 
 
 
 
 
 
 
 
98
  # Handle the uploaded file
99
  if st.button("Process Document"):
100
  with st.spinner("Processing document..."):
 
 
 
101
  try:
102
  # Create a temporary file with proper error handling
 
103
  temp_dir = tempfile.gettempdir()
104
  temp_path = os.path.join(temp_dir, uploaded_file.name)
105
 
@@ -110,6 +138,7 @@ with st.sidebar:
110
  temp_file.write(uploaded_file.getvalue())
111
 
112
  # Get a path to store the document permanently
 
113
  doc_path = get_document_path(uploaded_file.name)
114
 
115
  # Copy the file to the documents directory
@@ -119,22 +148,32 @@ with st.sidebar:
119
  if not copy_success:
120
  logger.warning("Using temporary file path instead of documents directory")
121
  doc_path = temp_path
 
122
 
123
  # Ingest the document with retry logic for 403 errors
124
- logger.info("Ingesting document")
 
125
  max_retries = 3
126
 
127
  for attempt in range(max_retries):
128
  try:
129
- document_processor.ingest_file(temp_path, {"original_name": uploaded_file.name})
 
 
130
  break
131
  except Exception as e:
132
  error_str = str(e).lower()
133
  if ("403" in error_str or "forbidden" in error_str or "permission" in error_str) and attempt < max_retries - 1:
 
134
  logger.warning(f"Permission error ({attempt+1}/{max_retries}), retrying...")
135
  time.sleep(1.5) # Add delay between retries
 
 
 
 
 
136
  else:
137
- raise # Re-raise if not a 403 error or on last attempt
138
 
139
  # Clean up the temporary file if different from doc_path
140
  if temp_path != doc_path and os.path.exists(temp_path):
@@ -144,19 +183,45 @@ with st.sidebar:
144
  except Exception as e:
145
  logger.warning(f"Could not remove temporary file: {e}")
146
 
147
- st.success(f"Document {uploaded_file.name} processed successfully!")
 
 
 
 
 
 
148
  except Exception as e:
 
149
  logger.error(f"Error processing document: {str(e)}")
150
- st.error(f"Error processing document: {str(e)}")
151
 
152
  if "403" in str(e) or "forbidden" in str(e).lower():
153
  st.warning("This appears to be a permissions issue. Try using a different file format or using the text input option instead.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  except Exception as e:
155
  logger.error(f"File uploader error: {str(e)}")
156
  st.error(f"File upload functionality is currently unavailable: {str(e)}")
157
 
158
- st.header("Raw Text Input")
159
- text_input = st.text_area("Enter text to add to the knowledge base")
 
160
 
161
  if st.button("Add Text"):
162
  if text_input:
@@ -168,13 +233,22 @@ with st.sidebar:
168
  "timestamp": str(datetime.now())
169
  }
170
 
 
 
 
 
171
  # Ingest the text
172
- document_processor.ingest_text(text_input, metadata)
173
 
174
- st.success("Text added to knowledge base successfully!")
 
 
 
175
  except Exception as e:
176
  logger.error(f"Error adding text: {str(e)}")
177
  st.error(f"Error adding text: {str(e)}")
 
 
178
 
179
  # Display model information
180
  st.header("Models")
 
92
 
93
  # Add file uploader with error handling
94
  try:
95
+ st.subheader("Upload a File")
96
+
97
+ # Show supported file types info
98
+ with st.expander("Supported File Types"):
99
+ st.markdown("""
100
+ - **PDF** (.pdf) - Best for formatted documents
101
+ - **Text** (.txt) - Simple text files
102
+ - **CSV** (.csv) - Structured data
103
+ - **Word** (.doc, .docx) - Microsoft Word documents
104
+ - **Markdown** (.md) - Formatted text
105
+ - **HTML** (.html, .htm) - Web pages
106
+
107
+ Other file types may work but are not fully supported.
108
+ """)
109
+
110
+ uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt", "csv", "doc", "docx", "md", "html", "htm", "xml", "json"])
111
 
112
  if uploaded_file is not None:
113
+ # Display file info
114
+ file_details = {
115
+ "Filename": uploaded_file.name,
116
+ "File size": f"{uploaded_file.size / 1024:.1f} KB",
117
+ "File type": uploaded_file.type
118
+ }
119
+
120
+ st.json(file_details)
121
+
122
  # Handle the uploaded file
123
  if st.button("Process Document"):
124
  with st.spinner("Processing document..."):
125
+ status_placeholder = st.empty()
126
+ status_placeholder.info("Starting document processing...")
127
+
128
  try:
129
  # Create a temporary file with proper error handling
130
+ status_placeholder.info("Creating temporary file...")
131
  temp_dir = tempfile.gettempdir()
132
  temp_path = os.path.join(temp_dir, uploaded_file.name)
133
 
 
138
  temp_file.write(uploaded_file.getvalue())
139
 
140
  # Get a path to store the document permanently
141
+ status_placeholder.info("Preparing document storage location...")
142
  doc_path = get_document_path(uploaded_file.name)
143
 
144
  # Copy the file to the documents directory
 
148
  if not copy_success:
149
  logger.warning("Using temporary file path instead of documents directory")
150
  doc_path = temp_path
151
+ status_placeholder.warning("Using temporary storage (document won't be permanently saved)")
152
 
153
  # Ingest the document with retry logic for 403 errors
154
+ status_placeholder.info("Analyzing and indexing document content...")
155
+ progress_bar = st.progress(0)
156
  max_retries = 3
157
 
158
  for attempt in range(max_retries):
159
  try:
160
+ progress_bar.progress((attempt * 30) / 100) # Show progress as we attempt
161
+ ids = document_processor.ingest_file(temp_path, {"original_name": uploaded_file.name})
162
+ progress_bar.progress(100)
163
  break
164
  except Exception as e:
165
  error_str = str(e).lower()
166
  if ("403" in error_str or "forbidden" in error_str or "permission" in error_str) and attempt < max_retries - 1:
167
+ status_placeholder.warning(f"Permission error ({attempt+1}/{max_retries}), retrying...")
168
  logger.warning(f"Permission error ({attempt+1}/{max_retries}), retrying...")
169
  time.sleep(1.5) # Add delay between retries
170
+ elif attempt < max_retries - 1:
171
+ # General retry for any error
172
+ status_placeholder.warning(f"Error ({attempt+1}/{max_retries}), retrying...")
173
+ logger.warning(f"Error during ingestion ({attempt+1}/{max_retries}): {e}")
174
+ time.sleep(1.5)
175
  else:
176
+ raise # Re-raise on last attempt
177
 
178
  # Clean up the temporary file if different from doc_path
179
  if temp_path != doc_path and os.path.exists(temp_path):
 
183
  except Exception as e:
184
  logger.warning(f"Could not remove temporary file: {e}")
185
 
186
+ # Check if ingestion was successful based on IDs
187
+ if ids and not all(str(id).startswith("error-") for id in ids):
188
+ status_placeholder.success(f"✅ Document processed successfully!")
189
+ st.balloons() # Celebrate success
190
+ else:
191
+ status_placeholder.warning("⚠️ Document processed with warnings. Some content may not be fully indexed.")
192
+
193
  except Exception as e:
194
+ progress_bar = st.progress(100) if 'progress_bar' in locals() else st.progress(0)
195
  logger.error(f"Error processing document: {str(e)}")
196
+ status_placeholder.error(f"Error processing document: {str(e)}")
197
 
198
  if "403" in str(e) or "forbidden" in str(e).lower():
199
  st.warning("This appears to be a permissions issue. Try using a different file format or using the text input option instead.")
200
+ elif "unsupported" in str(e).lower() or "not supported" in str(e).lower() or "no specific loader" in str(e).lower():
201
+ st.warning("This file type may not be fully supported. Try converting to PDF or TXT format.")
202
+ elif "memory" in str(e).lower():
203
+ st.warning("The file may be too large to process. Try a smaller file or split the content.")
204
+ elif "timeout" in str(e).lower():
205
+ st.warning("Processing timed out. Try a smaller file or try again later.")
206
+
207
+ # Show troubleshooting tips
208
+ with st.expander("Troubleshooting Tips"):
209
+ st.markdown("""
210
+ - Convert your document to PDF or plain text format
211
+ - Try a smaller file (under 1MB)
212
+ - Remove any password protection from the file
213
+ - Try the text input option below instead
214
+ - Check if the file contains complex formatting or images
215
+ """)
216
+
217
+ st.markdown("---")
218
  except Exception as e:
219
  logger.error(f"File uploader error: {str(e)}")
220
  st.error(f"File upload functionality is currently unavailable: {str(e)}")
221
 
222
+ st.subheader("Raw Text Input")
223
+ st.markdown("Alternatively, paste text directly to add to the knowledge base:")
224
+ text_input = st.text_area("Enter text to add to the knowledge base", height=150)
225
 
226
  if st.button("Add Text"):
227
  if text_input:
 
233
  "timestamp": str(datetime.now())
234
  }
235
 
236
+ # Ingest the text with progress indication
237
+ status_text = st.empty()
238
+ status_text.info("Processing text...")
239
+
240
  # Ingest the text
241
+ ids = document_processor.ingest_text(text_input, metadata)
242
 
243
+ if ids and not any(str(id).startswith("error-") for id in ids):
244
+ status_text.success("✅ Text added to knowledge base successfully!")
245
+ else:
246
+ status_text.warning("⚠️ Text processing completed with warnings")
247
  except Exception as e:
248
  logger.error(f"Error adding text: {str(e)}")
249
  st.error(f"Error adding text: {str(e)}")
250
+ else:
251
+ st.warning("Please enter some text to add")
252
 
253
  # Display model information
254
  st.header("Models")