Spaces:

p3rc03
/

2B

Sleeping

App Files Files Community

37-AN commited on May 14

Commit

6c6cf17

1 Parent(s): 48a1a2b

Fix output keys error and file upload issues

Browse files

Files changed (4) hide show

app/core/agent.py +68 -30
app/core/memory.py +26 -7
app/ui/streamlit_app.py +68 -35
app/utils/helpers.py +111 -46

app/core/agent.py CHANGED Viewed

@@ -1,8 +1,13 @@
 import sys
 import os
 from typing import List, Dict, Any
 from langchain.prompts import PromptTemplate
 # Add project root to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 from app.core.memory import MemoryManager
@@ -35,39 +40,72 @@ Assistant:"""
             input_variables=["context", "chat_history", "question"],
             template=self.system_template
         )
     def query(self, question: str) -> Dict[str, Any]:
         """Process a user query and return a response."""
-        # Use the RAG chain to get an answer
-        response = self.rag_chain({"question": question})
-        # Extract the answer and source documents
-        answer = response["answer"]
-        source_docs = response["source_documents"] if "source_documents" in response else []
-        # Format source documents for display
-        sources = []
-        for doc in source_docs:
-            metadata = doc.metadata
-            sources.append({
-                "content": doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content,
-                "source": metadata.get("source", "Unknown"),
-                "file_name": metadata.get("file_name", "Unknown"),
-                "page": metadata.get("page", "N/A") if "page" in metadata else None
-            })
-        return {
-            "answer": answer,
-            "sources": sources
-        }
     def add_conversation_to_memory(self, question: str, answer: str):
         """Add a conversation exchange to the memory for future context."""
-        # Create metadata for the conversation
-        metadata = {
-            "type": "conversation",
-            "question": question
-        }
-        # Add the exchange to the vector store
-        self.memory_manager.add_texts([answer], [metadata])

 import sys
 import os
+import logging
 from typing import List, Dict, Any
 from langchain.prompts import PromptTemplate
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Add project root to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 from app.core.memory import MemoryManager
             input_variables=["context", "chat_history", "question"],
             template=self.system_template
         )
+        logger.info("AssistantAgent initialized successfully")
     def query(self, question: str) -> Dict[str, Any]:
         """Process a user query and return a response."""
+        try:
+            logger.info(f"Processing query: {question[:50]}...")
+            # Use the RAG chain to get an answer
+            response = self.rag_chain({"question": question})
+            # Extract the answer and source documents
+            logger.debug(f"RAG chain response keys: {response.keys()}")
+            if "answer" not in response:
+                logger.warning(f"Missing 'answer' key in response. Available keys: {response.keys()}")
+                # Create a fallback answer if the expected key is missing
+                answer = "I'm sorry, I encountered an issue processing your request. Let me try a simpler response."
+            else:
+                answer = response["answer"]
+            # Handle different variations of source document keys
+            source_docs = []
+            if "source_documents" in response:
+                source_docs = response["source_documents"]
+            elif "sources" in response:
+                source_docs = response["sources"]
+            # Format source documents for display
+            sources = []
+            for doc in source_docs:
+                metadata = getattr(doc, 'metadata', {})
+                page_content = getattr(doc, 'page_content', str(doc)[:100])
+                sources.append({
+                    "content": page_content[:100] + "..." if len(page_content) > 100 else page_content,
+                    "source": metadata.get("source", "Unknown"),
+                    "file_name": metadata.get("file_name", "Unknown"),
+                    "page": metadata.get("page", "N/A") if "page" in metadata else None
+                })
+            logger.info(f"Query processed successfully with {len(sources)} sources")
+            return {
+                "answer": answer,
+                "sources": sources
+            }
+        except Exception as e:
+            logger.error(f"Error in query method: {str(e)}")
+            # Return a graceful fallback response
+            return {
+                "answer": f"I encountered an error while processing your question. Error details: {str(e)}",
+                "sources": []
+            }
     def add_conversation_to_memory(self, question: str, answer: str):
         """Add a conversation exchange to the memory for future context."""
+        try:
+            # Create metadata for the conversation
+            metadata = {
+                "type": "conversation",
+                "question": question
+            }
+            # Add the exchange to the vector store
+            logger.info("Adding conversation to memory")
+            self.memory_manager.add_texts([answer], [metadata])
+        except Exception as e:
+            logger.error(f"Error adding conversation to memory: {str(e)}")
+            # Silently fail - this is not critical for the user experience

app/core/memory.py CHANGED Viewed

@@ -116,13 +116,32 @@ class MemoryManager:
     def create_rag_chain(self):
         """Create a RAG chain for question answering."""
-        # Using the chat model created with the regular LLM
-        return ConversationalRetrievalChain.from_llm(
-            llm=self.llm,
-            retriever=self.get_retriever(),
-            memory=self.memory,
-            return_source_documents=True
-        )
     def add_texts(self, texts, metadatas=None):
         """Add texts to the vector store."""

     def create_rag_chain(self):
         """Create a RAG chain for question answering."""
+        try:
+            # Configure correct return keys to match what agent.py expects
+            logger.info("Creating ConversationalRetrievalChain")
+            chain = ConversationalRetrievalChain.from_llm(
+                llm=self.llm,
+                retriever=self.get_retriever(),
+                memory=self.memory,
+                return_source_documents=True,
+                return_generated_question=False,
+            )
+            return chain
+        except Exception as e:
+            logger.error(f"Error creating RAG chain: {e}")
+            # Create a mock chain as fallback
+            logger.warning("Using fallback mock chain")
+            # Create a simple function that mimics the chain's interface
+            def mock_chain(inputs):
+                logger.info(f"Mock chain received query: {inputs.get('question', '')}")
+                return {
+                    "answer": "I'm having trouble accessing the knowledge base. I can only answer general questions right now.",
+                    "source_documents": []
+                }
+            return mock_chain
     def add_texts(self, texts, metadatas=None):
         """Add texts to the vector store."""

app/ui/streamlit_app.py CHANGED Viewed

@@ -18,14 +18,14 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
 try:
     from app.core.agent import AssistantAgent
     from app.core.ingestion import DocumentProcessor
-    from app.utils.helpers import get_document_path, format_sources, save_conversation
     from app.config import LLM_MODEL, EMBEDDING_MODEL
 except ImportError:
     # Fallback to direct imports if app is not recognized as a package
     sys.path.append(os.path.abspath('.'))
     from app.core.agent import AssistantAgent
     from app.core.ingestion import DocumentProcessor
-    from app.utils.helpers import get_document_path, format_sources, save_conversation
     from app.config import LLM_MODEL, EMBEDDING_MODEL
 # Set page config
@@ -89,33 +89,56 @@ st.title("🤗 Personal AI Assistant (Hugging Face)")
 # Create a sidebar for uploading documents and settings
 with st.sidebar:
     st.header("Upload Documents")
-    uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt", "csv"])
-    if uploaded_file is not None:
-        # Create a temporary file
-        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp:
-            tmp.write(uploaded_file.getvalue())
-            tmp_path = tmp.name
-        if st.button("Process Document"):
-            with st.spinner("Processing document..."):
-                try:
-                    # Get a path to store the document
-                    doc_path = get_document_path(uploaded_file.name)
-                    # Copy the file to the documents directory
-                    with open(doc_path, "wb") as f:
-                        f.write(uploaded_file.getvalue())
-                    # Ingest the document
-                    document_processor.ingest_file(tmp_path, {"original_name": uploaded_file.name})
-                    # Clean up the temporary file
-                    os.unlink(tmp_path)
-                    st.success(f"Document {uploaded_file.name} processed successfully!")
-                except Exception as e:
-                    st.error(f"Error processing document: {str(e)}")
     st.header("Raw Text Input")
     text_input = st.text_area("Enter text to add to the knowledge base")
@@ -135,6 +158,7 @@ with st.sidebar:
                     st.success("Text added to knowledge base successfully!")
                 except Exception as e:
                     st.error(f"Error adding text: {str(e)}")
     # Display model information
@@ -166,8 +190,9 @@ for message in st.session_state.messages:
                 sources = message["sources"]
                 if sources:
                     for i, source in enumerate(sources, 1):
-                        st.write(f"{i}. {source['file_name']}" + (f" (Page {source['page']})" if source.get('page') else ""))
-                        st.text(source['content'])
                 else:
                     st.write("No specific sources used.")
@@ -197,8 +222,9 @@ if prompt := st.chat_input("Ask a question..."):
                         else:
                             raise
-                answer = response["answer"]
-                sources = response["sources"]
                 # Display the response
                 st.write(answer)
@@ -207,13 +233,17 @@ if prompt := st.chat_input("Ask a question..."):
                 with st.expander("View Sources"):
                     if sources:
                         for i, source in enumerate(sources, 1):
-                            st.write(f"{i}. {source['file_name']}" + (f" (Page {source['page']})" if source.get('page') else ""))
-                            st.text(source['content'])
                     else:
                         st.write("No specific sources used.")
                 # Save conversation
-                save_conversation(prompt, answer, sources)
                 # Add assistant response to chat history
                 st.session_state.messages.append({
@@ -223,7 +253,10 @@ if prompt := st.chat_input("Ask a question..."):
                 })
                 # Update the agent's memory
-                agent.add_conversation_to_memory(prompt, answer)
             except Exception as e:
                 error_msg = f"Error generating response: {str(e)}"

 try:
     from app.core.agent import AssistantAgent
     from app.core.ingestion import DocumentProcessor
+    from app.utils.helpers import get_document_path, format_sources, save_conversation, copy_uploaded_file
     from app.config import LLM_MODEL, EMBEDDING_MODEL
 except ImportError:
     # Fallback to direct imports if app is not recognized as a package
     sys.path.append(os.path.abspath('.'))
     from app.core.agent import AssistantAgent
     from app.core.ingestion import DocumentProcessor
+    from app.utils.helpers import get_document_path, format_sources, save_conversation, copy_uploaded_file
     from app.config import LLM_MODEL, EMBEDDING_MODEL
 # Set page config
 # Create a sidebar for uploading documents and settings
 with st.sidebar:
     st.header("Upload Documents")
+    # Add file uploader with error handling
+    try:
+        uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt", "csv"])
+        if uploaded_file is not None:
+            # Handle the uploaded file
+            if st.button("Process Document"):
+                with st.spinner("Processing document..."):
+                    try:
+                        # Create a temporary file with proper error handling
+                        temp_dir = tempfile.gettempdir()
+                        temp_path = os.path.join(temp_dir, uploaded_file.name)
+                        logger.info(f"Saving uploaded file to temporary path: {temp_path}")
+                        # Write the file data to the temporary file
+                        with open(temp_path, "wb") as temp_file:
+                            temp_file.write(uploaded_file.getvalue())
+                        # Get a path to store the document permanently
+                        doc_path = get_document_path(uploaded_file.name)
+                        # Copy the file to the documents directory
+                        logger.info(f"Copying file to documents directory: {doc_path}")
+                        copy_success = copy_uploaded_file(temp_path, doc_path)
+                        if not copy_success:
+                            logger.warning("Using temporary file path instead of documents directory")
+                            doc_path = temp_path
+                        # Ingest the document
+                        logger.info("Ingesting document")
+                        document_processor.ingest_file(temp_path, {"original_name": uploaded_file.name})
+                        # Clean up the temporary file if different from doc_path
+                        if temp_path != doc_path and os.path.exists(temp_path):
+                            try:
+                                os.unlink(temp_path)
+                                logger.info(f"Temporary file removed: {temp_path}")
+                            except Exception as e:
+                                logger.warning(f"Could not remove temporary file: {e}")
+                        st.success(f"Document {uploaded_file.name} processed successfully!")
+                    except Exception as e:
+                        logger.error(f"Error processing document: {str(e)}")
+                        st.error(f"Error processing document: {str(e)}")
+    except Exception as e:
+        logger.error(f"File uploader error: {str(e)}")
+        st.error(f"File upload functionality is currently unavailable: {str(e)}")
     st.header("Raw Text Input")
     text_input = st.text_area("Enter text to add to the knowledge base")
                     st.success("Text added to knowledge base successfully!")
                 except Exception as e:
+                    logger.error(f"Error adding text: {str(e)}")
                     st.error(f"Error adding text: {str(e)}")
     # Display model information
                 sources = message["sources"]
                 if sources:
                     for i, source in enumerate(sources, 1):
+                        st.write(f"{i}. {source.get('file_name', 'Unknown')}" +
+                                (f" (Page {source['page']})" if source.get('page') else ""))
+                        st.text(source.get('content', 'No content available'))
                 else:
                     st.write("No specific sources used.")
                         else:
                             raise
+                # Extract answer and sources, with fallbacks if missing
+                answer = response.get("answer", "I couldn't generate a proper response.")
+                sources = response.get("sources", [])
                 # Display the response
                 st.write(answer)
                 with st.expander("View Sources"):
                     if sources:
                         for i, source in enumerate(sources, 1):
+                            st.write(f"{i}. {source.get('file_name', 'Unknown')}" +
+                                    (f" (Page {source['page']})" if source.get('page') else ""))
+                            st.text(source.get('content', 'No content available'))
                     else:
                         st.write("No specific sources used.")
                 # Save conversation
+                try:
+                    save_conversation(prompt, answer, sources)
+                except Exception as save_error:
+                    logger.error(f"Error saving conversation: {save_error}")
                 # Add assistant response to chat history
                 st.session_state.messages.append({
                 })
                 # Update the agent's memory
+                try:
+                    agent.add_conversation_to_memory(prompt, answer)
+                except Exception as memory_error:
+                    logger.error(f"Error adding to memory: {memory_error}")
             except Exception as e:
                 error_msg = f"Error generating response: {str(e)}"

app/utils/helpers.py CHANGED Viewed

@@ -1,67 +1,132 @@
 import os
 import sys
 from datetime import datetime
 from typing import List, Dict, Any
 def sanitize_filename(filename: str) -> str:
     """Sanitize a filename by removing invalid characters."""
     # Replace invalid characters with underscores
     invalid_chars = '<>:"/\\|?*'
     for char in invalid_chars:
         filename = filename.replace(char, '_')
     return filename
 def get_document_path(filename: str) -> str:
     """Get the path to store a document."""
-    # Get the documents directory
-    docs_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'documents')
-    # Create the directory if it doesn't exist
-    os.makedirs(docs_dir, exist_ok=True)
-    # Sanitize the filename
-    filename = sanitize_filename(filename)
-    # Add a timestamp to make the filename unique
-    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
-    base, ext = os.path.splitext(filename)
-    unique_filename = f"{base}_{timestamp}{ext}"
-    return os.path.join(docs_dir, unique_filename)
 def format_sources(sources: List[Dict[str, Any]]) -> str:
     """Format source documents for display."""
-    if not sources:
-        return "No sources found."
-    formatted = []
-    for i, source in enumerate(sources, 1):
-        source_str = f"{i}. {source['file_name']} "
-        if source.get('page'):
-            source_str += f"(Page {source['page']}) "
-        formatted.append(source_str)
-    return "\n".join(formatted)
 def save_conversation(question: str, answer: str, sources: List[Dict[str, Any]]) -> str:
     """Save a conversation to a file."""
-    # Create a directory for conversations
-    conv_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'conversations')
-    os.makedirs(conv_dir, exist_ok=True)
-    # Create a filename based on the timestamp and first few words of the question
-    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
-    question_slug = "_".join(question.split()[:5]).lower()
-    question_slug = sanitize_filename(question_slug)
-    filename = f"{timestamp}_{question_slug}.txt"
-    # Format the conversation
-    formatted_sources = format_sources(sources)
-    content = f"Question: {question}\n\nAnswer: {answer}\n\nSources:\n{formatted_sources}\n"
-    # Save the conversation
-    filepath = os.path.join(conv_dir, filename)
-    with open(filepath, 'w') as f:
-        f.write(content)
-    return filepath

 import os
 import sys
+import logging
+import shutil
 from datetime import datetime
 from typing import List, Dict, Any
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 def sanitize_filename(filename: str) -> str:
     """Sanitize a filename by removing invalid characters."""
     # Replace invalid characters with underscores
     invalid_chars = '<>:"/\\|?*'
     for char in invalid_chars:
         filename = filename.replace(char, '_')
+    # Limit filename length to avoid issues
+    if len(filename) > 200:
+        base, ext = os.path.splitext(filename)
+        filename = base[:195] + ext
     return filename
 def get_document_path(filename: str) -> str:
     """Get the path to store a document."""
+    try:
+        # Get the documents directory
+        docs_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'documents')
+        # Create the directory if it doesn't exist
+        os.makedirs(docs_dir, exist_ok=True)
+        # Try to ensure the directory has write permissions
+        try:
+            # Test file to check write permissions
+            test_file = os.path.join(docs_dir, '.test_write_access')
+            with open(test_file, 'w') as f:
+                f.write('test')
+            os.remove(test_file)
+        except Exception as e:
+            logger.warning(f"Document directory may not be writable: {e}")
+            # Try alternative location
+            docs_dir = '/tmp/documents' if os.name != 'nt' else os.path.join(os.environ.get('TEMP', 'C:\\Temp'), 'documents')
+            os.makedirs(docs_dir, exist_ok=True)
+        # Sanitize the filename
+        filename = sanitize_filename(filename)
+        # Add a timestamp to make the filename unique
+        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
+        base, ext = os.path.splitext(filename)
+        unique_filename = f"{base}_{timestamp}{ext}"
+        filepath = os.path.join(docs_dir, unique_filename)
+        logger.info(f"Document will be stored at: {filepath}")
+        return filepath
+    except Exception as e:
+        logger.error(f"Error getting document path: {e}")
+        # Fallback to a simple path in /tmp or temp directory
+        fallback_dir = '/tmp' if os.name != 'nt' else os.environ.get('TEMP', 'C:\\Temp')
+        os.makedirs(fallback_dir, exist_ok=True)
+        return os.path.join(fallback_dir, f"doc_{datetime.now().strftime('%Y%m%d%H%M%S')}")
+def copy_uploaded_file(source_path: str, destination_path: str) -> bool:
+    """Copy an uploaded file with proper error handling."""
+    try:
+        shutil.copy2(source_path, destination_path)
+        logger.info(f"File copied from {source_path} to {destination_path}")
+        return True
+    except Exception as e:
+        logger.error(f"Error copying file: {e}")
+        # Try alternate approach
+        try:
+            with open(source_path, 'rb') as src, open(destination_path, 'wb') as dst:
+                dst.write(src.read())
+            logger.info(f"File copied using alternate method")
+            return True
+        except Exception as e2:
+            logger.error(f"All methods of copying file failed: {e2}")
+            return False
 def format_sources(sources: List[Dict[str, Any]]) -> str:
     """Format source documents for display."""
+    try:
+        if not sources:
+            return "No sources found."
+        formatted = []
+        for i, source in enumerate(sources, 1):
+            source_str = f"{i}. {source.get('file_name', 'Unknown Source')} "
+            if source.get('page'):
+                source_str += f"(Page {source['page']}) "
+            formatted.append(source_str)
+        return "\n".join(formatted)
+    except Exception as e:
+        logger.error(f"Error formatting sources: {e}")
+        return "Error displaying sources."
 def save_conversation(question: str, answer: str, sources: List[Dict[str, Any]]) -> str:
     """Save a conversation to a file."""
+    try:
+        # Create a directory for conversations
+        conv_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'conversations')
+        try:
+            os.makedirs(conv_dir, exist_ok=True)
+        except Exception as e:
+            logger.warning(f"Could not create conversation directory: {e}")
+            # Use alternative directory
+            conv_dir = '/tmp/conversations' if os.name != 'nt' else os.path.join(os.environ.get('TEMP', 'C:\\Temp'), 'conversations')
+            os.makedirs(conv_dir, exist_ok=True)
+        # Create a filename based on the timestamp and first few words of the question
+        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
+        question_slug = "_".join((question or "empty_question").split()[:5]).lower()
+        question_slug = sanitize_filename(question_slug)
+        filename = f"{timestamp}_{question_slug}.txt"
+        # Format the conversation
+        formatted_sources = format_sources(sources)
+        content = f"Question: {question}\n\nAnswer: {answer}\n\nSources:\n{formatted_sources}\n"
+        # Save the conversation
+        filepath = os.path.join(conv_dir, filename)
+        with open(filepath, 'w') as f:
+            f.write(content)
+        logger.info(f"Conversation saved to {filepath}")
+        return filepath
+    except Exception as e:
+        logger.error(f"Error saving conversation: {e}")
+        return ""