Spaces:

p3rc03
/

2B

Running

App Files Files Community

37-AN commited on May 14

Commit

9f0d171

1 Parent(s): 403ced7

Fix 403 error by using local models

Browse files

Files changed (5) hide show

Dockerfile +13 -4
app.py +40 -1
app/config.py +23 -9
app/core/memory.py +84 -19
app/ui/streamlit_app.py +99 -35

Dockerfile CHANGED Viewed

@@ -27,9 +27,12 @@ RUN mkdir -p /app/models && chmod 777 /app/models
 # Copy the rest of the application
 COPY . .
-# Create necessary directories with proper permissions
-RUN mkdir -p data/documents data/vector_db && \
-    chmod -R 777 data
 # Set environment variables for cache locations
 ENV TRANSFORMERS_CACHE=/app/models
@@ -51,8 +54,14 @@ ENV MAX_TOKENS=256
 ENV CHUNK_SIZE=512
 ENV CHUNK_OVERLAP=128
 # Expose port for Hugging Face Spaces
 EXPOSE 7860
 # Run the Streamlit app on the correct port
-CMD ["streamlit", "run", "app/ui/streamlit_app.py", "--server.port=7860", "--server.address=0.0.0.0"]

 # Copy the rest of the application
 COPY . .
+# Create necessary directories with proper permissions and unique vector_db folders
+RUN mkdir -p data/documents && chmod -R 777 data/documents
+RUN mkdir -p data/vector_db && chmod -R 777 data/vector_db
+# Create multiple vector_db instances to avoid collisions
+RUN mkdir -p data/vector_db_1 data/vector_db_2 data/vector_db_3 && \
+    chmod -R 777 data/vector_db_*
 # Set environment variables for cache locations
 ENV TRANSFORMERS_CACHE=/app/models
 ENV CHUNK_SIZE=512
 ENV CHUNK_OVERLAP=128
+# Set server.maxMessageSize for Streamlit to handle large uploads
+ENV STREAMLIT_SERVER_MAX_MESSAGE_SIZE=200
+# Set shared memory settings to improve performance
+ENV PYTHONHASHSEED=0
 # Expose port for Hugging Face Spaces
 EXPOSE 7860
 # Run the Streamlit app on the correct port
+CMD ["streamlit", "run", "app/ui/streamlit_app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.maxUploadSize=10"]

app.py CHANGED Viewed

@@ -6,6 +6,20 @@ This file starts the Streamlit UI when deployed to Hugging Face Spaces.
 import subprocess
 import os
 import sys
 # Make sure the app directory is in the path
 # Add the current directory to the path so that 'app' is recognized as a package
@@ -14,11 +28,36 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.abspath('.'))
 # Create necessary directories
 os.makedirs('data/documents', exist_ok=True)
 os.makedirs('data/vector_db', exist_ok=True)
 # Set environment variable for Python path
 os.environ['PYTHONPATH'] = os.path.abspath('.')
 # Run the Streamlit app with specific port to match huggingface-space.yml
-subprocess.run(["streamlit", "run", "app/ui/streamlit_app.py", "--server.port=7860", "--server.address=0.0.0.0"])

 import subprocess
 import os
 import sys
+import time
+import random
+import logging
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    handlers=[
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
 # Make sure the app directory is in the path
 # Add the current directory to the path so that 'app' is recognized as a package
 sys.path.append(os.path.abspath('.'))
 # Create necessary directories
+logger.info("Creating necessary directories...")
 os.makedirs('data/documents', exist_ok=True)
 os.makedirs('data/vector_db', exist_ok=True)
+# Create multiple vector database paths to help with concurrent access
+for i in range(1, 4):
+    path = f'data/vector_db_{i}'
+    os.makedirs(path, exist_ok=True)
+    # Ensure directories have proper permissions
+    try:
+        os.chmod(path, 0o777)
+    except Exception as e:
+        logger.warning(f"Could not set permissions for {path}: {e}")
 # Set environment variable for Python path
 os.environ['PYTHONPATH'] = os.path.abspath('.')
+# Add a small delay to ensure directory creation is complete
+logger.info("Starting application...")
+time.sleep(1)
 # Run the Streamlit app with specific port to match huggingface-space.yml
+# Add server.maxMessageSize to handle larger files and messages
+cmd = [
+    "streamlit", "run", "app/ui/streamlit_app.py",
+    "--server.port=7860",
+    "--server.address=0.0.0.0",
+    "--server.maxUploadSize=10",
+    "--server.maxMessageSize=200"
+]
+logger.info(f"Running command: {' '.join(cmd)}")
+subprocess.run(cmd)

app/config.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from dotenv import load_dotenv
 from pathlib import Path
@@ -10,18 +11,31 @@ load_dotenv(dotenv_path=env_path)
 HF_API_KEY = os.getenv('HF_API_KEY', '')
 # LLM Configuration
-LLM_MODEL = os.getenv('LLM_MODEL', 'google/flan-t5-large')
 EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
 # Vector Database
-VECTOR_DB_PATH = os.getenv('VECTOR_DB_PATH', './data/vector_db')
 COLLECTION_NAME = os.getenv('COLLECTION_NAME', 'personal_assistant')
 # Application Settings
 DEFAULT_TEMPERATURE = float(os.getenv('DEFAULT_TEMPERATURE', 0.7))
-CHUNK_SIZE = int(os.getenv('CHUNK_SIZE', 1000))
-CHUNK_OVERLAP = int(os.getenv('CHUNK_OVERLAP', 200))
-MAX_TOKENS = int(os.getenv('MAX_TOKENS', 512))
 # Create a template .env file if it doesn't exist
 def create_env_example():
@@ -31,7 +45,7 @@ def create_env_example():
 HF_API_KEY=your_huggingface_api_key_here
 # LLM Configuration
-LLM_MODEL=google/flan-t5-large  # Free model with good performance
 EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # Vector Database
@@ -40,7 +54,7 @@ COLLECTION_NAME=personal_assistant
 # Application Settings
 DEFAULT_TEMPERATURE=0.7
-CHUNK_SIZE=1000
-CHUNK_OVERLAP=200
-MAX_TOKENS=512
 """)

 import os
+import random
 from dotenv import load_dotenv
 from pathlib import Path
 HF_API_KEY = os.getenv('HF_API_KEY', '')
 # LLM Configuration
+LLM_MODEL = os.getenv('LLM_MODEL', 'distilgpt2')
 EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
 # Vector Database
+# Determine which vector DB path to use based on deployment environment
+if os.path.exists("/app/data/vector_db_1"):
+    # We're in the Docker container, use one of the multiple DB paths
+    vector_db_options = [
+        './data/vector_db_1',
+        './data/vector_db_2',
+        './data/vector_db_3',
+    ]
+    # Choose a random DB path to reduce collision probability
+    VECTOR_DB_PATH = os.getenv('VECTOR_DB_PATH', random.choice(vector_db_options))
+else:
+    # Local development, use the standard path
+    VECTOR_DB_PATH = os.getenv('VECTOR_DB_PATH', './data/vector_db')
 COLLECTION_NAME = os.getenv('COLLECTION_NAME', 'personal_assistant')
 # Application Settings
 DEFAULT_TEMPERATURE = float(os.getenv('DEFAULT_TEMPERATURE', 0.7))
+CHUNK_SIZE = int(os.getenv('CHUNK_SIZE', 512))
+CHUNK_OVERLAP = int(os.getenv('CHUNK_OVERLAP', 128))
+MAX_TOKENS = int(os.getenv('MAX_TOKENS', 256))
 # Create a template .env file if it doesn't exist
 def create_env_example():
 HF_API_KEY=your_huggingface_api_key_here
 # LLM Configuration
+LLM_MODEL=distilgpt2  # Use small model for Hugging Face Spaces
 EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 # Vector Database
 # Application Settings
 DEFAULT_TEMPERATURE=0.7
+CHUNK_SIZE=512
+CHUNK_OVERLAP=128
+MAX_TOKENS=256
 """)

app/core/memory.py CHANGED Viewed

@@ -1,11 +1,18 @@
 import os
 import sys
 from langchain.vectorstores import Qdrant
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from qdrant_client import QdrantClient
 from qdrant_client.models import Distance, VectorParams
 # Add project root to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 from app.config import VECTOR_DB_PATH, COLLECTION_NAME
@@ -26,29 +33,79 @@ class MemoryManager:
         )
     def _init_qdrant_client(self):
-        """Initialize the Qdrant client."""
         os.makedirs(VECTOR_DB_PATH, exist_ok=True)
-        return QdrantClient(path=VECTOR_DB_PATH)
     def _init_vector_store(self):
         """Initialize the vector store."""
-        collections = self.client.get_collections().collections
-        collection_names = [collection.name for collection in collections]
-        # Get vector dimension from the embedding model
-        vector_size = len(self.embeddings.embed_query("test"))
-        if COLLECTION_NAME not in collection_names:
-            self.client.create_collection(
                 collection_name=COLLECTION_NAME,
-                vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
             )
-        return Qdrant(
-            client=self.client,
-            collection_name=COLLECTION_NAME,
-            embeddings=self.embeddings
-        )
     def get_retriever(self):
         """Get the retriever for RAG."""
@@ -69,8 +126,16 @@ class MemoryManager:
     def add_texts(self, texts, metadatas=None):
         """Add texts to the vector store."""
-        return self.vectorstore.add_texts(texts=texts, metadatas=metadatas)
     def similarity_search(self, query, k=5):
         """Perform a similarity search."""
-        return self.vectorstore.similarity_search(query, k=k)

 import os
 import sys
+import time
+import random
+import logging
 from langchain.vectorstores import Qdrant
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from qdrant_client import QdrantClient
 from qdrant_client.models import Distance, VectorParams
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Add project root to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 from app.config import VECTOR_DB_PATH, COLLECTION_NAME
         )
     def _init_qdrant_client(self):
+        """Initialize the Qdrant client with retry logic for concurrent access issues."""
+        # Create directory if it doesn't exist
         os.makedirs(VECTOR_DB_PATH, exist_ok=True)
+        # Add a small random delay to reduce chance of concurrent access
+        time.sleep(random.uniform(0.1, 0.5))
+        # Generate a unique path for this instance to avoid collision
+        instance_id = str(random.randint(10000, 99999))
+        unique_path = os.path.join(VECTOR_DB_PATH, f"instance_{instance_id}")
+        max_retries = 3
+        retry_count = 0
+        while retry_count < max_retries:
+            try:
+                logger.info(f"Attempting to initialize Qdrant client (attempt {retry_count+1}/{max_retries})")
+                # Try to use the unique path first
+                try:
+                    os.makedirs(unique_path, exist_ok=True)
+                    return QdrantClient(path=unique_path)
+                except Exception as e:
+                    logger.warning(f"Could not use unique path {unique_path}: {e}")
+                    # Try the main path as fallback
+                    return QdrantClient(path=VECTOR_DB_PATH)
+            except RuntimeError as e:
+                if "already accessed by another instance" in str(e):
+                    retry_count += 1
+                    wait_time = random.uniform(0.5, 2.0) * retry_count
+                    logger.warning(f"Qdrant concurrent access detected. Retrying in {wait_time:.2f} seconds...")
+                    time.sleep(wait_time)
+                else:
+                    # Different error, don't retry
+                    raise
+        # If all retries failed, try to use in-memory storage as last resort
+        logger.warning("All Qdrant client initialization attempts failed. Using in-memory mode.")
+        return QdrantClient(":memory:")
     def _init_vector_store(self):
         """Initialize the vector store."""
+        try:
+            collections = self.client.get_collections().collections
+            collection_names = [collection.name for collection in collections]
+            # Get vector dimension from the embedding model
+            vector_size = len(self.embeddings.embed_query("test"))
+            if COLLECTION_NAME not in collection_names:
+                # Create the collection with appropriate settings
+                self.client.create_collection(
+                    collection_name=COLLECTION_NAME,
+                    vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE),
+                )
+                logger.info(f"Created new collection: {COLLECTION_NAME}")
+            return Qdrant(
+                client=self.client,
                 collection_name=COLLECTION_NAME,
+                embeddings=self.embeddings
+            )
+        except Exception as e:
+            logger.error(f"Error initializing vector store: {e}")
+            # Create a simple in-memory fallback
+            logger.warning("Using in-memory vector store as fallback.")
+            return Qdrant.from_texts(
+                ["Hello, I am your AI assistant."],
+                self.embeddings,
+                location=":memory:",
+                collection_name=COLLECTION_NAME
             )
     def get_retriever(self):
         """Get the retriever for RAG."""
     def add_texts(self, texts, metadatas=None):
         """Add texts to the vector store."""
+        try:
+            return self.vectorstore.add_texts(texts=texts, metadatas=metadatas)
+        except Exception as e:
+            logger.error(f"Error adding texts to vector store: {e}")
+            return ["error-id-" + str(random.randint(10000, 99999))]
     def similarity_search(self, query, k=5):
         """Perform a similarity search."""
+        try:
+            return self.vectorstore.similarity_search(query, k=k)
+        except Exception as e:
+            logger.error(f"Error during similarity search: {e}")
+            return []

app/ui/streamlit_app.py CHANGED Viewed

@@ -4,6 +4,12 @@ import sys
 import tempfile
 from datetime import datetime
 from typing import List, Dict, Any
 # Add project root to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
@@ -29,15 +35,50 @@ st.set_page_config(
     layout="wide"
 )
 # Initialize session state variables
 if "messages" not in st.session_state:
     st.session_state.messages = []
-if "agent" not in st.session_state:
-    st.session_state.agent = AssistantAgent()
-if "document_processor" not in st.session_state:
-    st.session_state.document_processor = DocumentProcessor(st.session_state.agent.memory_manager)
 # App title
 st.title("🤗 Personal AI Assistant (Hugging Face)")
@@ -64,7 +105,7 @@ with st.sidebar:
                         f.write(uploaded_file.getvalue())
                     # Ingest the document
-                    st.session_state.document_processor.ingest_file(tmp_path, {"original_name": uploaded_file.name})
                     # Clean up the temporary file
                     os.unlink(tmp_path)
@@ -87,7 +128,7 @@ with st.sidebar:
                     }
                     # Ingest the text
-                    st.session_state.document_processor.ingest_text(text_input, metadata)
                     st.success("Text added to knowledge base successfully!")
                 except Exception as e:
@@ -139,34 +180,57 @@ if prompt := st.chat_input("Ask a question..."):
     # Generate response
     with st.chat_message("assistant"):
         with st.spinner("Thinking..."):
-            response = st.session_state.agent.query(prompt)
-            answer = response["answer"]
-            sources = response["sources"]
-            # Display the response
-            st.write(answer)
-            # Display sources in an expander
-            with st.expander("View Sources"):
-                if sources:
-                    for i, source in enumerate(sources, 1):
-                        st.write(f"{i}. {source['file_name']}" + (f" (Page {source['page']})" if source.get('page') else ""))
-                        st.text(source['content'])
-                else:
-                    st.write("No specific sources used.")
-            # Save conversation
-            save_conversation(prompt, answer, sources)
-            # Add assistant response to chat history
-            st.session_state.messages.append({
-                "role": "assistant",
-                "content": answer,
-                "sources": sources
-            })
-            # Update the agent's memory
-            st.session_state.agent.add_conversation_to_memory(prompt, answer)
 # Add a footer
 st.markdown("---")

 import tempfile
 from datetime import datetime
 from typing import List, Dict, Any
+import time
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Add project root to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
     layout="wide"
 )
+# Function to initialize the agent safely
+@st.cache_resource
+def get_agent():
+    logger.info("Initializing AssistantAgent (should only happen once)")
+    try:
+        return AssistantAgent()
+    except Exception as e:
+        logger.error(f"Error initializing agent: {e}")
+        st.error(f"Could not initialize AI assistant: {str(e)}")
+        # Return a dummy agent as fallback
+        class DummyAgent:
+            def query(self, question):
+                return {
+                    "answer": "I'm having trouble starting up. Please try refreshing the page.",
+                    "sources": []
+                }
+            def add_conversation_to_memory(self, *args, **kwargs):
+                pass
+        return DummyAgent()
+# Function to initialize document processor safely
+@st.cache_resource
+def get_document_processor(agent):
+    logger.info("Initializing DocumentProcessor (should only happen once)")
+    try:
+        return DocumentProcessor(agent.memory_manager)
+    except Exception as e:
+        logger.error(f"Error initializing document processor: {e}")
+        st.error(f"Could not initialize document processor: {str(e)}")
+        # Return a dummy processor as fallback
+        class DummyProcessor:
+            def ingest_file(self, *args, **kwargs):
+                return ["dummy-id"]
+            def ingest_text(self, *args, **kwargs):
+                return ["dummy-id"]
+        return DummyProcessor()
 # Initialize session state variables
 if "messages" not in st.session_state:
     st.session_state.messages = []
+# Initialize agent and document processor with caching to prevent multiple instances
+agent = get_agent()
+document_processor = get_document_processor(agent)
 # App title
 st.title("🤗 Personal AI Assistant (Hugging Face)")
                         f.write(uploaded_file.getvalue())
                     # Ingest the document
+                    document_processor.ingest_file(tmp_path, {"original_name": uploaded_file.name})
                     # Clean up the temporary file
                     os.unlink(tmp_path)
                     }
                     # Ingest the text
+                    document_processor.ingest_text(text_input, metadata)
                     st.success("Text added to knowledge base successfully!")
                 except Exception as e:
     # Generate response
     with st.chat_message("assistant"):
         with st.spinner("Thinking..."):
+            try:
+                # Add retry mechanism for vector store issues
+                max_retries = 3
+                for attempt in range(max_retries):
+                    try:
+                        response = agent.query(prompt)
+                        break
+                    except Exception as e:
+                        if "already accessed by another instance" in str(e) and attempt < max_retries - 1:
+                            logger.warning(f"Vector store access conflict, retrying ({attempt+1}/{max_retries})...")
+                            time.sleep(1)  # Wait before retrying
+                        else:
+                            raise
+                answer = response["answer"]
+                sources = response["sources"]
+                # Display the response
+                st.write(answer)
+                # Display sources in an expander
+                with st.expander("View Sources"):
+                    if sources:
+                        for i, source in enumerate(sources, 1):
+                            st.write(f"{i}. {source['file_name']}" + (f" (Page {source['page']})" if source.get('page') else ""))
+                            st.text(source['content'])
+                    else:
+                        st.write("No specific sources used.")
+                # Save conversation
+                save_conversation(prompt, answer, sources)
+                # Add assistant response to chat history
+                st.session_state.messages.append({
+                    "role": "assistant",
+                    "content": answer,
+                    "sources": sources
+                })
+                # Update the agent's memory
+                agent.add_conversation_to_memory(prompt, answer)
+            except Exception as e:
+                error_msg = f"Error generating response: {str(e)}"
+                logger.error(error_msg)
+                st.error(error_msg)
+                st.session_state.messages.append({
+                    "role": "assistant",
+                    "content": "I'm sorry, I encountered an error while processing your request. Please try again or refresh the page.",
+                    "sources": []
+                })
 # Add a footer
 st.markdown("---")