Spaces:

Ahmadkhan12
/

Rag-university-act-2016

Sleeping

App Files Files Community

Ahmadkhan12 commited on Nov 24, 2024

Commit

0e65123

verified ·

1 Parent(s): afbfc0e

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -21

app.py CHANGED Viewed

@@ -7,6 +7,12 @@ import faiss
 import numpy as np
 import time
 # Function to process the uploaded PDF and save it temporarily
 def process_pdf(file):
     st.write("Processing uploaded PDF...")
@@ -39,8 +45,31 @@ def chunk_text(text, chunk_size=200):
         st.error(f"Error chunking text: {e}")
         return []
 # Main function to run the Streamlit app
 def main():
     st.title("PDF Embedding and Query System")
     # File uploader for the user to upload a PDF
@@ -57,31 +86,17 @@ def main():
             st.error("No text extracted from the PDF. Please upload a valid file.")
             return
-        # Initialize Sentence-Transformer model for embeddings
-        st.write("Loading embedding model...")
-        model = SentenceTransformer('all-MiniLM-L6-v2')
         # Chunk text into smaller sections for embedding generation
-        text_chunks = chunk_text(pdf_text, chunk_size=200)
         if not text_chunks:
-            st.error("Failed to split text into chunks. Exiting.")
-            return
-        # Generate embeddings with a progress bar
-        st.write("Generating embeddings...")
-        progress_bar = st.progress(0)
-        embeddings = []
-        for i, chunk in enumerate(text_chunks):
-            embeddings.append(model.encode(chunk, convert_to_numpy=True))
-            progress_bar.progress((i + 1) / len(text_chunks))
-        embeddings = np.array(embeddings)
-        # Build FAISS index
-        st.write("Building FAISS index...")
-        dimension = embeddings.shape[-1]
-        index = faiss.IndexFlatL2(dimension)
-        index.add(embeddings)
         # Query input field for users to enter their search queries
         query = st.text_input("Enter a query to search:")

 import numpy as np
 import time
+# Global variables for caching the model and embeddings
+model = None
+index = None
+embeddings = None
+text_chunks = []
 # Function to process the uploaded PDF and save it temporarily
 def process_pdf(file):
     st.write("Processing uploaded PDF...")
         st.error(f"Error chunking text: {e}")
         return []
+# Function to load the embedding model
+def load_model():
+    global model
+    st.write("Loading embedding model...")
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+# Function to generate embeddings
+def generate_embeddings():
+    global embeddings, text_chunks, index
+    st.write("Generating embeddings...")
+    embeddings = []
+    for chunk in text_chunks:
+        embeddings.append(model.encode(chunk, convert_to_numpy=True))
+    embeddings = np.array(embeddings)
+    # Build FAISS index
+    st.write("Building FAISS index...")
+    dimension = embeddings.shape[-1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(embeddings)
 # Main function to run the Streamlit app
 def main():
+    global embeddings, text_chunks, index, model
     st.title("PDF Embedding and Query System")
     # File uploader for the user to upload a PDF
             st.error("No text extracted from the PDF. Please upload a valid file.")
             return
+        # Initialize Sentence-Transformer model and embeddings only once
+        if model is None:
+            load_model()
         # Chunk text into smaller sections for embedding generation
         if not text_chunks:
+            text_chunks = chunk_text(pdf_text, chunk_size=200)
+        # Generate embeddings only once
+        if embeddings is None:
+            generate_embeddings()
         # Query input field for users to enter their search queries
         query = st.text_input("Enter a query to search:")