Spaces:

Ahmadkhan12
/

Rag-university-act-2016

Sleeping

App Files Files Community

Ahmadkhan12 commited on Nov 24, 2024

Commit

61651bd

verified ·

1 Parent(s): b5b7646

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -16

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import time
 # Function to process the uploaded PDF and save it temporarily
 def process_pdf(file):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
         tmpfile.write(file.read())  # Write the uploaded file's content to the temp file
         tmpfile_path = tmpfile.name  # Get the temporary file path
@@ -16,17 +17,27 @@ def process_pdf(file):
 # Function to extract text from the PDF
 def extract_text_from_pdf(pdf_path):
-    reader = PdfReader(pdf_path)
-    text = ""
-    for page in reader.pages:
-        text += page.extract_text()
-    return text
 # Function to chunk text into smaller sections
 def chunk_text(text, chunk_size=200):
-    words = text.split()
-    chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
-    return chunks
 # Main function to run the Streamlit app
 def main():
@@ -40,16 +51,23 @@ def main():
         tmp_file_path = process_pdf(uploaded_file)
         # Extract text from the uploaded PDF
-        st.write("Extracting text from the PDF...")
         pdf_text = extract_text_from_pdf(tmp_file_path)
         # Initialize Sentence-Transformer model for embeddings
         model = SentenceTransformer('all-MiniLM-L6-v2')
         # Chunk text into smaller sections for embedding generation
-        st.write("Chunking text for embedding generation...")
         text_chunks = chunk_text(pdf_text, chunk_size=200)
         # Generate embeddings with a progress bar
         st.write("Generating embeddings...")
         progress_bar = st.progress(0)
@@ -73,9 +91,4 @@ def main():
             query_embedding = model.encode([query], convert_to_numpy=True)
             # Perform similarity search using FAISS
-            st.write("Searching...")
-            start_time = time.time()
-            D, I = index.search(query_embedding, k=5)
-            end_time = time.time()
-            # Display the res

 # Function to process the uploaded PDF and save it temporarily
 def process_pdf(file):
+    st.write("Processing uploaded PDF...")
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
         tmpfile.write(file.read())  # Write the uploaded file's content to the temp file
         tmpfile_path = tmpfile.name  # Get the temporary file path
 # Function to extract text from the PDF
 def extract_text_from_pdf(pdf_path):
+    try:
+        st.write("Extracting text from the PDF...")
+        reader = PdfReader(pdf_path)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text()
+        return text
+    except Exception as e:
+        st.error(f"Error extracting text from PDF: {e}")
+        return ""
 # Function to chunk text into smaller sections
 def chunk_text(text, chunk_size=200):
+    try:
+        st.write("Chunking text into smaller sections...")
+        words = text.split()
+        chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
+        return chunks
+    except Exception as e:
+        st.error(f"Error chunking text: {e}")
+        return []
 # Main function to run the Streamlit app
 def main():
         tmp_file_path = process_pdf(uploaded_file)
         # Extract text from the uploaded PDF
         pdf_text = extract_text_from_pdf(tmp_file_path)
+        if not pdf_text:
+            st.error("No text extracted from the PDF. Please upload a valid file.")
+            return
         # Initialize Sentence-Transformer model for embeddings
+        st.write("Loading embedding model...")
         model = SentenceTransformer('all-MiniLM-L6-v2')
         # Chunk text into smaller sections for embedding generation
         text_chunks = chunk_text(pdf_text, chunk_size=200)
+        if not text_chunks:
+            st.error("Failed to split text into chunks. Exiting.")
+            return
         # Generate embeddings with a progress bar
         st.write("Generating embeddings...")
         progress_bar = st.progress(0)
             query_embedding = model.encode([query], convert_to_numpy=True)
             # Perform similarity search using FAISS
+            st.write("Searching.