Spaces:

samim2024
/

bsnl-chatboot

Sleeping

App Files Files Community

samim2024 commited on May 16

Commit

875ad97

verified ·

1 Parent(s): 1676c9d

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -28

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 # app.py
 import streamlit as st
 import os
 import shutil
 from io import BytesIO
 from PyPDF2 import PdfReader
 import pandas as pd
@@ -57,9 +57,12 @@ with st.sidebar:
         input_data = st.file_uploader("Upload a PDF, TXT, XLS/XLSX, or DOC/DOCX file", type=["pdf", "txt", "xls", "xlsx", "doc", "docx"])
         if st.button("Process File") and input_data is not None:
-            vector_store = process_input(input_data)
-            st.session_state.vectorstore = vector_store
-            st.success("File processed successfully. You can now ask questions.")
     # Display chat history
     st.subheader("Chat History")
@@ -133,8 +136,13 @@ def main():
             st.write("**Answer:**", answer)
 def process_input(input_data):
-    # Create uploads directory
-    os.makedirs("uploads", exist_ok=True)
     # Initialize progress bar and status
     progress_bar = st.progress(0)
@@ -143,41 +151,53 @@ def process_input(input_data):
     documents = ""
     file_name = input_data.name.lower()
-    # Step 1: Read file
     status.update(label="Reading file...")
-    progress_bar.progress(0.25)
-    if file_name.endswith(".pdf"):
-        pdf_reader = PdfReader(input_data)
-        for page in pdf_reader.pages:
-            documents += page.extract_text() or ""
-    elif file_name.endswith(".txt"):
-        documents = input_data.read().decode("utf-8")
-    elif file_name.endswith((".xls", ".xlsx")):
-        df = pd.read_excel(input_data)
-        documents = " ".join(df.astype(str).values.flatten())
-    elif file_name.endswith((".doc", ".docx")):
-        doc = Document(input_data)
-        for para in doc.paragraphs:
-            documents += para.text + "\n"
-    # Step 2: Split text
     status.update(label="Splitting text into chunks...")
-    progress_bar.progress(0.50)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     texts = text_splitter.split_text(documents)
-    # Step 3: Create embeddings
     status.update(label="Creating embeddings...")
-    progress_bar.progress(0.75)
     hf_embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-mpnet-base-v2",
         model_kwargs={'device': 'cpu'}
     )
-    # Step 4: Initialize FAISS vector store
     status.update(label="Building vector store...")
     progress_bar.progress(0.90)

 # app.py
 import streamlit as st
 import os
 import shutil
+import tempfile
 from io import BytesIO
 from PyPDF2 import PdfReader
 import pandas as pd
         input_data = st.file_uploader("Upload a PDF, TXT, XLS/XLSX, or DOC/DOCX file", type=["pdf", "txt", "xls", "xlsx", "doc", "docx"])
         if st.button("Process File") and input_data is not None:
+            try:
+                vector_store = process_input(input_data)
+                st.session_state.vectorstore = vector_store
+                st.success("File processed successfully. You can now ask questions.")
+            except (PermissionError, OSError) as e:
+                st.error(f"Error processing file: {str(e)}. Check file permissions or server configuration.")
     # Display chat history
     st.subheader("Chat History")
             st.write("**Answer:**", answer)
 def process_input(input_data):
+    # Create uploads directory with proper permissions
+    try:
+        os.makedirs("uploads", exist_ok=True)
+        os.chmod("uploads", 0o777)  # Ensure write permissions
+    except PermissionError as e:
+        st.error(f"Failed to create uploads directory: {str(e)}")
+        raise
     # Initialize progress bar and status
     progress_bar = st.progress(0)
     documents = ""
     file_name = input_data.name.lower()
+    # Step 1: Save file temporarily
+    status.update(label="Saving file...")
+    progress_bar.progress(0.20)
+    with tempfile.NamedTemporaryFile(delete=False, dir="uploads", suffix=file_name) as tmp_file:
+        tmp_file.write(input_data.read())
+        tmp_file_path = tmp_file.name
+    # Step 2: Read file
     status.update(label="Reading file...")
+    progress_bar.progress(0.40)
+    try:
+        if file_name.endswith(".pdf"):
+            pdf_reader = PdfReader(tmp_file_path)
+            for page in pdf_reader.pages:
+                documents += page.extract_text() or ""
+        elif file_name.endswith(".txt"):
+            with open(tmp_file_path, "r", encoding="utf-8") as f:
+                documents = f.read()
+        elif file_name.endswith((".xls", ".xlsx")):
+            df = pd.read_excel(tmp_file_path)
+            documents = " ".join(df.astype(str).values.flatten())
+        elif file_name.endswith((".doc", ".docx")):
+            doc = Document(tmp_file_path)
+            for para in doc.paragraphs:
+                documents += para.text + "\n"
+    finally:
+        os.remove(tmp_file_path)  # Clean up temporary file
+    # Step 3: Split text
     status.update(label="Splitting text into chunks...")
+    progress_bar.progress(0.60)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     texts = text_splitter.split_text(documents)
+    # Step 4: Create embeddings
     status.update(label="Creating embeddings...")
+    progress_bar.progress(0.80)
     hf_embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-mpnet-base-v2",
         model_kwargs={'device': 'cpu'}
     )
+    # Step 5: Initialize FAISS vector store
     status.update(label="Building vector store...")
     progress_bar.progress(0.90)