Spaces:

TabasumDev
/

GraniteByte

Sleeping

TabasumDev commited on Feb 22

Commit

90aae5b

verified ·

1 Parent(s): 2952b2a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -444,7 +444,6 @@
 # 🔥 Run Streamlit App
 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
@@ -479,24 +478,34 @@ lora_config = LoraConfig(
 model = get_peft_model(model, lora_config)
 model.eval()
-# 🛠 Function to Read & Extract Text from PDFs (With Debugging)
-def read_files(file):
-    st.write("📂 Processing uploaded file...")  # Debugging
-    file_context = ""
     try:
-        reader = PdfReader(file)
         for page in reader.pages:
             text = page.extract_text()
             if text:
                 file_context += text + "\n"
         if not file_context.strip():
             st.error("⚠️ No text found. The document might be scanned or encrypted.")
             return ""
         st.write(f"✅ Extracted {len(file_context)} characters.")  # Debugging
         return file_context.strip()
     except Exception as e:
         st.error(f"⚠️ Error reading PDF: {e}")
         return ""
@@ -595,6 +604,7 @@ if __name__ == '__main__':
 # import streamlit as st
 # from PyPDF2 import PdfReader

 # 🔥 Run Streamlit App
 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
 model = get_peft_model(model, lora_config)
 model.eval()
+# 🛠 Function to Read & Extract Text from PDFs
+def read_files(uploaded_file):
     try:
+        # 🔥 Step 1: Save file to disk first
+        temp_pdf_path = "temp_uploaded_file.pdf"
+        with open(temp_pdf_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())  # Save the file
+        # 🔥 Step 2: Open the saved file and extract text
+        st.write("📂 Processing saved PDF file...")  # Debugging
+        file_context = ""
+        reader = PdfReader(temp_pdf_path)
         for page in reader.pages:
             text = page.extract_text()
             if text:
                 file_context += text + "\n"
+        # 🔥 Step 3: Delete the temp file after reading
+        os.remove(temp_pdf_path)
         if not file_context.strip():
             st.error("⚠️ No text found. The document might be scanned or encrypted.")
             return ""
         st.write(f"✅ Extracted {len(file_context)} characters.")  # Debugging
         return file_context.strip()
     except Exception as e:
         st.error(f"⚠️ Error reading PDF: {e}")
         return ""
 # import streamlit as st
 # from PyPDF2 import PdfReader