Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -444,7 +444,6 @@
|
|
| 444 |
# π₯ Run Streamlit App
|
| 445 |
# if __name__ == '__main__':
|
| 446 |
# main()
|
| 447 |
-
|
| 448 |
import streamlit as st
|
| 449 |
import os
|
| 450 |
import re
|
|
@@ -479,24 +478,34 @@ lora_config = LoraConfig(
|
|
| 479 |
model = get_peft_model(model, lora_config)
|
| 480 |
model.eval()
|
| 481 |
|
| 482 |
-
# π Function to Read & Extract Text from PDFs
|
| 483 |
-
def read_files(
|
| 484 |
-
st.write("π Processing uploaded file...") # Debugging
|
| 485 |
-
file_context = ""
|
| 486 |
try:
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
for page in reader.pages:
|
| 489 |
text = page.extract_text()
|
| 490 |
if text:
|
| 491 |
file_context += text + "\n"
|
| 492 |
|
|
|
|
|
|
|
|
|
|
| 493 |
if not file_context.strip():
|
| 494 |
st.error("β οΈ No text found. The document might be scanned or encrypted.")
|
| 495 |
return ""
|
| 496 |
|
| 497 |
st.write(f"β
Extracted {len(file_context)} characters.") # Debugging
|
| 498 |
return file_context.strip()
|
| 499 |
-
|
| 500 |
except Exception as e:
|
| 501 |
st.error(f"β οΈ Error reading PDF: {e}")
|
| 502 |
return ""
|
|
@@ -595,6 +604,7 @@ if __name__ == '__main__':
|
|
| 595 |
|
| 596 |
|
| 597 |
|
|
|
|
| 598 |
# import streamlit as st
|
| 599 |
# from PyPDF2 import PdfReader
|
| 600 |
|
|
|
|
| 444 |
# π₯ Run Streamlit App
|
| 445 |
# if __name__ == '__main__':
|
| 446 |
# main()
|
|
|
|
| 447 |
import streamlit as st
|
| 448 |
import os
|
| 449 |
import re
|
|
|
|
| 478 |
model = get_peft_model(model, lora_config)
|
| 479 |
model.eval()
|
| 480 |
|
| 481 |
+
# π Function to Read & Extract Text from PDFs
|
| 482 |
+
def read_files(uploaded_file):
|
|
|
|
|
|
|
| 483 |
try:
|
| 484 |
+
# π₯ Step 1: Save file to disk first
|
| 485 |
+
temp_pdf_path = "temp_uploaded_file.pdf"
|
| 486 |
+
with open(temp_pdf_path, "wb") as f:
|
| 487 |
+
f.write(uploaded_file.getbuffer()) # Save the file
|
| 488 |
+
|
| 489 |
+
# π₯ Step 2: Open the saved file and extract text
|
| 490 |
+
st.write("π Processing saved PDF file...") # Debugging
|
| 491 |
+
file_context = ""
|
| 492 |
+
reader = PdfReader(temp_pdf_path)
|
| 493 |
+
|
| 494 |
for page in reader.pages:
|
| 495 |
text = page.extract_text()
|
| 496 |
if text:
|
| 497 |
file_context += text + "\n"
|
| 498 |
|
| 499 |
+
# π₯ Step 3: Delete the temp file after reading
|
| 500 |
+
os.remove(temp_pdf_path)
|
| 501 |
+
|
| 502 |
if not file_context.strip():
|
| 503 |
st.error("β οΈ No text found. The document might be scanned or encrypted.")
|
| 504 |
return ""
|
| 505 |
|
| 506 |
st.write(f"β
Extracted {len(file_context)} characters.") # Debugging
|
| 507 |
return file_context.strip()
|
| 508 |
+
|
| 509 |
except Exception as e:
|
| 510 |
st.error(f"β οΈ Error reading PDF: {e}")
|
| 511 |
return ""
|
|
|
|
| 604 |
|
| 605 |
|
| 606 |
|
| 607 |
+
|
| 608 |
# import streamlit as st
|
| 609 |
# from PyPDF2 import PdfReader
|
| 610 |
|