Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
# app.py
|
2 |
import streamlit as st
|
3 |
import os
|
4 |
-
import shutil
|
5 |
import tempfile
|
6 |
from io import BytesIO
|
7 |
from PyPDF2 import PdfReader
|
8 |
-
import pandas as pd
|
9 |
-
from docx import Document
|
10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
12 |
from langchain_community.vectorstores import FAISS
|
@@ -54,7 +51,7 @@ with st.sidebar:
|
|
54 |
|
55 |
# File uploader
|
56 |
if st.session_state.authenticated:
|
57 |
-
input_data = st.file_uploader("Upload a PDF
|
58 |
|
59 |
if st.button("Process File") and input_data is not None:
|
60 |
try:
|
@@ -118,14 +115,14 @@ def main():
|
|
118 |
""", unsafe_allow_html=True)
|
119 |
|
120 |
st.title("RAG Q&A App with Mistral AI")
|
121 |
-
st.markdown("Welcome to the BSNL RAG App! Upload your
|
122 |
|
123 |
if not st.session_state.authenticated:
|
124 |
st.warning("Please authenticate with your API key in the sidebar.")
|
125 |
return
|
126 |
|
127 |
if st.session_state.vectorstore is None:
|
128 |
-
st.info("Please upload and process a PDF
|
129 |
return
|
130 |
|
131 |
query = st.text_input("Enter your question:")
|
@@ -146,38 +143,25 @@ def process_input(input_data):
|
|
146 |
|
147 |
# Initialize progress bar and status
|
148 |
progress_bar = st.progress(0)
|
149 |
-
status = st.status("Processing file...", expanded=True)
|
150 |
-
|
151 |
-
documents = ""
|
152 |
-
file_name = input_data.name.lower()
|
153 |
|
154 |
# Step 1: Save file temporarily
|
155 |
-
status.update(label="Saving file...")
|
156 |
progress_bar.progress(0.20)
|
157 |
|
158 |
-
with tempfile.NamedTemporaryFile(delete=False, dir="uploads", suffix=
|
159 |
tmp_file.write(input_data.read())
|
160 |
tmp_file_path = tmp_file.name
|
161 |
|
162 |
-
# Step 2: Read file
|
163 |
-
status.update(label="Reading file...")
|
164 |
progress_bar.progress(0.40)
|
165 |
|
166 |
try:
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
elif file_name.endswith(".txt"):
|
172 |
-
with open(tmp_file_path, "r", encoding="utf-8") as f:
|
173 |
-
documents = f.read()
|
174 |
-
elif file_name.endswith((".xls", ".xlsx")):
|
175 |
-
df = pd.read_excel(tmp_file_path)
|
176 |
-
documents = " ".join(df.astype(str).values.flatten())
|
177 |
-
elif file_name.endswith((".doc", ".docx")):
|
178 |
-
doc = Document(tmp_file_path)
|
179 |
-
for para in doc.paragraphs:
|
180 |
-
documents += para.text + "\n"
|
181 |
finally:
|
182 |
os.remove(tmp_file_path) # Clean up temporary file
|
183 |
|
|
|
1 |
# app.py
|
2 |
import streamlit as st
|
3 |
import os
|
|
|
4 |
import tempfile
|
5 |
from io import BytesIO
|
6 |
from PyPDF2 import PdfReader
|
|
|
|
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
9 |
from langchain_community.vectorstores import FAISS
|
|
|
51 |
|
52 |
# File uploader
|
53 |
if st.session_state.authenticated:
|
54 |
+
input_data = st.file_uploader("Upload a PDF file", type=["pdf"])
|
55 |
|
56 |
if st.button("Process File") and input_data is not None:
|
57 |
try:
|
|
|
115 |
""", unsafe_allow_html=True)
|
116 |
|
117 |
st.title("RAG Q&A App with Mistral AI")
|
118 |
+
st.markdown("Welcome to the BSNL RAG App! Upload your PDF files and ask questions with ease.", unsafe_allow_html=True)
|
119 |
|
120 |
if not st.session_state.authenticated:
|
121 |
st.warning("Please authenticate with your API key in the sidebar.")
|
122 |
return
|
123 |
|
124 |
if st.session_state.vectorstore is None:
|
125 |
+
st.info("Please upload and process a PDF file in the sidebar.")
|
126 |
return
|
127 |
|
128 |
query = st.text_input("Enter your question:")
|
|
|
143 |
|
144 |
# Initialize progress bar and status
|
145 |
progress_bar = st.progress(0)
|
146 |
+
status = st.status("Processing PDF file...", expanded=True)
|
|
|
|
|
|
|
147 |
|
148 |
# Step 1: Save file temporarily
|
149 |
+
status.update(label="Saving PDF file...")
|
150 |
progress_bar.progress(0.20)
|
151 |
|
152 |
+
with tempfile.NamedTemporaryFile(delete=False, dir="uploads", suffix=".pdf") as tmp_file:
|
153 |
tmp_file.write(input_data.read())
|
154 |
tmp_file_path = tmp_file.name
|
155 |
|
156 |
+
# Step 2: Read PDF file
|
157 |
+
status.update(label="Reading PDF file...")
|
158 |
progress_bar.progress(0.40)
|
159 |
|
160 |
try:
|
161 |
+
pdf_reader = PdfReader(tmp_file_path)
|
162 |
+
documents = ""
|
163 |
+
for page in pdf_reader.pages:
|
164 |
+
documents += page.extract_text() or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
finally:
|
166 |
os.remove(tmp_file_path) # Clean up temporary file
|
167 |
|