samim2024 commited on
Commit
38af0d3
·
verified ·
1 Parent(s): 875ad97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -28
app.py CHANGED
@@ -1,12 +1,9 @@
1
  # app.py
2
  import streamlit as st
3
  import os
4
- import shutil
5
  import tempfile
6
  from io import BytesIO
7
  from PyPDF2 import PdfReader
8
- import pandas as pd
9
- from docx import Document
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
  from langchain_community.vectorstores import FAISS
@@ -54,7 +51,7 @@ with st.sidebar:
54
 
55
  # File uploader
56
  if st.session_state.authenticated:
57
- input_data = st.file_uploader("Upload a PDF, TXT, XLS/XLSX, or DOC/DOCX file", type=["pdf", "txt", "xls", "xlsx", "doc", "docx"])
58
 
59
  if st.button("Process File") and input_data is not None:
60
  try:
@@ -118,14 +115,14 @@ def main():
118
  """, unsafe_allow_html=True)
119
 
120
  st.title("RAG Q&A App with Mistral AI")
121
- st.markdown("Welcome to the BSNL RAG App! Upload your PDFs, TXTs, XLS/XLSX, or DOC/DOCX files and ask questions with ease.", unsafe_allow_html=True)
122
 
123
  if not st.session_state.authenticated:
124
  st.warning("Please authenticate with your API key in the sidebar.")
125
  return
126
 
127
  if st.session_state.vectorstore is None:
128
- st.info("Please upload and process a PDF, TXT, XLS/XLSX, or DOC/DOCX file in the sidebar.")
129
  return
130
 
131
  query = st.text_input("Enter your question:")
@@ -146,38 +143,25 @@ def process_input(input_data):
146
 
147
  # Initialize progress bar and status
148
  progress_bar = st.progress(0)
149
- status = st.status("Processing file...", expanded=True)
150
-
151
- documents = ""
152
- file_name = input_data.name.lower()
153
 
154
  # Step 1: Save file temporarily
155
- status.update(label="Saving file...")
156
  progress_bar.progress(0.20)
157
 
158
- with tempfile.NamedTemporaryFile(delete=False, dir="uploads", suffix=file_name) as tmp_file:
159
  tmp_file.write(input_data.read())
160
  tmp_file_path = tmp_file.name
161
 
162
- # Step 2: Read file
163
- status.update(label="Reading file...")
164
  progress_bar.progress(0.40)
165
 
166
  try:
167
- if file_name.endswith(".pdf"):
168
- pdf_reader = PdfReader(tmp_file_path)
169
- for page in pdf_reader.pages:
170
- documents += page.extract_text() or ""
171
- elif file_name.endswith(".txt"):
172
- with open(tmp_file_path, "r", encoding="utf-8") as f:
173
- documents = f.read()
174
- elif file_name.endswith((".xls", ".xlsx")):
175
- df = pd.read_excel(tmp_file_path)
176
- documents = " ".join(df.astype(str).values.flatten())
177
- elif file_name.endswith((".doc", ".docx")):
178
- doc = Document(tmp_file_path)
179
- for para in doc.paragraphs:
180
- documents += para.text + "\n"
181
  finally:
182
  os.remove(tmp_file_path) # Clean up temporary file
183
 
 
1
  # app.py
2
  import streamlit as st
3
  import os
 
4
  import tempfile
5
  from io import BytesIO
6
  from PyPDF2 import PdfReader
 
 
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain_community.vectorstores import FAISS
 
51
 
52
  # File uploader
53
  if st.session_state.authenticated:
54
+ input_data = st.file_uploader("Upload a PDF file", type=["pdf"])
55
 
56
  if st.button("Process File") and input_data is not None:
57
  try:
 
115
  """, unsafe_allow_html=True)
116
 
117
  st.title("RAG Q&A App with Mistral AI")
118
+ st.markdown("Welcome to the BSNL RAG App! Upload your PDF files and ask questions with ease.", unsafe_allow_html=True)
119
 
120
  if not st.session_state.authenticated:
121
  st.warning("Please authenticate with your API key in the sidebar.")
122
  return
123
 
124
  if st.session_state.vectorstore is None:
125
+ st.info("Please upload and process a PDF file in the sidebar.")
126
  return
127
 
128
  query = st.text_input("Enter your question:")
 
143
 
144
  # Initialize progress bar and status
145
  progress_bar = st.progress(0)
146
+ status = st.status("Processing PDF file...", expanded=True)
 
 
 
147
 
148
  # Step 1: Save file temporarily
149
+ status.update(label="Saving PDF file...")
150
  progress_bar.progress(0.20)
151
 
152
+ with tempfile.NamedTemporaryFile(delete=False, dir="uploads", suffix=".pdf") as tmp_file:
153
  tmp_file.write(input_data.read())
154
  tmp_file_path = tmp_file.name
155
 
156
+ # Step 2: Read PDF file
157
+ status.update(label="Reading PDF file...")
158
  progress_bar.progress(0.40)
159
 
160
  try:
161
+ pdf_reader = PdfReader(tmp_file_path)
162
+ documents = ""
163
+ for page in pdf_reader.pages:
164
+ documents += page.extract_text() or ""
 
 
 
 
 
 
 
 
 
 
165
  finally:
166
  os.remove(tmp_file_path) # Clean up temporary file
167