Ahmadkhan12 commited on
Commit
0e65123
·
verified ·
1 Parent(s): afbfc0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -21
app.py CHANGED
@@ -7,6 +7,12 @@ import faiss
7
  import numpy as np
8
  import time
9
 
 
 
 
 
 
 
10
  # Function to process the uploaded PDF and save it temporarily
11
  def process_pdf(file):
12
  st.write("Processing uploaded PDF...")
@@ -39,8 +45,31 @@ def chunk_text(text, chunk_size=200):
39
  st.error(f"Error chunking text: {e}")
40
  return []
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Main function to run the Streamlit app
43
  def main():
 
 
44
  st.title("PDF Embedding and Query System")
45
 
46
  # File uploader for the user to upload a PDF
@@ -57,31 +86,17 @@ def main():
57
  st.error("No text extracted from the PDF. Please upload a valid file.")
58
  return
59
 
60
- # Initialize Sentence-Transformer model for embeddings
61
- st.write("Loading embedding model...")
62
- model = SentenceTransformer('all-MiniLM-L6-v2')
63
 
64
  # Chunk text into smaller sections for embedding generation
65
- text_chunks = chunk_text(pdf_text, chunk_size=200)
66
-
67
  if not text_chunks:
68
- st.error("Failed to split text into chunks. Exiting.")
69
- return
70
 
71
- # Generate embeddings with a progress bar
72
- st.write("Generating embeddings...")
73
- progress_bar = st.progress(0)
74
- embeddings = []
75
- for i, chunk in enumerate(text_chunks):
76
- embeddings.append(model.encode(chunk, convert_to_numpy=True))
77
- progress_bar.progress((i + 1) / len(text_chunks))
78
- embeddings = np.array(embeddings)
79
-
80
- # Build FAISS index
81
- st.write("Building FAISS index...")
82
- dimension = embeddings.shape[-1]
83
- index = faiss.IndexFlatL2(dimension)
84
- index.add(embeddings)
85
 
86
  # Query input field for users to enter their search queries
87
  query = st.text_input("Enter a query to search:")
 
7
  import numpy as np
8
  import time
9
 
10
+ # Global variables for caching the model and embeddings
11
+ model = None
12
+ index = None
13
+ embeddings = None
14
+ text_chunks = []
15
+
16
  # Function to process the uploaded PDF and save it temporarily
17
  def process_pdf(file):
18
  st.write("Processing uploaded PDF...")
 
45
  st.error(f"Error chunking text: {e}")
46
  return []
47
 
48
+ # Function to load the embedding model
49
+ def load_model():
50
+ global model
51
+ st.write("Loading embedding model...")
52
+ model = SentenceTransformer('all-MiniLM-L6-v2')
53
+
54
+ # Function to generate embeddings
55
+ def generate_embeddings():
56
+ global embeddings, text_chunks, index
57
+ st.write("Generating embeddings...")
58
+ embeddings = []
59
+ for chunk in text_chunks:
60
+ embeddings.append(model.encode(chunk, convert_to_numpy=True))
61
+ embeddings = np.array(embeddings)
62
+
63
+ # Build FAISS index
64
+ st.write("Building FAISS index...")
65
+ dimension = embeddings.shape[-1]
66
+ index = faiss.IndexFlatL2(dimension)
67
+ index.add(embeddings)
68
+
69
  # Main function to run the Streamlit app
70
  def main():
71
+ global embeddings, text_chunks, index, model
72
+
73
  st.title("PDF Embedding and Query System")
74
 
75
  # File uploader for the user to upload a PDF
 
86
  st.error("No text extracted from the PDF. Please upload a valid file.")
87
  return
88
 
89
+ # Initialize Sentence-Transformer model and embeddings only once
90
+ if model is None:
91
+ load_model()
92
 
93
  # Chunk text into smaller sections for embedding generation
 
 
94
  if not text_chunks:
95
+ text_chunks = chunk_text(pdf_text, chunk_size=200)
 
96
 
97
+ # Generate embeddings only once
98
+ if embeddings is None:
99
+ generate_embeddings()
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # Query input field for users to enter their search queries
102
  query = st.text_input("Enter a query to search:")