Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -30,6 +30,12 @@ regulation_pdfs = {
|
|
30 |
"COPPA": "COPPA.pdf"
|
31 |
}
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# Function to extract text from PDF
|
34 |
def extract_pdf(pdf_path):
|
35 |
try:
|
@@ -97,7 +103,7 @@ def markdown_to_html(content):
|
|
97 |
return markdown2.markdown(content)
|
98 |
|
99 |
def load_pdfs(gdpr, ferpa, coppa, additional_pdfs):
|
100 |
-
global full_pdf_content, vector_store, rag_chain
|
101 |
|
102 |
documents = []
|
103 |
full_pdf_content = ""
|
@@ -137,6 +143,7 @@ def load_pdfs(gdpr, ferpa, coppa, additional_pdfs):
|
|
137 |
print(f"Failed to extract content from uploaded PDF: {pdf_file.name}")
|
138 |
|
139 |
if not documents:
|
|
|
140 |
return "No PDFs were successfully loaded. Please check your selections and uploads."
|
141 |
|
142 |
print(f"Total documents loaded: {len(documents)}")
|
@@ -145,12 +152,13 @@ def load_pdfs(gdpr, ferpa, coppa, additional_pdfs):
|
|
145 |
vector_store = generate_embeddings(documents)
|
146 |
rag_chain = create_rag_chain(vector_store)
|
147 |
|
|
|
148 |
return f"PDFs loaded and RAG system updated successfully! Loaded {len(documents)} document chunks."
|
149 |
|
150 |
def process_query(user_query):
|
151 |
-
global rag_chain, full_pdf_content
|
152 |
|
153 |
-
if
|
154 |
return ("Please load PDFs before asking questions.",
|
155 |
"Please load PDFs before asking questions.",
|
156 |
"Please load PDFs and initialize the system before asking questions.")
|
@@ -168,11 +176,6 @@ def process_query(user_query):
|
|
168 |
|
169 |
return rag_response, gemini_resp, html_content
|
170 |
|
171 |
-
# Initialize
|
172 |
-
full_pdf_content = ""
|
173 |
-
vector_store = None
|
174 |
-
rag_chain = None
|
175 |
-
|
176 |
# Gradio interface
|
177 |
with gr.Blocks() as iface:
|
178 |
gr.Markdown("# Data Protection Team")
|
|
|
30 |
"COPPA": "COPPA.pdf"
|
31 |
}
|
32 |
|
33 |
+
# Global variables
|
34 |
+
full_pdf_content = ""
|
35 |
+
vector_store = None
|
36 |
+
rag_chain = None
|
37 |
+
pdfs_loaded = False
|
38 |
+
|
39 |
# Function to extract text from PDF
|
40 |
def extract_pdf(pdf_path):
|
41 |
try:
|
|
|
103 |
return markdown2.markdown(content)
|
104 |
|
105 |
def load_pdfs(gdpr, ferpa, coppa, additional_pdfs):
|
106 |
+
global full_pdf_content, vector_store, rag_chain, pdfs_loaded
|
107 |
|
108 |
documents = []
|
109 |
full_pdf_content = ""
|
|
|
143 |
print(f"Failed to extract content from uploaded PDF: {pdf_file.name}")
|
144 |
|
145 |
if not documents:
|
146 |
+
pdfs_loaded = False
|
147 |
return "No PDFs were successfully loaded. Please check your selections and uploads."
|
148 |
|
149 |
print(f"Total documents loaded: {len(documents)}")
|
|
|
152 |
vector_store = generate_embeddings(documents)
|
153 |
rag_chain = create_rag_chain(vector_store)
|
154 |
|
155 |
+
pdfs_loaded = True
|
156 |
return f"PDFs loaded and RAG system updated successfully! Loaded {len(documents)} document chunks."
|
157 |
|
158 |
def process_query(user_query):
|
159 |
+
global rag_chain, full_pdf_content, pdfs_loaded
|
160 |
|
161 |
+
if not pdfs_loaded:
|
162 |
return ("Please load PDFs before asking questions.",
|
163 |
"Please load PDFs before asking questions.",
|
164 |
"Please load PDFs and initialize the system before asking questions.")
|
|
|
176 |
|
177 |
return rag_response, gemini_resp, html_content
|
178 |
|
|
|
|
|
|
|
|
|
|
|
179 |
# Gradio interface
|
180 |
with gr.Blocks() as iface:
|
181 |
gr.Markdown("# Data Protection Team")
|