Spaces:

jeremierostan
/

Data_Protection_Team

Sleeping

App Files Files

xet

Community

jeremierostan commited on Jul 27, 2024

Commit

bfc5546

verified ·

1 Parent(s): 2957c8f

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -56

app.py CHANGED Viewed

@@ -36,6 +36,53 @@ vector_store = None
 rag_chain = None
 pdfs_loaded = False
 # Function to extract text from PDF
 def extract_pdf(pdf_path):
     try:
@@ -99,62 +146,7 @@ def generate_final_response(response1, response2):
     chain = prompt | openai_client
     return chain.invoke({"response1": response1, "response2": response2}).content
-def markdown_to_html(content):
-    return markdown2.markdown(content)
-def load_pdfs(gdpr, ferpa, coppa, additional_pdfs):
-    global full_pdf_content, vector_store, rag_chain, pdfs_loaded
-    documents = []
-    full_pdf_content = ""
-    # Load selected regulation PDFs
-    selected_regulations = []
-    if gdpr:
-        selected_regulations.append("GDPR")
-    if ferpa:
-        selected_regulations.append("FERPA")
-    if coppa:
-        selected_regulations.append("COPPA")
-    for regulation in selected_regulations:
-        if regulation in regulation_pdfs:
-            pdf_path = regulation_pdfs[regulation]
-            if os.path.exists(pdf_path):
-                pdf_content = extract_pdf(pdf_path)
-                if pdf_content:
-                    full_pdf_content += pdf_content + "\n\n"
-                    documents.extend(split_text(pdf_content))
-                    print(f"Loaded {regulation} PDF")
-                else:
-                    print(f"Failed to extract content from {regulation} PDF")
-            else:
-                print(f"PDF file for {regulation} not found at {pdf_path}")
-    # Load additional user-uploaded PDFs
-    if additional_pdfs is not None:
-        for pdf_file in additional_pdfs:
-            pdf_content = extract_pdf(pdf_file.name)
-            if pdf_content:
-                full_pdf_content += pdf_content + "\n\n"
-                documents.extend(split_text(pdf_content))
-                print(f"Loaded additional PDF: {pdf_file.name}")
-            else:
-                print(f"Failed to extract content from uploaded PDF: {pdf_file.name}")
-    if not documents:
-        pdfs_loaded = False
-        return "No PDFs were successfully loaded. Please check your selections and uploads."
-    print(f"Total documents loaded: {len(documents)}")
-    print(f"Total content length: {len(full_pdf_content)} characters")
-    vector_store = generate_embeddings(documents)
-    rag_chain = create_rag_chain(vector_store)
-    pdfs_loaded = True
-    return f"PDFs loaded and RAG system updated successfully! Loaded {len(documents)} document chunks."
 def process_query(user_query):
     global rag_chain, full_pdf_content, pdfs_loaded
@@ -176,6 +168,10 @@ def process_query(user_query):
     return rag_response, gemini_resp, html_content
 # Gradio interface
 with gr.Blocks() as iface:
     gr.Markdown("# Data Protection Team")

 rag_chain = None
 pdfs_loaded = False
+# Function to load regulations with checked boxes or uploaded
+def load_pdfs(selected_regulations, additional_pdfs):
+    global full_pdf_content, vector_store, rag_chain, pdfs_loaded
+    documents = []
+    full_pdf_content = ""
+    print(f"Selected regulations: {selected_regulations}")  # Debug print
+    for regulation in selected_regulations:
+        if regulation in regulation_pdfs:
+            pdf_path = regulation_pdfs[regulation]
+            if os.path.exists(pdf_path):
+                pdf_content = extract_pdf(pdf_path)
+                if pdf_content:
+                    full_pdf_content += pdf_content + "\n\n"
+                    documents.extend(split_text(pdf_content))
+                    print(f"Loaded {regulation} PDF")
+                else:
+                    print(f"Failed to extract content from {regulation} PDF")
+            else:
+                print(f"PDF file for {regulation} not found at {pdf_path}")
+    # Load additional user-uploaded PDFs
+    if additional_pdfs is not None:
+        for pdf_file in additional_pdfs:
+            pdf_content = extract_pdf(pdf_file.name)
+            if pdf_content:
+                full_pdf_content += pdf_content + "\n\n"
+                documents.extend(split_text(pdf_content))
+                print(f"Loaded additional PDF: {pdf_file.name}")
+            else:
+                print(f"Failed to extract content from uploaded PDF: {pdf_file.name}")
+    if not documents:
+        pdfs_loaded = False
+        return "No PDFs were successfully loaded. Please check your selections and uploads."
+    print(f"Total documents loaded: {len(documents)}")
+    print(f"Total content length: {len(full_pdf_content)} characters")
+    vector_store = generate_embeddings(documents)
+    rag_chain = create_rag_chain(vector_store)
+    pdfs_loaded = True
+    return f"PDFs loaded and RAG system updated successfully! Loaded {len(documents)} document chunks."
 # Function to extract text from PDF
 def extract_pdf(pdf_path):
     try:
     chain = prompt | openai_client
     return chain.invoke({"response1": response1, "response2": response2}).content
+# Function to process the query
 def process_query(user_query):
     global rag_chain, full_pdf_content, pdfs_loaded
     return rag_response, gemini_resp, html_content
+# Function to output the final response as markdown
+def markdown_to_html(content):
+    return markdown2.markdown(content)
 # Gradio interface
 with gr.Blocks() as iface:
     gr.Markdown("# Data Protection Team")