jeremierostan commited on
Commit
bfc5546
·
verified ·
1 Parent(s): 2957c8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -56
app.py CHANGED
@@ -36,6 +36,53 @@ vector_store = None
36
  rag_chain = None
37
  pdfs_loaded = False
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Function to extract text from PDF
40
  def extract_pdf(pdf_path):
41
  try:
@@ -99,62 +146,7 @@ def generate_final_response(response1, response2):
99
  chain = prompt | openai_client
100
  return chain.invoke({"response1": response1, "response2": response2}).content
101
 
102
- def markdown_to_html(content):
103
- return markdown2.markdown(content)
104
-
105
- def load_pdfs(gdpr, ferpa, coppa, additional_pdfs):
106
- global full_pdf_content, vector_store, rag_chain, pdfs_loaded
107
-
108
- documents = []
109
- full_pdf_content = ""
110
-
111
- # Load selected regulation PDFs
112
- selected_regulations = []
113
- if gdpr:
114
- selected_regulations.append("GDPR")
115
- if ferpa:
116
- selected_regulations.append("FERPA")
117
- if coppa:
118
- selected_regulations.append("COPPA")
119
-
120
- for regulation in selected_regulations:
121
- if regulation in regulation_pdfs:
122
- pdf_path = regulation_pdfs[regulation]
123
- if os.path.exists(pdf_path):
124
- pdf_content = extract_pdf(pdf_path)
125
- if pdf_content:
126
- full_pdf_content += pdf_content + "\n\n"
127
- documents.extend(split_text(pdf_content))
128
- print(f"Loaded {regulation} PDF")
129
- else:
130
- print(f"Failed to extract content from {regulation} PDF")
131
- else:
132
- print(f"PDF file for {regulation} not found at {pdf_path}")
133
-
134
- # Load additional user-uploaded PDFs
135
- if additional_pdfs is not None:
136
- for pdf_file in additional_pdfs:
137
- pdf_content = extract_pdf(pdf_file.name)
138
- if pdf_content:
139
- full_pdf_content += pdf_content + "\n\n"
140
- documents.extend(split_text(pdf_content))
141
- print(f"Loaded additional PDF: {pdf_file.name}")
142
- else:
143
- print(f"Failed to extract content from uploaded PDF: {pdf_file.name}")
144
-
145
- if not documents:
146
- pdfs_loaded = False
147
- return "No PDFs were successfully loaded. Please check your selections and uploads."
148
-
149
- print(f"Total documents loaded: {len(documents)}")
150
- print(f"Total content length: {len(full_pdf_content)} characters")
151
-
152
- vector_store = generate_embeddings(documents)
153
- rag_chain = create_rag_chain(vector_store)
154
-
155
- pdfs_loaded = True
156
- return f"PDFs loaded and RAG system updated successfully! Loaded {len(documents)} document chunks."
157
-
158
  def process_query(user_query):
159
  global rag_chain, full_pdf_content, pdfs_loaded
160
 
@@ -176,6 +168,10 @@ def process_query(user_query):
176
 
177
  return rag_response, gemini_resp, html_content
178
 
 
 
 
 
179
  # Gradio interface
180
  with gr.Blocks() as iface:
181
  gr.Markdown("# Data Protection Team")
 
36
  rag_chain = None
37
  pdfs_loaded = False
38
 
39
+ # Function to load regulations with checked boxes or uploaded
40
+ def load_pdfs(selected_regulations, additional_pdfs):
41
+ global full_pdf_content, vector_store, rag_chain, pdfs_loaded
42
+
43
+ documents = []
44
+ full_pdf_content = ""
45
+
46
+ print(f"Selected regulations: {selected_regulations}") # Debug print
47
+
48
+ for regulation in selected_regulations:
49
+ if regulation in regulation_pdfs:
50
+ pdf_path = regulation_pdfs[regulation]
51
+ if os.path.exists(pdf_path):
52
+ pdf_content = extract_pdf(pdf_path)
53
+ if pdf_content:
54
+ full_pdf_content += pdf_content + "\n\n"
55
+ documents.extend(split_text(pdf_content))
56
+ print(f"Loaded {regulation} PDF")
57
+ else:
58
+ print(f"Failed to extract content from {regulation} PDF")
59
+ else:
60
+ print(f"PDF file for {regulation} not found at {pdf_path}")
61
+
62
+ # Load additional user-uploaded PDFs
63
+ if additional_pdfs is not None:
64
+ for pdf_file in additional_pdfs:
65
+ pdf_content = extract_pdf(pdf_file.name)
66
+ if pdf_content:
67
+ full_pdf_content += pdf_content + "\n\n"
68
+ documents.extend(split_text(pdf_content))
69
+ print(f"Loaded additional PDF: {pdf_file.name}")
70
+ else:
71
+ print(f"Failed to extract content from uploaded PDF: {pdf_file.name}")
72
+
73
+ if not documents:
74
+ pdfs_loaded = False
75
+ return "No PDFs were successfully loaded. Please check your selections and uploads."
76
+
77
+ print(f"Total documents loaded: {len(documents)}")
78
+ print(f"Total content length: {len(full_pdf_content)} characters")
79
+
80
+ vector_store = generate_embeddings(documents)
81
+ rag_chain = create_rag_chain(vector_store)
82
+
83
+ pdfs_loaded = True
84
+ return f"PDFs loaded and RAG system updated successfully! Loaded {len(documents)} document chunks."
85
+
86
  # Function to extract text from PDF
87
  def extract_pdf(pdf_path):
88
  try:
 
146
  chain = prompt | openai_client
147
  return chain.invoke({"response1": response1, "response2": response2}).content
148
 
149
+ # Function to process the query
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def process_query(user_query):
151
  global rag_chain, full_pdf_content, pdfs_loaded
152
 
 
168
 
169
  return rag_response, gemini_resp, html_content
170
 
171
+ # Function to output the final response as markdown
172
+ def markdown_to_html(content):
173
+ return markdown2.markdown(content)
174
+
175
  # Gradio interface
176
  with gr.Blocks() as iface:
177
  gr.Markdown("# Data Protection Team")