KarthickAdopleAI commited on
Commit
162607d
·
verified ·
1 Parent(s): ea9adb5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +317 -0
app.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %%writefile app.py
2
+ import streamlit as st
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_openai import AzureChatOpenAI
5
+ from langchain.chains import create_retrieval_chain
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
9
+ from langchain_openai import OpenAIEmbeddings
10
+ import pandas as pd
11
+ import io
12
+ import time
13
+
14
+ from langchain.document_loaders import UnstructuredFileLoader
15
+ from typing import List, Dict, Tuple
16
+ from langchain_openai import AzureChatOpenAI,AzureOpenAIEmbeddings
17
+ from langchain.vectorstores import FAISS
18
+ from langchain.text_splitter import CharacterTextSplitter
19
+
20
+ class PDFExtract:
21
+ def __init__(self):
22
+ pass
23
+
24
+ def _extract_text_from_pdfs(self, file_paths: List[str]) -> List[str]:
25
+ """Extract text content from PDF files.
26
+ Args:
27
+ file_paths (List[str]): List of file paths.
28
+ Returns:
29
+ List[str]: Extracted text from the PDFs.
30
+ """
31
+ docs = []
32
+ loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
33
+ for loader in loaders:
34
+ docs.extend(loader.load())
35
+ return docs
36
+
37
+ def _split_text_into_chunks(self, text: str) -> List[str]:
38
+ """Split text into smaller chunks.
39
+ Args:
40
+ text (str): Input text to be split.
41
+ Returns:
42
+ List[str]: List of smaller text chunks.
43
+ """
44
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=2000, chunk_overlap=0, length_function=len)
45
+
46
+ chunks = text_splitter.split_documents(text)
47
+
48
+ return chunks
49
+
50
+ def _create_vector_store_from_text_chunks(self, text_chunks: List[str]) -> FAISS:
51
+ """Create a vector store from text chunks.
52
+ Args:
53
+ text_chunks (List[str]): List of text chunks.
54
+ Returns:
55
+ FAISS: Vector store created from the text chunks.
56
+ """
57
+ embeddings = AzureOpenAIEmbeddings(
58
+ azure_deployment="text-embedding-3-large",
59
+ )
60
+
61
+ return FAISS.from_documents(documents=text_chunks, embedding=embeddings)
62
+
63
+ def main(self,file_paths: List[str]):
64
+ text = self._extract_text_from_pdfs(file_paths)
65
+ text_chunks = self._split_text_into_chunks(text)
66
+ vector_store = self._create_vector_store_from_text_chunks(text_chunks)
67
+ return vector_store
68
+ # Set page configuration
69
+ st.set_page_config(page_title="GASB Decision Flow", layout="wide")
70
+
71
+ # Custom CSS for better UI
72
+ st.markdown("""
73
+ <style>
74
+ .uploadfile-container {
75
+ display: flex;
76
+ justify-content: center;
77
+ margin-bottom: 20px;
78
+ }
79
+ .chat-container {
80
+ margin-top: 20px;
81
+ }
82
+ .stApp {
83
+ max-width: 1200px;
84
+ margin: 0 auto;
85
+ }
86
+ .loader {
87
+ border: 8px solid #f3f3f3;
88
+ border-top: 8px solid #3498db;
89
+ border-radius: 50%;
90
+ width: 50px;
91
+ height: 50px;
92
+ animation: spin 1s linear infinite;
93
+ margin: 20px auto;
94
+ }
95
+ @keyframes spin {
96
+ 0% { transform: rotate(0deg); }
97
+ 100% { transform: rotate(360deg); }
98
+ }
99
+
100
+ /* Hide scrollbars but keep scrolling functionality */
101
+ ::-webkit-scrollbar {
102
+ width: 0px;
103
+ height: 0px;
104
+ background: transparent;
105
+ }
106
+
107
+ * {
108
+ -ms-overflow-style: none;
109
+ scrollbar-width: none;
110
+ }
111
+
112
+ div[data-testid="stVerticalBlock"] {
113
+ overflow-x: hidden;
114
+ }
115
+
116
+ .element-container, .stTextInput, .stButton {
117
+ overflow: visible !important;
118
+ }
119
+
120
+ /* Custom chat message styling */
121
+ .user-message-container {
122
+ display: flex;
123
+ justify-content: flex-end;
124
+ margin-bottom: 10px;
125
+ }
126
+ .st-emotion-cache-janbn0
127
+ {
128
+ margin-left: 3in;
129
+ }
130
+ .user-message {
131
+ background-color: #2b7dfa;
132
+ color: white;
133
+ border-radius: 18px 18px 0 18px;
134
+ padding: 10px 15px;
135
+ max-width: 70%;
136
+ text-align: right;
137
+ }
138
+
139
+ .assistant-message-container {
140
+ display: flex;
141
+ justify-content: flex-start;
142
+ margin-bottom: 10px;
143
+ }
144
+
145
+ .assistant-message {
146
+ background-color: #f1f1f1;
147
+ color: #333;
148
+ border-radius: 18px 18px 18px 0;
149
+ padding: 10px 15px;
150
+ max-width: 70%;
151
+ }
152
+ </style>
153
+ """, unsafe_allow_html=True)
154
+
155
+ # Title and description
156
+ st.title("22nd Century")
157
+ st.markdown("Upload your document and ask questions to determine GASB compliance")
158
+
159
+ # Initialize session state for chat history
160
+ if 'messages' not in st.session_state:
161
+ st.session_state.messages = []
162
+
163
+ if 'db' not in st.session_state:
164
+ st.session_state.db = None
165
+
166
+ if 'file_processed' not in st.session_state:
167
+ st.session_state.file_processed = False
168
+
169
+ # Function to process the uploaded file
170
+ def process_file(uploaded_file):
171
+ with st.spinner("Processing document..."):
172
+ # Read file content
173
+ if uploaded_file.type == "application/pdf":
174
+ pdfextract = PDFExtract()
175
+ db = pdfextract.main([uploaded_file.name])
176
+
177
+ return db
178
+
179
+ # Center the file uploader
180
+ st.markdown('<div class="uploadfile-container">', unsafe_allow_html=True)
181
+ uploaded_file = st.file_uploader("Upload your contract document (PDF, Word, or Text)", type=["pdf", "docx", "txt"])
182
+ st.markdown('</div>', unsafe_allow_html=True)
183
+
184
+ # Process the file when uploaded
185
+ if uploaded_file and not st.session_state.file_processed:
186
+ db = process_file(uploaded_file)
187
+ if db:
188
+ st.session_state.db = db
189
+ st.session_state.file_processed = True
190
+ st.success(f"Document '{uploaded_file.name}' processed successfully!")
191
+
192
+ # GASB decision flow logic
193
+ if st.session_state.file_processed:
194
+ # Setup langchain components
195
+ retriever = st.session_state.db.as_retriever()
196
+ llm = AzureChatOpenAI(model='gpt-4o', temperature=0, max_tokens=3000)
197
+
198
+ system_prompt = (
199
+ "Use the given context to answer the question. Answer yes or no with justify the answer detailed. "
200
+ "If you don't know the answer, say you don't know. "
201
+ "Use three sentence maximum and keep the answer concise. "
202
+ """'GASB Do Not Apply' sentence include in the output for the following Questions Otherwise don't include:
203
+ Does the contract involve the use of software or capital assets? if answer is 'no' include 'GASB 87/96 Do Not Apply' in the answer.
204
+ Is the software an insignificant component to any fixed asset in the agreement? if answer is 'yes' include 'GASB 96 Do Not Apply' in the answer.
205
+ Is this a software that you are procuring? if answer is 'no' include 'GASB 96 Do Not Apply' in the answer.
206
+ Is it a perpetual license/agreement? if answer is 'yes' or 'no' include 'GASB 96 Do Not Apply' in the answer.
207
+
208
+ Lease Queries:{lease_queries} if 'yes' for all questions include 'GASB 87 Do Not Apply' in the answer.
209
+ Does the lease explicitly transfer ownership? if answer is 'no' include 'GASB 87 Do Not Apply' in the answer.
210
+
211
+ Must Return the Reason Why you answer yes or no.
212
+ """
213
+ "Context: {context}"
214
+ )
215
+
216
+ prompt = ChatPromptTemplate.from_messages(
217
+ [
218
+ ("system", system_prompt),
219
+ ("human", "{input}"),
220
+ ]
221
+ )
222
+
223
+ question_answer_chain = create_stuff_documents_chain(llm, prompt)
224
+ chain = create_retrieval_chain(retriever, question_answer_chain)
225
+
226
+ # Define flows
227
+ initial_flow = ["Does the contract involve the use of software or capital assets?", "Does this contract include software?"]
228
+
229
+ software_flow = [
230
+ "Is the software an insignificant component to any fixed asset in the agreement?",
231
+ "Is this a software that you are procuring?",
232
+ "Is it a perpetual license/agreement?"
233
+ ]
234
+
235
+ lease_flow = [
236
+ "Is this a lease of an intangible asset?",
237
+ "Is this a lease for supply contracts?",
238
+ "Is this a lease of inventory?",
239
+ "Does the lease explicitly transfer ownership?"
240
+ ]
241
+
242
+ # Chat container
243
+ st.markdown('<div class="chat-container">', unsafe_allow_html=True)
244
+ st.subheader("GASB Decision Flow Chat")
245
+
246
+ # Display chat messages
247
+ for message in st.session_state.messages:
248
+ with st.chat_message(message["role"]):
249
+ st.write(message["content"])
250
+
251
+ # Function to run the GASB decision flow
252
+ def run_gasb_flow():
253
+ with st.spinner("Running initial questions..."):
254
+ execute = True
255
+
256
+ for question in initial_flow:
257
+ # Add user question to chat
258
+ st.session_state.messages.append({"role": "user", "content": question})
259
+ with st.chat_message("user"):
260
+ st.write(question)
261
+
262
+ # Get AI response
263
+ with st.spinner("Thinking..."):
264
+ response = chain.invoke({"input": question, 'lease_queries': lease_flow})
265
+ answer = response['answer']
266
+
267
+ # Add AI response to chat
268
+ st.session_state.messages.append({"role": "assistant", "content": answer})
269
+ with st.chat_message("assistant"):
270
+ st.write(answer)
271
+
272
+ if "GASB" in answer:
273
+ st.info("Flow stopped due to GASB answer.")
274
+ execute = False
275
+ break
276
+
277
+ time.sleep(1) # Small delay for better UX
278
+
279
+ if execute:
280
+ if "software" in answer.lower():
281
+ selected_flow = software_flow
282
+ st.info("Continuing with software flow...")
283
+ else:
284
+ selected_flow = lease_flow
285
+ st.info("Continuing with lease flow...")
286
+
287
+ for question in selected_flow:
288
+ # Add user question to chat
289
+ st.session_state.messages.append({"role": "user", "content": question})
290
+ with st.chat_message("user"):
291
+ st.write(question)
292
+
293
+ # Get AI response
294
+ with st.spinner("Thinking..."):
295
+ response = chain.invoke({"input": question, 'lease_queries': lease_flow})
296
+ answer = response['answer']
297
+
298
+ # Add AI response to chat
299
+ st.session_state.messages.append({"role": "assistant", "content": answer})
300
+ with st.chat_message("assistant"):
301
+ st.write(answer)
302
+
303
+ if "GASB" in answer:
304
+ st.info("Flow stopped due to GASB answer.")
305
+ break
306
+
307
+ time.sleep(2) # Small delay for better UX
308
+
309
+ # Custom question input
310
+ if st.session_state.file_processed and 'custom_mode' not in st.session_state:
311
+ if st.button("Start GASB Decision Flow"):
312
+ run_gasb_flow()
313
+ st.session_state.custom_mode = True
314
+
315
+ st.markdown('</div>', unsafe_allow_html=True)
316
+ else:
317
+ st.info("Please upload a document to start the GASB decision flow")