KarthickAdopleAI commited on
Commit
7f08f2f
·
verified ·
1 Parent(s): bc4ef14

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +316 -0
app.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from langchain_openai import AzureChatOpenAI
4
+ from langchain.chains import create_retrieval_chain
5
+ from langchain.chains.combine_documents import create_stuff_documents_chain
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+ from langchain_openai import OpenAIEmbeddings
9
+ import pandas as pd
10
+ import io
11
+ import time
12
+
13
+ from langchain.document_loaders import UnstructuredFileLoader
14
+ from typing import List, Dict, Tuple
15
+ from langchain_openai import AzureChatOpenAI,AzureOpenAIEmbeddings
16
+ from langchain.vectorstores import FAISS
17
+ from langchain.text_splitter import CharacterTextSplitter
18
+
19
+ class PDFExtract:
20
+ def __init__(self):
21
+ pass
22
+
23
+ def _extract_text_from_pdfs(self, file_paths: List[str]) -> List[str]:
24
+ """Extract text content from PDF files.
25
+ Args:
26
+ file_paths (List[str]): List of file paths.
27
+ Returns:
28
+ List[str]: Extracted text from the PDFs.
29
+ """
30
+ docs = []
31
+ loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
32
+ for loader in loaders:
33
+ docs.extend(loader.load())
34
+ return docs
35
+
36
+ def _split_text_into_chunks(self, text: str) -> List[str]:
37
+ """Split text into smaller chunks.
38
+ Args:
39
+ text (str): Input text to be split.
40
+ Returns:
41
+ List[str]: List of smaller text chunks.
42
+ """
43
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=2000, chunk_overlap=0, length_function=len)
44
+
45
+ chunks = text_splitter.split_documents(text)
46
+
47
+ return chunks
48
+
49
+ def _create_vector_store_from_text_chunks(self, text_chunks: List[str]) -> FAISS:
50
+ """Create a vector store from text chunks.
51
+ Args:
52
+ text_chunks (List[str]): List of text chunks.
53
+ Returns:
54
+ FAISS: Vector store created from the text chunks.
55
+ """
56
+ embeddings = AzureOpenAIEmbeddings(
57
+ azure_deployment="text-embedding-3-large",
58
+ )
59
+
60
+ return FAISS.from_documents(documents=text_chunks, embedding=embeddings)
61
+
62
+ def main(self,file_paths: List[str]):
63
+ text = self._extract_text_from_pdfs(file_paths)
64
+ text_chunks = self._split_text_into_chunks(text)
65
+ vector_store = self._create_vector_store_from_text_chunks(text_chunks)
66
+ return vector_store
67
+ # Set page configuration
68
+ st.set_page_config(page_title="GASB Decision Flow", layout="wide")
69
+
70
+ # Custom CSS for better UI
71
+ st.markdown("""
72
+ <style>
73
+ .uploadfile-container {
74
+ display: flex;
75
+ justify-content: center;
76
+ margin-bottom: 20px;
77
+ }
78
+ .chat-container {
79
+ margin-top: 20px;
80
+ }
81
+ .stApp {
82
+ max-width: 1200px;
83
+ margin: 0 auto;
84
+ }
85
+ .loader {
86
+ border: 8px solid #f3f3f3;
87
+ border-top: 8px solid #3498db;
88
+ border-radius: 50%;
89
+ width: 50px;
90
+ height: 50px;
91
+ animation: spin 1s linear infinite;
92
+ margin: 20px auto;
93
+ }
94
+ @keyframes spin {
95
+ 0% { transform: rotate(0deg); }
96
+ 100% { transform: rotate(360deg); }
97
+ }
98
+
99
+ /* Hide scrollbars but keep scrolling functionality */
100
+ ::-webkit-scrollbar {
101
+ width: 0px;
102
+ height: 0px;
103
+ background: transparent;
104
+ }
105
+
106
+ * {
107
+ -ms-overflow-style: none;
108
+ scrollbar-width: none;
109
+ }
110
+
111
+ div[data-testid="stVerticalBlock"] {
112
+ overflow-x: hidden;
113
+ }
114
+
115
+ .element-container, .stTextInput, .stButton {
116
+ overflow: visible !important;
117
+ }
118
+
119
+ /* Custom chat message styling */
120
+ .user-message-container {
121
+ display: flex;
122
+ justify-content: flex-end;
123
+ margin-bottom: 10px;
124
+ }
125
+ .st-emotion-cache-janbn0
126
+ {
127
+ margin-left: 3in;
128
+ }
129
+ .user-message {
130
+ background-color: #2b7dfa;
131
+ color: white;
132
+ border-radius: 18px 18px 0 18px;
133
+ padding: 10px 15px;
134
+ max-width: 70%;
135
+ text-align: right;
136
+ }
137
+
138
+ .assistant-message-container {
139
+ display: flex;
140
+ justify-content: flex-start;
141
+ margin-bottom: 10px;
142
+ }
143
+
144
+ .assistant-message {
145
+ background-color: #f1f1f1;
146
+ color: #333;
147
+ border-radius: 18px 18px 18px 0;
148
+ padding: 10px 15px;
149
+ max-width: 70%;
150
+ }
151
+ </style>
152
+ """, unsafe_allow_html=True)
153
+
154
+ # Title and description
155
+ st.title("22nd Century")
156
+ st.markdown("Upload your document and ask questions to determine GASB compliance")
157
+
158
+ # Initialize session state for chat history
159
+ if 'messages' not in st.session_state:
160
+ st.session_state.messages = []
161
+
162
+ if 'db' not in st.session_state:
163
+ st.session_state.db = None
164
+
165
+ if 'file_processed' not in st.session_state:
166
+ st.session_state.file_processed = False
167
+
168
+ # Function to process the uploaded file
169
+ def process_file(uploaded_file):
170
+ with st.spinner("Processing document..."):
171
+ # Read file content
172
+ if uploaded_file.type == "application/pdf":
173
+ pdfextract = PDFExtract()
174
+ db = pdfextract.main([uploaded_file.name])
175
+
176
+ return db
177
+
178
+ # Center the file uploader
179
+ st.markdown('<div class="uploadfile-container">', unsafe_allow_html=True)
180
+ uploaded_file = st.file_uploader("Upload your contract document (PDF, Word, or Text)", type=["pdf", "docx", "txt"])
181
+ st.markdown('</div>', unsafe_allow_html=True)
182
+
183
+ # Process the file when uploaded
184
+ if uploaded_file and not st.session_state.file_processed:
185
+ db = process_file(uploaded_file)
186
+ if db:
187
+ st.session_state.db = db
188
+ st.session_state.file_processed = True
189
+ st.success(f"Document '{uploaded_file.name}' processed successfully!")
190
+
191
+ # GASB decision flow logic
192
+ if st.session_state.file_processed:
193
+ # Setup langchain components
194
+ retriever = st.session_state.db.as_retriever()
195
+ llm = AzureChatOpenAI(model='gpt-4o', temperature=0, max_tokens=3000)
196
+
197
+ system_prompt = (
198
+ "Use the given context to answer the question. Answer yes or no with justify the answer detailed. "
199
+ "If you don't know the answer, say you don't know. "
200
+ "Use three sentence maximum and keep the answer concise. "
201
+ """'GASB Do Not Apply' sentence include in the output for the following Questions Otherwise don't include:
202
+ Does the contract involve the use of software or capital assets? if answer is 'no' include 'GASB 87/96 Do Not Apply' in the answer.
203
+ Is the software an insignificant component to any fixed asset in the agreement? if answer is 'yes' include 'GASB 96 Do Not Apply' in the answer.
204
+ Is this a software that you are procuring? if answer is 'no' include 'GASB 96 Do Not Apply' in the answer.
205
+ Is it a perpetual license/agreement? if answer is 'yes' or 'no' include 'GASB 96 Do Not Apply' in the answer.
206
+
207
+ Lease Queries:{lease_queries} if 'yes' for all questions include 'GASB 87 Do Not Apply' in the answer.
208
+ Does the lease explicitly transfer ownership? if answer is 'no' include 'GASB 87 Do Not Apply' in the answer.
209
+
210
+ Must Return the Reason Why you answer yes or no.
211
+ """
212
+ "Context: {context}"
213
+ )
214
+
215
+ prompt = ChatPromptTemplate.from_messages(
216
+ [
217
+ ("system", system_prompt),
218
+ ("human", "{input}"),
219
+ ]
220
+ )
221
+
222
+ question_answer_chain = create_stuff_documents_chain(llm, prompt)
223
+ chain = create_retrieval_chain(retriever, question_answer_chain)
224
+
225
+ # Define flows
226
+ initial_flow = ["Does the contract involve the use of software or capital assets?", "Does this contract include software?"]
227
+
228
+ software_flow = [
229
+ "Is the software an insignificant component to any fixed asset in the agreement?",
230
+ "Is this a software that you are procuring?",
231
+ "Is it a perpetual license/agreement?"
232
+ ]
233
+
234
+ lease_flow = [
235
+ "Is this a lease of an intangible asset?",
236
+ "Is this a lease for supply contracts?",
237
+ "Is this a lease of inventory?",
238
+ "Does the lease explicitly transfer ownership?"
239
+ ]
240
+
241
+ # Chat container
242
+ st.markdown('<div class="chat-container">', unsafe_allow_html=True)
243
+ st.subheader("GASB Decision Flow Chat")
244
+
245
+ # Display chat messages
246
+ for message in st.session_state.messages:
247
+ with st.chat_message(message["role"]):
248
+ st.write(message["content"])
249
+
250
+ # Function to run the GASB decision flow
251
+ def run_gasb_flow():
252
+ with st.spinner("Running initial questions..."):
253
+ execute = True
254
+
255
+ for question in initial_flow:
256
+ # Add user question to chat
257
+ st.session_state.messages.append({"role": "user", "content": question})
258
+ with st.chat_message("user"):
259
+ st.write(question)
260
+
261
+ # Get AI response
262
+ with st.spinner("Thinking..."):
263
+ response = chain.invoke({"input": question, 'lease_queries': lease_flow})
264
+ answer = response['answer']
265
+
266
+ # Add AI response to chat
267
+ st.session_state.messages.append({"role": "assistant", "content": answer})
268
+ with st.chat_message("assistant"):
269
+ st.write(answer)
270
+
271
+ if "GASB" in answer:
272
+ st.info("Flow stopped due to GASB answer.")
273
+ execute = False
274
+ break
275
+
276
+ time.sleep(1) # Small delay for better UX
277
+
278
+ if execute:
279
+ if "software" in answer.lower():
280
+ selected_flow = software_flow
281
+ st.info("Continuing with software flow...")
282
+ else:
283
+ selected_flow = lease_flow
284
+ st.info("Continuing with lease flow...")
285
+
286
+ for question in selected_flow:
287
+ # Add user question to chat
288
+ st.session_state.messages.append({"role": "user", "content": question})
289
+ with st.chat_message("user"):
290
+ st.write(question)
291
+
292
+ # Get AI response
293
+ with st.spinner("Thinking..."):
294
+ response = chain.invoke({"input": question, 'lease_queries': lease_flow})
295
+ answer = response['answer']
296
+
297
+ # Add AI response to chat
298
+ st.session_state.messages.append({"role": "assistant", "content": answer})
299
+ with st.chat_message("assistant"):
300
+ st.write(answer)
301
+
302
+ if "GASB" in answer:
303
+ st.info("Flow stopped due to GASB answer.")
304
+ break
305
+
306
+ time.sleep(2) # Small delay for better UX
307
+
308
+ # Custom question input
309
+ if st.session_state.file_processed and 'custom_mode' not in st.session_state:
310
+ if st.button("Start GASB Decision Flow"):
311
+ run_gasb_flow()
312
+ st.session_state.custom_mode = True
313
+
314
+ st.markdown('</div>', unsafe_allow_html=True)
315
+ else:
316
+ st.info("Please upload a document to start the GASB decision flow")