Spaces:

ADOPLE
/

22nd_Century

Sleeping

App Files Files Community

KarthickAdopleAI commited on Feb 25

Commit

7f08f2f

verified ·

1 Parent(s): bc4ef14

Create app.py

Browse files

Files changed (1) hide show

app.py +316 -0

app.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import streamlit as st
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import AzureChatOpenAI
+from langchain.chains import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_community.vectorstores import FAISS
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+import pandas as pd
+import io
+import time
+from langchain.document_loaders import UnstructuredFileLoader
+from typing import List, Dict, Tuple
+from langchain_openai import AzureChatOpenAI,AzureOpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.text_splitter import CharacterTextSplitter
+class PDFExtract:
+    def __init__(self):
+      pass
+    def _extract_text_from_pdfs(self, file_paths: List[str]) -> List[str]:
+        """Extract text content from PDF files.
+        Args:
+            file_paths (List[str]): List of file paths.
+        Returns:
+            List[str]: Extracted text from the PDFs.
+        """
+        docs = []
+        loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
+        for loader in loaders:
+            docs.extend(loader.load())
+        return docs
+    def _split_text_into_chunks(self, text: str) -> List[str]:
+        """Split text into smaller chunks.
+        Args:
+            text (str): Input text to be split.
+        Returns:
+            List[str]: List of smaller text chunks.
+        """
+        text_splitter = CharacterTextSplitter(separator="\n", chunk_size=2000, chunk_overlap=0, length_function=len)
+        chunks = text_splitter.split_documents(text)
+        return chunks
+    def _create_vector_store_from_text_chunks(self, text_chunks: List[str]) -> FAISS:
+        """Create a vector store from text chunks.
+        Args:
+            text_chunks (List[str]): List of text chunks.
+        Returns:
+            FAISS: Vector store created from the text chunks.
+        """
+        embeddings = AzureOpenAIEmbeddings(
+                        azure_deployment="text-embedding-3-large",
+                    )
+        return FAISS.from_documents(documents=text_chunks, embedding=embeddings)
+    def main(self,file_paths: List[str]):
+      text = self._extract_text_from_pdfs(file_paths)
+      text_chunks = self._split_text_into_chunks(text)
+      vector_store = self._create_vector_store_from_text_chunks(text_chunks)
+      return vector_store
+# Set page configuration
+st.set_page_config(page_title="GASB Decision Flow", layout="wide")
+# Custom CSS for better UI
+st.markdown("""
+<style>
+    .uploadfile-container {
+        display: flex;
+        justify-content: center;
+        margin-bottom: 20px;
+    }
+    .chat-container {
+        margin-top: 20px;
+    }
+    .stApp {
+        max-width: 1200px;
+        margin: 0 auto;
+    }
+    .loader {
+        border: 8px solid #f3f3f3;
+        border-top: 8px solid #3498db;
+        border-radius: 50%;
+        width: 50px;
+        height: 50px;
+        animation: spin 1s linear infinite;
+        margin: 20px auto;
+    }
+    @keyframes spin {
+        0% { transform: rotate(0deg); }
+        100% { transform: rotate(360deg); }
+    }
+    /* Hide scrollbars but keep scrolling functionality */
+    ::-webkit-scrollbar {
+        width: 0px;
+        height: 0px;
+        background: transparent;
+    }
+    * {
+        -ms-overflow-style: none;
+        scrollbar-width: none;
+    }
+    div[data-testid="stVerticalBlock"] {
+        overflow-x: hidden;
+    }
+    .element-container, .stTextInput, .stButton {
+        overflow: visible !important;
+    }
+    /* Custom chat message styling */
+    .user-message-container {
+        display: flex;
+        justify-content: flex-end;
+        margin-bottom: 10px;
+    }
+    .st-emotion-cache-janbn0
+    {
+      margin-left: 3in;
+    }
+    .user-message {
+        background-color: #2b7dfa;
+        color: white;
+        border-radius: 18px 18px 0 18px;
+        padding: 10px 15px;
+        max-width: 70%;
+        text-align: right;
+    }
+    .assistant-message-container {
+        display: flex;
+        justify-content: flex-start;
+        margin-bottom: 10px;
+    }
+    .assistant-message {
+        background-color: #f1f1f1;
+        color: #333;
+        border-radius: 18px 18px 18px 0;
+        padding: 10px 15px;
+        max-width: 70%;
+    }
+</style>
+""", unsafe_allow_html=True)
+# Title and description
+st.title("22nd Century")
+st.markdown("Upload your document and ask questions to determine GASB compliance")
+# Initialize session state for chat history
+if 'messages' not in st.session_state:
+    st.session_state.messages = []
+if 'db' not in st.session_state:
+    st.session_state.db = None
+if 'file_processed' not in st.session_state:
+    st.session_state.file_processed = False
+# Function to process the uploaded file
+def process_file(uploaded_file):
+    with st.spinner("Processing document..."):
+        # Read file content
+        if uploaded_file.type == "application/pdf":
+            pdfextract = PDFExtract()
+            db = pdfextract.main([uploaded_file.name])
+        return db
+# Center the file uploader
+st.markdown('<div class="uploadfile-container">', unsafe_allow_html=True)
+uploaded_file = st.file_uploader("Upload your contract document (PDF, Word, or Text)", type=["pdf", "docx", "txt"])
+st.markdown('</div>', unsafe_allow_html=True)
+# Process the file when uploaded
+if uploaded_file and not st.session_state.file_processed:
+    db = process_file(uploaded_file)
+    if db:
+        st.session_state.db = db
+        st.session_state.file_processed = True
+        st.success(f"Document '{uploaded_file.name}' processed successfully!")
+# GASB decision flow logic
+if st.session_state.file_processed:
+    # Setup langchain components
+    retriever = st.session_state.db.as_retriever()
+    llm = AzureChatOpenAI(model='gpt-4o', temperature=0, max_tokens=3000)
+    system_prompt = (
+        "Use the given context to answer the question. Answer yes or no with justify the answer detailed. "
+        "If you don't know the answer, say you don't know. "
+        "Use three sentence maximum and keep the answer concise. "
+        """'GASB Do Not Apply' sentence include in the output for the following Questions Otherwise don't include:
+        Does the contract involve the use of software or capital assets? if answer is 'no' include 'GASB 87/96 Do Not Apply' in the answer.
+        Is the software an insignificant component to any fixed asset in the agreement? if answer is 'yes' include 'GASB 96 Do Not Apply' in the answer.
+        Is this a software that you are procuring? if answer is 'no' include 'GASB 96 Do Not Apply' in the answer.
+        Is it a perpetual license/agreement? if answer is 'yes' or 'no' include 'GASB 96 Do Not Apply' in the answer.
+        Lease Queries:{lease_queries} if 'yes' for all questions include 'GASB 87 Do Not Apply' in the answer.
+        Does the lease explicitly transfer ownership? if answer is 'no' include 'GASB 87 Do Not Apply' in the answer.
+        Must Return the Reason Why you answer yes or no.
+        """
+        "Context: {context}"
+    )
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", system_prompt),
+            ("human", "{input}"),
+        ]
+    )
+    question_answer_chain = create_stuff_documents_chain(llm, prompt)
+    chain = create_retrieval_chain(retriever, question_answer_chain)
+    # Define flows
+    initial_flow = ["Does the contract involve the use of software or capital assets?", "Does this contract include software?"]
+    software_flow = [
+        "Is the software an insignificant component to any fixed asset in the agreement?",
+        "Is this a software that you are procuring?",
+        "Is it a perpetual license/agreement?"
+    ]
+    lease_flow = [
+        "Is this a lease of an intangible asset?",
+        "Is this a lease for supply contracts?",
+        "Is this a lease of inventory?",
+        "Does the lease explicitly transfer ownership?"
+    ]
+    # Chat container
+    st.markdown('<div class="chat-container">', unsafe_allow_html=True)
+    st.subheader("GASB Decision Flow Chat")
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
+    # Function to run the GASB decision flow
+    def run_gasb_flow():
+        with st.spinner("Running initial questions..."):
+            execute = True
+            for question in initial_flow:
+                # Add user question to chat
+                st.session_state.messages.append({"role": "user", "content": question})
+                with st.chat_message("user"):
+                    st.write(question)
+                # Get AI response
+                with st.spinner("Thinking..."):
+                    response = chain.invoke({"input": question, 'lease_queries': lease_flow})
+                    answer = response['answer']
+                # Add AI response to chat
+                st.session_state.messages.append({"role": "assistant", "content": answer})
+                with st.chat_message("assistant"):
+                    st.write(answer)
+                if "GASB" in answer:
+                    st.info("Flow stopped due to GASB answer.")
+                    execute = False
+                    break
+                time.sleep(1)  # Small delay for better UX
+            if execute:
+                if "software" in answer.lower():
+                    selected_flow = software_flow
+                    st.info("Continuing with software flow...")
+                else:
+                    selected_flow = lease_flow
+                    st.info("Continuing with lease flow...")
+                for question in selected_flow:
+                    # Add user question to chat
+                    st.session_state.messages.append({"role": "user", "content": question})
+                    with st.chat_message("user"):
+                        st.write(question)
+                    # Get AI response
+                    with st.spinner("Thinking..."):
+                        response = chain.invoke({"input": question, 'lease_queries': lease_flow})
+                        answer = response['answer']
+                    # Add AI response to chat
+                    st.session_state.messages.append({"role": "assistant", "content": answer})
+                    with st.chat_message("assistant"):
+                        st.write(answer)
+                    if "GASB" in answer:
+                        st.info("Flow stopped due to GASB answer.")
+                        break
+                    time.sleep(2)  # Small delay for better UX
+    # Custom question input
+    if st.session_state.file_processed and 'custom_mode' not in st.session_state:
+        if st.button("Start GASB Decision Flow"):
+            run_gasb_flow()
+            st.session_state.custom_mode = True
+    st.markdown('</div>', unsafe_allow_html=True)
+else:
+    st.info("Please upload a document to start the GASB decision flow")