Spaces:

scholarly360
/

contracts-summary

Runtime error

File size: 4,403 Bytes

370ba10

import streamlit as st
st.set_page_config(layout="wide")
from annotated_text import annotated_text, annotation
import fitz
import os
import chromadb
import uuid
from pathlib import Path

os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY']
st.title("Contracts Summary ")
import pandas as pd

from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import spacy
# Load the English model from SpaCy
nlp = spacy.load("en_core_web_md")

def util_upload_file_and_return_list_docs(uploaded_files):
    #util_del_cwd()
    list_docs = []
    list_save_path = []
    for uploaded_file in uploaded_files:
        save_path = Path(os.getcwd(), uploaded_file.name)
        with open(save_path, mode='wb') as w:
            w.write(uploaded_file.getvalue())
        #print('save_path:', save_path)
        docs = fitz.open(save_path) 
        list_docs.append(docs)
        list_save_path.append(save_path)
    return(list_docs, list_save_path)

    
def util_get_list_page_and_passage(list_docs, list_save_path):
    #page_documents = []
    documents = []
    for ind_doc, docs in enumerate(list_docs):
        text = ''
        for txt_index, txt_page in enumerate(docs):
            text = text + txt_page.get_text()
        documents.append(text)    
    return(documents)
    

    

documents = []


def get_summary_single_doc(text):
    from langchain.llms import OpenAI
    from langchain.chains.summarize import load_summarize_chain
    from langchain.text_splitter import CharacterTextSplitter
    from langchain.prompts import PromptTemplate
    from langchain.llms import OpenAI
    from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
    LLM_KEY=os.environ.get("OPEN_API_KEY")
    text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=3000,
            chunk_overlap=20
        )
    #create the documents from list of texts
    texts = text_splitter.create_documents([text])
    prompt_template = """Write a concise summary of the following:
    {text}
    CONCISE SUMMARY:"""
    prompt = PromptTemplate.from_template(prompt_template)

    refine_template = (
        "Your job is to produce a final summary with key learnings\n"
        "We have provided an existing summary up to a certain point: {existing_answer}\n"
        "We have the opportunity to refine the existing summary"
        "(only if needed) with detailed context below.\n"
        "------------\n"
        "{text}\n"
        "------------\n"
        "Given the new context, refine the original summary"
        "If the context isn't useful, return the original summary."
    )
    refine_prompt = PromptTemplate.from_template(refine_template)

    #Define the LLM
    # here we are using OpenAI's ChatGPT
    from langchain.chat_models import ChatOpenAI
    model_name = "gpt-3.5-turbo"
    llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name)

    refine_chain = load_summarize_chain(
        llm,
        chain_type="refine",
        question_prompt=prompt,
        refine_prompt=refine_prompt,
        return_intermediate_steps=True,
       
    )
    refine_outputs = refine_chain({'input_documents': texts})
    return(refine_outputs['output_text'])
    

with st.form("my_form"):
    multi = '''1. Download and Upload contract (PDF) .
    
    e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf
    
    e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract
    '''
    st.markdown(multi)
    multi = '''2. Press Summary .'''
    st.markdown(multi)
    multi = '''
    ** Attempt is made for summary ** \n
    '''
    st.markdown(multi)
    #uploaded_file = st.file_uploader("Choose a file")  

    list_docs = []
    list_save_path = []
    uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True)
    submitted = st.form_submit_button("Summary")
    
    if submitted and (uploaded_files is not None):
        list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files)
        documents = util_get_list_page_and_passage(list_docs, list_save_path)
        for index, item in enumerate(documents):
            st.write('Summary' + str(index+1) +  ' :: ')
            st.write(get_summary_single_doc(item))