scholarly360's picture
Create app.py
370ba10 verified
import streamlit as st
st.set_page_config(layout="wide")
from annotated_text import annotated_text, annotation
import fitz
import os
import chromadb
import uuid
from pathlib import Path
os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY']
st.title("Contracts Summary ")
import pandas as pd
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import spacy
# Load the English model from SpaCy
nlp = spacy.load("en_core_web_md")
def util_upload_file_and_return_list_docs(uploaded_files):
#util_del_cwd()
list_docs = []
list_save_path = []
for uploaded_file in uploaded_files:
save_path = Path(os.getcwd(), uploaded_file.name)
with open(save_path, mode='wb') as w:
w.write(uploaded_file.getvalue())
#print('save_path:', save_path)
docs = fitz.open(save_path)
list_docs.append(docs)
list_save_path.append(save_path)
return(list_docs, list_save_path)
def util_get_list_page_and_passage(list_docs, list_save_path):
#page_documents = []
documents = []
for ind_doc, docs in enumerate(list_docs):
text = ''
for txt_index, txt_page in enumerate(docs):
text = text + txt_page.get_text()
documents.append(text)
return(documents)
documents = []
def get_summary_single_doc(text):
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
LLM_KEY=os.environ.get("OPEN_API_KEY")
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=3000,
chunk_overlap=20
)
#create the documents from list of texts
texts = text_splitter.create_documents([text])
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)
refine_template = (
"Your job is to produce a final summary with key learnings\n"
"We have provided an existing summary up to a certain point: {existing_answer}\n"
"We have the opportunity to refine the existing summary"
"(only if needed) with detailed context below.\n"
"------------\n"
"{text}\n"
"------------\n"
"Given the new context, refine the original summary"
"If the context isn't useful, return the original summary."
)
refine_prompt = PromptTemplate.from_template(refine_template)
#Define the LLM
# here we are using OpenAI's ChatGPT
from langchain.chat_models import ChatOpenAI
model_name = "gpt-3.5-turbo"
llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name)
refine_chain = load_summarize_chain(
llm,
chain_type="refine",
question_prompt=prompt,
refine_prompt=refine_prompt,
return_intermediate_steps=True,
)
refine_outputs = refine_chain({'input_documents': texts})
return(refine_outputs['output_text'])
with st.form("my_form"):
multi = '''1. Download and Upload contract (PDF) .
e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf
e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract
'''
st.markdown(multi)
multi = '''2. Press Summary .'''
st.markdown(multi)
multi = '''
** Attempt is made for summary ** \n
'''
st.markdown(multi)
#uploaded_file = st.file_uploader("Choose a file")
list_docs = []
list_save_path = []
uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True)
submitted = st.form_submit_button("Summary")
if submitted and (uploaded_files is not None):
list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files)
documents = util_get_list_page_and_passage(list_docs, list_save_path)
for index, item in enumerate(documents):
st.write('Summary' + str(index+1) + ' :: ')
st.write(get_summary_single_doc(item))