Spaces:
Runtime error
Runtime error
File size: 4,403 Bytes
370ba10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
st.set_page_config(layout="wide")
from annotated_text import annotated_text, annotation
import fitz
import os
import chromadb
import uuid
from pathlib import Path
os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY']
st.title("Contracts Summary ")
import pandas as pd
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.schema import Document
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import spacy
# Load the English model from SpaCy
nlp = spacy.load("en_core_web_md")
def util_upload_file_and_return_list_docs(uploaded_files):
#util_del_cwd()
list_docs = []
list_save_path = []
for uploaded_file in uploaded_files:
save_path = Path(os.getcwd(), uploaded_file.name)
with open(save_path, mode='wb') as w:
w.write(uploaded_file.getvalue())
#print('save_path:', save_path)
docs = fitz.open(save_path)
list_docs.append(docs)
list_save_path.append(save_path)
return(list_docs, list_save_path)
def util_get_list_page_and_passage(list_docs, list_save_path):
#page_documents = []
documents = []
for ind_doc, docs in enumerate(list_docs):
text = ''
for txt_index, txt_page in enumerate(docs):
text = text + txt_page.get_text()
documents.append(text)
return(documents)
documents = []
def get_summary_single_doc(text):
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
LLM_KEY=os.environ.get("OPEN_API_KEY")
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=3000,
chunk_overlap=20
)
#create the documents from list of texts
texts = text_splitter.create_documents([text])
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)
refine_template = (
"Your job is to produce a final summary with key learnings\n"
"We have provided an existing summary up to a certain point: {existing_answer}\n"
"We have the opportunity to refine the existing summary"
"(only if needed) with detailed context below.\n"
"------------\n"
"{text}\n"
"------------\n"
"Given the new context, refine the original summary"
"If the context isn't useful, return the original summary."
)
refine_prompt = PromptTemplate.from_template(refine_template)
#Define the LLM
# here we are using OpenAI's ChatGPT
from langchain.chat_models import ChatOpenAI
model_name = "gpt-3.5-turbo"
llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name)
refine_chain = load_summarize_chain(
llm,
chain_type="refine",
question_prompt=prompt,
refine_prompt=refine_prompt,
return_intermediate_steps=True,
)
refine_outputs = refine_chain({'input_documents': texts})
return(refine_outputs['output_text'])
with st.form("my_form"):
multi = '''1. Download and Upload contract (PDF) .
e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf
e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract
'''
st.markdown(multi)
multi = '''2. Press Summary .'''
st.markdown(multi)
multi = '''
** Attempt is made for summary ** \n
'''
st.markdown(multi)
#uploaded_file = st.file_uploader("Choose a file")
list_docs = []
list_save_path = []
uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True)
submitted = st.form_submit_button("Summary")
if submitted and (uploaded_files is not None):
list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files)
documents = util_get_list_page_and_passage(list_docs, list_save_path)
for index, item in enumerate(documents):
st.write('Summary' + str(index+1) + ' :: ')
st.write(get_summary_single_doc(item))
|