Spaces:
Runtime error
Runtime error
import streamlit as st | |
st.set_page_config(layout="wide") | |
from annotated_text import annotated_text, annotation | |
import fitz | |
import os | |
import chromadb | |
import uuid | |
from pathlib import Path | |
os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY'] | |
st.title("Contracts Summary ") | |
import pandas as pd | |
from langchain.retrievers import BM25Retriever, EnsembleRetriever | |
from langchain.schema import Document | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import HuggingFaceEmbeddings | |
import spacy | |
# Load the English model from SpaCy | |
nlp = spacy.load("en_core_web_md") | |
def util_upload_file_and_return_list_docs(uploaded_files): | |
#util_del_cwd() | |
list_docs = [] | |
list_save_path = [] | |
for uploaded_file in uploaded_files: | |
save_path = Path(os.getcwd(), uploaded_file.name) | |
with open(save_path, mode='wb') as w: | |
w.write(uploaded_file.getvalue()) | |
#print('save_path:', save_path) | |
docs = fitz.open(save_path) | |
list_docs.append(docs) | |
list_save_path.append(save_path) | |
return(list_docs, list_save_path) | |
def util_get_list_page_and_passage(list_docs, list_save_path): | |
#page_documents = [] | |
documents = [] | |
for ind_doc, docs in enumerate(list_docs): | |
text = '' | |
for txt_index, txt_page in enumerate(docs): | |
text = text + txt_page.get_text() | |
documents.append(text) | |
return(documents) | |
documents = [] | |
def get_summary_single_doc(text): | |
from langchain.llms import OpenAI | |
from langchain.chains.summarize import load_summarize_chain | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.prompts import PromptTemplate | |
from langchain.llms import OpenAI | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
LLM_KEY=os.environ.get("OPEN_API_KEY") | |
text_splitter = CharacterTextSplitter( | |
separator="\n", | |
chunk_size=3000, | |
chunk_overlap=20 | |
) | |
#create the documents from list of texts | |
texts = text_splitter.create_documents([text]) | |
prompt_template = """Write a concise summary of the following: | |
{text} | |
CONCISE SUMMARY:""" | |
prompt = PromptTemplate.from_template(prompt_template) | |
refine_template = ( | |
"Your job is to produce a final summary with key learnings\n" | |
"We have provided an existing summary up to a certain point: {existing_answer}\n" | |
"We have the opportunity to refine the existing summary" | |
"(only if needed) with detailed context below.\n" | |
"------------\n" | |
"{text}\n" | |
"------------\n" | |
"Given the new context, refine the original summary" | |
"If the context isn't useful, return the original summary." | |
) | |
refine_prompt = PromptTemplate.from_template(refine_template) | |
#Define the LLM | |
# here we are using OpenAI's ChatGPT | |
from langchain.chat_models import ChatOpenAI | |
model_name = "gpt-3.5-turbo" | |
llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name) | |
refine_chain = load_summarize_chain( | |
llm, | |
chain_type="refine", | |
question_prompt=prompt, | |
refine_prompt=refine_prompt, | |
return_intermediate_steps=True, | |
) | |
refine_outputs = refine_chain({'input_documents': texts}) | |
return(refine_outputs['output_text']) | |
with st.form("my_form"): | |
multi = '''1. Download and Upload contract (PDF) . | |
e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf | |
e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract | |
''' | |
st.markdown(multi) | |
multi = '''2. Press Summary .''' | |
st.markdown(multi) | |
multi = ''' | |
** Attempt is made for summary ** \n | |
''' | |
st.markdown(multi) | |
#uploaded_file = st.file_uploader("Choose a file") | |
list_docs = [] | |
list_save_path = [] | |
uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True) | |
submitted = st.form_submit_button("Summary") | |
if submitted and (uploaded_files is not None): | |
list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files) | |
documents = util_get_list_page_and_passage(list_docs, list_save_path) | |
for index, item in enumerate(documents): | |
st.write('Summary' + str(index+1) + ' :: ') | |
st.write(get_summary_single_doc(item)) | |