Chan-Y's picture
Update app.py
d3aa346 verified
raw
history blame
3.09 kB
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import PyPDF2
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from pathlib import Path
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
task="text-generation",
max_new_tokens=4096,
temperature=0.5,
do_sample=False,
)
llm_engine_hf = ChatHuggingFace(llm=llm)
def read_pdf(file_path):
pdf_reader = PyPDF2.PdfReader(file_path)
text = ""
for page in range(len(pdf_reader.pages)):
text += pdf_reader.pages[page].extract_text()
return text
def summarize(file, n_words):
# Read the content of the uploaded file
file_path = file.name
if file_path.endswith('.pdf'):
text = read_pdf(file_path)
else:
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
template = '''
Please carefully read the following document:
<document>
{TEXT}
</document>
After reading through the document, identify the language, and pinpoint the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should consist of a maximum of 10 bullet points.
Ensure that the final summary is in the language you identified from the document.
Your goal is to comprehensively capture the core content of the document while expressing each summary point succinctly. Omit minor details and focus on central themes and important facts.
'''
prompt = PromptTemplate(
template=template,
input_variables=['TEXT']
)
formatted_prompt = prompt.format(TEXT=text)
output_summary = llm_engine_hf.invoke(formatted_prompt)
return output_summary.content
def download_summary(output_text):
if output_text:
file_path = Path('summary.txt')
with open(file_path, 'w', encoding='utf-8') as f:
f.write(output_text)
return file_path
else:
return None
def create_download_file(summary_text):
file_path = download_summary(summary_text)
return str(file_path) if file_path else None
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Document Summarizer")
with gr.Row():
with gr.Column():
file = gr.File(label="Submit a file")
with gr.Column():
output_text = gr.Textbox(label="Summary", lines=20)
submit_button = gr.Button("Summarize")
submit_button.click(summarize, inputs=[file], outputs=output_text)
def generate_file():
summary_text = output_text
file_path = download_summary(summary_text)
return file_path
download_button = gr.Button("Download Summary")
download_button.click(
fn=create_download_file,
inputs=[output_text],
outputs=gr.File()
)
# Run the Gradio app
demo.launch(share=True)