Chan-Y's picture
Update app.py
8d26000 verified
raw
history blame
3.08 kB
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import PyPDF2
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from pathlib import Path
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
task="text-generation",
max_new_tokens=4096,
temperature=0.5,
do_sample=False,
)
llm_engine_hf = ChatHuggingFace(llm=llm)
def read_pdf(file_path):
pdf_reader = PyPDF2.PdfReader(file_path)
text = ""
for page in range(len(pdf_reader.pages)):
text += pdf_reader.pages[page].extract_text()
return text
def summarize(file, n_words):
# Read the content of the uploaded file
file_path = file.name
if file_path.endswith('.pdf'):
text = read_pdf(file_path)
else:
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
template = '''
Please carefully read the following document:
<document>
{TEXT}
</document>
After reading through the document, detect the language, identify the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should have a maximum of 10 bullet points.
The final summary MUST be in the language you detected.
Your goal is to be comprehensive in capturing the core content of the document, while also being concise in how you express each summary point. Omit minor details and focus on the central themes and important facts.
'''
prompt = PromptTemplate(
template=template,
input_variables=['TEXT']
)
formatted_prompt = prompt.format(TEXT=text)
output_summary = llm_engine_hf.invoke(formatted_prompt)
return output_summary.content
def download_summary(output_text):
if output_text:
file_path = Path('summary.txt')
with open(file_path, 'w', encoding='utf-8') as f:
f.write(output_text)
return file_path
else:
return None
def create_download_file(summary_text):
file_path = download_summary(summary_text)
return str(file_path) if file_path else None
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Document Summarizer")
with gr.Row():
with gr.Column():
file = gr.File(label="Submit a file")
with gr.Column():
output_text = gr.Textbox(label="Summary", lines=20)
submit_button = gr.Button("Summarize")
submit_button.click(summarize, inputs=[file], outputs=output_text)
def generate_file():
summary_text = output_text
file_path = download_summary(summary_text)
return file_path
download_button = gr.Button("Download Summary")
download_button.click(
fn=create_download_file,
inputs=[output_text],
outputs=gr.File()
)
# Run the Gradio app
demo.launch(share=True)