Spaces:
Running
Running
import warnings | |
warnings.simplefilter(action='ignore', category=FutureWarning) | |
import PyPDF2 | |
import gradio as gr | |
from langchain.prompts import PromptTemplate | |
from langchain.chains.summarize import load_summarize_chain | |
from pathlib import Path | |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint | |
llm = HuggingFaceEndpoint( | |
repo_id="mistralai/Mistral-7B-Instruct-v0.3", | |
task="text-generation", | |
max_new_tokens=4096, | |
temperature=0.5, | |
do_sample=False, | |
) | |
llm_engine_hf = ChatHuggingFace(llm=llm) | |
def read_pdf(file_path): | |
pdf_reader = PyPDF2.PdfReader(file_path) | |
text = "" | |
for page in range(len(pdf_reader.pages)): | |
text += pdf_reader.pages[page].extract_text() | |
return text | |
def summarize(file, n_words): | |
# Read the content of the uploaded file | |
file_path = file.name | |
if file_path.endswith('.pdf'): | |
text = read_pdf(file_path) | |
else: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
text = f.read() | |
template = ''' | |
Please carefully read the following document: | |
<document> | |
{TEXT} | |
</document> | |
After reading through the document, detect the language, identify the key points and main ideas covered in the text. Organize these key points into a concise bulleted list that summarizes the essential information from the document. The summary should have a maximum of 10 bullet points. | |
The final summary MUST be in the language you detected. | |
Your goal is to be comprehensive in capturing the core content of the document, while also being concise in how you express each summary point. Omit minor details and focus on the central themes and important facts. | |
''' | |
prompt = PromptTemplate( | |
template=template, | |
input_variables=['TEXT'] | |
) | |
formatted_prompt = prompt.format(TEXT=text) | |
output_summary = llm_engine_hf.invoke(formatted_prompt) | |
return output_summary.content | |
def download_summary(output_text): | |
if output_text: | |
file_path = Path('summary.txt') | |
with open(file_path, 'w', encoding='utf-8') as f: | |
f.write(output_text) | |
return file_path | |
else: | |
return None | |
def create_download_file(summary_text): | |
file_path = download_summary(summary_text) | |
return str(file_path) if file_path else None | |
# Create the Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## Document Summarizer") | |
with gr.Row(): | |
with gr.Column(): | |
file = gr.File(label="Submit a file") | |
with gr.Column(): | |
output_text = gr.Textbox(label="Summary", lines=20) | |
submit_button = gr.Button("Summarize") | |
submit_button.click(summarize, inputs=[file], outputs=output_text) | |
def generate_file(): | |
summary_text = output_text | |
file_path = download_summary(summary_text) | |
return file_path | |
download_button = gr.Button("Download Summary") | |
download_button.click( | |
fn=create_download_file, | |
inputs=[output_text], | |
outputs=gr.File() | |
) | |
# Run the Gradio app | |
demo.launch(share=True) |