#https://huggingface.co/spaces/gianb/PDF_Summarized_TTS # Here are the imports import gradio as gr import PyPDF2 import pdfplumber from transformers import pipeline, AutoProcessor, AutoModel, AutoTokenizer from PyPDF2 import PdfReader import torch import soundfile as sf from IPython.display import Audio from datasets import load_dataset from pdfminer.high_level import extract_pages, extract_text import io #Here is the code summarization = pipeline ('summarization', model = "pszemraj/long-t5-tglobal-base-16384-book-summary") def summarize_and_speech(pdf_file): pdf_bytes_io = io.BytesIO(pdf_file) pdf_reader = PyPDF2.PdfReader(pdf_bytes_io) abstract_text = pdf_reader.pages[0].extract_text() summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text'] # Use a text-to-speech model to generate audio synthesiser = pipeline("text-to-speech", "facebook/mms-tts-eng") tts_output = synthesiser(summary) audio_data = tts_output[0]["audio"] return summary, audio_data iface = gr.Interface( fn= summarize_and_speech, inputs=gr.File(label="Upload PDF", type="binary"), outputs=[gr.Textbox(label="Abstract Summary:"), gr.Audio(type="filepath", label="Summary_Speech")], live=True, title="Abstract_Research_Paper_Summarizer", description="Upload a Research Paper PDF File. The model will generate a one line summary of the Abstract section and a speech audio." ) iface.launch()