Spaces:

gianb
/

PDF_Summarizer_and_TTS

Runtime error

File size: 1,337 Bytes

eb917b1

import gradio as gr

from transformers import pipeline
import PyPDF2
import pdfplumber 

import torch

import soundfile as sf

from IPython.display import Audio

from datasets import load_dataset

from pdfminer.high_level import extract_pages, extract_text

summarization = pipeline ('summarization', model = "pszemraj/long-t5-tglobal-base-16384-book-summary")

def summarize_and_speech(pdf_file):
    with open(pdf_file.name, 'rb') as file:
        pdf_reader = PyPDF2.PdfFileReader(file)
        abstract_text = pdf_reader.pages[0].extract_text()
        summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text']
        
        # Use a text-to-speech model to generate audio
        synthesiser = pipeline("text-to-speech", "facebook/mms-tts-eng")
        tts_output = synthesiser(summary)
        audio_data = tts_output[0]["audio"]

        return summary, audio_data

iface = gr.Interface(
    fn= summarize_and_speech,
    inputs=gr.File(label="Upload PDF", type="binary"),
    outputs=[gr.Textbox(label="Abstract Summary:"), gr.Audio(type="filepath", label="Summary_Speech")],
    live=True,
    title="Abstract_Research_Paper_Summarizer",
    description="Upload a Research Paper PDF File. The model will generate a one line summary of the Abstract section and a speech audio."
)

iface.launch()