Spaces:
Sleeping
Sleeping
File size: 2,303 Bytes
c693e62 cdb128e c693e62 cdb128e c693e62 67d721c c693e62 d9986c3 67d721c d9986c3 67d721c d9986c3 67d721c d9986c3 67d721c d9986c3 2a10acb cdb128e 797680d 2a10acb 67d721c 2a10acb 67d721c 2a10acb 67d721c 2a10acb c693e62 67d721c c693e62 67d721c cdb128e c693e62 2a10acb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
from PyPDF2 import PdfReader
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from gtts import gTTS
from io import BytesIO
import re
# Load the LED-large model for summarization
model_name = "pszemraj/led-large-book-summary"
summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
def extract_abstract_and_summarize(pdf_file):
try:
with open(pdf_file, "rb") as file:
pdf_reader = PdfReader(file)
abstract_text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text = page.extract_text()
abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
if abstract_match:
start_index = abstract_match.end()
introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
if introduction_match:
end_index = start_index + introduction_match.start()
else:
end_index = None
abstract_text = text[start_index:end_index]
break
# Summarize the extracted abstract using the LED-large model
result = summarizer(abstract_text, max_length=256, min_length=16, length_penalty=2.0)
summary = result[0]['summary']
# Generate audio
speech = gTTS(text=summary, lang="en")
speech_bytes = BytesIO()
speech.write_to_fp(speech_bytes)
# Return individual output values
return summary, speech_bytes.getvalue(), abstract_text.strip()
except Exception as e:
raise Exception(str(e))
interface = gr.Interface(
fn=extract_abstract_and_summarize,
inputs=[gr.File(label="Upload PDF")],
outputs=[gr.Textbox(label="Summary"), gr.Audio()],
title="PDF Summarization & Audio Tool",
description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
)
interface.launch()
|