Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| from transformers import pipeline | |
| from gtts import gTTS | |
| from io import BytesIO | |
| import re | |
| import os | |
| summarizer = pipeline("summarization") | |
| def extract_abstract_and_summarize(pdf_file): | |
| try: | |
| with open(pdf_file, "rb") as file: | |
| pdf_reader = PdfReader(file) | |
| abstract_text = "" | |
| for page_num in range(len(pdf_reader.pages)): | |
| page = pdf_reader.pages[page_num] | |
| text = page.extract_text() | |
| abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE) | |
| if abstract_match: | |
| start_index = abstract_match.end() | |
| introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE) | |
| if introduction_match: | |
| end_index = start_index + introduction_match.start() | |
| else: | |
| end_index = None | |
| abstract_text = text[start_index:end_index] | |
| break | |
| # Summarize the extracted abstract | |
| result = summarizer( | |
| abstract_text, | |
| min_length=16, | |
| max_length=256, | |
| no_repeat_ngram_size=3, | |
| encoder_no_repeat_ngram_size=3, | |
| repetition_penalty=3.5, | |
| num_beams=4, | |
| early_stopping=True, | |
| ) | |
| summary = result[0]['summary'] | |
| # Generate audio | |
| speech = gTTS(text=summary, lang="en") | |
| speech_bytes = BytesIO() | |
| speech.write_to_fp(speech_bytes) | |
| # Return individual output values | |
| return summary, speech_bytes.getvalue(), abstract_text.strip() | |
| except Exception as e: | |
| raise Exception(str(e)) | |
| interface = gr.Interface( | |
| fn=extract_abstract_and_summarize, | |
| inputs=[gr.File(label="Upload PDF")], | |
| outputs=[gr.Textbox(label="Summary"), gr.Audio()], | |
| title="PDF Summarization & Audio Tool", | |
| description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'summarizer' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs.""" | |
| ) | |
| interface.launch() | |