Spaces:
Runtime error
Runtime error
!pip install transformers pyPDF2 torchaudio | |
!pip install pdfminer.six | |
!pip install datasets sentencepiece | |
from google.colab import drive | |
from transformers import pipeline | |
import PyPDF2 | |
from pdfminer.high_level import extract_pages, extract_text | |
from pdfminer.layout import LTTextContainer, LTChar | |
drive.mount('/content/drive') | |
pdf_path = '/content/drive/MyDrive/Applied AI/Assessment_3/Article 11 Hidden Technical Debt in Machine Learning Systems.pdf' | |
summarization = pipeline ('summarization', model = "pszemraj/long-t5-tglobal-base-16384-book-summary") | |
# Open the PDF file | |
pdf_file = open(pdf_path, 'rb') | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
# Extract text from the Abstract section | |
abstract_text = pdf_reader.pages[0].extract_text() | |
# Close the PDF file | |
pdf_file.close() | |
summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text'] | |
print(summary) | |
!pip install --upgrade transformers sentencepiece datasets[audio] | |
import torch | |
import soundfile as sf | |
from IPython.display import Audio | |
from datasets import load_dataset | |
synthesiser = pipeline("text-to-speech", "facebook/mms-tts-eng") | |
TTS_Output = synthesiser(summary) | |
print(TTS_Output.keys()) | |
audio_key = TTS_Output["audio"] | |
Audio(data=audio_key[0], rate=16000) | |
!pip install gradio==2.3.6 | |
!pip install --upgrade typing-extensions | |
import gradio as gr | |
def summarize_pdf(pdf_path): | |
pdf_file = open(pdf_path, 'rb') | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
abstract_text = pdf_reader.pages[0].extract_text() | |
summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text'] | |
pdf_file.close() | |
return summary | |
iface = gr.Interface( | |
fn=summarize_pdf, | |
inputs= "file", | |
outputs="text", | |
live=True, | |
title="PDF Summarizer", | |
description="Upload a PDF with an abstract, and the model will generate a summary." | |
) |