gianb's picture
Update app.py
91861aa
raw
history blame
1.88 kB
!pip install transformers pyPDF2 torchaudio
!pip install pdfminer.six
!pip install datasets sentencepiece
from google.colab import drive
from transformers import pipeline
import PyPDF2
from pdfminer.high_level import extract_pages, extract_text
from pdfminer.layout import LTTextContainer, LTChar
drive.mount('/content/drive')
pdf_path = '/content/drive/MyDrive/Applied AI/Assessment_3/Article 11 Hidden Technical Debt in Machine Learning Systems.pdf'
summarization = pipeline ('summarization', model = "pszemraj/long-t5-tglobal-base-16384-book-summary")
# Open the PDF file
pdf_file = open(pdf_path, 'rb')
pdf_reader = PyPDF2.PdfReader(pdf_file)
# Extract text from the Abstract section
abstract_text = pdf_reader.pages[0].extract_text()
# Close the PDF file
pdf_file.close()
summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text']
print(summary)
!pip install --upgrade transformers sentencepiece datasets[audio]
import torch
import soundfile as sf
from IPython.display import Audio
from datasets import load_dataset
synthesiser = pipeline("text-to-speech", "facebook/mms-tts-eng")
TTS_Output = synthesiser(summary)
print(TTS_Output.keys())
audio_key = TTS_Output["audio"]
Audio(data=audio_key[0], rate=16000)
!pip install gradio==2.3.6
!pip install --upgrade typing-extensions
import gradio as gr
def summarize_pdf(pdf_path):
pdf_file = open(pdf_path, 'rb')
pdf_reader = PyPDF2.PdfReader(pdf_file)
abstract_text = pdf_reader.pages[0].extract_text()
summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text']
pdf_file.close()
return summary
iface = gr.Interface(
fn=summarize_pdf,
inputs= "file",
outputs="text",
live=True,
title="PDF Summarizer",
description="Upload a PDF with an abstract, and the model will generate a summary."
)