!pip install transformers pyPDF2 torchaudio !pip install pdfminer.six !pip install datasets sentencepiece from google.colab import drive from transformers import pipeline import PyPDF2 from pdfminer.high_level import extract_pages, extract_text from pdfminer.layout import LTTextContainer, LTChar drive.mount('/content/drive') pdf_path = '/content/drive/MyDrive/Applied AI/Assessment_3/Article 11 Hidden Technical Debt in Machine Learning Systems.pdf' summarization = pipeline ('summarization', model = "pszemraj/long-t5-tglobal-base-16384-book-summary") # Open the PDF file pdf_file = open(pdf_path, 'rb') pdf_reader = PyPDF2.PdfReader(pdf_file) # Extract text from the Abstract section abstract_text = pdf_reader.pages[0].extract_text() # Close the PDF file pdf_file.close() summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text'] print(summary) !pip install --upgrade transformers sentencepiece datasets[audio] import torch import soundfile as sf from IPython.display import Audio from datasets import load_dataset synthesiser = pipeline("text-to-speech", "facebook/mms-tts-eng") TTS_Output = synthesiser(summary) print(TTS_Output.keys()) audio_key = TTS_Output["audio"] Audio(data=audio_key[0], rate=16000) !pip install gradio==2.3.6 !pip install --upgrade typing-extensions import gradio as gr def summarize_pdf(pdf_path): pdf_file = open(pdf_path, 'rb') pdf_reader = PyPDF2.PdfReader(pdf_file) abstract_text = pdf_reader.pages[0].extract_text() summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text'] pdf_file.close() return summary iface = gr.Interface( fn=summarize_pdf, inputs= "file", outputs="text", live=True, title="PDF Summarizer", description="Upload a PDF with an abstract, and the model will generate a summary." )