File size: 2,430 Bytes
ed58ca2
 
 
 
6720d31
 
 
 
 
 
 
 
 
 
 
ed58ca2
6720d31
 
 
 
 
ed58ca2
6720d31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed58ca2
6720d31
 
 
 
 
 
 
ed58ca2
6720d31
 
 
 
 
 
 
 
 
 
ed58ca2
6720d31
0b067a3
6720d31
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# https://huggingface.co/spaces/azsalihu/AbstractSummary_To_Audio

# Here are the imports

import torch
import PyPDF2
import gradio as gr
from IPython.display import Audio, display
from transformers import pipeline 
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import numpy as np
import scipy
from gtts import gTTS
from io import BytesIO

# Extracting Text function
def extract_text(article):
    pdfReader = PyPDF2.PdfReader(article)
    pageObj = pdfReader.pages[0]
    return pageObj.extract_text()

# Summarization Function
def summarize_abstract(text):
    sentences = text.split(". ")
    for i, sentence in enumerate(sentences):
        if "Abstract" in sentence:
            start = i + 1
            end = start + 6
            break
    abstract = ". ".join(sentences[start:end+1])
    tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")
    model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-base-book-summary")

    # Tokenize abstract 
    inputs = tokenizer(abstract, max_length=1024, return_tensors="pt", truncation=True)

    # Generate summary
    summary_ids = model.generate(inputs['input_ids'], max_length=50, min_length=30, no_repeat_ngram_size=3, encoder_no_repeat_ngram_size=3, repetition_penalty=3.5, num_beams=4, do_sample=True,early_stopping=False)
    
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    if '.' in summary:
        index = summary.rindex('.')
        if index != -1:
           summary = summary[:index+1]

    return summary 

# Abstract to Audio Fuction
def abstract_to_audio(text):
    tts = gTTS(text, lang='en') 
    buffer = BytesIO()
    tts.write_to_fp(buffer)
    buffer.seek(0)  
    return buffer.read()

# Combining Extracting text, Summarization, Abstract to Audio functions
def abstract_audio(article):
    text = extract_text(article)
    summary = summarize_abstract(text) 
    audio = abstract_to_audio(summary)
    return summary, audio

inputs = gr.File() 
summary_text = gr.Text()
audio_summary = gr.Audio()

# Building Gradio Interface
myApp = gr.Interface( fn= abstract_audio, inputs=gr.File(),
    outputs=[gr.Text(),gr.Audio()], title="Summary of Abstract to Audio ", description="An App that helps you summarises the abstract of an Article\Journal and gives the audio of the summary", examples=["NIPS-2015-hidden-technical-debt-in-machine-learning-systems-Paper.pdf"]
)

myApp.launch()