File size: 2,658 Bytes
a582605
2753e83
a582605
 
2753e83
 
a582605
 
d740958
4563676
 
a582605
 
 
1647f6a
a582605
 
 
 
9447b9b
 
a582605
 
 
120d786
a582605
9447b9b
 
 
5f6f8b5
9447b9b
bc1bad1
9e0447a
9447b9b
 
23fa73c
cc6af15
23fa73c
 
9447b9b
 
23fa73c
120d786
 
fb7fc01
23fa73c
 
9447b9b
eca9970
5ef8eae
2d7e30f
 
 
9447b9b
 
a582605
 
4563676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a582605
4c3923d
a582605
 
 
5e46dcf
a582605
 
37c818f
5e46dcf
 
a582605
 
4c3923d
a582605
5025ff4
9ce669c
2d4f77c
c2e2b8a
 
 
a582605
 
4c3923d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
import torch
import PyPDF2
from transformers import pipeline 
import numpy
import scipy
from gtts import gTTS
from io import BytesIO
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import VitsTokenizer, VitsModel


def extract_text(pdf_file):
    pdfReader = PyPDF2.PdfReader(pdf_file)
    pageObj = pdfReader.pages[0]
    return pageObj.extract_text()

def summarize_text(text):
    sentences = text.split(". ")
    
    # Find abstract section
    for i, sentence in enumerate(sentences):
        if "Abstract" in sentence:
            start = i + 1
            end = start + 6
            break
            
    # Extract abstract 
    abstract = ". ".join(sentences[start:end+1])

    # Load BART model & tokenizer
    tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")
    model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-base-book-summary")

    # Tokenize abstract 
    inputs = tokenizer(abstract,
                       max_length=1024,
                       return_tensors="pt",
                       truncation=True)

    # Generate summary
    summary_ids = model.generate(inputs['input_ids'],
                                 num_beams=3,
                                 max_length=30,
                                 min_length=21,
                                 do_sample=False,
                                 early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    if '.' in summary:
        index = summary.rindex('.')
        if index != -1:
           summary = summary[:index+1]

    return summary

def text_to_audio(text):
    #tts = gTTS(text, lang='en') 
    #buffer = BytesIO()
    #tts.write_to_fp(buffer)
    #buffer.seek(0)  
    #return buffer.read()

    tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
    model = VitsModel.from_pretrained("facebook/mms-tts-eng")

    inputs = tokenizer([text], return_tensors="pt")

    with torch.no_grad():
      outputs = model(**inputs)
  
    return outputs.waveform[0]

def audio_pdf(pdf_file):
    text = extract_text(pdf_file)
    summary = summarize_text(text) 
    audio = text_to_audio(summary)
    return summary, audio

inputs = gr.File() 
summary_text = gr.Text()
audio_summary = gr.Audio()


iface = gr.Interface(
    fn=audio_pdf,
    inputs=inputs,
    outputs=[summary_text,audio_summary],
    title="PDF Audio Summarizer 📻",
    description="App that converts an abstract into audio",
    examples=["Attention_is_all_you_need.pdf", 
              "ImageNet_Classification.pdf"
             ]
)

iface.launch()