eHemink commited on
Commit
59c6c87
·
1 Parent(s): b8b1d65

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -69
app.py DELETED
@@ -1,69 +0,0 @@
1
- #imports
2
- !pip install PyPDF2
3
- import PyPDF2
4
- import re
5
- !pip install transformers
6
- import transformers
7
- from transformers import pipeline
8
- !pip install git+https://github.com/suno-ai/bark.git
9
- from bark import SAMPLE_RATE, generate_audio, preload_models
10
- from scipy.io.wavfile import write as write_wav
11
- from IPython.display import Audio
12
-
13
- def abstract_to_audio(insert_pdf):
14
- # Extracting the abstract text from the article pdf
15
- def extract_abstract(pdf_file):
16
- # Open the PDF file in read-binary mode
17
- with open(pdf_file, 'rb') as file:
18
- # Create a PDF reader object
19
- pdf_reader = PyPDF2.PdfReader(file)
20
-
21
- # Initialize an empty string to store abstract content
22
- abstract_text = ''
23
-
24
- # Loop through each page in the PDF
25
- for page_num in range(len(pdf_reader.pages)):
26
- # Get the text from the current page
27
- page = pdf_reader.pages[page_num]
28
- text = page.extract_text()
29
-
30
- # Use regular expression to find the "Abstract" section
31
- abstract_match = re.search(r'\bAbstract\b', text, re.IGNORECASE)
32
- if abstract_match:
33
- # Get the text after the "Abstract" heading until the next section, indicated by "Introduction" heading
34
- start_index = abstract_match.end()
35
- next_section_match = re.search(r'\bIntroduction\b', text[start_index:])
36
- if next_section_match:
37
- end_index = start_index + next_section_match.start()
38
- abstract_text = text[start_index:end_index]
39
- else:
40
- # If no next section found, extract text till the end
41
- abstract_text = text[start_index:]
42
- break # Exit loop once abstract is found
43
-
44
- return abstract_text.strip()
45
-
46
-
47
- abstract = extract_abstract(insert_pdf)
48
-
49
- # Creating a summarization pipeline
50
- model = "lidiya/bart-large-xsum-samsum"
51
- pipeline1 = pipeline(task = "summarization", model = model)
52
-
53
- # Summarizing the extracted abstract
54
- summarized = pipeline1(abstract)
55
- print(summarized[0]['summary_text'])
56
- tss_prompt = summarized[0]['summary_text']
57
-
58
- # Generate audio file that speaks the generated sentence using Bark
59
- # download and load all models
60
- preload_models()
61
-
62
- # generate audio from text
63
- text_prompt = tss_prompt
64
- audio_array = generate_audio(text_prompt)
65
-
66
- # play text in notebook
67
- return Audio(audio_array, rate=SAMPLE_RATE)
68
-
69
- my_app = gr.Interface(fn=abstract_to_audio, inputs='file', outputs='audio')