Spaces:

eHemink
/

assessment3_part2

Runtime error

App Files Files Community

eHemink commited on Nov 28, 2023

Commit

b8b1d65

1 Parent(s): 3d0b487

Create app.py

Browse files

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+#imports
+!pip install PyPDF2
+import PyPDF2
+import re
+!pip install transformers
+import transformers
+from transformers import pipeline
+!pip install git+https://github.com/suno-ai/bark.git
+from bark import SAMPLE_RATE, generate_audio, preload_models
+from scipy.io.wavfile import write as write_wav
+from IPython.display import Audio
+def abstract_to_audio(insert_pdf):
+    # Extracting the abstract text from the article pdf
+    def extract_abstract(pdf_file):
+        # Open the PDF file in read-binary mode
+        with open(pdf_file, 'rb') as file:
+            # Create a PDF reader object
+            pdf_reader = PyPDF2.PdfReader(file)
+            # Initialize an empty string to store abstract content
+            abstract_text = ''
+            # Loop through each page in the PDF
+            for page_num in range(len(pdf_reader.pages)):
+                # Get the text from the current page
+                page = pdf_reader.pages[page_num]
+                text = page.extract_text()
+                # Use regular expression to find the "Abstract" section
+                abstract_match = re.search(r'\bAbstract\b', text, re.IGNORECASE)
+                if abstract_match:
+                    # Get the text after the "Abstract" heading until the next section, indicated by "Introduction" heading
+                    start_index = abstract_match.end()
+                    next_section_match = re.search(r'\bIntroduction\b', text[start_index:])
+                    if next_section_match:
+                        end_index = start_index + next_section_match.start()
+                        abstract_text = text[start_index:end_index]
+                    else:
+                        # If no next section found, extract text till the end
+                        abstract_text = text[start_index:]
+                    break  # Exit loop once abstract is found
+            return abstract_text.strip()
+    abstract = extract_abstract(insert_pdf)
+    # Creating a summarization pipeline
+    model = "lidiya/bart-large-xsum-samsum"
+    pipeline1 = pipeline(task = "summarization", model = model)
+    # Summarizing the extracted abstract
+    summarized = pipeline1(abstract)
+    print(summarized[0]['summary_text'])
+    tss_prompt = summarized[0]['summary_text']
+    # Generate audio file that speaks the generated sentence using Bark
+    # download and load all models
+    preload_models()
+    # generate audio from text
+    text_prompt = tss_prompt
+    audio_array = generate_audio(text_prompt)
+    # play text in notebook
+    return Audio(audio_array, rate=SAMPLE_RATE)
+my_app = gr.Interface(fn=abstract_to_audio, inputs='file', outputs='audio')