Spaces:

emirhanbilgic
/

read-my-pdf-outloud

Running

App Files Files Community

emirhanbilgic commited on Aug 11, 2024

Commit

e666162

verified ·

1 Parent(s): fc99598

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -34

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import torch
 from transformers import MarianTokenizer, MarianMTModel
@@ -7,8 +8,6 @@ from PyPDF2 import PdfReader
 import re
 import textwrap
 import soundfile as sf
-import numpy as np
-import tempfile
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -37,7 +36,7 @@ def split_text_into_sentences(text):
     return [sentence.strip() for sentence in sentences if sentence.strip()]
 # Translation function
-@gr.GPU(duration=120)
 def translate(source_text, source_lang, target_lang, batch_size=16):
     model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
@@ -66,7 +65,7 @@ def preprocess(text):
     return text
 # Function to generate audio for a single sentence
-@gr.GPU(duration=120)
 def generate_single_wav_from_text(sentence, description):
     set_seed(SEED)
     inputs = tts_tokenizer(description.strip(), return_tensors="pt").to(device)
@@ -79,6 +78,7 @@ def generate_single_wav_from_text(sentence, description):
     audio_arr = generation.cpu().numpy().squeeze()
     return SAMPLE_RATE, audio_arr
 # Gradio Interface
 with gr.Blocks() as demo:
     with gr.Row():
@@ -91,55 +91,34 @@ with gr.Blocks() as demo:
                                      value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
             run_button = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
-            audio_output = gr.Gallery(label="Generated Audio Clips")
             markdown_output = gr.Markdown()
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
-        # Extract and process text from PDF
-        print("Extracting text from PDF...")
         text = pdf_to_text(pdf_input.name)
-        print(f"Extracted text: {text[:100]}...")  # Display the first 100 characters for a quick preview
-        # Perform translation if enabled
         if translate_checkbox:
-            print("Translating text...")
             text = translate(text, source_lang, target_lang)
-            print(f"Translated text: {text[:100]}...")  # Display the first 100 characters for a quick preview
         sentences = split_text_into_sentences(text)
-        all_audio = []
         all_text = ""
         for sentence in sentences:
-            print(f"Processing sentence: {sentence[:50]}...")  # Display the first 50 characters for a quick preview
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
-            # Save audio to a temporary file and accumulate it in the list
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
-                sf.write(f.name, audio_arr, sample_rate)
-                all_audio.append(f.name)
             all_text += f"**Sentence**: {sentence}\n\n"
-            # Yield the accumulated results
-            yield all_audio, all_text
-        print("Processing complete.")
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
-        # Stream outputs to Gradio interface
         for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
             yield audio_data, markdown_text
-    def handle_translation_toggle(translate_checkbox):
-        if translate_checkbox:
-            return gr.update(visible=True), gr.update(visible=True)
-        else:
-            return gr.update(visible=False), gr.update(visible=False)
     translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
     source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
-    run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_output, markdown_output])
 demo.queue()
-demo.launch(share=True)

+import spaces
 import gradio as gr
 import torch
 from transformers import MarianTokenizer, MarianMTModel
 import re
 import textwrap
 import soundfile as sf
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     return [sentence.strip() for sentence in sentences if sentence.strip()]
 # Translation function
+@spaces.GPU(duration=120)
 def translate(source_text, source_lang, target_lang, batch_size=16):
     model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
     return text
 # Function to generate audio for a single sentence
+@spaces.GPU(duration=120)
 def generate_single_wav_from_text(sentence, description):
     set_seed(SEED)
     inputs = tts_tokenizer(description.strip(), return_tensors="pt").to(device)
     audio_arr = generation.cpu().numpy().squeeze()
     return SAMPLE_RATE, audio_arr
 # Gradio Interface
 with gr.Blocks() as demo:
     with gr.Row():
                                      value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
             run_button = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
+            audio_gallery = gr.Gallery(label="Generated Audios", item_type="audio")
             markdown_output = gr.Markdown()
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
         text = pdf_to_text(pdf_input.name)
         if translate_checkbox:
             text = translate(text, source_lang, target_lang)
         sentences = split_text_into_sentences(text)
+        all_audio_paths = []
         all_text = ""
         for sentence in sentences:
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
+            # Create temporary audio file
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile:
+                sf.write(tmpfile, audio_arr, sample_rate)
+                all_audio_paths.append(tmpfile.name)
             all_text += f"**Sentence**: {sentence}\n\n"
+            yield all_audio_paths, all_text
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
         for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
             yield audio_data, markdown_text
     translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
     source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
+    run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_gallery, markdown_output])
 demo.queue()
+demo.launch(share=True)