Spaces:

emirhanbilgic
/

read-my-pdf-outloud

Running

App Files Files Community

emirhanbilgic commited on Aug 11, 2024

Commit

fc99598

verified ·

1 Parent(s): 1558a57

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -11

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import spaces
 import gradio as gr
 import torch
 from transformers import MarianTokenizer, MarianMTModel
@@ -8,6 +7,8 @@ from PyPDF2 import PdfReader
 import re
 import textwrap
 import soundfile as sf
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -36,7 +37,7 @@ def split_text_into_sentences(text):
     return [sentence.strip() for sentence in sentences if sentence.strip()]
 # Translation function
-@spaces.GPU(duration=120)
 def translate(source_text, source_lang, target_lang, batch_size=16):
     model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
@@ -65,7 +66,7 @@ def preprocess(text):
     return text
 # Function to generate audio for a single sentence
-@spaces.GPU(duration=120)
 def generate_single_wav_from_text(sentence, description):
     set_seed(SEED)
     inputs = tts_tokenizer(description.strip(), return_tensors="pt").to(device)
@@ -90,7 +91,7 @@ with gr.Blocks() as demo:
                                      value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
             run_button = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
-            audio_output = gr.Audio(label="Generated Audio")
             markdown_output = gr.Markdown()
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
@@ -108,23 +109,27 @@ with gr.Blocks() as demo:
         sentences = split_text_into_sentences(text)
         all_audio = []
         all_text = ""
         for sentence in sentences:
             print(f"Processing sentence: {sentence[:50]}...")  # Display the first 50 characters for a quick preview
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
-            all_audio.append((sample_rate, audio_arr))
             all_text += f"**Sentence**: {sentence}\n\n"
             # Yield the accumulated results
-            yield all_audio.copy(), all_text  # Use .copy() to avoid mutation issues
         print("Processing complete.")
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
         # Stream outputs to Gradio interface
         for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
-            # Display all accumulated audio files and markdown text
-            yield [gr.Audio.update(value=(sample_rate, audio_arr)) for sample_rate, audio_arr in audio_data], markdown_text
     def handle_translation_toggle(translate_checkbox):
         if translate_checkbox:

 import gradio as gr
 import torch
 from transformers import MarianTokenizer, MarianMTModel
 import re
 import textwrap
 import soundfile as sf
+import numpy as np
+import tempfile
 # Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     return [sentence.strip() for sentence in sentences if sentence.strip()]
 # Translation function
+@gr.GPU(duration=120)
 def translate(source_text, source_lang, target_lang, batch_size=16):
     model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
     return text
 # Function to generate audio for a single sentence
+@gr.GPU(duration=120)
 def generate_single_wav_from_text(sentence, description):
     set_seed(SEED)
     inputs = tts_tokenizer(description.strip(), return_tensors="pt").to(device)
                                      value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
             run_button = gr.Button("Generate Audio", variant="primary")
         with gr.Column():
+            audio_output = gr.Gallery(label="Generated Audio Clips")
             markdown_output = gr.Markdown()
     def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
         sentences = split_text_into_sentences(text)
         all_audio = []
         all_text = ""
         for sentence in sentences:
             print(f"Processing sentence: {sentence[:50]}...")  # Display the first 50 characters for a quick preview
             sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
+            # Save audio to a temporary file and accumulate it in the list
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+                sf.write(f.name, audio_arr, sample_rate)
+                all_audio.append(f.name)
             all_text += f"**Sentence**: {sentence}\n\n"
             # Yield the accumulated results
+            yield all_audio, all_text
         print("Processing complete.")
     def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
         # Stream outputs to Gradio interface
         for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
+            yield audio_data, markdown_text
     def handle_translation_toggle(translate_checkbox):
         if translate_checkbox: