emirhanbilgic commited on
Commit
fc99598
·
verified ·
1 Parent(s): 1558a57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import spaces
2
  import gradio as gr
3
  import torch
4
  from transformers import MarianTokenizer, MarianMTModel
@@ -8,6 +7,8 @@ from PyPDF2 import PdfReader
8
  import re
9
  import textwrap
10
  import soundfile as sf
 
 
11
 
12
  # Device configuration
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -36,7 +37,7 @@ def split_text_into_sentences(text):
36
  return [sentence.strip() for sentence in sentences if sentence.strip()]
37
 
38
  # Translation function
39
- @spaces.GPU(duration=120)
40
  def translate(source_text, source_lang, target_lang, batch_size=16):
41
  model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
42
 
@@ -65,7 +66,7 @@ def preprocess(text):
65
  return text
66
 
67
  # Function to generate audio for a single sentence
68
- @spaces.GPU(duration=120)
69
  def generate_single_wav_from_text(sentence, description):
70
  set_seed(SEED)
71
  inputs = tts_tokenizer(description.strip(), return_tensors="pt").to(device)
@@ -90,7 +91,7 @@ with gr.Blocks() as demo:
90
  value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
91
  run_button = gr.Button("Generate Audio", variant="primary")
92
  with gr.Column():
93
- audio_output = gr.Audio(label="Generated Audio")
94
  markdown_output = gr.Markdown()
95
 
96
  def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
@@ -108,23 +109,27 @@ with gr.Blocks() as demo:
108
  sentences = split_text_into_sentences(text)
109
  all_audio = []
110
  all_text = ""
111
-
112
  for sentence in sentences:
113
  print(f"Processing sentence: {sentence[:50]}...") # Display the first 50 characters for a quick preview
114
  sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
115
- all_audio.append((sample_rate, audio_arr))
 
 
 
 
 
116
  all_text += f"**Sentence**: {sentence}\n\n"
117
-
118
  # Yield the accumulated results
119
- yield all_audio.copy(), all_text # Use .copy() to avoid mutation issues
120
-
121
  print("Processing complete.")
122
 
123
  def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
124
  # Stream outputs to Gradio interface
125
  for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
126
- # Display all accumulated audio files and markdown text
127
- yield [gr.Audio.update(value=(sample_rate, audio_arr)) for sample_rate, audio_arr in audio_data], markdown_text
128
 
129
  def handle_translation_toggle(translate_checkbox):
130
  if translate_checkbox:
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import MarianTokenizer, MarianMTModel
 
7
  import re
8
  import textwrap
9
  import soundfile as sf
10
+ import numpy as np
11
+ import tempfile
12
 
13
  # Device configuration
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
37
  return [sentence.strip() for sentence in sentences if sentence.strip()]
38
 
39
  # Translation function
40
+ @gr.GPU(duration=120)
41
  def translate(source_text, source_lang, target_lang, batch_size=16):
42
  model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
43
 
 
66
  return text
67
 
68
  # Function to generate audio for a single sentence
69
+ @gr.GPU(duration=120)
70
  def generate_single_wav_from_text(sentence, description):
71
  set_seed(SEED)
72
  inputs = tts_tokenizer(description.strip(), return_tensors="pt").to(device)
 
91
  value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
92
  run_button = gr.Button("Generate Audio", variant="primary")
93
  with gr.Column():
94
+ audio_output = gr.Gallery(label="Generated Audio Clips")
95
  markdown_output = gr.Markdown()
96
 
97
  def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
 
109
  sentences = split_text_into_sentences(text)
110
  all_audio = []
111
  all_text = ""
112
+
113
  for sentence in sentences:
114
  print(f"Processing sentence: {sentence[:50]}...") # Display the first 50 characters for a quick preview
115
  sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
116
+
117
+ # Save audio to a temporary file and accumulate it in the list
118
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
119
+ sf.write(f.name, audio_arr, sample_rate)
120
+ all_audio.append(f.name)
121
+
122
  all_text += f"**Sentence**: {sentence}\n\n"
123
+
124
  # Yield the accumulated results
125
+ yield all_audio, all_text
126
+
127
  print("Processing complete.")
128
 
129
  def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
130
  # Stream outputs to Gradio interface
131
  for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
132
+ yield audio_data, markdown_text
 
133
 
134
  def handle_translation_toggle(translate_checkbox):
135
  if translate_checkbox: