emirhanbilgic commited on
Commit
e666162
·
verified ·
1 Parent(s): fc99598

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -34
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import torch
3
  from transformers import MarianTokenizer, MarianMTModel
@@ -7,8 +8,6 @@ from PyPDF2 import PdfReader
7
  import re
8
  import textwrap
9
  import soundfile as sf
10
- import numpy as np
11
- import tempfile
12
 
13
  # Device configuration
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -37,7 +36,7 @@ def split_text_into_sentences(text):
37
  return [sentence.strip() for sentence in sentences if sentence.strip()]
38
 
39
  # Translation function
40
- @gr.GPU(duration=120)
41
  def translate(source_text, source_lang, target_lang, batch_size=16):
42
  model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
43
 
@@ -66,7 +65,7 @@ def preprocess(text):
66
  return text
67
 
68
  # Function to generate audio for a single sentence
69
- @gr.GPU(duration=120)
70
  def generate_single_wav_from_text(sentence, description):
71
  set_seed(SEED)
72
  inputs = tts_tokenizer(description.strip(), return_tensors="pt").to(device)
@@ -79,6 +78,7 @@ def generate_single_wav_from_text(sentence, description):
79
  audio_arr = generation.cpu().numpy().squeeze()
80
  return SAMPLE_RATE, audio_arr
81
 
 
82
  # Gradio Interface
83
  with gr.Blocks() as demo:
84
  with gr.Row():
@@ -91,55 +91,34 @@ with gr.Blocks() as demo:
91
  value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
92
  run_button = gr.Button("Generate Audio", variant="primary")
93
  with gr.Column():
94
- audio_output = gr.Gallery(label="Generated Audio Clips")
95
  markdown_output = gr.Markdown()
96
 
97
  def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
98
- # Extract and process text from PDF
99
- print("Extracting text from PDF...")
100
  text = pdf_to_text(pdf_input.name)
101
- print(f"Extracted text: {text[:100]}...") # Display the first 100 characters for a quick preview
102
-
103
- # Perform translation if enabled
104
  if translate_checkbox:
105
- print("Translating text...")
106
  text = translate(text, source_lang, target_lang)
107
- print(f"Translated text: {text[:100]}...") # Display the first 100 characters for a quick preview
108
 
109
  sentences = split_text_into_sentences(text)
110
- all_audio = []
111
  all_text = ""
112
 
113
  for sentence in sentences:
114
- print(f"Processing sentence: {sentence[:50]}...") # Display the first 50 characters for a quick preview
115
  sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
116
-
117
- # Save audio to a temporary file and accumulate it in the list
118
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
119
- sf.write(f.name, audio_arr, sample_rate)
120
- all_audio.append(f.name)
121
-
122
  all_text += f"**Sentence**: {sentence}\n\n"
123
-
124
- # Yield the accumulated results
125
- yield all_audio, all_text
126
-
127
- print("Processing complete.")
128
 
129
  def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
130
- # Stream outputs to Gradio interface
131
  for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
132
  yield audio_data, markdown_text
133
 
134
- def handle_translation_toggle(translate_checkbox):
135
- if translate_checkbox:
136
- return gr.update(visible=True), gr.update(visible=True)
137
- else:
138
- return gr.update(visible=False), gr.update(visible=False)
139
-
140
  translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
141
  source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
142
- run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_output, markdown_output])
143
 
144
  demo.queue()
145
- demo.launch(share=True)
 
1
+ import spaces
2
  import gradio as gr
3
  import torch
4
  from transformers import MarianTokenizer, MarianMTModel
 
8
  import re
9
  import textwrap
10
  import soundfile as sf
 
 
11
 
12
  # Device configuration
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
36
  return [sentence.strip() for sentence in sentences if sentence.strip()]
37
 
38
  # Translation function
39
+ @spaces.GPU(duration=120)
40
  def translate(source_text, source_lang, target_lang, batch_size=16):
41
  model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
42
 
 
65
  return text
66
 
67
  # Function to generate audio for a single sentence
68
+ @spaces.GPU(duration=120)
69
  def generate_single_wav_from_text(sentence, description):
70
  set_seed(SEED)
71
  inputs = tts_tokenizer(description.strip(), return_tensors="pt").to(device)
 
78
  audio_arr = generation.cpu().numpy().squeeze()
79
  return SAMPLE_RATE, audio_arr
80
 
81
+
82
  # Gradio Interface
83
  with gr.Blocks() as demo:
84
  with gr.Row():
 
91
  value="Old man voice. Monotone voice tune from an old man, with a very close recording that almost has no background noise.")
92
  run_button = gr.Button("Generate Audio", variant="primary")
93
  with gr.Column():
94
+ audio_gallery = gr.Gallery(label="Generated Audios", item_type="audio")
95
  markdown_output = gr.Markdown()
96
 
97
  def handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
 
 
98
  text = pdf_to_text(pdf_input.name)
 
 
 
99
  if translate_checkbox:
 
100
  text = translate(text, source_lang, target_lang)
 
101
 
102
  sentences = split_text_into_sentences(text)
103
+ all_audio_paths = []
104
  all_text = ""
105
 
106
  for sentence in sentences:
 
107
  sample_rate, audio_arr = generate_single_wav_from_text(sentence, description)
108
+ # Create temporary audio file
109
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile:
110
+ sf.write(tmpfile, audio_arr, sample_rate)
111
+ all_audio_paths.append(tmpfile.name)
 
 
112
  all_text += f"**Sentence**: {sentence}\n\n"
113
+ yield all_audio_paths, all_text
 
 
 
 
114
 
115
  def run_pipeline(pdf_input, translate_checkbox, source_lang, target_lang, description):
 
116
  for audio_data, markdown_text in handle_process(pdf_input, translate_checkbox, source_lang, target_lang, description):
117
  yield audio_data, markdown_text
118
 
 
 
 
 
 
 
119
  translate_checkbox.change(fn=handle_translation_toggle, inputs=translate_checkbox, outputs=[source_lang, target_lang])
120
  source_lang.change(fn=lambda lang: gr.update(choices={"en": ["de", "fr", "tr"], "tr": ["en"], "de": ["en", "fr"], "fr": ["en", "de"]}.get(lang, [])), inputs=source_lang, outputs=target_lang)
121
+ run_button.click(run_pipeline, inputs=[pdf_input, translate_checkbox, source_lang, target_lang, description], outputs=[audio_gallery, markdown_output])
122
 
123
  demo.queue()
124
+ demo.launch(share=True)