ovieyra21 commited on
Commit
27e8b08
·
verified ·
1 Parent(s): 011aec2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -10
app.py CHANGED
@@ -1,16 +1,14 @@
1
  import gradio as gr
2
  import torch
3
  from datasets import load_dataset
4
- from transformers import pipeline, SpeechT5Processor, SpeechT5HifiGan, SpeechT5ForTextToSpeech
5
 
6
  model_id = "ovieyra21/es_speecht5_tts_mabama" # update with your model id
7
- # pipe = pipeline("automatic-speech-recognition", model=model_id)
8
  model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
9
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
10
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
11
  speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
12
 
13
- # checkpoint = "microsoft/speecht5_tts"
14
  processor = SpeechT5Processor.from_pretrained(model_id)
15
 
16
  replacements = [
@@ -27,7 +25,6 @@ replacements = [
27
  ("ü", "u"),
28
  ]
29
 
30
-
31
  title = "Text-to-Speech"
32
  description = """
33
  Demo for text-to-speech translation in French. Demo uses [Sandiago21/speecht5_finetuned_facebook_voxpopuli_french](https://huggingface.co/Sandiago21/speecht5_finetuned_facebook_voxpopuli_french) checkpoint, which is based on Microsoft's
@@ -35,7 +32,6 @@ Demo for text-to-speech translation in French. Demo uses [Sandiago21/speecht5_fi
35
  ![Text-to-Speech (TTS)"](https://geekflare.com/wp-content/uploads/2021/07/texttospeech-1200x385.png "Diagram of Text-to-Speech (TTS)")
36
  """
37
 
38
-
39
  def cleanup_text(text):
40
  for src, dst in replacements:
41
  text = text.replace(src, dst)
@@ -44,16 +40,14 @@ def cleanup_text(text):
44
  def synthesize_speech(text):
45
  text = cleanup_text(text)
46
  inputs = processor(text=text, return_tensors="pt")
47
-
48
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
49
-
50
- return gr.Audio.update(value=(16000, speech.cpu().numpy()))
51
 
52
  syntesize_speech_gradio = gr.Interface(
53
  synthesize_speech,
54
- inputs = gr.Textbox(label="Text", placeholder="Type something here..."),
55
  outputs=gr.Audio(),
56
  examples=["Probando audio"],
57
  title=title,
58
  description=description,
59
- ).launch()
 
1
  import gradio as gr
2
  import torch
3
  from datasets import load_dataset
4
+ from transformers import SpeechT5Processor, SpeechT5HifiGan, SpeechT5ForTextToSpeech
5
 
6
  model_id = "ovieyra21/es_speecht5_tts_mabama" # update with your model id
 
7
  model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
8
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
9
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
10
  speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
11
 
 
12
  processor = SpeechT5Processor.from_pretrained(model_id)
13
 
14
  replacements = [
 
25
  ("ü", "u"),
26
  ]
27
 
 
28
  title = "Text-to-Speech"
29
  description = """
30
  Demo for text-to-speech translation in French. Demo uses [Sandiago21/speecht5_finetuned_facebook_voxpopuli_french](https://huggingface.co/Sandiago21/speecht5_finetuned_facebook_voxpopuli_french) checkpoint, which is based on Microsoft's
 
32
  ![Text-to-Speech (TTS)"](https://geekflare.com/wp-content/uploads/2021/07/texttospeech-1200x385.png "Diagram of Text-to-Speech (TTS)")
33
  """
34
 
 
35
  def cleanup_text(text):
36
  for src, dst in replacements:
37
  text = text.replace(src, dst)
 
40
  def synthesize_speech(text):
41
  text = cleanup_text(text)
42
  inputs = processor(text=text, return_tensors="pt")
 
43
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
44
+ return (16000, speech.cpu().numpy()) # Devuelve el audio directamente
 
45
 
46
  syntesize_speech_gradio = gr.Interface(
47
  synthesize_speech,
48
+ inputs=gr.Textbox(label="Text", placeholder="Type something here..."),
49
  outputs=gr.Audio(),
50
  examples=["Probando audio"],
51
  title=title,
52
  description=description,
53
+ ).launch()