wifix199 commited on
Commit
e67e9cb
·
verified ·
1 Parent(s): 4357623

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +53 -0
  2. requirements.txt +19 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
5
+
6
+ # Load the model and vocoder
7
+ checkpoint = "microsoft/speecht5_tts"
8
+ processor = SpeechT5Processor.from_pretrained(checkpoint)
9
+ model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
10
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
11
+
12
+ # Speaker embeddings for male and female
13
+ speaker_embeddings = {
14
+ "male": "speaker/cmu_us_bdl_arctic-wav-arctic_a0009.npy",
15
+ "female": "speaker/cmu_us_slt_arctic-wav-arctic_a0508.npy"
16
+ }
17
+
18
+ # Function to generate speech
19
+ def text_to_speech(text, gender):
20
+ if len(text.strip()) == 0:
21
+ return (16000, np.zeros(0).astype(np.int16))
22
+
23
+ inputs = processor(text=text, return_tensors="pt")
24
+
25
+ # Truncate input if too long
26
+ input_ids = inputs["input_ids"]
27
+ input_ids = input_ids[..., :model.config.max_text_positions]
28
+
29
+ # Load speaker embedding based on gender selection
30
+ speaker_embedding_path = speaker_embeddings[gender]
31
+ speaker_embedding = np.load(speaker_embedding_path)
32
+ speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
33
+
34
+ # Generate speech
35
+ speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)
36
+ speech = (speech.numpy() * 32767).astype(np.int16)
37
+
38
+ return (16000, speech)
39
+
40
+ # Create the Gradio interface
41
+ iface = gr.Interface(
42
+ fn=text_to_speech,
43
+ inputs=[
44
+ gr.Textbox(label="Enter Text"),
45
+ gr.Radio(["male", "female"], label="Select Voice Gender") # Gender selection
46
+ ],
47
+ outputs=gr.Audio(label="Generated Speech"),
48
+ title="Text-to-Speech Bot",
49
+ description="Enter text and select a voice gender to generate speech."
50
+ )
51
+
52
+ # Launch the interface
53
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ soundfile
5
+ librosa
6
+ TTS
7
+ playsound
8
+ gtts
9
+ pyttsx3
10
+ torchaudio
11
+ soundfile
12
+ git+https://github.com/huggingface/transformers.git
13
+ torch
14
+ torchaudio
15
+ sentencepiece
16
+ soundfile
17
+ samplerate
18
+ librosa
19
+ resampy