Sanjayraju30 commited on
Commit
e51e8e6
·
verified ·
1 Parent(s): 75e39be

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import os
5
+ import io
6
+ import base64
7
+ from kokoro import KModel, KPipeline
8
+
9
+ # Check if CUDA is available
10
+ CUDA_AVAILABLE = torch.cuda.is_available()
11
+
12
+ # Initialize the model
13
+ model = KModel().to('cuda' if CUDA_AVAILABLE else 'cpu').eval()
14
+
15
+ # Initialize pipelines for different language codes (using 'a' for English)
16
+ pipelines = {'a': KPipeline(lang_code='a', model=False)}
17
+
18
+ # Custom pronunciation for "kokoro"
19
+ pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
20
+
21
+ def text_to_audio(text, speed=1.0):
22
+ """Convert text to audio using Kokoro model.
23
+
24
+ Args:
25
+ text: The text to convert to speech
26
+ speed: Speech speed multiplier (0.5-2.0, where 1.0 is normal speed)
27
+
28
+ Returns:
29
+ Audio data as a tuple of (sample_rate, audio_array)
30
+ """
31
+ if not text:
32
+ return None
33
+
34
+ pipeline = pipelines['a'] # Use English pipeline
35
+ voice = "af_heart" # Default voice (US English, female, Heart)
36
+
37
+ # Process the text
38
+ pack = pipeline.load_voice(voice)
39
+
40
+ for _, ps, _ in pipeline(text, voice, speed):
41
+ ref_s = pack[len(ps)-1]
42
+
43
+ # Generate audio
44
+ try:
45
+ audio = model(ps, ref_s, speed)
46
+ except Exception as e:
47
+ raise gr.Error(f"Error generating audio: {str(e)}")
48
+
49
+ # Return the audio with 24kHz sample rate
50
+ return 24000, audio.numpy()
51
+
52
+ return None
53
+
54
+ def text_to_audio_b64(text, speed=1.0):
55
+ """Convert text to audio and return as base64 encoded WAV file.
56
+
57
+ Args:
58
+ text: The text to convert to speech
59
+ speed: Speech speed multiplier (0.5-2.0, where 1.0 is normal speed)
60
+
61
+ Returns:
62
+ Base64 encoded WAV file as a string
63
+ """
64
+ import soundfile as sf
65
+
66
+ result = text_to_audio(text, speed)
67
+ if result is None:
68
+ return None
69
+
70
+ sample_rate, audio_data = result
71
+
72
+ # Save to BytesIO object
73
+ wav_io = io.BytesIO()
74
+ sf.write(wav_io, audio_data, sample_rate, format='WAV')
75
+ wav_io.seek(0)
76
+
77
+ # Convert to base64
78
+ wav_b64 = base64.b64encode(wav_io.read()).decode('utf-8')
79
+ return wav_b64
80
+
81
+ # Create Gradio interface
82
+ with gr.Blocks(title="Kokoro Text-to-Audio MCP") as app:
83
+ gr.Markdown("# 🎵 Kokoro Text-to-Audio MCP")
84
+ gr.Markdown("Convert text to speech using the Kokoro-82M model")
85
+
86
+ with gr.Row():
87
+ with gr.Column():
88
+ text_input = gr.Textbox(
89
+ label="Enter your text",
90
+ placeholder="Type something to convert to audio...",
91
+ lines=5
92
+ )
93
+ speed_slider = gr.Slider(
94
+ minimum=0.5,
95
+ maximum=2.0,
96
+ value=1.0,
97
+ step=0.1,
98
+ label="Speech Speed"
99
+ )
100
+ submit_btn = gr.Button("Generate Audio")
101
+
102
+ with gr.Column():
103
+ audio_output = gr.Audio(label="Generated Audio", type="numpy")
104
+
105
+ submit_btn.click(
106
+ fn=text_to_audio,
107
+ inputs=[text_input, speed_slider],
108
+ outputs=[audio_output]
109
+ )
110
+
111
+ gr.Markdown("### Usage Tips")
112
+ gr.Markdown("- Adjust the speed slider to modify the pace of speech")
113
+
114
+ # Add section about MCP support
115
+ with gr.Accordion("MCP Support (for LLMs)", open=False):
116
+ gr.Markdown("""
117
+ ### MCP Support
118
+
119
+ This app supports the Model Context Protocol (MCP), allowing Large Language Models like Claude Desktop to use it as a tool.
120
+
121
+ To use this app with an MCP client, add the following configuration:
122
+
123
+ ```json
124
+ {
125
+ "mcpServers": {
126
+ "kokoroTTS": {
127
+ "url": "https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse"
128
+ }
129
+ }
130
+ }
131
+ ```
132
+
133
+ Replace `your-app-url.hf.space` with your actual Hugging Face Space URL.
134
+ """)
135
+
136
+ # Launch the app with MCP support
137
+ if __name__ == "__main__":
138
+ # Check for environment variable to enable MCP
139
+ enable_mcp = os.environ.get('GRADIO_MCP_SERVER', 'False').lower() in ('true', '1', 't')
140
+
141
+ app.launch(mcp_server=True)