amks7 commited on
Commit
315aef2
·
verified ·
1 Parent(s): 007c694

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -88
app.py CHANGED
@@ -1,88 +1,88 @@
1
- import gradio as gr
2
- from google import genai
3
- from google.genai import types
4
- import wave
5
- import os
6
- from dotenv import load_dotenv
7
-
8
- # Load API key
9
- load_dotenv()
10
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
11
- client = genai.Client(api_key=GOOGLE_API_KEY)
12
-
13
- # Save audio from PCM to WAV
14
- def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
15
- with wave.open(filename, "wb") as wf:
16
- wf.setnchannels(channels)
17
- wf.setsampwidth(sample_width)
18
- wf.setframerate(rate)
19
- wf.writeframes(pcm)
20
-
21
- # Gemini TTS generation function
22
- def generate_speech(text, voice):
23
- try:
24
- response = client.models.generate_content(
25
- model="gemini-2.5-flash-preview-tts",
26
- contents=text,
27
- config=types.GenerateContentConfig(
28
- response_modalities=["AUDIO"],
29
- speech_config=types.SpeechConfig(
30
- voice_config=types.VoiceConfig(
31
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
32
- voice_name=voice
33
- )
34
- )
35
- )
36
- )
37
- )
38
-
39
- audio_data = response.candidates[0].content.parts[0].inline_data.data
40
- output_path = "output.wav"
41
- wave_file(output_path, audio_data)
42
- return output_path, output_path, "Speech generated successfully."
43
-
44
- except Exception as e:
45
- return None, None, f"Error: {str(e)}"
46
-
47
- # Gradio app using Blocks
48
- with gr.Blocks(title="Gemini TTS Demo") as demo:
49
- gr.Markdown("## Google Gemini Text-to-Speech")
50
- gr.Markdown("Enter text below, choose a voice, and listen to the generated speech.")
51
-
52
- with gr.Row():
53
- text_input = gr.Textbox(
54
- lines=3,
55
- label="Enter Text",
56
- placeholder="Example: Welcome to the world of AI."
57
- )
58
- voice_input = gr.Dropdown(
59
- choices=["Kore", "Wes"],
60
- value="Kore",
61
- label="Select Voice"
62
- )
63
-
64
- with gr.Row():
65
- generate_btn = gr.Button("Generate Speech", variant="primary")
66
-
67
- with gr.Row():
68
- audio_output = gr.Audio(label="Generated Audio")
69
- file_output = gr.File(label="Download Audio File")
70
- status_output = gr.Textbox(label="Status", interactive=False)
71
-
72
- examples = gr.Examples(
73
- examples=[
74
- ["Good morning! Hope you have a great day ahead.", "Kore"],
75
- ["Welcome to the future of AI voice generation.", "Wes"],
76
- ["Your appointment is scheduled for 3 PM on Monday.", "Kore"],
77
- ["This is a demo of Google's Gemini text-to-speech feature.", "Wes"],
78
- ],
79
- inputs=[text_input, voice_input],
80
- )
81
-
82
- generate_btn.click(
83
- fn=generate_speech,
84
- inputs=[text_input, voice_input],
85
- outputs=[audio_output, file_output, status_output],
86
- )
87
-
88
- demo.launch()
 
1
+ import gradio as gr
2
+ from google import genai
3
+ from google.genai import types
4
+ import wave
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load API key
9
+ load_dotenv()
10
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
11
+ client = genai.Client(api_key=GOOGLE_API_KEY)
12
+
13
+ # Save audio from PCM to WAV
14
+ def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
15
+ with wave.open(filename, "wb") as wf:
16
+ wf.setnchannels(channels)
17
+ wf.setsampwidth(sample_width)
18
+ wf.setframerate(rate)
19
+ wf.writeframes(pcm)
20
+
21
+ # Gemini TTS generation function
22
+ def generate_speech(text, voice):
23
+ try:
24
+ response = client.models.generate_content(
25
+ model="gemini-2.5-flash-preview-tts",
26
+ contents=text,
27
+ config=types.GenerateContentConfig(
28
+ response_modalities=["AUDIO"],
29
+ speech_config=types.SpeechConfig(
30
+ voice_config=types.VoiceConfig(
31
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
32
+ voice_name=voice
33
+ )
34
+ )
35
+ )
36
+ )
37
+ )
38
+
39
+ audio_data = response.candidates[0].content.parts[0].inline_data.data
40
+ output_path = "output.wav"
41
+ wave_file(output_path, audio_data)
42
+ return output_path, output_path, "Speech generated successfully."
43
+
44
+ except Exception as e:
45
+ return None, None, f"Error: {str(e)}"
46
+
47
+ # Gradio app using Blocks
48
+ with gr.Blocks(title="Gemini TTS Demo") as demo:
49
+ gr.Markdown("## Google Gemini Text-to-Speech")
50
+ gr.Markdown("Enter text below, choose a voice, and listen to the generated speech.")
51
+
52
+ with gr.Row():
53
+ text_input = gr.Textbox(
54
+ lines=3,
55
+ label="Enter Text",
56
+ placeholder="Example: Welcome to the world of AI."
57
+ )
58
+ voice_input = gr.Dropdown(
59
+ choices = ["Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Aoede", "Callirrhoe", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba", "Despina", "Erinome", "Algenib", "Rasalgethi", "Laomedeia", "Achernar", "Alnilam", "Schedar", "Gacrux", "Pulcherrima", "Achird", "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat"],
60
+ value="Kore",
61
+ label="Select Voice"
62
+ )
63
+
64
+ with gr.Row():
65
+ generate_btn = gr.Button("Generate Speech", variant="primary")
66
+
67
+ with gr.Row():
68
+ audio_output = gr.Audio(label="Generated Audio")
69
+ file_output = gr.File(label="Download Audio File")
70
+ status_output = gr.Textbox(label="Status", interactive=False)
71
+
72
+ examples = gr.Examples(
73
+ examples=[
74
+ ["Good morning! Hope you have a great day ahead.", "Kore"],
75
+ ["Welcome to the future of AI voice generation.", "Wes"],
76
+ ["Your appointment is scheduled for 3 PM on Monday.", "Kore"],
77
+ ["This is a demo of Google's Gemini text-to-speech feature.", "Wes"],
78
+ ],
79
+ inputs=[text_input, voice_input],
80
+ )
81
+
82
+ generate_btn.click(
83
+ fn=generate_speech,
84
+ inputs=[text_input, voice_input],
85
+ outputs=[audio_output, file_output, status_output],
86
+ )
87
+
88
+ demo.launch()