File size: 1,576 Bytes
b86e718
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Install dependencies in Colab
try:
    import whisper, gradio as gr
    from gtts import gTTS
    from groq import Groq
except:
    import whisper, gradio as gr
    from gtts import gTTS
    from groq import Groq

import os
import tempfile

# Load Whisper model
whisper_model = whisper.load_model("base")

# Groq API Key (replace with your actual key or set as env variable)
GROQ_API_KEY = "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"
client = Groq(api_key=GROQ_API_KEY)

# Core logic
def voice_chat(audio_path):
    # Step 1: Transcribe audio
    result = whisper_model.transcribe(audio_path)
    user_text = result["text"]

    # Step 2: Groq LLM response
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": user_text}],
        model="llama3-8b-8192",
    )
    bot_reply = response.choices[0].message.content

    # Step 3: Text to speech using gTTS
    tts = gTTS(bot_reply)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
        tts.save(f.name)
        audio_response_path = f.name

    return user_text, bot_reply, audio_response_path

# Gradio interface
iface = gr.Interface(
    fn=voice_chat,
    inputs=gr.Microphone(label="🎀 Speak your question", type="filepath"),
    outputs=[
        gr.Text(label="πŸ“ Transcribed Input"),
        gr.Text(label="πŸ€– LLM Reply"),
        gr.Audio(label="πŸ”‰ Spoken Reply", type="filepath")
    ],
    title="πŸ—£οΈ Real-Time Voice-to-Voice Chatbot (Whisper + Groq + gTTS)",
    live=True
)

if __name__ == "__main__":
    iface.launch()