File size: 3,403 Bytes
ee6e7eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import streamlit as st
import requests
import os
import tempfile
import io
import soundfile as sf

# Hugging Face API Key (Make sure to add it in your HF Space secrets)
HF_API_KEY = os.getenv("HF_API_KEY")
HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}

# Hugging Face Model APIs
HF_SPEECH_TO_TEXT_API = "https://api-inference.huggingface.co/models/openai/whisper-small"
HF_CHATBOT_API = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct"
HF_TEXT_TO_SPEECH_API = "https://api-inference.huggingface.co/models/facebook/mms-tts-eng"

# Chinese Zodiac Avatars with Personalities
avatars = {
    "Rat": "Clever and resourceful. Loves puzzles and word games.",
    "Ox": "Patient and wise. Encourages steady learning.",
    "Tiger": "Brave and adventurous. Loves storytelling!",
    "Rabbit": "Gentle and kind. Makes learning fun and safe.",
    "Dragon": "Bold and confident. Loves to challenge students!",
    "Snake": "Calm and analytical. Gives insightful explanations.",
    "Horse": "Energetic and playful. Encourages fast-paced learning!",
    "Goat": "Creative and artistic. Uses imagination in learning.",
    "Monkey": "Curious and mischievous. Always has fun facts!",
    "Rooster": "Diligent and disciplined. Helps with structured learning.",
    "Dog": "Loyal and friendly. Encourages confidence.",
    "Pig": "Easygoing and supportive. Encourages relaxed learning."
}

# Function to process speech-to-text
def speech_to_text(audio_path):
    with open(audio_path, "rb") as f:
        files = {"file": f}
        response = requests.post(HF_SPEECH_TO_TEXT_API, headers=HEADERS, files=files)
        return response.json().get("text", "Could not transcribe audio.")

# Function to get chatbot response
def chatbot_response(user_input, avatar):
    personality = avatars.get(avatar, "Friendly and supportive.")
    prompt = f"You are {avatar}, an AI speaking coach. You are {personality}.\n"
    prompt += f"Help the student practice English in a fun and engaging way.\n"
    prompt += f"User: {user_input}\n{avatar}:"
    payload = {"inputs": prompt}
    response = requests.post(HF_CHATBOT_API, headers=HEADERS, json=payload)
    return response.json().get("generated_text", "I'm here to help!")

# Function for text-to-speech
def text_to_speech(text):
    payload = {"inputs": text}
    response = requests.post(HF_TEXT_TO_SPEECH_API, headers=HEADERS, json=payload)
    return response.content

# Streamlit UI
st.title("🎙️ AI Speaking Coach - Talking Pals")
st.write("Choose an avatar and start speaking!")

# Select Avatar
avatar = st.selectbox("Pick your speaking coach:", list(avatars.keys()))
st.write(f"**{avatar}** - {avatars[avatar]}")

# Upload or record audio
audio_file = st.file_uploader("Record or upload your voice", type=["wav", "mp3"])
show_text = st.checkbox("Show conversation text")

if audio_file:
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
        tmpfile.write(audio_file.getvalue())
        tmpfile_path = tmpfile.name

    st.audio(tmpfile_path)

    user_text = speech_to_text(tmpfile_path)
    if show_text:
        st.write(f"**You:** {user_text}")

    ai_reply = chatbot_response(user_text, avatar)
    if show_text:
        st.write(f"**{avatar}:** {ai_reply}")

    speech_audio = text_to_speech(ai_reply)
    audio_bytes = io.BytesIO(speech_audio)
    st.audio(audio_bytes, format="audio/wav")