MyTalkingPal / app.py
PeterPinetree's picture
Create app.py
ee6e7eb verified
raw
history blame
3.4 kB
import streamlit as st
import requests
import os
import tempfile
import io
import soundfile as sf
# Hugging Face API Key (Make sure to add it in your HF Space secrets)
HF_API_KEY = os.getenv("HF_API_KEY")
HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}
# Hugging Face Model APIs
HF_SPEECH_TO_TEXT_API = "https://api-inference.huggingface.co/models/openai/whisper-small"
HF_CHATBOT_API = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct"
HF_TEXT_TO_SPEECH_API = "https://api-inference.huggingface.co/models/facebook/mms-tts-eng"
# Chinese Zodiac Avatars with Personalities
avatars = {
"Rat": "Clever and resourceful. Loves puzzles and word games.",
"Ox": "Patient and wise. Encourages steady learning.",
"Tiger": "Brave and adventurous. Loves storytelling!",
"Rabbit": "Gentle and kind. Makes learning fun and safe.",
"Dragon": "Bold and confident. Loves to challenge students!",
"Snake": "Calm and analytical. Gives insightful explanations.",
"Horse": "Energetic and playful. Encourages fast-paced learning!",
"Goat": "Creative and artistic. Uses imagination in learning.",
"Monkey": "Curious and mischievous. Always has fun facts!",
"Rooster": "Diligent and disciplined. Helps with structured learning.",
"Dog": "Loyal and friendly. Encourages confidence.",
"Pig": "Easygoing and supportive. Encourages relaxed learning."
}
# Function to process speech-to-text
def speech_to_text(audio_path):
with open(audio_path, "rb") as f:
files = {"file": f}
response = requests.post(HF_SPEECH_TO_TEXT_API, headers=HEADERS, files=files)
return response.json().get("text", "Could not transcribe audio.")
# Function to get chatbot response
def chatbot_response(user_input, avatar):
personality = avatars.get(avatar, "Friendly and supportive.")
prompt = f"You are {avatar}, an AI speaking coach. You are {personality}.\n"
prompt += f"Help the student practice English in a fun and engaging way.\n"
prompt += f"User: {user_input}\n{avatar}:"
payload = {"inputs": prompt}
response = requests.post(HF_CHATBOT_API, headers=HEADERS, json=payload)
return response.json().get("generated_text", "I'm here to help!")
# Function for text-to-speech
def text_to_speech(text):
payload = {"inputs": text}
response = requests.post(HF_TEXT_TO_SPEECH_API, headers=HEADERS, json=payload)
return response.content
# Streamlit UI
st.title("πŸŽ™οΈ AI Speaking Coach - Talking Pals")
st.write("Choose an avatar and start speaking!")
# Select Avatar
avatar = st.selectbox("Pick your speaking coach:", list(avatars.keys()))
st.write(f"**{avatar}** - {avatars[avatar]}")
# Upload or record audio
audio_file = st.file_uploader("Record or upload your voice", type=["wav", "mp3"])
show_text = st.checkbox("Show conversation text")
if audio_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
tmpfile.write(audio_file.getvalue())
tmpfile_path = tmpfile.name
st.audio(tmpfile_path)
user_text = speech_to_text(tmpfile_path)
if show_text:
st.write(f"**You:** {user_text}")
ai_reply = chatbot_response(user_text, avatar)
if show_text:
st.write(f"**{avatar}:** {ai_reply}")
speech_audio = text_to_speech(ai_reply)
audio_bytes = io.BytesIO(speech_audio)
st.audio(audio_bytes, format="audio/wav")