Spaces:
Build error
Build error
import streamlit as st | |
import streamlit.components.v1 as components | |
import torch | |
import numpy as np | |
import av | |
import logging | |
import os | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import whisper | |
from TTS.api import TTS | |
from streamlit_webrtc import webrtc_streamer, WebRtcMode, AudioProcessorBase | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.FileHandler("language_companion.log"), | |
logging.StreamHandler() | |
] | |
) | |
logger = logging.getLogger(__name__) | |
# Comprehensive Avatar Personas | |
AVATAR_PERSONAS = { | |
"Rat": { | |
"name": "Puzzle Master Rat", | |
"description": "I love solving word puzzles and making learning fun!", | |
"teaching_style": "Learning is like solving a fun game", | |
"learning_approach": "I break big words into small, easy pieces", | |
"motivation_techniques": [ | |
"Turn hard words into exciting challenges", | |
"Make learning feel like play" | |
], | |
"voice_sample": "Let's solve this language puzzle together!", | |
"image": "rat.png" | |
}, | |
"Ox": { | |
"name": "Professor Steady Ox", | |
"description": "I help you learn English step by step, slowly and carefully", | |
"teaching_style": "Learning is like building a big tower, one block at a time", | |
"learning_approach": "We go slow and make sure you understand everything", | |
"motivation_techniques": [ | |
"Break big lessons into small, easy parts", | |
"Celebrate every little success" | |
], | |
"voice_sample": "We will build your English skills step by step, carefully and steadily.", | |
"image": "ox.png" | |
}, | |
"Tiger": { | |
"name": "Adventure Coach Tiger", | |
"description": "Learning English is an exciting adventure!", | |
"teaching_style": "Every English lesson is a fun mission", | |
"learning_approach": "We learn by trying new things and having fun", | |
"motivation_techniques": [ | |
"Make learning feel like a game", | |
"Cheer you on with lots of energy" | |
], | |
"voice_sample": "Are you ready for an exciting English language adventure?", | |
"image": "tiger.png" | |
}, | |
"Rabbit": { | |
"name": "Storyteller Rabbit", | |
"description": "I love telling stories that help you learn English", | |
"teaching_style": "Learning through fun and friendly stories", | |
"learning_approach": "Words become magic when they tell a story", | |
"motivation_techniques": [ | |
"Use funny and interesting stories", | |
"Help you imagine new words" | |
], | |
"voice_sample": "Let me tell you a story that will help you learn English.", | |
"image": "rabbit.png" | |
}, | |
"Dragon": { | |
"name": "Smart Dragon", | |
"description": "I help you understand English like a language explorer", | |
"teaching_style": "Learning is like discovering a new world", | |
"learning_approach": "We look at words like they are treasure maps", | |
"motivation_techniques": [ | |
"Make learning feel like an exciting discovery", | |
"Explain things in a clear way" | |
], | |
"voice_sample": "Let us explore the world of English together!", | |
"image": "dragon.png" | |
}, | |
"Snake": { | |
"name": "Wise Snake", | |
"description": "I help you understand English slowly and carefully", | |
"teaching_style": "Learning is like solving a gentle puzzle", | |
"learning_approach": "We think about words and their meanings", | |
"motivation_techniques": [ | |
"Take time to understand each word", | |
"Think about how words connect" | |
], | |
"voice_sample": "Let's understand language together, step by step.", | |
"image": "snake.png" | |
}, | |
"Horse": { | |
"name": "Energetic Coach Horse", | |
"description": "Let's speak English and have fun!", | |
"teaching_style": "Learning is an active, exciting game", | |
"learning_approach": "We learn by speaking and playing", | |
"motivation_techniques": [ | |
"Speak English with lots of energy", | |
"Make learning feel like a fun activity" | |
], | |
"voice_sample": "Come on, let's speak English and have fun doing it!", | |
"image": "horse.png" | |
}, | |
"Goat": { | |
"name": "Creative Goat", | |
"description": "I help you draw pictures with English words", | |
"teaching_style": "Learning is like creating colorful art", | |
"learning_approach": "We use imagination to learn words", | |
"motivation_techniques": [ | |
"Make words feel like colorful pictures", | |
"Use creativity to remember" | |
], | |
"voice_sample": "Let's paint beautiful pictures with our English words!", | |
"image": "sheep.png" | |
}, | |
"Monkey": { | |
"name": "Playful Monkey", | |
"description": "Learning English is the most fun game!", | |
"teaching_style": "Every lesson is a funny, exciting game", | |
"learning_approach": "We laugh and learn at the same time", | |
"motivation_techniques": [ | |
"Turn learning into a funny game", | |
"Make English feel like play" | |
], | |
"voice_sample": "Learning English is the most fun game we'll play today!", | |
"image": "monkey.png" | |
}, | |
"Rooster": { | |
"name": "Pronunciation Rooster", | |
"description": "I help you speak English clearly and correctly", | |
"teaching_style": "Learning is about saying words just right", | |
"learning_approach": "We practice saying words perfectly", | |
"motivation_techniques": [ | |
"Practice saying words clearly", | |
"Make pronunciation feel like a fun challenge" | |
], | |
"voice_sample": "Listen carefully and repeat after me, with perfect pronunciation!", | |
"image": "rooster.png" | |
}, | |
"Dog": { | |
"name": "Friendly Dog", | |
"description": "I'm always here to help you learn English", | |
"teaching_style": "Learning is about being kind and patient", | |
"learning_approach": "We learn together, step by step", | |
"motivation_techniques": [ | |
"Cheer you on with lots of love", | |
"Make you feel confident" | |
], | |
"voice_sample": "You're doing great! Keep practicing your English.", | |
"image": "dog.png" | |
}, | |
"Pig": { | |
"name": "Calm Pig", | |
"description": "Let's learn English together, nice and easy", | |
"teaching_style": "Learning is relaxed and comfortable", | |
"learning_approach": "We take our time and enjoy learning", | |
"motivation_techniques": [ | |
"Make learning feel relaxed", | |
"No stress, just fun" | |
], | |
"voice_sample": "Let's learn English together, nice and easy.", | |
"image": "pig.png" | |
} | |
} | |
class CharacterSelector: | |
def __init__(self): | |
self.tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts") | |
def create_avatar_carousel(self): | |
""" | |
Create a Swiper-like avatar carousel using HTML, CSS, and JavaScript | |
""" | |
carousel_html = """ | |
<link rel="stylesheet" href="https://unpkg.com/swiper/swiper-bundle.min.css"> | |
<script src="https://unpkg.com/swiper/swiper-bundle.min.js"></script> | |
<style> | |
.swiper-container { | |
width: 100%; | |
height: 500px; | |
} | |
.swiper-slide { | |
text-align: center; | |
background: #f4f4f4; | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
justify-content: center; | |
border-radius: 18px; | |
padding: 20px; | |
} | |
.avatar-image { | |
width: 250px; | |
height: 250px; | |
object-fit: cover; | |
border-radius: 50%; | |
margin-bottom: 15px; | |
} | |
.voice-preview-btn { | |
margin-top: 10px; | |
padding: 10px; | |
background-color: #4CAF50; | |
color: white; | |
border: none; | |
border-radius: 5px; | |
cursor: pointer; | |
} | |
</style> | |
<div class="swiper-container"> | |
<div class="swiper-wrapper"> | |
""" | |
# Generate slides for each avatar | |
for avatar_key, avatar_info in AVATAR_PERSONAS.items(): | |
carousel_html += f""" | |
<div class="swiper-slide" data-avatar="{avatar_key}"> | |
<img src="images/{avatar_info['image']}" class="avatar-image" alt="{avatar_info['name']}"> | |
<h3>{avatar_info['name']}</h3> | |
<p>{avatar_info['description']}</p> | |
<button class="voice-preview-btn" onclick="previewVoice('{avatar_key}')"> | |
Preview Voice | |
</button> | |
</div> | |
""" | |
carousel_html += """ | |
</div> | |
<div class="swiper-pagination"></div> | |
<div class="swiper-button-prev"></div> | |
<div class="swiper-button-next"></div> | |
</div> | |
<script> | |
var swiper = new Swiper('.swiper-container', { | |
slidesPerView: 'auto', | |
centeredSlides: true, | |
spaceBetween: 30, | |
pagination: { | |
el: '.swiper-pagination', | |
clickable: true | |
}, | |
navigation: { | |
nextEl: '.swiper-button-next', | |
prevEl: '.swiper-button-prev', | |
} | |
}); | |
function previewVoice(avatarKey) { | |
window.parent.postMessage({ | |
type: 'previewVoice', | |
avatarKey: avatarKey | |
}, '*'); | |
} | |
function selectAvatar(avatarKey) { | |
window.parent.postMessage({ | |
type: 'avatarSelected', | |
avatarKey: avatarKey | |
}, '*'); | |
} | |
</script> | |
""" | |
return carousel_html | |
def generate_voice_preview(self, avatar_key): | |
"""Generate voice preview for a specific avatar""" | |
avatar = AVATAR_PERSONAS[avatar_key] | |
try: | |
# Generate voice preview | |
self.tts_model.tts_to_file( | |
text=avatar['voice_sample'], | |
file_path=f"{avatar_key}_preview.wav" | |
) | |
return f"{avatar_key}_preview.wav" | |
except Exception as e: | |
logging.error(f"Voice preview error for {avatar_key}: {e}") | |
return None | |
def main(): | |
st.title("Language Learning Companion") | |
# Character Selection Carousel | |
character_selector = CharacterSelector() | |
# Render Swiper Carousel | |
carousel_html = character_selector.create_avatar_carousel() | |
components.html(carousel_html, height=600, scrolling=True) | |
# JavaScript communication for avatar selection and voice preview | |
components.html(""" | |
<script> | |
window.addEventListener('message', function(event) { | |
if (event.data.type === 'avatarSelected') { | |
window.parent.postMessage({ | |
type: 'streamlit:setComponentValue', | |
key: 'selected_avatar', | |
value: event.data.avatarKey | |
}, '*'); | |
} | |
if (event.data.type === 'previewVoice') { | |
window.parent.postMessage({ | |
type: 'streamlit:setComponentValue', | |
key: 'preview_voice', | |
value: event.data.avatarKey | |
}, '*'); | |
} | |
}); | |
</script> | |
""", height=0) | |
# Handle Voice Preview | |
preview_avatar = st.experimental_get_query_params().get('preview_voice', [None])[0] | |
if preview_avatar: | |
preview_audio = character_selector.generate_voice_preview(preview_avatar) | |
if preview_audio: | |
st.audio(preview_audio, format='audio/wav') | |
# Selected Avatar Handling | |
selected_avatar = st.experimental_get_query_params().get('selected_avatar', [None])[0] | |
if selected_avatar: | |
# Initialize Language Learning Companion with selected avatar | |
companion = LanguageLearningCompanion(selected_avatar) | |
# Rest of the conversation interface... | |
if __name__ == "__main__": | |
main() | |
class AudioProcessor(AudioProcessorBase): | |
def __init__(self, companion): | |
self.companion = companion | |
self.audio_buffer = [] | |
self.sample_rate = 16000 | |
self.recording_duration = 5 # 5 seconds of audio before processing | |
def recv(self, frame: av.AudioFrame) -> av.AudioFrame: | |
# Convert audio frame to numpy array | |
audio_input = frame.to_ndarray(format="f32") | |
# Accumulate audio | |
if len(audio_input.shape) > 1: | |
audio_input = audio_input[:, 0] # Take first channel if stereo | |
self.audio_buffer.extend(audio_input) | |
# Process when buffer reaches sufficient length | |
if len(self.audio_buffer) >= self.sample_rate * self.recording_duration: | |
try: | |
# Convert buffer to wav file | |
audio_array = np.array(self.audio_buffer[:self.sample_rate * self.recording_duration]) | |
sf.write('temp_recording.wav', audio_array, self.sample_rate) | |
# Transcribe | |
transcription = self.companion.transcribe_audio('temp_recording.wav') | |
# Generate response | |
if transcription: | |
response = self.companion.generate_response(transcription) | |
# Text to Speech | |
if response: | |
audio_response = self.companion.text_to_speech(response) | |
# Display results | |
st.session_state.transcription = transcription | |
st.session_state.response = response | |
# Clear buffer | |
self.audio_buffer = [] | |
except Exception as e: | |
logging.error(f"Audio processing error: {e}") | |
st.error("Error processing audio") | |
self.audio_buffer = [] | |
return frame | |
def main(): | |
st.title("Language Learning Companion") | |
# Initialize session state for storing conversation | |
if 'transcription' not in st.session_state: | |
st.session_state.transcription = "" | |
if 'response' not in st.session_state: | |
st.session_state.response = "" | |
# Avatar Selection | |
avatar_selection = st.selectbox( | |
"Choose Your Learning Buddy", | |
list(AVATAR_PERSONAS.keys()) | |
) | |
# Initialize Companion | |
companion = LanguageLearningCompanion(avatar_selection) | |
# WebRTC Audio Capture with Custom Processor | |
ctx = webrtc_streamer( | |
key="language-learning", | |
mode=WebRtcMode.SENDRECV, | |
audio_processor_factory=lambda: AudioProcessor(companion), | |
media_stream_constraints={ | |
"audio": True, | |
"video": False | |
} | |
) | |
# Display Conversation | |
if st.session_state.transcription: | |
st.subheader("Your Speech") | |
st.write(st.session_state.transcription) | |
if st.session_state.response: | |
st.subheader("Companion's Response") | |
st.write(st.session_state.response) | |
# Optional: Play response audio | |
if os.path.exists('response.wav'): | |
st.audio('response.wav', format='audio/wav') | |
if __name__ == "__main__": | |
main() | |
class LanguageLearningCompanion: | |
def __init__(self, avatar_name): | |
try: | |
# Model Initialization with Quantization | |
self.model = AutoModelForCausalLM.from_pretrained( | |
"mistralai/Mistral-7B-Instruct-v0.1", | |
load_in_8bit=True, | |
device_map="auto" | |
) | |
self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") | |
# Speech Models | |
self.whisper_model = whisper.load_model("base") | |
self.tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts") | |
# Avatar Selection | |
self.avatar = AVATAR_PERSONAS.get(avatar_name, AVATAR_PERSONAS["Rat"]) | |
logger.info(f"Initialized Language Learning Companion with {avatar_name}") | |
except Exception as e: | |
logger.error(f"Model initialization error: {e}") | |
st.error("Error loading models. Please try again later.") | |
def transcribe_audio(self, audio_path): | |
"""Transcribe audio using Whisper""" | |
try: | |
result = self.whisper_model.transcribe(audio_path) | |
logger.info(f"Successfully transcribed audio: {result['text']}") | |
return result['text'] | |
except Exception as e: | |
logger.error(f"Transcription error: {e}") | |
st.error("Error transcribing audio.") | |
return None | |
def generate_response(self, user_input): | |
"""Generate educational response using Mistral""" | |
try: | |
prompt = f""" | |
Avatar: {self.avatar['name']} | |
Teaching Style: {self.avatar['teaching_style']} | |
Learning Approach: {self.avatar['learning_approach']} | |
User Input: {user_input} | |
Generate an encouraging, educational response that: | |
1. Addresses the user's input | |
2. Provides gentle language learning guidance | |
3. Maintains an engaging, child-friendly tone | |
""" | |
inputs = self.tokenizer(prompt, return_tensors="pt") | |
outputs = self.model.generate(**inputs, max_length=200) | |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
logger.info(f"Generated response for input: {user_input}") | |
return response | |
except Exception as e: | |
logger.error(f"Response generation error: {e}") | |
st.error("Error generating response.") | |
return None | |
def text_to_speech(self, text): | |
"""Convert text to speech""" | |
try: | |
self.tts_model.tts_to_file(text=text, file_path="response.wav") | |
logger.info("Successfully converted text to speech") | |
return "response.wav" | |
except Exception as e: | |
logger.error(f"Text-to-speech conversion error: {e}") | |
st.error("Error converting text to speech.") | |
return None | |
def main(): | |
st.title("Language Learning Companion") | |
# Avatar Selection | |
avatar_selection = st.selectbox( | |
"Choose Your Learning Buddy", | |
list(AVATAR_PERSONAS.keys()) | |
) | |
# Initialize Companion | |
companion = LanguageLearningCompanion(avatar_selection) | |
# WebRTC Audio Capture | |
webrtc_ctx = webrtc_streamer( | |
key="language-companion", | |
mode=WebRtcMode.SENDRECV, | |
media_stream_constraints={"audio": True, "video": False} | |
) | |
if st.button("Transcribe and Respond"): | |
if webrtc_ctx.audio_receiver: | |
try: | |
# Placeholder for audio processing | |
# In a real implementation, you'd capture and process the audio | |
st.warning("Audio processing not fully implemented in this version") | |
except Exception as e: | |
logger.error(f"WebRTC audio processing error: {e}") | |
st.error("Error processing audio.") | |
if __name__ == "__main__": | |
main() |