Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| import torch | |
| import numpy as np | |
| import av | |
| import logging | |
| import os | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import whisper | |
| from TTS.api import TTS | |
| from streamlit_webrtc import webrtc_streamer, WebRtcMode, AudioProcessorBase | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler("language_companion.log"), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Comprehensive Avatar Personas | |
| AVATAR_PERSONAS = { | |
| "Rat": { | |
| "name": "Puzzle Master Rat", | |
| "description": "I love solving word puzzles and making learning fun!", | |
| "teaching_style": "Learning is like solving a fun game", | |
| "learning_approach": "I break big words into small, easy pieces", | |
| "motivation_techniques": [ | |
| "Turn hard words into exciting challenges", | |
| "Make learning feel like play" | |
| ], | |
| "voice_sample": "Let's solve this language puzzle together!", | |
| "image": "rat.png" | |
| }, | |
| "Ox": { | |
| "name": "Professor Steady Ox", | |
| "description": "I help you learn English step by step, slowly and carefully", | |
| "teaching_style": "Learning is like building a big tower, one block at a time", | |
| "learning_approach": "We go slow and make sure you understand everything", | |
| "motivation_techniques": [ | |
| "Break big lessons into small, easy parts", | |
| "Celebrate every little success" | |
| ], | |
| "voice_sample": "We will build your English skills step by step, carefully and steadily.", | |
| "image": "ox.png" | |
| }, | |
| "Tiger": { | |
| "name": "Adventure Coach Tiger", | |
| "description": "Learning English is an exciting adventure!", | |
| "teaching_style": "Every English lesson is a fun mission", | |
| "learning_approach": "We learn by trying new things and having fun", | |
| "motivation_techniques": [ | |
| "Make learning feel like a game", | |
| "Cheer you on with lots of energy" | |
| ], | |
| "voice_sample": "Are you ready for an exciting English language adventure?", | |
| "image": "tiger.png" | |
| }, | |
| "Rabbit": { | |
| "name": "Storyteller Rabbit", | |
| "description": "I love telling stories that help you learn English", | |
| "teaching_style": "Learning through fun and friendly stories", | |
| "learning_approach": "Words become magic when they tell a story", | |
| "motivation_techniques": [ | |
| "Use funny and interesting stories", | |
| "Help you imagine new words" | |
| ], | |
| "voice_sample": "Let me tell you a story that will help you learn English.", | |
| "image": "rabbit.png" | |
| }, | |
| "Dragon": { | |
| "name": "Smart Dragon", | |
| "description": "I help you understand English like a language explorer", | |
| "teaching_style": "Learning is like discovering a new world", | |
| "learning_approach": "We look at words like they are treasure maps", | |
| "motivation_techniques": [ | |
| "Make learning feel like an exciting discovery", | |
| "Explain things in a clear way" | |
| ], | |
| "voice_sample": "Let us explore the world of English together!", | |
| "image": "dragon.png" | |
| }, | |
| "Snake": { | |
| "name": "Wise Snake", | |
| "description": "I help you understand English slowly and carefully", | |
| "teaching_style": "Learning is like solving a gentle puzzle", | |
| "learning_approach": "We think about words and their meanings", | |
| "motivation_techniques": [ | |
| "Take time to understand each word", | |
| "Think about how words connect" | |
| ], | |
| "voice_sample": "Let's understand language together, step by step.", | |
| "image": "snake.png" | |
| }, | |
| "Horse": { | |
| "name": "Energetic Coach Horse", | |
| "description": "Let's speak English and have fun!", | |
| "teaching_style": "Learning is an active, exciting game", | |
| "learning_approach": "We learn by speaking and playing", | |
| "motivation_techniques": [ | |
| "Speak English with lots of energy", | |
| "Make learning feel like a fun activity" | |
| ], | |
| "voice_sample": "Come on, let's speak English and have fun doing it!", | |
| "image": "horse.png" | |
| }, | |
| "Goat": { | |
| "name": "Creative Goat", | |
| "description": "I help you draw pictures with English words", | |
| "teaching_style": "Learning is like creating colorful art", | |
| "learning_approach": "We use imagination to learn words", | |
| "motivation_techniques": [ | |
| "Make words feel like colorful pictures", | |
| "Use creativity to remember" | |
| ], | |
| "voice_sample": "Let's paint beautiful pictures with our English words!", | |
| "image": "sheep.png" | |
| }, | |
| "Monkey": { | |
| "name": "Playful Monkey", | |
| "description": "Learning English is the most fun game!", | |
| "teaching_style": "Every lesson is a funny, exciting game", | |
| "learning_approach": "We laugh and learn at the same time", | |
| "motivation_techniques": [ | |
| "Turn learning into a funny game", | |
| "Make English feel like play" | |
| ], | |
| "voice_sample": "Learning English is the most fun game we'll play today!", | |
| "image": "monkey.png" | |
| }, | |
| "Rooster": { | |
| "name": "Pronunciation Rooster", | |
| "description": "I help you speak English clearly and correctly", | |
| "teaching_style": "Learning is about saying words just right", | |
| "learning_approach": "We practice saying words perfectly", | |
| "motivation_techniques": [ | |
| "Practice saying words clearly", | |
| "Make pronunciation feel like a fun challenge" | |
| ], | |
| "voice_sample": "Listen carefully and repeat after me, with perfect pronunciation!", | |
| "image": "rooster.png" | |
| }, | |
| "Dog": { | |
| "name": "Friendly Dog", | |
| "description": "I'm always here to help you learn English", | |
| "teaching_style": "Learning is about being kind and patient", | |
| "learning_approach": "We learn together, step by step", | |
| "motivation_techniques": [ | |
| "Cheer you on with lots of love", | |
| "Make you feel confident" | |
| ], | |
| "voice_sample": "You're doing great! Keep practicing your English.", | |
| "image": "dog.png" | |
| }, | |
| "Pig": { | |
| "name": "Calm Pig", | |
| "description": "Let's learn English together, nice and easy", | |
| "teaching_style": "Learning is relaxed and comfortable", | |
| "learning_approach": "We take our time and enjoy learning", | |
| "motivation_techniques": [ | |
| "Make learning feel relaxed", | |
| "No stress, just fun" | |
| ], | |
| "voice_sample": "Let's learn English together, nice and easy.", | |
| "image": "pig.png" | |
| } | |
| } | |
| class CharacterSelector: | |
| def __init__(self): | |
| self.tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts") | |
| def create_avatar_carousel(self): | |
| """ | |
| Create a Swiper-like avatar carousel using HTML, CSS, and JavaScript | |
| """ | |
| carousel_html = """ | |
| <link rel="stylesheet" href="https://unpkg.com/swiper/swiper-bundle.min.css"> | |
| <script src="https://unpkg.com/swiper/swiper-bundle.min.js"></script> | |
| <style> | |
| .swiper-container { | |
| width: 100%; | |
| height: 500px; | |
| } | |
| .swiper-slide { | |
| text-align: center; | |
| background: #f4f4f4; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| border-radius: 18px; | |
| padding: 20px; | |
| } | |
| .avatar-image { | |
| width: 250px; | |
| height: 250px; | |
| object-fit: cover; | |
| border-radius: 50%; | |
| margin-bottom: 15px; | |
| } | |
| .voice-preview-btn { | |
| margin-top: 10px; | |
| padding: 10px; | |
| background-color: #4CAF50; | |
| color: white; | |
| border: none; | |
| border-radius: 5px; | |
| cursor: pointer; | |
| } | |
| </style> | |
| <div class="swiper-container"> | |
| <div class="swiper-wrapper"> | |
| """ | |
| # Generate slides for each avatar | |
| for avatar_key, avatar_info in AVATAR_PERSONAS.items(): | |
| carousel_html += f""" | |
| <div class="swiper-slide" data-avatar="{avatar_key}"> | |
| <img src="images/{avatar_info['image']}" class="avatar-image" alt="{avatar_info['name']}"> | |
| <h3>{avatar_info['name']}</h3> | |
| <p>{avatar_info['description']}</p> | |
| <button class="voice-preview-btn" onclick="previewVoice('{avatar_key}')"> | |
| Preview Voice | |
| </button> | |
| </div> | |
| """ | |
| carousel_html += """ | |
| </div> | |
| <div class="swiper-pagination"></div> | |
| <div class="swiper-button-prev"></div> | |
| <div class="swiper-button-next"></div> | |
| </div> | |
| <script> | |
| var swiper = new Swiper('.swiper-container', { | |
| slidesPerView: 'auto', | |
| centeredSlides: true, | |
| spaceBetween: 30, | |
| pagination: { | |
| el: '.swiper-pagination', | |
| clickable: true | |
| }, | |
| navigation: { | |
| nextEl: '.swiper-button-next', | |
| prevEl: '.swiper-button-prev', | |
| } | |
| }); | |
| function previewVoice(avatarKey) { | |
| window.parent.postMessage({ | |
| type: 'previewVoice', | |
| avatarKey: avatarKey | |
| }, '*'); | |
| } | |
| function selectAvatar(avatarKey) { | |
| window.parent.postMessage({ | |
| type: 'avatarSelected', | |
| avatarKey: avatarKey | |
| }, '*'); | |
| } | |
| </script> | |
| """ | |
| return carousel_html | |
| def generate_voice_preview(self, avatar_key): | |
| """Generate voice preview for a specific avatar""" | |
| avatar = AVATAR_PERSONAS[avatar_key] | |
| try: | |
| # Generate voice preview | |
| self.tts_model.tts_to_file( | |
| text=avatar['voice_sample'], | |
| file_path=f"{avatar_key}_preview.wav" | |
| ) | |
| return f"{avatar_key}_preview.wav" | |
| except Exception as e: | |
| logging.error(f"Voice preview error for {avatar_key}: {e}") | |
| return None | |
| def main(): | |
| st.title("Language Learning Companion") | |
| # Character Selection Carousel | |
| character_selector = CharacterSelector() | |
| # Render Swiper Carousel | |
| carousel_html = character_selector.create_avatar_carousel() | |
| components.html(carousel_html, height=600, scrolling=True) | |
| # JavaScript communication for avatar selection and voice preview | |
| components.html(""" | |
| <script> | |
| window.addEventListener('message', function(event) { | |
| if (event.data.type === 'avatarSelected') { | |
| window.parent.postMessage({ | |
| type: 'streamlit:setComponentValue', | |
| key: 'selected_avatar', | |
| value: event.data.avatarKey | |
| }, '*'); | |
| } | |
| if (event.data.type === 'previewVoice') { | |
| window.parent.postMessage({ | |
| type: 'streamlit:setComponentValue', | |
| key: 'preview_voice', | |
| value: event.data.avatarKey | |
| }, '*'); | |
| } | |
| }); | |
| </script> | |
| """, height=0) | |
| # Handle Voice Preview | |
| preview_avatar = st.experimental_get_query_params().get('preview_voice', [None])[0] | |
| if preview_avatar: | |
| preview_audio = character_selector.generate_voice_preview(preview_avatar) | |
| if preview_audio: | |
| st.audio(preview_audio, format='audio/wav') | |
| # Selected Avatar Handling | |
| selected_avatar = st.experimental_get_query_params().get('selected_avatar', [None])[0] | |
| if selected_avatar: | |
| # Initialize Language Learning Companion with selected avatar | |
| companion = LanguageLearningCompanion(selected_avatar) | |
| # Rest of the conversation interface... | |
| if __name__ == "__main__": | |
| main() | |
| class AudioProcessor(AudioProcessorBase): | |
| def __init__(self, companion): | |
| self.companion = companion | |
| self.audio_buffer = [] | |
| self.sample_rate = 16000 | |
| self.recording_duration = 5 # 5 seconds of audio before processing | |
| def recv(self, frame: av.AudioFrame) -> av.AudioFrame: | |
| # Convert audio frame to numpy array | |
| audio_input = frame.to_ndarray(format="f32") | |
| # Accumulate audio | |
| if len(audio_input.shape) > 1: | |
| audio_input = audio_input[:, 0] # Take first channel if stereo | |
| self.audio_buffer.extend(audio_input) | |
| # Process when buffer reaches sufficient length | |
| if len(self.audio_buffer) >= self.sample_rate * self.recording_duration: | |
| try: | |
| # Convert buffer to wav file | |
| audio_array = np.array(self.audio_buffer[:self.sample_rate * self.recording_duration]) | |
| sf.write('temp_recording.wav', audio_array, self.sample_rate) | |
| # Transcribe | |
| transcription = self.companion.transcribe_audio('temp_recording.wav') | |
| # Generate response | |
| if transcription: | |
| response = self.companion.generate_response(transcription) | |
| # Text to Speech | |
| if response: | |
| audio_response = self.companion.text_to_speech(response) | |
| # Display results | |
| st.session_state.transcription = transcription | |
| st.session_state.response = response | |
| # Clear buffer | |
| self.audio_buffer = [] | |
| except Exception as e: | |
| logging.error(f"Audio processing error: {e}") | |
| st.error("Error processing audio") | |
| self.audio_buffer = [] | |
| return frame | |
| def main(): | |
| st.title("Language Learning Companion") | |
| # Initialize session state for storing conversation | |
| if 'transcription' not in st.session_state: | |
| st.session_state.transcription = "" | |
| if 'response' not in st.session_state: | |
| st.session_state.response = "" | |
| # Avatar Selection | |
| avatar_selection = st.selectbox( | |
| "Choose Your Learning Buddy", | |
| list(AVATAR_PERSONAS.keys()) | |
| ) | |
| # Initialize Companion | |
| companion = LanguageLearningCompanion(avatar_selection) | |
| # WebRTC Audio Capture with Custom Processor | |
| ctx = webrtc_streamer( | |
| key="language-learning", | |
| mode=WebRtcMode.SENDRECV, | |
| audio_processor_factory=lambda: AudioProcessor(companion), | |
| media_stream_constraints={ | |
| "audio": True, | |
| "video": False | |
| } | |
| ) | |
| # Display Conversation | |
| if st.session_state.transcription: | |
| st.subheader("Your Speech") | |
| st.write(st.session_state.transcription) | |
| if st.session_state.response: | |
| st.subheader("Companion's Response") | |
| st.write(st.session_state.response) | |
| # Optional: Play response audio | |
| if os.path.exists('response.wav'): | |
| st.audio('response.wav', format='audio/wav') | |
| if __name__ == "__main__": | |
| main() | |
| class LanguageLearningCompanion: | |
| def __init__(self, avatar_name): | |
| try: | |
| # Model Initialization with Quantization | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| "mistralai/Mistral-7B-Instruct-v0.1", | |
| load_in_8bit=True, | |
| device_map="auto" | |
| ) | |
| self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") | |
| # Speech Models | |
| self.whisper_model = whisper.load_model("base") | |
| self.tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts") | |
| # Avatar Selection | |
| self.avatar = AVATAR_PERSONAS.get(avatar_name, AVATAR_PERSONAS["Rat"]) | |
| logger.info(f"Initialized Language Learning Companion with {avatar_name}") | |
| except Exception as e: | |
| logger.error(f"Model initialization error: {e}") | |
| st.error("Error loading models. Please try again later.") | |
| def transcribe_audio(self, audio_path): | |
| """Transcribe audio using Whisper""" | |
| try: | |
| result = self.whisper_model.transcribe(audio_path) | |
| logger.info(f"Successfully transcribed audio: {result['text']}") | |
| return result['text'] | |
| except Exception as e: | |
| logger.error(f"Transcription error: {e}") | |
| st.error("Error transcribing audio.") | |
| return None | |
| def generate_response(self, user_input): | |
| """Generate educational response using Mistral""" | |
| try: | |
| prompt = f""" | |
| Avatar: {self.avatar['name']} | |
| Teaching Style: {self.avatar['teaching_style']} | |
| Learning Approach: {self.avatar['learning_approach']} | |
| User Input: {user_input} | |
| Generate an encouraging, educational response that: | |
| 1. Addresses the user's input | |
| 2. Provides gentle language learning guidance | |
| 3. Maintains an engaging, child-friendly tone | |
| """ | |
| inputs = self.tokenizer(prompt, return_tensors="pt") | |
| outputs = self.model.generate(**inputs, max_length=200) | |
| response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| logger.info(f"Generated response for input: {user_input}") | |
| return response | |
| except Exception as e: | |
| logger.error(f"Response generation error: {e}") | |
| st.error("Error generating response.") | |
| return None | |
| def text_to_speech(self, text): | |
| """Convert text to speech""" | |
| try: | |
| self.tts_model.tts_to_file(text=text, file_path="response.wav") | |
| logger.info("Successfully converted text to speech") | |
| return "response.wav" | |
| except Exception as e: | |
| logger.error(f"Text-to-speech conversion error: {e}") | |
| st.error("Error converting text to speech.") | |
| return None | |
| def main(): | |
| st.title("Language Learning Companion") | |
| # Avatar Selection | |
| avatar_selection = st.selectbox( | |
| "Choose Your Learning Buddy", | |
| list(AVATAR_PERSONAS.keys()) | |
| ) | |
| # Initialize Companion | |
| companion = LanguageLearningCompanion(avatar_selection) | |
| # WebRTC Audio Capture | |
| webrtc_ctx = webrtc_streamer( | |
| key="language-companion", | |
| mode=WebRtcMode.SENDRECV, | |
| media_stream_constraints={"audio": True, "video": False} | |
| ) | |
| if st.button("Transcribe and Respond"): | |
| if webrtc_ctx.audio_receiver: | |
| try: | |
| # Placeholder for audio processing | |
| # In a real implementation, you'd capture and process the audio | |
| st.warning("Audio processing not fully implemented in this version") | |
| except Exception as e: | |
| logger.error(f"WebRTC audio processing error: {e}") | |
| st.error("Error processing audio.") | |
| if __name__ == "__main__": | |
| main() |