|
import streamlit as st |
|
import streamlit.components.v1 as components |
|
import torch |
|
import numpy as np |
|
import av |
|
import logging |
|
import os |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import whisper |
|
from TTS.api import TTS |
|
from streamlit_webrtc import webrtc_streamer, WebRtcMode, AudioProcessorBase |
|
|
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
|
handlers=[ |
|
logging.FileHandler("language_companion.log"), |
|
logging.StreamHandler() |
|
] |
|
) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
AVATAR_PERSONAS = { |
|
"Rat": { |
|
"name": "Puzzle Master Rat", |
|
"description": "I love solving word puzzles and making learning fun!", |
|
"teaching_style": "Learning is like solving a fun game", |
|
"learning_approach": "I break big words into small, easy pieces", |
|
"motivation_techniques": [ |
|
"Turn hard words into exciting challenges", |
|
"Make learning feel like play" |
|
], |
|
"voice_sample": "Let's solve this language puzzle together!", |
|
"image": "rat.png" |
|
}, |
|
"Ox": { |
|
"name": "Professor Steady Ox", |
|
"description": "I help you learn English step by step, slowly and carefully", |
|
"teaching_style": "Learning is like building a big tower, one block at a time", |
|
"learning_approach": "We go slow and make sure you understand everything", |
|
"motivation_techniques": [ |
|
"Break big lessons into small, easy parts", |
|
"Celebrate every little success" |
|
], |
|
"voice_sample": "We will build your English skills step by step, carefully and steadily.", |
|
"image": "ox.png" |
|
}, |
|
"Tiger": { |
|
"name": "Adventure Coach Tiger", |
|
"description": "Learning English is an exciting adventure!", |
|
"teaching_style": "Every English lesson is a fun mission", |
|
"learning_approach": "We learn by trying new things and having fun", |
|
"motivation_techniques": [ |
|
"Make learning feel like a game", |
|
"Cheer you on with lots of energy" |
|
], |
|
"voice_sample": "Are you ready for an exciting English language adventure?", |
|
"image": "tiger.png" |
|
}, |
|
"Rabbit": { |
|
"name": "Storyteller Rabbit", |
|
"description": "I love telling stories that help you learn English", |
|
"teaching_style": "Learning through fun and friendly stories", |
|
"learning_approach": "Words become magic when they tell a story", |
|
"motivation_techniques": [ |
|
"Use funny and interesting stories", |
|
"Help you imagine new words" |
|
], |
|
"voice_sample": "Let me tell you a story that will help you learn English.", |
|
"image": "rabbit.png" |
|
}, |
|
"Dragon": { |
|
"name": "Smart Dragon", |
|
"description": "I help you understand English like a language explorer", |
|
"teaching_style": "Learning is like discovering a new world", |
|
"learning_approach": "We look at words like they are treasure maps", |
|
"motivation_techniques": [ |
|
"Make learning feel like an exciting discovery", |
|
"Explain things in a clear way" |
|
], |
|
"voice_sample": "Let us explore the world of English together!", |
|
"image": "dragon.png" |
|
}, |
|
"Snake": { |
|
"name": "Wise Snake", |
|
"description": "I help you understand English slowly and carefully", |
|
"teaching_style": "Learning is like solving a gentle puzzle", |
|
"learning_approach": "We think about words and their meanings", |
|
"motivation_techniques": [ |
|
"Take time to understand each word", |
|
"Think about how words connect" |
|
], |
|
"voice_sample": "Let's understand language together, step by step.", |
|
"image": "snake.png" |
|
}, |
|
"Horse": { |
|
"name": "Energetic Coach Horse", |
|
"description": "Let's speak English and have fun!", |
|
"teaching_style": "Learning is an active, exciting game", |
|
"learning_approach": "We learn by speaking and playing", |
|
"motivation_techniques": [ |
|
"Speak English with lots of energy", |
|
"Make learning feel like a fun activity" |
|
], |
|
"voice_sample": "Come on, let's speak English and have fun doing it!", |
|
"image": "horse.png" |
|
}, |
|
"Goat": { |
|
"name": "Creative Goat", |
|
"description": "I help you draw pictures with English words", |
|
"teaching_style": "Learning is like creating colorful art", |
|
"learning_approach": "We use imagination to learn words", |
|
"motivation_techniques": [ |
|
"Make words feel like colorful pictures", |
|
"Use creativity to remember" |
|
], |
|
"voice_sample": "Let's paint beautiful pictures with our English words!", |
|
"image": "sheep.png" |
|
}, |
|
"Monkey": { |
|
"name": "Playful Monkey", |
|
"description": "Learning English is the most fun game!", |
|
"teaching_style": "Every lesson is a funny, exciting game", |
|
"learning_approach": "We laugh and learn at the same time", |
|
"motivation_techniques": [ |
|
"Turn learning into a funny game", |
|
"Make English feel like play" |
|
], |
|
"voice_sample": "Learning English is the most fun game we'll play today!", |
|
"image": "monkey.png" |
|
}, |
|
"Rooster": { |
|
"name": "Pronunciation Rooster", |
|
"description": "I help you speak English clearly and correctly", |
|
"teaching_style": "Learning is about saying words just right", |
|
"learning_approach": "We practice saying words perfectly", |
|
"motivation_techniques": [ |
|
"Practice saying words clearly", |
|
"Make pronunciation feel like a fun challenge" |
|
], |
|
"voice_sample": "Listen carefully and repeat after me, with perfect pronunciation!", |
|
"image": "rooster.png" |
|
}, |
|
"Dog": { |
|
"name": "Friendly Dog", |
|
"description": "I'm always here to help you learn English", |
|
"teaching_style": "Learning is about being kind and patient", |
|
"learning_approach": "We learn together, step by step", |
|
"motivation_techniques": [ |
|
"Cheer you on with lots of love", |
|
"Make you feel confident" |
|
], |
|
"voice_sample": "You're doing great! Keep practicing your English.", |
|
"image": "dog.png" |
|
}, |
|
"Pig": { |
|
"name": "Calm Pig", |
|
"description": "Let's learn English together, nice and easy", |
|
"teaching_style": "Learning is relaxed and comfortable", |
|
"learning_approach": "We take our time and enjoy learning", |
|
"motivation_techniques": [ |
|
"Make learning feel relaxed", |
|
"No stress, just fun" |
|
], |
|
"voice_sample": "Let's learn English together, nice and easy.", |
|
"image": "pig.png" |
|
} |
|
} |
|
|
|
class CharacterSelector: |
|
def __init__(self): |
|
self.tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts") |
|
|
|
def create_avatar_carousel(self): |
|
""" |
|
Create a Swiper-like avatar carousel using HTML, CSS, and JavaScript |
|
""" |
|
carousel_html = """ |
|
<link rel="stylesheet" href="https://unpkg.com/swiper/swiper-bundle.min.css"> |
|
<script src="https://unpkg.com/swiper/swiper-bundle.min.js"></script> |
|
|
|
<style> |
|
.swiper-container { |
|
width: 100%; |
|
height: 500px; |
|
} |
|
.swiper-slide { |
|
text-align: center; |
|
background: #f4f4f4; |
|
display: flex; |
|
flex-direction: column; |
|
align-items: center; |
|
justify-content: center; |
|
border-radius: 18px; |
|
padding: 20px; |
|
} |
|
.avatar-image { |
|
width: 250px; |
|
height: 250px; |
|
object-fit: cover; |
|
border-radius: 50%; |
|
margin-bottom: 15px; |
|
} |
|
.voice-preview-btn { |
|
margin-top: 10px; |
|
padding: 10px; |
|
background-color: #4CAF50; |
|
color: white; |
|
border: none; |
|
border-radius: 5px; |
|
cursor: pointer; |
|
} |
|
</style> |
|
|
|
<div class="swiper-container"> |
|
<div class="swiper-wrapper"> |
|
""" |
|
|
|
|
|
for avatar_key, avatar_info in AVATAR_PERSONAS.items(): |
|
carousel_html += f""" |
|
<div class="swiper-slide" data-avatar="{avatar_key}"> |
|
<img src="images/{avatar_info['image']}" class="avatar-image" alt="{avatar_info['name']}"> |
|
<h3>{avatar_info['name']}</h3> |
|
<p>{avatar_info['description']}</p> |
|
<button class="voice-preview-btn" onclick="previewVoice('{avatar_key}')"> |
|
Preview Voice |
|
</button> |
|
</div> |
|
""" |
|
|
|
carousel_html += """ |
|
</div> |
|
<div class="swiper-pagination"></div> |
|
<div class="swiper-button-prev"></div> |
|
<div class="swiper-button-next"></div> |
|
</div> |
|
|
|
<script> |
|
var swiper = new Swiper('.swiper-container', { |
|
slidesPerView: 'auto', |
|
centeredSlides: true, |
|
spaceBetween: 30, |
|
pagination: { |
|
el: '.swiper-pagination', |
|
clickable: true |
|
}, |
|
navigation: { |
|
nextEl: '.swiper-button-next', |
|
prevEl: '.swiper-button-prev', |
|
} |
|
}); |
|
|
|
function previewVoice(avatarKey) { |
|
window.parent.postMessage({ |
|
type: 'previewVoice', |
|
avatarKey: avatarKey |
|
}, '*'); |
|
} |
|
|
|
function selectAvatar(avatarKey) { |
|
window.parent.postMessage({ |
|
type: 'avatarSelected', |
|
avatarKey: avatarKey |
|
}, '*'); |
|
} |
|
</script> |
|
""" |
|
|
|
return carousel_html |
|
|
|
def generate_voice_preview(self, avatar_key): |
|
"""Generate voice preview for a specific avatar""" |
|
avatar = AVATAR_PERSONAS[avatar_key] |
|
try: |
|
|
|
self.tts_model.tts_to_file( |
|
text=avatar['voice_sample'], |
|
file_path=f"{avatar_key}_preview.wav" |
|
) |
|
return f"{avatar_key}_preview.wav" |
|
except Exception as e: |
|
logging.error(f"Voice preview error for {avatar_key}: {e}") |
|
return None |
|
|
|
def main(): |
|
st.title("Language Learning Companion") |
|
|
|
|
|
character_selector = CharacterSelector() |
|
|
|
|
|
carousel_html = character_selector.create_avatar_carousel() |
|
components.html(carousel_html, height=600, scrolling=True) |
|
|
|
|
|
components.html(""" |
|
<script> |
|
window.addEventListener('message', function(event) { |
|
if (event.data.type === 'avatarSelected') { |
|
window.parent.postMessage({ |
|
type: 'streamlit:setComponentValue', |
|
key: 'selected_avatar', |
|
value: event.data.avatarKey |
|
}, '*'); |
|
} |
|
if (event.data.type === 'previewVoice') { |
|
window.parent.postMessage({ |
|
type: 'streamlit:setComponentValue', |
|
key: 'preview_voice', |
|
value: event.data.avatarKey |
|
}, '*'); |
|
} |
|
}); |
|
</script> |
|
""", height=0) |
|
|
|
|
|
preview_avatar = st.experimental_get_query_params().get('preview_voice', [None])[0] |
|
if preview_avatar: |
|
preview_audio = character_selector.generate_voice_preview(preview_avatar) |
|
if preview_audio: |
|
st.audio(preview_audio, format='audio/wav') |
|
|
|
|
|
selected_avatar = st.experimental_get_query_params().get('selected_avatar', [None])[0] |
|
if selected_avatar: |
|
|
|
companion = LanguageLearningCompanion(selected_avatar) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
|
class AudioProcessor(AudioProcessorBase): |
|
def __init__(self, companion): |
|
self.companion = companion |
|
self.audio_buffer = [] |
|
self.sample_rate = 16000 |
|
self.recording_duration = 5 |
|
|
|
def recv(self, frame: av.AudioFrame) -> av.AudioFrame: |
|
|
|
audio_input = frame.to_ndarray(format="f32") |
|
|
|
|
|
if len(audio_input.shape) > 1: |
|
audio_input = audio_input[:, 0] |
|
|
|
self.audio_buffer.extend(audio_input) |
|
|
|
|
|
if len(self.audio_buffer) >= self.sample_rate * self.recording_duration: |
|
try: |
|
|
|
audio_array = np.array(self.audio_buffer[:self.sample_rate * self.recording_duration]) |
|
sf.write('temp_recording.wav', audio_array, self.sample_rate) |
|
|
|
|
|
transcription = self.companion.transcribe_audio('temp_recording.wav') |
|
|
|
|
|
if transcription: |
|
response = self.companion.generate_response(transcription) |
|
|
|
|
|
if response: |
|
audio_response = self.companion.text_to_speech(response) |
|
|
|
|
|
st.session_state.transcription = transcription |
|
st.session_state.response = response |
|
|
|
|
|
self.audio_buffer = [] |
|
|
|
except Exception as e: |
|
logging.error(f"Audio processing error: {e}") |
|
st.error("Error processing audio") |
|
self.audio_buffer = [] |
|
|
|
return frame |
|
|
|
def main(): |
|
st.title("Language Learning Companion") |
|
|
|
|
|
if 'transcription' not in st.session_state: |
|
st.session_state.transcription = "" |
|
if 'response' not in st.session_state: |
|
st.session_state.response = "" |
|
|
|
|
|
avatar_selection = st.selectbox( |
|
"Choose Your Learning Buddy", |
|
list(AVATAR_PERSONAS.keys()) |
|
) |
|
|
|
|
|
companion = LanguageLearningCompanion(avatar_selection) |
|
|
|
|
|
ctx = webrtc_streamer( |
|
key="language-learning", |
|
mode=WebRtcMode.SENDRECV, |
|
audio_processor_factory=lambda: AudioProcessor(companion), |
|
media_stream_constraints={ |
|
"audio": True, |
|
"video": False |
|
} |
|
) |
|
|
|
|
|
if st.session_state.transcription: |
|
st.subheader("Your Speech") |
|
st.write(st.session_state.transcription) |
|
|
|
if st.session_state.response: |
|
st.subheader("Companion's Response") |
|
st.write(st.session_state.response) |
|
|
|
|
|
if os.path.exists('response.wav'): |
|
st.audio('response.wav', format='audio/wav') |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
|
class LanguageLearningCompanion: |
|
def __init__(self, avatar_name): |
|
try: |
|
|
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
"mistralai/Mistral-7B-Instruct-v0.1", |
|
load_in_8bit=True, |
|
device_map="auto" |
|
) |
|
self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") |
|
|
|
|
|
self.whisper_model = whisper.load_model("base") |
|
self.tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts") |
|
|
|
|
|
self.avatar = AVATAR_PERSONAS.get(avatar_name, AVATAR_PERSONAS["Rat"]) |
|
|
|
logger.info(f"Initialized Language Learning Companion with {avatar_name}") |
|
|
|
except Exception as e: |
|
logger.error(f"Model initialization error: {e}") |
|
st.error("Error loading models. Please try again later.") |
|
|
|
def transcribe_audio(self, audio_path): |
|
"""Transcribe audio using Whisper""" |
|
try: |
|
result = self.whisper_model.transcribe(audio_path) |
|
logger.info(f"Successfully transcribed audio: {result['text']}") |
|
return result['text'] |
|
except Exception as e: |
|
logger.error(f"Transcription error: {e}") |
|
st.error("Error transcribing audio.") |
|
return None |
|
|
|
def generate_response(self, user_input): |
|
"""Generate educational response using Mistral""" |
|
try: |
|
prompt = f""" |
|
Avatar: {self.avatar['name']} |
|
Teaching Style: {self.avatar['teaching_style']} |
|
Learning Approach: {self.avatar['learning_approach']} |
|
|
|
User Input: {user_input} |
|
|
|
Generate an encouraging, educational response that: |
|
1. Addresses the user's input |
|
2. Provides gentle language learning guidance |
|
3. Maintains an engaging, child-friendly tone |
|
""" |
|
|
|
inputs = self.tokenizer(prompt, return_tensors="pt") |
|
outputs = self.model.generate(**inputs, max_length=200) |
|
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
logger.info(f"Generated response for input: {user_input}") |
|
return response |
|
|
|
except Exception as e: |
|
logger.error(f"Response generation error: {e}") |
|
st.error("Error generating response.") |
|
return None |
|
|
|
def text_to_speech(self, text): |
|
"""Convert text to speech""" |
|
try: |
|
self.tts_model.tts_to_file(text=text, file_path="response.wav") |
|
logger.info("Successfully converted text to speech") |
|
return "response.wav" |
|
except Exception as e: |
|
logger.error(f"Text-to-speech conversion error: {e}") |
|
st.error("Error converting text to speech.") |
|
return None |
|
|
|
def main(): |
|
st.title("Language Learning Companion") |
|
|
|
|
|
avatar_selection = st.selectbox( |
|
"Choose Your Learning Buddy", |
|
list(AVATAR_PERSONAS.keys()) |
|
) |
|
|
|
|
|
companion = LanguageLearningCompanion(avatar_selection) |
|
|
|
|
|
webrtc_ctx = webrtc_streamer( |
|
key="language-companion", |
|
mode=WebRtcMode.SENDRECV, |
|
media_stream_constraints={"audio": True, "video": False} |
|
) |
|
|
|
if st.button("Transcribe and Respond"): |
|
if webrtc_ctx.audio_receiver: |
|
try: |
|
|
|
|
|
st.warning("Audio processing not fully implemented in this version") |
|
except Exception as e: |
|
logger.error(f"WebRTC audio processing error: {e}") |
|
st.error("Error processing audio.") |
|
|
|
if __name__ == "__main__": |
|
main() |