Spaces:

simonraj
/

OralCoachZeroGPU

Runtime error

App Files Files Community

simonraj commited on May 18, 2024

Commit

2e5e5ac

verified ·

1 Parent(s): 01a50df

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -87

app.py CHANGED Viewed

@@ -1,146 +1,216 @@
 import gradio as gr
 import os
 import thinkingframes
 import soundfile as sf
 import numpy as np
 import logging
 from dotenv import load_dotenv
 from policy import user_acceptance_policy
 from styles import theme
-from thinkingframes import generate_prompt, strategy_options, questions
 from utils import get_image_html, collect_student_info
-from database_functions import add_user_privacy, add_submission
 from tab_teachers_dashboard import create_teachers_dashboard_tab
 from config import CLASS_OPTIONS
-import spaces
-import edge_tts
 import tempfile
-import requests  # Ensure this import is included for making API requests
-# Load environment variables
-load_dotenv()
-# Whisper API settings
-API_URL = "https://api-inference.huggingface.co/models/whisper-large"
-headers = {"Authorization": f"Bearer {os.getenv('HF_AUTH_TOKEN')}"}
-def whisper_query(filename):
-    with open(filename, "rb") as f:
-        data = f.read()
-    response = requests.post(API_URL, headers=headers, data=data)
-    return response.json()
 # For maintaining user session (to keep track of userID)
 user_state = gr.State(value="")
-# Load the Meta-Llama-3-8B model from Hugging Face
-llm = gr.load("meta-llama/Meta-Llama-3-8B", src="models")
 image_path = "picturePerformance.jpg"
 img_html = get_image_html(image_path)
-@spaces.GPU(duration=120)
 def transcribe(audio_path):
-    response = whisper_query(audio_path)
-    if "text" in response:
-        return response["text"]
-    else:
-        raise ValueError("Transcription failed.")
 @spaces.GPU(duration=120)
-def generate_feedback(user_id, question_choice, strategy_choice, message, feedback_level):
-    current_question_index = questions.index(question_choice)
-    strategy, explanation = strategy_options[strategy_choice]
     conversation = [{
         "role": "system",
-        "content": thinkingframes.generate_system_message(current_question_index, feedback_level)
     }, {
         "role": "user",
         "content": message
     }]
-    feedback = llm(conversation)[0]["generated_text"]
-    questionNo = current_question_index + 1
-    add_submission(user_id, message, feedback, int(0), "", questionNo)
-    return feedback
-@spaces.GPU(duration=60)
-def generate_audio_feedback(feedback_buffer):
-    communicate = edge_tts.Communicate(feedback_buffer)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-        tmp_path = tmp_file.name
-        asyncio.run(communicate.save(tmp_path))
-    return tmp_path
-def predict(question_choice, strategy_choice, feedback_level, audio):
-    current_audio_output = None
     if audio is None:
-        return [("Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "No audio data received. Please try again.")], current_audio_output
     sample_rate, audio_data = audio
     if audio_data is None or len(audio_data) == 0:
-        return [("Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "No audio data received. Please try again.")], current_audio_output
     audio_path = "audio.wav"
     if not isinstance(audio_data, np.ndarray):
         raise ValueError("audio_data must be a numpy array")
     sf.write(audio_path, audio_data, sample_rate)
-    chat_history = [("Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "Transcribing your audio, please listen to your oral response while waiting ...")]
     try:
-        student_response = transcribe(audio_path)
         if not student_response.strip():
-            return [("Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "Transcription failed. Please try again or seek assistance.")], current_audio_output
-        chat_history.append(("Student", student_response))
-        chat_history.append(("Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "Transcription complete. Generating feedback. Please continue listening to your oral response while waiting ..."))
-        feedback = generate_feedback(int(user_state.value), question_choice, strategy_choice, student_response, feedback_level)
-        chat_history.append(("Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", feedback))
-        audio_output_path = generate_audio_feedback(feedback)
-        current_audio_output = (24000, audio_output_path)
-        return chat_history, current_audio_output
     except Exception as e:
         logging.error(f"An error occurred: {str(e)}", exc_info=True)
-        return [("Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "An error occurred. Please try again or seek assistance.")], current_audio_output
-def toggle_oral_coach_visibility(class_name, index_no, policy_checked):
-    if not policy_checked:
-        return "Please agree to the Things to Note When using the Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡ before submitting.", gr.update(visible=False)
-    user_id, message = add_user_privacy(class_name, index_no)
-    if "Error" in message:
-        return message, gr.update(visible=False)
-    user_state.value = user_id
-    return message, gr.update(visible=True)
-with gr.Blocks(title="Oral Coach powered by ZeroGPU⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡ and Meta AI 🦙 (LLama3)", theme=theme, css="footer {visibility: hidden}textbox{resize:none}") as demo:
-    with gr.Tab("Oral Coach ⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡"):
         gr.Markdown("## Student Information")
         class_name = gr.Dropdown(label="Class", choices=CLASS_OPTIONS)
         index_no = gr.Dropdown(label="Index No", choices=[f"{i:02}" for i in range(1, 46)])
         policy_text = gr.Markdown(user_acceptance_policy)
-        policy_checkbox = gr.Checkbox(label="I have read and agree to the Things to Note When using the Oral Coach ⚡ϞϞ(๑�� ․̫ ⚈๑)∩ ⚡", value=False)
         submit_info_btn = gr.Button("Submit Info")
         info_output = gr.Text()
         with gr.Column(visible=False) as oral_coach_content:
-            gr.Markdown("## Powered by Hugging Face")
-            gr.Markdown(img_html)
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("### Step 1: Choose a Question")
@@ -157,21 +227,38 @@ with gr.Blocks(title="Oral Coach powered by ZeroGPU⚡ϞϞ(๑⚈ ․̫ ⚈๑)
                     submit_answer_btn = gr.Button("Submit Oral Response")
                     gr.Markdown("### Step 5: Review your personalised feedback")
-                    feedback_output = gr.Chatbot(label="Feedback", scale=4, height=700, show_label=True)
-                    audio_output = gr.Audio(type="numpy", label="Audio Playback", format="wav", autoplay="True")
-                    submit_answer_btn.click(
-                        predict,
-                        inputs=[question_choice, strategy_choice, feedback_level, audio_input],
-                        outputs=[feedback_output, audio_output]
                     )
         submit_info_btn.click(
             toggle_oral_coach_visibility,
             inputs=[class_name, index_no, policy_checkbox],
             outputs=[info_output, oral_coach_content]
         )
     create_teachers_dashboard_tab()
 demo.queue(max_size=20)

+# app.py
 import gradio as gr
+import asyncio
 import os
 import thinkingframes
 import soundfile as sf
 import numpy as np
 import logging
+from huggingface_hub import InferenceClient
+from streaming_stt_nemo import Model
+import edge_tts
 from dotenv import load_dotenv
 from policy import user_acceptance_policy
 from styles import theme
+from thinkingframes import generate_prompt, strategy_options
 from utils import get_image_html, collect_student_info
+from database_functions import add_submission
 from tab_teachers_dashboard import create_teachers_dashboard_tab
 from config import CLASS_OPTIONS
+from concurrent.futures import ThreadPoolExecutor
 import tempfile
+import spaces
+# Load CSS from external file
+with open('styles.css', 'r') as file:
+    css = file.read()
 # For maintaining user session (to keep track of userID)
 user_state = gr.State(value="")
+load_dotenv()
+client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+default_lang = "en"
+engines = {default_lang: Model(default_lang)}
 image_path = "picturePerformance.jpg"
 img_html = get_image_html(image_path)
+# Create a thread pool executor
+executor = ThreadPoolExecutor()
+# Transcription function using streaming_stt_nemo
 def transcribe(audio_path):
+    lang = "en"
+    model = engines[lang]
+    with open(audio_path, "rb") as audio_file:
+        text = model.stt_file(audio_file)[0]
+    return text
+# Inference function using Hugging Face InferenceClient
 @spaces.GPU(duration=120)
+def model(text):
+    system_instructions = "[SYSTEM] You are CrucialCoach, an AI-powered conversational coach. Guide the user through challenging workplace situations using the principles from 'Crucial Conversations'. Ask one question at a time and provide step-by-step guidance.\n\n[USER]"
+    generate_kwargs = dict(
+        temperature=0.7,
+        max_new_tokens=512,
+        top_p=0.95,
+        repetition_penalty=1,
+        do_sample=True,
+        seed=42,
+    )
+    formatted_prompt = system_instructions + text + "[CrucialCoach]"
+    stream = client.text_generation(
+        formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    output = ""
+    for response in stream:
+        if not response.token.text == "</s>":
+            output += response.token.text
+    return output
+# Text-to-Speech function using edge_tts
+async def generate_audio_feedback(feedback_text):
+    communicate = edge_tts.Communicate(feedback_text)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+        tmp_path = tmp_file.name
+        await communicate.save(tmp_path)
+    return tmp_path
+# Generating feedback for the Oral Coach
+async def generate_feedback(user_id, question_choice, strategy_choice, message, feedback_level):
+    current_question_index = thinkingframes.questions.index(question_choice)
+    strategy, explanation = thinkingframes.strategy_options[strategy_choice]
     conversation = [{
         "role": "system",
+        "content": f"You are an expert Primary 6 English Language Teacher in a Singapore Primary school, "
+                   f"directly guiding a Primary 6 student in Singapore in their oral responses. "
+                   f"Format the feedback in Markdown so that it can be easily read. "
+                   f"Address the student directly in the second person in your feedback. "
+                   f"The student is answering the question: '{thinkingframes.questions[current_question_index]}'. "
+                   f"For Question 1, consider the picture description: '{thinkingframes.description}'. "
+                   f"For Questions 2 and 3, the picture is not relevant, so the student should not refer to it in their response. "
+                   f"Analyze the student's response using the following step-by-step approach: "
+                   f"1. Evaluate the response against the {strategy} thinking frame. "
+                   f"2. Assess how well the student's response addresses each criteria of the {strategy} thinking frame: "
+                   f"   - Assign emoticon scores based on how well the student comprehensively covered each criteria: "
+                   f"     - 😊😊😊 (three smiling faces) for a good coverage "
+                   f"     - 😊😊 (two smiling faces) for an average coverage "
+                   f"     - 😊 (one smiling face) for a poor coverage "
+                   f"   - Provide a clear, direct, and concise explanation of how well the answer addresses each criteria. "
+                   f"   - Identify specific areas for improvement in students responses, and provide targeted suggestions for improvement. "
+                   f"3. Identify overall strengths and areas for improvement in the student's response using the {strategy} to format and provide targeted areas for improvement. "
+                   f"4. Provide specific feedback on grammar, vocabulary, and sentence structure. "
+                   f"   Suggest age-appropriate enhancements that are one level higher than the student's current response. "
+                   f"5. Conclude with follow-up questions for reflection. "
+                   f"If the student's response deviates from the question, provide clear and concise feedback to help them refocus and try again. "
+                   f"Ensure that the vocabulary and sentence structure recommendations are achievable for Primary 6 students in Singapore. "
+                   f"Example Feedback Structure for Each Criteria: "
+                   f"Criteria: [Criteria Name] "
+                   f"Score: [Smiling emoticons] "
+                   f"Explanation: [Clear, direct, and concise explanation of how well the answer addresses the criteria. Identify specific areas for improvement, and provide targeted suggestions for improvement.] "
+                   f"{thinkingframes.generate_prompt(feedback_level)}"
     }, {
         "role": "user",
         "content": message
     }]
+    response = model(conversation)
+    chat_history = []  # Initialize chat history outside the loop
+    full_feedback = ""  # Accumulate the entire feedback message
+    try:
+        for chunk in response:
+            if chunk.choices[0].delta and chunk.choices[0].delta.content:
+                feedback_chunk = chunk.choices[0].delta.content
+                yield feedback_chunk  # Yield each feedback chunk as it is generated
+                await asyncio.sleep(0)
+    except Exception as e:
+        logging.error(f"An error occurred during feedback generation: {str(e)}")
+    questionNo = current_question_index + 1
+    add_submission(user_id, message, full_feedback, int(0), "", questionNo)
+# Function to predict and handle the entire workflow
+async def predict(question_choice, strategy_choice, feedback_level, audio):
+    current_audio_output = None  # Initialize current_audio_output to None
+    final_feedback = ""  # Store only the assistant's feedback
     if audio is None:
+        yield [("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "No audio data received. Please try again.")], current_audio_output
+        return
     sample_rate, audio_data = audio
     if audio_data is None or len(audio_data) == 0:
+        yield [("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "No audio data received. Please try again.")], current_audio_output
+        return
     audio_path = "audio.wav"
     if not isinstance(audio_data, np.ndarray):
         raise ValueError("audio_data must be a numpy array")
     sf.write(audio_path, audio_data, sample_rate)
+    chat_history = [("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "Transcribing your audio, please listen to your oral response while waiting ...")]
+    yield chat_history, current_audio_output
     try:
+        transcription_future = executor.submit(transcribe, audio_path)
+        student_response = await asyncio.wrap_future(transcription_future)
         if not student_response.strip():
+            yield [("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "Transcription failed. Please try again or seek assistance.")], current_audio_output
+            return
+        chat_history.append(("Student", student_response))  # Add student's transcript
+        yield chat_history, current_audio_output
+        chat_history.append(("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "Transcription complete. Generating feedback. Please continue listening to your oral response while waiting ..."))
+        yield chat_history, current_audio_output
+        moderation_response = client.moderations.create(input=student_response)
+        flagged = any(result.flagged for result in moderation_response.results)
+        if flagged:
+            moderated_message = "The message has been flagged. Please see your teacher to clarify."
+            questionNo = thinkingframes.questions.index(question_choice) + 1
+            add_submission(int(user_state.value), moderated_message, "", int(0), "", questionNo)
+            yield chat_history, current_audio_output
+            return
+        accumulated_feedback = ""  # Variable to store the accumulated feedback
+        async for feedback_chunk in generate_feedback(int(user_state.value), question_choice, strategy_choice, student_response, feedback_level):
+            accumulated_feedback += feedback_chunk  # Accumulate the feedback chunks
+            if chat_history and chat_history[-1][0] == "Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡":
+                chat_history[-1] = ("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", accumulated_feedback)  # Update the last message in chat_history
+            else:
+                chat_history.append(("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", accumulated_feedback))  # Append a new message to chat_history
+            yield chat_history, current_audio_output  # Yield the updated chat_history and current_audio_output
+        feedback_buffer = accumulated_feedback  # Use the accumulated feedback for TTS
+        audio_task = asyncio.create_task(generate_audio_feedback(feedback_buffer))
+        current_audio_output = await audio_task  # Store audio output
+        yield chat_history, current_audio_output  # Yield the final chat_history and current_audio_output
     except Exception as e:
         logging.error(f"An error occurred: {str(e)}", exc_info=True)
+        yield [("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", "An error occurred. Please try again or seek assistance.")], current_audio_output
+with gr.Blocks(title="Oral Coach powered by ZeroGPU⚡ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡ and Meta AI 🦙 (LLama3)", theme=theme, css="footer {visibility: hidden}textbox{resize:none}") as demo:
+    with gr.Tab("Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡"):
         gr.Markdown("## Student Information")
         class_name = gr.Dropdown(label="Class", choices=CLASS_OPTIONS)
         index_no = gr.Dropdown(label="Index No", choices=[f"{i:02}" for i in range(1, 46)])
         policy_text = gr.Markdown(user_acceptance_policy)
+        policy_checkbox = gr.Checkbox(label="I have read and agree to the Things to Note When using the Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡", value=False)
         submit_info_btn = gr.Button("Submit Info")
         info_output = gr.Text()
         with gr.Column(visible=False) as oral_coach_content:
+            gr.Markdown("## English Language Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡")
+            gr.Markdown(img_html)  # Display the image
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("### Step 1: Choose a Question")
                     submit_answer_btn = gr.Button("Submit Oral Response")
                     gr.Markdown("### Step 5: Review your personalised feedback")
+                    feedback_output = gr.Chatbot(
+                        label="Feedback",
+                        scale=4,
+                        height=700,
+                        show_label=True
                     )
+                    audio_output = gr.Audio(type="numpy", label="Audio Playback", format="wav", autoplay=True)
+            submit_answer_btn.click(
+                predict,
+                inputs=[question_choice, strategy_choice, feedback_level, audio_input],
+                outputs=[feedback_output, audio_output],
+                api_name="predict"
+            )
+        def toggle_oral_coach_visibility(class_name, index_no, policy_checked):
+            if not policy_checked:
+                return "Please agree to the Things to Note When using the Oral Coach  ⚡ ϞϞ(๑⚈ ․̫ ⚈๑)∩ ⚡ before submitting.", gr.update(visible=False)
+            validation_passed, message, userid = collect_student_info(class_name, index_no)
+            if not validation_passed:
+                return message, gr.update(visible=False)
+            user_state.value = userid
+            return message, gr.update(visible=True)
         submit_info_btn.click(
             toggle_oral_coach_visibility,
             inputs=[class_name, index_no, policy_checkbox],
             outputs=[info_output, oral_coach_content]
         )
+    # Define other tabs like Teacher's Dashboard
     create_teachers_dashboard_tab()
 demo.queue(max_size=20)