Spaces:

mskov
/

Misophonia_Trigger_Detection

Runtime error

File size: 5,060 Bytes

import os
os.system("pip install git+https://github.com/openai/whisper.git")
import whisper
import evaluate
from evaluate.utils import launch_gradio_widget
import gradio as gr
import torch
import pandas as pd
import random
import classify
from whisper.model import Whisper
from whisper.tokenizer import get_tokenizer
from transformers import AutoModelForSequenceClassification, pipeline, WhisperTokenizer, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer


# pull in emotion detection
# --- Add element for specification
# pull in text classification
# --- Add custom labels
# --- Associate labels with radio elements
# add logic to initiate mock notificaiton when detected
# pull in misophonia-specific model

model_cache = {}


# static classes for now, but it would be best ot have the user select from multiple, and to enter their own
class_options = {
    "misophonia": ["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"]
}

pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")



def slider_logic(slider):
    threshold = 0
    if slider == 1:
        threshold = .98
    elif slider == 2:
        threshold = .88
    elif slider == 3:
        threshold = .77
    elif slider == 4:
        threshold = .66
    elif slider == 5:
        threshold = .55
    else:
        threshold = []
    return threshold

# Create a Gradio interface with audio file and text inputs
def classify_toxicity(audio_file, selected_sounds, slider):
    # Transcribe the audio file using Whisper ASR
    # transcribed_text = pipe(audio_file)["text"]

    threshold = slider_logic(slider)
    model = whisper.load_model("large")
    # model = model_cache[model_name]
    # class_names = classify_anxiety.split(",")
    classify_anxiety = "misophonia"
    class_names_list = class_options.get(classify_anxiety, [])
    class_str = ""
    for elm in class_names_list:
        class_str += elm + ","
    #class_names = class_names_temp.split(",")
    class_names = class_str.split(",")
    print("class names ", class_names, "classify_anxiety ", classify_anxiety)
    
    tokenizer = get_tokenizer("large")
    # tokenizer= WhisperTokenizer.from_pretrained("openai/whisper-large")

    internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs(
        model=model,
        class_names=class_names,
        # class_names=classify_anxiety,
        tokenizer=tokenizer,
    )
    audio_features = classify.calculate_audio_features(audio_file, model)
    average_logprobs = classify.calculate_average_logprobs(
        model=model,
        audio_features=audio_features,
        class_names=class_names,
        tokenizer=tokenizer,
    )
    average_logprobs -= internal_lm_average_logprobs
    scores = average_logprobs.softmax(-1).tolist()
    
    class_score_dict = {class_name: score for class_name, score in zip(class_names, scores)}
    
    for selected_class_name in selected_sounds:
        if selected_class_name in class_score_dict:
            score = class_score_dict[selected_class_name]
            if score > threshold:
                print(f"Threshold exceeded for class '{selected_class_name}': Score = {score:.4f}")
                affirm = positive_affirmations()
            else:
                affirm = ""



    '''
    for class_name, score in class_score_dict.items():
        if score > threshold:
            print(f"Threshold exceeded for class '{class_name}': Score = {score:.4f}")
    '''   
    holder1 = {class_name: score for class_name, score in zip(class_names, scores)}
    # miso_label_dict = {label: score for label, score in classify_anxiety[0].items()}
    holder2 = ""
    holder3= " "
    return {class_name: score for class_name, score in zip(class_names, scores)}, affirm
    
def positive_affirmations():
    affirmations = [
        "I have survived my anxiety before and I will survive again now",
        "I am not in danger; I am just uncomfortable; this too will pass",
        "I forgive and release the past and look forward to the future",
        "I can't control what other people say but I can control my breathing and my response"
    ]
    selected_affirm = random.choice(affirmations)
    return selected_affirm
    
with gr.Blocks() as iface:
    with gr.Column():
        miso_sounds = gr.CheckboxGroup(["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"])
        sense_slider = gr.Slider(minimum=1, maximum=5, step=1.0, label="How readily do you want the tool to intervene? 1 = in extreme cases and 5 = at every opportunity")
    with gr.Column():
        aud_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
        submit_btn = gr.Button(label="Run")
    with gr.Column():
        # out_val = gr.Textbox()
        out_class = gr.Label()
        out_text = gr.Textbox()
    submit_btn.click(fn=classify_toxicity, inputs=[aud_input, miso_sounds, sense_slider], outputs=[out_class, out_text])


iface.launch()