import os os.system("pip install git+https://github.com/openai/whisper.git") import whisper import evaluate from evaluate.utils import launch_gradio_widget import gradio as gr import torch import pandas as pd import random import classify from whisper.model import Whisper from whisper.tokenizer import get_tokenizer from transformers import AutoModelForSequenceClassification, pipeline, WhisperTokenizer, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer # pull in emotion detection # --- Add element for specification # pull in text classification # --- Add custom labels # --- Associate labels with radio elements # add logic to initiate mock notificaiton when detected # pull in misophonia-specific model model_cache = {} # static classes for now, but it would be best ot have the user select from multiple, and to enter their own class_options = { "misophonia": ["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"] } pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large") def slider_logic(slider): threshold = 0 if slider == 1: threshold = .98 elif slider == 2: threshold = .88 elif slider == 3: threshold = .77 elif slider == 4: threshold = .66 elif slider == 5: threshold = .55 else: threshold = [] return threshold # Create a Gradio interface with audio file and text inputs def classify_toxicity(audio_file, selected_sounds, slider): # Transcribe the audio file using Whisper ASR # transcribed_text = pipe(audio_file)["text"] threshold = slider_logic(slider) model = whisper.load_model("large") # model = model_cache[model_name] # class_names = classify_anxiety.split(",") classify_anxiety = "misophonia" class_names_list = class_options.get(classify_anxiety, []) class_str = "" for elm in class_names_list: class_str += elm + "," #class_names = class_names_temp.split(",") class_names = class_str.split(",") print("class names ", class_names, "classify_anxiety ", classify_anxiety) tokenizer = get_tokenizer("large") # tokenizer= WhisperTokenizer.from_pretrained("openai/whisper-large") internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs( model=model, class_names=class_names, # class_names=classify_anxiety, tokenizer=tokenizer, ) audio_features = classify.calculate_audio_features(audio_file, model) average_logprobs = classify.calculate_average_logprobs( model=model, audio_features=audio_features, class_names=class_names, tokenizer=tokenizer, ) average_logprobs -= internal_lm_average_logprobs scores = average_logprobs.softmax(-1).tolist() class_score_dict = {class_name: score for class_name, score in zip(class_names, scores)} for selected_class_name in selected_sounds: if selected_class_name in class_score_dict: score = class_score_dict[selected_class_name] if score > threshold: print(f"Threshold exceeded for class '{selected_class_name}': Score = {score:.4f}") show_state : True ''' for class_name, score in class_score_dict.items(): if score > threshold: print(f"Threshold exceeded for class '{class_name}': Score = {score:.4f}") ''' holder1 = {class_name: score for class_name, score in zip(class_names, scores)} # miso_label_dict = {label: score for label, score in classify_anxiety[0].items()} holder2 = "" holder3= " " return {class_name: score for class_name, score in zip(class_names, scores)} def positive_affirmations(): affirmations = [ "I have survived my anxiety before and I will survive again now", "I am not in danger; I am just uncomfortable; this too will pass", "I forgive and release the past and look forward to the future", "I can't control what other people say but I can control my breathing and my response" ] selected_affirm = random.choice(affirmations) return selected_affirm with gr.Blocks() as iface: show_state = gr.State(False) with gr.Column(): miso_sounds = gr.CheckboxGroup(["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"]) sense_slider = gr.Slider(minimum=1, maximum=5, step=1.0, label="How readily do you want the tool to intervene? 1 = in extreme cases and 5 = at every opportunity") with gr.Column(): aud_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File") submit_btn = gr.Button(label="Run") with gr.Column(): # out_val = gr.Textbox() out_class = gr.Label() calm_image = gr.Image(value="./hrv-breathing.gif", visible=show_state) submit_btn.click(fn=classify_toxicity, inputs=[aud_input, miso_sounds, sense_slider], outputs=out_class) iface.launch()