Spaces:
Runtime error
Runtime error
import os | |
os.system("pip install git+https://github.com/openai/whisper.git") | |
import whisper | |
import evaluate | |
from evaluate.utils import launch_gradio_widget | |
import gradio as gr | |
import torch | |
import pandas as pd | |
import random | |
import classify | |
from whisper.model import Whisper | |
from whisper.tokenizer import get_tokenizer | |
from transformers import AutoModelForSequenceClassification, pipeline, WhisperTokenizer, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer | |
# pull in emotion detection | |
# --- Add element for specification | |
# pull in text classification | |
# --- Add custom labels | |
# --- Associate labels with radio elements | |
# add logic to initiate mock notificaiton when detected | |
# pull in misophonia-specific model | |
model_cache = {} | |
# static classes for now, but it would be best ot have the user select from multiple, and to enter their own | |
class_options = { | |
"misophonia": ["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"] | |
} | |
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large") | |
def slider_logic(slider): | |
threshold = 0 | |
if slider == 1: | |
threshold = .98 | |
elif slider == 2: | |
threshold = .88 | |
elif slider == 3: | |
threshold = .77 | |
elif slider == 4: | |
threshold = .66 | |
elif slider == 5: | |
threshold = .55 | |
else: | |
threshold = [] | |
return threshold | |
# Create a Gradio interface with audio file and text inputs | |
def classify_toxicity(audio_file, selected_sounds, slider): | |
# Transcribe the audio file using Whisper ASR | |
# transcribed_text = pipe(audio_file)["text"] | |
threshold = slider_logic(slider) | |
model = whisper.load_model("large") | |
# model = model_cache[model_name] | |
# class_names = classify_anxiety.split(",") | |
classify_anxiety = "misophonia" | |
class_names_list = class_options.get(classify_anxiety, []) | |
class_str = "" | |
for elm in class_names_list: | |
class_str += elm + "," | |
#class_names = class_names_temp.split(",") | |
class_names = class_str.split(",") | |
print("class names ", class_names, "classify_anxiety ", classify_anxiety) | |
tokenizer = get_tokenizer("large") | |
# tokenizer= WhisperTokenizer.from_pretrained("openai/whisper-large") | |
internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs( | |
model=model, | |
class_names=class_names, | |
# class_names=classify_anxiety, | |
tokenizer=tokenizer, | |
) | |
audio_features = classify.calculate_audio_features(audio_file, model) | |
average_logprobs = classify.calculate_average_logprobs( | |
model=model, | |
audio_features=audio_features, | |
class_names=class_names, | |
tokenizer=tokenizer, | |
) | |
average_logprobs -= internal_lm_average_logprobs | |
scores = average_logprobs.softmax(-1).tolist() | |
class_score_dict = {class_name: score for class_name, score in zip(class_names, scores)} | |
for selected_class_name in selected_sounds: | |
if selected_class_name in class_score_dict: | |
score = class_score_dict[selected_class_name] | |
if score > threshold: | |
print(f"Threshold exceeded for class '{selected_class_name}': Score = {score:.4f}") | |
affirm = f"Threshold exceeded for class '{selected_class_name}': Score = {score:.4f}" | |
else: | |
affirm = "" | |
''' | |
for class_name, score in class_score_dict.items(): | |
if score > threshold: | |
print(f"Threshold exceeded for class '{class_name}': Score = {score:.4f}") | |
''' | |
holder1 = {class_name: score for class_name, score in zip(class_names, scores)} | |
# miso_label_dict = {label: score for label, score in classify_anxiety[0].items()} | |
return {class_name: score for class_name, score in zip(class_names, scores)}, affirm | |
def positive_affirmations(): | |
affirmations = [ | |
"I have survived my anxiety before and I will survive again now", | |
"I am not in danger; I am just uncomfortable; this too will pass", | |
"I forgive and release the past and look forward to the future", | |
"I can't control what other people say but I can control my breathing and my response" | |
] | |
selected_affirm = random.choice(affirmations) | |
return selected_affirm | |
with gr.Blocks() as iface: | |
with gr.Column(): | |
miso_sounds = gr.CheckboxGroup(["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"]) | |
sense_slider = gr.Slider(minimum=1, maximum=5, step=1.0, label="How readily do you want the tool to intervene? 1 = in extreme cases and 5 = at every opportunity") | |
with gr.Column(): | |
aud_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File") | |
submit_btn = gr.Button(label="Run") | |
with gr.Column(): | |
# out_val = gr.Textbox() | |
out_class = gr.Label() | |
out_text = gr.Textbox() | |
submit_btn.click(fn=classify_toxicity, inputs=[aud_input, miso_sounds, sense_slider], outputs=[out_class, out_text]) | |
iface.launch() |