Spaces:
Runtime error
Runtime error
File size: 4,391 Bytes
fc004ff e29cd5a fc004ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import whisper
import evaluate
from evaluate.utils import launch_gradio_widget
import gradio as gr
import torch
import pandas as pd
import random
import classify
from whisper.model import Whisper
from whisper.tokenizer import get_tokenizer
from transformers import AutoModelForSequenceClassification, pipeline, WhisperTokenizer, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer
# pull in emotion detection
# --- Add element for specification
# pull in text classification
# --- Add custom labels
# --- Associate labels with radio elements
# add logic to initiate mock notificaiton when detected
# pull in misophonia-specific model
model_cache = {}
# Building prediction function for gradio
emo_dict = {
'sad': 'Sad',
'hap': 'Happy',
'ang': 'Anger',
'neu': 'Neutral'
}
# static classes for now, but it would be best ot have the user select from multiple, and to enter their own
class_options = {
"misophonia": ["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"]
}
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")
def slider_logic(slider):
threshold = 0
if slider == 1:
threshold = .98
elif slider == 2:
threshold = .88
elif slider == 3:
threshold = .77
elif slider == 4:
threshold = .66
elif slider == 5:
threshold = .55
else:
threshold = []
return threshold
# Create a Gradio interface with audio file and text inputs
def classify_toxicity(audio_file, slider):
# Transcribe the audio file using Whisper ASR
if audio_file != None:
transcribed_text = pipe(audio_file)["text"]
else:
transcribed_text = text_input
threshold = slider_logic(slider)
model = whisper.load_model("large")
# model = model_cache[model_name]
# class_names = classify_anxiety.split(",")
classify_anxiety = "misophonia"
class_names_list = class_options.get(classify_anxiety, [])
class_str = ""
for elm in class_names_list:
class_str += elm + ","
#class_names = class_names_temp.split(",")
class_names = class_str.split(",")
print("class names ", class_names, "classify_anxiety ", classify_anxiety)
tokenizer = get_tokenizer("large")
# tokenizer= WhisperTokenizer.from_pretrained("openai/whisper-large")
internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs(
model=model,
class_names=class_names,
# class_names=classify_anxiety,
tokenizer=tokenizer,
)
audio_features = classify.calculate_audio_features(audio_file, model)
average_logprobs = classify.calculate_average_logprobs(
model=model,
audio_features=audio_features,
class_names=class_names,
tokenizer=tokenizer,
)
average_logprobs -= internal_lm_average_logprobs
scores = average_logprobs.softmax(-1).tolist()
holder1 = {class_name: score for class_name, score in zip(class_names, scores)}
# miso_label_dict = {label: score for label, score in classify_anxiety[0].items()}
holder2 = ""
holder3= " "
return {class_name: score for class_name, score in zip(class_names, scores)}
def positive_affirmations():
affirmations = [
"I have survived my anxiety before and I will survive again now",
"I am not in danger; I am just uncomfortable; this too will pass",
"I forgive and release the past and look forward to the future",
"I can't control what other people say but I can control my breathing and my response"
]
selected_affirm = random.choice(affirmations)
return selected_affirm
with gr.Blocks() as iface:
show_state = gr.State([])
with gr.Column():
sense_slider = gr.Slider(minimum=1, maximum=5, step=1.0, label="How readily do you want the tool to intervene? 1 = in extreme cases and 5 = at every opportunity")
with gr.Column():
aud_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
submit_btn = gr.Button(label="Run")
with gr.Column():
# out_val = gr.Textbox()
out_class = gr.Label()
submit_btn.click(fn=classify_toxicity, inputs=[aud_input, sense_slider], outputs=out_class)
iface.launch() |