FalconTheBerd commited on
Commit
fd761f2
·
1 Parent(s): d8e7e56

initcommit

Browse files
Files changed (3) hide show
  1. README.md +1 -3
  2. app.py +35 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -7,6 +7,4 @@ sdk: gradio
7
  sdk_version: 5.35.0
8
  app_file: app.py
9
  pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 5.35.0
8
  app_file: app.py
9
  pinned: false
10
+ ---
 
 
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
4
+ import torch.nn.functional as F
5
+
6
+ # Load model
7
+ model_id = "Hatman/audio-emotion-detection"
8
+ extractor = AutoFeatureExtractor.from_pretrained(model_id)
9
+ model = AutoModelForAudioClassification.from_pretrained(model_id)
10
+
11
+ def classify_emotion(audio):
12
+ waveform, sr = torchaudio.load(audio)
13
+
14
+ # Mono
15
+ if waveform.shape[0] > 1:
16
+ waveform = waveform.mean(dim=0, keepdim=True)
17
+
18
+ # Resample
19
+ if sr != 16000:
20
+ waveform = torchaudio.transforms.Resample(sr, 16000)(waveform)
21
+
22
+ # Inference
23
+ inputs = extractor(waveform.squeeze(0), sampling_rate=16000, return_tensors="pt")
24
+ with torch.no_grad():
25
+ logits = model(**inputs).logits
26
+ probs = F.softmax(logits, dim=-1)[0]
27
+
28
+ # Return scores as a dict
29
+ return {model.config.id2label[i]: float(probs[i]) for i in range(len(probs))}
30
+
31
+ import gradio as gr
32
+ gr.Interface(fn=classify_emotion,
33
+ inputs=gr.Audio(type="filepath", label="Upload or record audio"),
34
+ outputs=gr.Label(num_top_classes=None),
35
+ title="Speech Emotion Detection").launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ torchaudio
3
+ transformers
4
+ gradio
5
+ soundfile