Spaces:
Runtime error
Runtime error
Commit
·
5d47fc0
1
Parent(s):
1b2ab2d
update examples
Browse files- app.py +1 -1
- data/heart.wav +0 -0
- src/__pycache__/__init__.cpython-39.pyc +0 -0
- src/__pycache__/modeling_outputs.cpython-39.pyc +0 -0
- src/__pycache__/models.cpython-39.pyc +0 -0
- src/collator.py +0 -58
- src/trainer.py +0 -62
app.py
CHANGED
@@ -33,7 +33,7 @@ inputs = gr.inputs.Audio(label="Input Audio", type="filepath", source="upload")
|
|
33 |
outputs = gr.outputs.Label(type="confidences", label = "Output Scores")
|
34 |
title = "Wav2Vec2 Speech Emotion Recognition"
|
35 |
description = "This is a demo of the Wav2Vec2 Speech Emotion Recognition model. Upload a .wav file (preferably small) and the top emotions predicted will be displayed."
|
36 |
-
examples = ['data/
|
37 |
article = "<a href = 'https://github.com/m3hrdadfi/soxan'> Wav2Vec2 Speech Classification Github Repository"
|
38 |
|
39 |
|
|
|
33 |
outputs = gr.outputs.Label(type="confidences", label = "Output Scores")
|
34 |
title = "Wav2Vec2 Speech Emotion Recognition"
|
35 |
description = "This is a demo of the Wav2Vec2 Speech Emotion Recognition model. Upload a .wav file (preferably small) and the top emotions predicted will be displayed."
|
36 |
+
examples = ['data/heart.wav', 'data/happy26.wav', 'data/jm24.wav', 'data/newton.wav', 'data/speeding.wav']
|
37 |
article = "<a href = 'https://github.com/m3hrdadfi/soxan'> Wav2Vec2 Speech Classification Github Repository"
|
38 |
|
39 |
|
data/heart.wav
ADDED
Binary file (12.1 kB). View file
|
|
src/__pycache__/__init__.cpython-39.pyc
CHANGED
Binary files a/src/__pycache__/__init__.cpython-39.pyc and b/src/__pycache__/__init__.cpython-39.pyc differ
|
|
src/__pycache__/modeling_outputs.cpython-39.pyc
CHANGED
Binary files a/src/__pycache__/modeling_outputs.cpython-39.pyc and b/src/__pycache__/modeling_outputs.cpython-39.pyc differ
|
|
src/__pycache__/models.cpython-39.pyc
CHANGED
Binary files a/src/__pycache__/models.cpython-39.pyc and b/src/__pycache__/models.cpython-39.pyc differ
|
|
src/collator.py
DELETED
@@ -1,58 +0,0 @@
|
|
1 |
-
from dataclasses import dataclass
|
2 |
-
from typing import Dict, List, Optional, Union
|
3 |
-
import torch
|
4 |
-
|
5 |
-
import transformers
|
6 |
-
from transformers import Wav2Vec2Processor, Wav2Vec2FeatureExtractor
|
7 |
-
|
8 |
-
|
9 |
-
@dataclass
|
10 |
-
class DataCollatorCTCWithPadding:
|
11 |
-
"""
|
12 |
-
Data collator that will dynamically pad the inputs received.
|
13 |
-
Args:
|
14 |
-
feature_extractor (:class:`~transformers.Wav2Vec2FeatureExtractor`)
|
15 |
-
The feature_extractor used for proccessing the data.
|
16 |
-
padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
|
17 |
-
Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
|
18 |
-
among:
|
19 |
-
* :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
|
20 |
-
sequence if provided).
|
21 |
-
* :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
|
22 |
-
maximum acceptable input length for the model if that argument is not provided.
|
23 |
-
* :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
|
24 |
-
different lengths).
|
25 |
-
max_length (:obj:`int`, `optional`):
|
26 |
-
Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
|
27 |
-
max_length_labels (:obj:`int`, `optional`):
|
28 |
-
Maximum length of the ``labels`` returned list and optionally padding length (see above).
|
29 |
-
pad_to_multiple_of (:obj:`int`, `optional`):
|
30 |
-
If set will pad the sequence to a multiple of the provided value.
|
31 |
-
This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
|
32 |
-
7.5 (Volta).
|
33 |
-
"""
|
34 |
-
|
35 |
-
feature_extractor: Wav2Vec2FeatureExtractor
|
36 |
-
padding: Union[bool, str] = True
|
37 |
-
max_length: Optional[int] = None
|
38 |
-
max_length_labels: Optional[int] = None
|
39 |
-
pad_to_multiple_of: Optional[int] = None
|
40 |
-
pad_to_multiple_of_labels: Optional[int] = None
|
41 |
-
|
42 |
-
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
43 |
-
input_features = [{"input_values": feature["input_values"]} for feature in features]
|
44 |
-
label_features = [feature["labels"] for feature in features]
|
45 |
-
|
46 |
-
d_type = torch.long if isinstance(label_features[0], int) else torch.float
|
47 |
-
|
48 |
-
batch = self.feature_extractor.pad(
|
49 |
-
input_features,
|
50 |
-
padding=self.padding,
|
51 |
-
max_length=self.max_length,
|
52 |
-
pad_to_multiple_of=self.pad_to_multiple_of,
|
53 |
-
return_tensors="pt",
|
54 |
-
)
|
55 |
-
|
56 |
-
batch["labels"] = torch.tensor(label_features, dtype=d_type)
|
57 |
-
|
58 |
-
return batch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/trainer.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
from typing import Any, Dict, Union
|
2 |
-
|
3 |
-
import torch
|
4 |
-
from packaging import version
|
5 |
-
from torch import nn
|
6 |
-
|
7 |
-
from transformers import (
|
8 |
-
Trainer,
|
9 |
-
is_apex_available,
|
10 |
-
)
|
11 |
-
|
12 |
-
if is_apex_available():
|
13 |
-
from apex import amp
|
14 |
-
|
15 |
-
if version.parse(torch.__version__) >= version.parse("1.6"):
|
16 |
-
_is_native_amp_available = True
|
17 |
-
from torch.cuda.amp import autocast
|
18 |
-
|
19 |
-
|
20 |
-
class CTCTrainer(Trainer):
|
21 |
-
def training_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]]) -> torch.Tensor:
|
22 |
-
"""
|
23 |
-
Perform a training step on a batch of inputs.
|
24 |
-
|
25 |
-
Subclass and override to inject custom behavior.
|
26 |
-
|
27 |
-
Args:
|
28 |
-
model (:obj:`nn.Module`):
|
29 |
-
The model to train.
|
30 |
-
inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`):
|
31 |
-
The inputs and targets of the model.
|
32 |
-
|
33 |
-
The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
|
34 |
-
argument :obj:`labels`. Check your model's documentation for all accepted arguments.
|
35 |
-
|
36 |
-
Return:
|
37 |
-
:obj:`torch.Tensor`: The tensor with training loss on this batch.
|
38 |
-
"""
|
39 |
-
|
40 |
-
model.train()
|
41 |
-
inputs = self._prepare_inputs(inputs)
|
42 |
-
|
43 |
-
if self.use_amp:
|
44 |
-
with autocast():
|
45 |
-
loss = self.compute_loss(model, inputs)
|
46 |
-
else:
|
47 |
-
loss = self.compute_loss(model, inputs)
|
48 |
-
|
49 |
-
if self.args.gradient_accumulation_steps > 1:
|
50 |
-
loss = loss / self.args.gradient_accumulation_steps
|
51 |
-
|
52 |
-
if self.use_amp:
|
53 |
-
self.scaler.scale(loss).backward()
|
54 |
-
elif self.use_apex:
|
55 |
-
with amp.scale_loss(loss, self.optimizer) as scaled_loss:
|
56 |
-
scaled_loss.backward()
|
57 |
-
elif self.deepspeed:
|
58 |
-
self.deepspeed.backward(loss)
|
59 |
-
else:
|
60 |
-
loss.backward()
|
61 |
-
|
62 |
-
return loss.detach()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|