Spaces:
Running
Running
update to use bfloat16
Browse files
src/audiobox_aesthetics/export_model_to_hf.py
CHANGED
@@ -51,6 +51,8 @@ if __name__ == "__main__":
|
|
51 |
}
|
52 |
for axis in target_transform.keys()
|
53 |
}
|
|
|
|
|
54 |
|
55 |
model = AudioBoxAesthetics(
|
56 |
sample_rate=16_000, target_transform=target_transform, **model_cfg
|
|
|
51 |
}
|
52 |
for axis in target_transform.keys()
|
53 |
}
|
54 |
+
# force precision to be bfloat16 to match infer class
|
55 |
+
model_cfg["precision"] = "bf16"
|
56 |
|
57 |
model = AudioBoxAesthetics(
|
58 |
sample_rate=16_000, target_transform=target_transform, **model_cfg
|
src/audiobox_aesthetics/inference.py
CHANGED
@@ -10,7 +10,7 @@ from audiobox_aesthetics.infer import make_inference_batch
|
|
10 |
from pydantic import BaseModel
|
11 |
import torchaudio
|
12 |
|
13 |
-
from pydantic import
|
14 |
from typing import Optional, List
|
15 |
import json
|
16 |
|
@@ -67,7 +67,7 @@ class AudioBoxAesthetics(
|
|
67 |
proj_dropout: float = 0.0,
|
68 |
nth_layer: int = 13,
|
69 |
use_weighted_layer_sum: bool = True,
|
70 |
-
precision: str = "
|
71 |
normalize_embed: bool = True,
|
72 |
output_dim: int = 1,
|
73 |
target_transform: dict = None,
|
|
|
10 |
from pydantic import BaseModel
|
11 |
import torchaudio
|
12 |
|
13 |
+
from pydantic import Field
|
14 |
from typing import Optional, List
|
15 |
import json
|
16 |
|
|
|
67 |
proj_dropout: float = 0.0,
|
68 |
nth_layer: int = 13,
|
69 |
use_weighted_layer_sum: bool = True,
|
70 |
+
precision: str = "bf16",
|
71 |
normalize_embed: bool = True,
|
72 |
output_dim: int = 1,
|
73 |
target_transform: dict = None,
|
test/test_inference.py
CHANGED
@@ -35,13 +35,14 @@ def test_inference_load_from_jsonl():
|
|
35 |
model = AudioBoxAesthetics.from_pretrained(model_name)
|
36 |
model.eval()
|
37 |
|
|
|
38 |
predictions = model.predict_from_files(audio_file_list)
|
39 |
|
40 |
single_pred = predictions[0]
|
41 |
-
assert single_pred["CE"] == cli_results[
|
42 |
-
assert single_pred["CU"] == cli_results[
|
43 |
-
assert single_pred["PC"] == cli_results[
|
44 |
-
assert single_pred["PQ"] == cli_results[
|
45 |
|
46 |
|
47 |
def test_inference_twice_on_same_audio_yields_same_result():
|
|
|
35 |
model = AudioBoxAesthetics.from_pretrained(model_name)
|
36 |
model.eval()
|
37 |
|
38 |
+
audio_path = audio_file_list.files[0].path
|
39 |
predictions = model.predict_from_files(audio_file_list)
|
40 |
|
41 |
single_pred = predictions[0]
|
42 |
+
assert single_pred["CE"] == cli_results[audio_path]["CE"]
|
43 |
+
assert single_pred["CU"] == cli_results[audio_path]["CU"]
|
44 |
+
assert single_pred["PC"] == cli_results[audio_path]["PC"]
|
45 |
+
assert single_pred["PQ"] == cli_results[audio_path]["PQ"]
|
46 |
|
47 |
|
48 |
def test_inference_twice_on_same_audio_yields_same_result():
|