Spaces:

thunnai
/

audiobox-aesthetics

Running

thunnai commited on Feb 12

Commit

1cff9a5

1 Parent(s): e118d8c

update to use bfloat16

Files changed (3) hide show

src/audiobox_aesthetics/export_model_to_hf.py CHANGED Viewed

@@ -51,6 +51,8 @@ if __name__ == "__main__":
         }
         for axis in target_transform.keys()
     }
     model = AudioBoxAesthetics(
         sample_rate=16_000, target_transform=target_transform, **model_cfg

         }
         for axis in target_transform.keys()
     }
+    # force precision to be bfloat16 to match infer class
+    model_cfg["precision"] = "bf16"
     model = AudioBoxAesthetics(
         sample_rate=16_000, target_transform=target_transform, **model_cfg

src/audiobox_aesthetics/inference.py CHANGED Viewed

@@ -10,7 +10,7 @@ from audiobox_aesthetics.infer import make_inference_batch
 from pydantic import BaseModel
 import torchaudio
-from pydantic import BaseModel, Field
 from typing import Optional, List
 import json
@@ -67,7 +67,7 @@ class AudioBoxAesthetics(
         proj_dropout: float = 0.0,
         nth_layer: int = 13,
         use_weighted_layer_sum: bool = True,
-        precision: str = "32",
         normalize_embed: bool = True,
         output_dim: int = 1,
         target_transform: dict = None,

 from pydantic import BaseModel
 import torchaudio
+from pydantic import Field
 from typing import Optional, List
 import json
         proj_dropout: float = 0.0,
         nth_layer: int = 13,
         use_weighted_layer_sum: bool = True,
+        precision: str = "bf16",
         normalize_embed: bool = True,
         output_dim: int = 1,
         target_transform: dict = None,

test/test_inference.py CHANGED Viewed

@@ -35,13 +35,14 @@ def test_inference_load_from_jsonl():
     model = AudioBoxAesthetics.from_pretrained(model_name)
     model.eval()
     predictions = model.predict_from_files(audio_file_list)
     single_pred = predictions[0]
-    assert single_pred["CE"] == cli_results[audio_file_list.files[0].path]["CE"]
-    assert single_pred["CU"] == cli_results[audio_file_list.files[0].path]["CU"]
-    assert single_pred["PC"] == cli_results[audio_file_list.files[0].path]["PC"]
-    assert single_pred["PQ"] == cli_results[audio_file_list.files[0].path]["PQ"]
 def test_inference_twice_on_same_audio_yields_same_result():

     model = AudioBoxAesthetics.from_pretrained(model_name)
     model.eval()
+    audio_path = audio_file_list.files[0].path
     predictions = model.predict_from_files(audio_file_list)
     single_pred = predictions[0]
+    assert single_pred["CE"] == cli_results[audio_path]["CE"]
+    assert single_pred["CU"] == cli_results[audio_path]["CU"]
+    assert single_pred["PC"] == cli_results[audio_path]["PC"]
+    assert single_pred["PQ"] == cli_results[audio_path]["PQ"]
 def test_inference_twice_on_same_audio_yields_same_result():