thunnai commited on
Commit
1cff9a5
·
1 Parent(s): e118d8c

update to use bfloat16

Browse files
src/audiobox_aesthetics/export_model_to_hf.py CHANGED
@@ -51,6 +51,8 @@ if __name__ == "__main__":
51
  }
52
  for axis in target_transform.keys()
53
  }
 
 
54
 
55
  model = AudioBoxAesthetics(
56
  sample_rate=16_000, target_transform=target_transform, **model_cfg
 
51
  }
52
  for axis in target_transform.keys()
53
  }
54
+ # force precision to be bfloat16 to match infer class
55
+ model_cfg["precision"] = "bf16"
56
 
57
  model = AudioBoxAesthetics(
58
  sample_rate=16_000, target_transform=target_transform, **model_cfg
src/audiobox_aesthetics/inference.py CHANGED
@@ -10,7 +10,7 @@ from audiobox_aesthetics.infer import make_inference_batch
10
  from pydantic import BaseModel
11
  import torchaudio
12
 
13
- from pydantic import BaseModel, Field
14
  from typing import Optional, List
15
  import json
16
 
@@ -67,7 +67,7 @@ class AudioBoxAesthetics(
67
  proj_dropout: float = 0.0,
68
  nth_layer: int = 13,
69
  use_weighted_layer_sum: bool = True,
70
- precision: str = "32",
71
  normalize_embed: bool = True,
72
  output_dim: int = 1,
73
  target_transform: dict = None,
 
10
  from pydantic import BaseModel
11
  import torchaudio
12
 
13
+ from pydantic import Field
14
  from typing import Optional, List
15
  import json
16
 
 
67
  proj_dropout: float = 0.0,
68
  nth_layer: int = 13,
69
  use_weighted_layer_sum: bool = True,
70
+ precision: str = "bf16",
71
  normalize_embed: bool = True,
72
  output_dim: int = 1,
73
  target_transform: dict = None,
test/test_inference.py CHANGED
@@ -35,13 +35,14 @@ def test_inference_load_from_jsonl():
35
  model = AudioBoxAesthetics.from_pretrained(model_name)
36
  model.eval()
37
 
 
38
  predictions = model.predict_from_files(audio_file_list)
39
 
40
  single_pred = predictions[0]
41
- assert single_pred["CE"] == cli_results[audio_file_list.files[0].path]["CE"]
42
- assert single_pred["CU"] == cli_results[audio_file_list.files[0].path]["CU"]
43
- assert single_pred["PC"] == cli_results[audio_file_list.files[0].path]["PC"]
44
- assert single_pred["PQ"] == cli_results[audio_file_list.files[0].path]["PQ"]
45
 
46
 
47
  def test_inference_twice_on_same_audio_yields_same_result():
 
35
  model = AudioBoxAesthetics.from_pretrained(model_name)
36
  model.eval()
37
 
38
+ audio_path = audio_file_list.files[0].path
39
  predictions = model.predict_from_files(audio_file_list)
40
 
41
  single_pred = predictions[0]
42
+ assert single_pred["CE"] == cli_results[audio_path]["CE"]
43
+ assert single_pred["CU"] == cli_results[audio_path]["CU"]
44
+ assert single_pred["PC"] == cli_results[audio_path]["PC"]
45
+ assert single_pred["PQ"] == cli_results[audio_path]["PQ"]
46
 
47
 
48
  def test_inference_twice_on_same_audio_yields_same_result():