Spaces:
Running
Running
File size: 2,652 Bytes
169a7c1 807d1d8 169a7c1 807d1d8 169a7c1 807d1d8 169a7c1 1cff9a5 169a7c1 1cff9a5 169a7c1 807d1d8 169a7c1 807d1d8 169a7c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
from audiobox_aesthetics.inference import AudioBoxAesthetics, AudioFileList, AudioFile
# cached results from running the CLI
cli_results = {
"sample_audio/libritts_spk-84.wav": {
"CE": 6.1027421951293945,
"CU": 6.3574299812316895,
"PC": 1.7401179075241089,
"PQ": 6.733065128326416,
},
}
model_name = "thunnai/audiobox-aesthetics"
def test_inference():
audio_path = "sample_audio/libritts_spk-84.wav"
audio_file = AudioFile(path=audio_path)
model = AudioBoxAesthetics.from_pretrained(model_name)
model.eval()
predictions = model.predict_from_files(audio_file)
single_pred = predictions[0]
print(single_pred)
assert single_pred["CE"] == cli_results[audio_path]["CE"]
assert single_pred["CU"] == cli_results[audio_path]["CU"]
assert single_pred["PC"] == cli_results[audio_path]["PC"]
assert single_pred["PQ"] == cli_results[audio_path]["PQ"]
def test_inference_load_from_jsonl():
audio_file_list = AudioFileList.from_jsonl("sample_audio/test.jsonl")
model = AudioBoxAesthetics.from_pretrained(model_name)
model.eval()
audio_path = audio_file_list.files[0].path
predictions = model.predict_from_files(audio_file_list)
single_pred = predictions[0]
assert single_pred["CE"] == cli_results[audio_path]["CE"]
assert single_pred["CU"] == cli_results[audio_path]["CU"]
assert single_pred["PC"] == cli_results[audio_path]["PC"]
assert single_pred["PQ"] == cli_results[audio_path]["PQ"]
def test_inference_twice_on_same_audio_yields_same_result():
audio_file = AudioFile(path="sample_audio/libritts_spk-84.wav")
model = AudioBoxAesthetics.from_pretrained(model_name)
model.eval()
predictions_a = model.predict_from_files(audio_file)
predictions_b = model.predict_from_files(audio_file)
single_pred_a = predictions_a[0]
single_pred_b = predictions_b[0]
assert single_pred_a["CE"] == single_pred_b["CE"]
assert single_pred_a["CU"] == single_pred_b["CU"]
assert single_pred_a["PC"] == single_pred_b["PC"]
assert single_pred_a["PQ"] == single_pred_b["PQ"]
def test_loading_from_wav():
audio_path = "sample_audio/libritts_spk-84.wav"
model = AudioBoxAesthetics.from_pretrained(model_name)
model.eval()
wav = model.load_audio(audio_path)
predictions = model.predict_from_wavs(wav)
single_pred = predictions[0]
assert single_pred["CE"] == cli_results[audio_path]["CE"]
assert single_pred["CU"] == cli_results[audio_path]["CU"]
assert single_pred["PC"] == cli_results[audio_path]["PC"]
assert single_pred["PQ"] == cli_results[audio_path]["PQ"]
|