File size: 2,652 Bytes
169a7c1
 
 
 
 
 
 
 
 
 
 
 
807d1d8
 
169a7c1
 
 
 
807d1d8
169a7c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
807d1d8
169a7c1
 
1cff9a5
169a7c1
 
 
1cff9a5
 
 
 
169a7c1
 
 
 
807d1d8
169a7c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
807d1d8
169a7c1
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from audiobox_aesthetics.inference import AudioBoxAesthetics, AudioFileList, AudioFile

# cached results from running the CLI
cli_results = {
    "sample_audio/libritts_spk-84.wav": {
        "CE": 6.1027421951293945,
        "CU": 6.3574299812316895,
        "PC": 1.7401179075241089,
        "PQ": 6.733065128326416,
    },
}

model_name = "thunnai/audiobox-aesthetics"


def test_inference():
    audio_path = "sample_audio/libritts_spk-84.wav"
    audio_file = AudioFile(path=audio_path)
    model = AudioBoxAesthetics.from_pretrained(model_name)
    model.eval()

    predictions = model.predict_from_files(audio_file)
    single_pred = predictions[0]

    print(single_pred)

    assert single_pred["CE"] == cli_results[audio_path]["CE"]
    assert single_pred["CU"] == cli_results[audio_path]["CU"]
    assert single_pred["PC"] == cli_results[audio_path]["PC"]
    assert single_pred["PQ"] == cli_results[audio_path]["PQ"]


def test_inference_load_from_jsonl():
    audio_file_list = AudioFileList.from_jsonl("sample_audio/test.jsonl")
    model = AudioBoxAesthetics.from_pretrained(model_name)
    model.eval()

    audio_path = audio_file_list.files[0].path
    predictions = model.predict_from_files(audio_file_list)

    single_pred = predictions[0]
    assert single_pred["CE"] == cli_results[audio_path]["CE"]
    assert single_pred["CU"] == cli_results[audio_path]["CU"]
    assert single_pred["PC"] == cli_results[audio_path]["PC"]
    assert single_pred["PQ"] == cli_results[audio_path]["PQ"]


def test_inference_twice_on_same_audio_yields_same_result():
    audio_file = AudioFile(path="sample_audio/libritts_spk-84.wav")
    model = AudioBoxAesthetics.from_pretrained(model_name)
    model.eval()

    predictions_a = model.predict_from_files(audio_file)
    predictions_b = model.predict_from_files(audio_file)

    single_pred_a = predictions_a[0]
    single_pred_b = predictions_b[0]

    assert single_pred_a["CE"] == single_pred_b["CE"]
    assert single_pred_a["CU"] == single_pred_b["CU"]
    assert single_pred_a["PC"] == single_pred_b["PC"]
    assert single_pred_a["PQ"] == single_pred_b["PQ"]


def test_loading_from_wav():
    audio_path = "sample_audio/libritts_spk-84.wav"
    model = AudioBoxAesthetics.from_pretrained(model_name)
    model.eval()

    wav = model.load_audio(audio_path)
    predictions = model.predict_from_wavs(wav)

    single_pred = predictions[0]
    assert single_pred["CE"] == cli_results[audio_path]["CE"]
    assert single_pred["CU"] == cli_results[audio_path]["CU"]
    assert single_pred["PC"] == cli_results[audio_path]["PC"]
    assert single_pred["PQ"] == cli_results[audio_path]["PQ"]