File size: 7,611 Bytes
5a29263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import base64
import struct
import pytest
from openai import OpenAI
from utils import *

server = ServerPreset.bert_bge_small()

EPSILON = 1e-3

@pytest.fixture(scope="module", autouse=True)
def create_server():
    global server
    server = ServerPreset.bert_bge_small()


def test_embedding_single():
    global server
    server.pooling = 'last'
    server.start()
    res = server.make_request("POST", "/v1/embeddings", data={
        "input": "I believe the meaning of life is",
    })
    assert res.status_code == 200
    assert len(res.body['data']) == 1
    assert 'embedding' in res.body['data'][0]
    assert len(res.body['data'][0]['embedding']) > 1

    # make sure embedding vector is normalized
    assert abs(sum([x ** 2 for x in res.body['data'][0]['embedding']]) - 1) < EPSILON


def test_embedding_multiple():
    global server
    server.pooling = 'last'
    server.start()
    res = server.make_request("POST", "/v1/embeddings", data={
        "input": [
            "I believe the meaning of life is",
            "Write a joke about AI from a very long prompt which will not be truncated",
            "This is a test",
            "This is another test",
        ],
    })
    assert res.status_code == 200
    assert len(res.body['data']) == 4
    for d in res.body['data']:
        assert 'embedding' in d
        assert len(d['embedding']) > 1


@pytest.mark.parametrize(

    "input,is_multi_prompt",

    [

        # do not crash on empty input

        ("", False),

        # single prompt

        ("string", False),

        ([12, 34, 56], False),

        ([12, 34, "string", 56, 78], False),

        # multiple prompts

        (["string1", "string2"], True),

        (["string1", [12, 34, 56]], True),

        ([[12, 34, 56], [12, 34, 56]], True),

        ([[12, 34, 56], [12, "string", 34, 56]], True),

    ]

)
def test_embedding_mixed_input(input, is_multi_prompt: bool):
    global server
    server.start()
    res = server.make_request("POST", "/v1/embeddings", data={"input": input})
    assert res.status_code == 200
    data = res.body['data']
    if is_multi_prompt:
        assert len(data) == len(input)
        for d in data:
            assert 'embedding' in d
            assert len(d['embedding']) > 1
    else:
        assert 'embedding' in data[0]
        assert len(data[0]['embedding']) > 1


def test_embedding_pooling_none():
    global server
    server.pooling = 'none'
    server.start()
    res = server.make_request("POST", "/embeddings", data={
        "input": "hello hello hello",
    })
    assert res.status_code == 200
    assert 'embedding' in res.body[0]
    assert len(res.body[0]['embedding']) == 5 # 3 text tokens + 2 special

    # make sure embedding vector is not normalized
    for x in res.body[0]['embedding']:
        assert abs(sum([x ** 2 for x in x]) - 1) > EPSILON


def test_embedding_pooling_none_oai():
    global server
    server.pooling = 'none'
    server.start()
    res = server.make_request("POST", "/v1/embeddings", data={
        "input": "hello hello hello",
    })

    # /v1/embeddings does not support pooling type 'none'
    assert res.status_code == 400
    assert "error" in res.body


def test_embedding_openai_library_single():
    global server
    server.pooling = 'last'
    server.start()
    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
    res = client.embeddings.create(model="text-embedding-3-small", input="I believe the meaning of life is")
    assert len(res.data) == 1
    assert len(res.data[0].embedding) > 1


def test_embedding_openai_library_multiple():
    global server
    server.pooling = 'last'
    server.start()
    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
    res = client.embeddings.create(model="text-embedding-3-small", input=[
        "I believe the meaning of life is",
        "Write a joke about AI from a very long prompt which will not be truncated",
        "This is a test",
        "This is another test",
    ])
    assert len(res.data) == 4
    for d in res.data:
        assert len(d.embedding) > 1


def test_embedding_error_prompt_too_long():
    global server
    server.pooling = 'last'
    server.start()
    res = server.make_request("POST", "/v1/embeddings", data={
        "input": "This is a test " * 512,
    })
    assert res.status_code != 200
    assert "too large" in res.body["error"]["message"]


def test_same_prompt_give_same_result():
    server.pooling = 'last'
    server.start()
    res = server.make_request("POST", "/v1/embeddings", data={
        "input": [
            "I believe the meaning of life is",
            "I believe the meaning of life is",
            "I believe the meaning of life is",
            "I believe the meaning of life is",
            "I believe the meaning of life is",
        ],
    })
    assert res.status_code == 200
    assert len(res.body['data']) == 5
    for i in range(1, len(res.body['data'])):
        v0 = res.body['data'][0]['embedding']
        vi = res.body['data'][i]['embedding']
        for x, y in zip(v0, vi):
            assert abs(x - y) < EPSILON


@pytest.mark.parametrize(

    "content,n_tokens",

    [

        ("I believe the meaning of life is", 9),

        ("This is a test", 6),

    ]

)
def test_embedding_usage_single(content, n_tokens):
    global server
    server.start()
    res = server.make_request("POST", "/v1/embeddings", data={"input": content})
    assert res.status_code == 200
    assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
    assert res.body['usage']['prompt_tokens'] == n_tokens


def test_embedding_usage_multiple():
    global server
    server.start()
    res = server.make_request("POST", "/v1/embeddings", data={
        "input": [
            "I believe the meaning of life is",
            "I believe the meaning of life is",
        ],
    })
    assert res.status_code == 200
    assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
    assert res.body['usage']['prompt_tokens'] == 2 * 9


def test_embedding_openai_library_base64():
    server.start()
    test_input = "Test base64 embedding output"

    # get embedding in default format
    res = server.make_request("POST", "/v1/embeddings", data={
        "input": test_input
    })
    assert res.status_code == 200
    vec0 = res.body["data"][0]["embedding"]

    # get embedding in base64 format
    res = server.make_request("POST", "/v1/embeddings", data={
        "input": test_input,
        "encoding_format": "base64"
    })

    assert res.status_code == 200
    assert "data" in res.body
    assert len(res.body["data"]) == 1

    embedding_data = res.body["data"][0]
    assert "embedding" in embedding_data
    assert isinstance(embedding_data["embedding"], str)

    # Verify embedding is valid base64
    decoded = base64.b64decode(embedding_data["embedding"])
    # Verify decoded data can be converted back to float array
    float_count = len(decoded) // 4  # 4 bytes per float
    floats = struct.unpack(f'{float_count}f', decoded)
    assert len(floats) > 0
    assert all(isinstance(x, float) for x in floats)
    assert len(floats) == len(vec0)

    # make sure the decoded data is the same as the original
    for x, y in zip(floats, vec0):
        assert abs(x - y) < EPSILON