File size: 3,370 Bytes
35199db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e20f6b1
10021fb
35199db
 
e20f6b1
35199db
 
 
 
e20f6b1
35199db
 
 
 
 
10021fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35199db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10021fb
35199db
e20f6b1
 
 
35199db
 
 
e20f6b1
 
 
 
35199db
 
35eb1b4
 
35199db
 
35eb1b4
35199db
35eb1b4
35199db
 
e20f6b1
 
35eb1b4
 
35199db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
import time
import os, sys

from pathlib import Path

from concrete.ml.deployment import FHEModelClient

import requests


def to_json(python_object):
    if isinstance(python_object, bytes):
        return {"__class__": "bytes", "__value__": list(python_object)}
    raise TypeError(repr(python_object) + " is not JSON serializable")


def from_json(python_object):
    if "__class__" in python_object:
        return bytes(python_object["__value__"])


# TODO: put the right link `API_URL` for your entry point
API_URL = "https://yw1dgyuig6ff5pft.us-east-1.aws.endpoints.huggingface.cloud"
headers = {
    "Authorization": "Bearer " + os.environ.get("HF_TOKEN"),
    "Content-Type": "application/octet-stream",
}


def query(payload):
    response = requests.post(API_URL, headers=headers, data=payload)
    return response.json()


path_to_model = Path("compiled_model")

# Decision-tree in FHE
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import numpy

features, classes = fetch_openml(data_id=44, as_frame=False, cache=True, return_X_y=True)
classes = classes.astype(numpy.int64)

_, X_test, _, Y_test = train_test_split(
    features,
    classes,
    test_size=0.15,
    random_state=42,
)

NB_SAMPLES = 2
X_test = X_test[:NB_SAMPLES]
Y_test = Y_test[:NB_SAMPLES]

# Recover parameters for client side
fhemodel_client = FHEModelClient(path_to_model)

# Generate the keys
fhemodel_client.generate_private_and_evaluation_keys()
evaluation_keys = fhemodel_client.get_serialized_evaluation_keys()

# Test the handler
nb_good = 0
nb_samples = len(X_test)
verbose = False
time_start = time.time()
duration = 0
is_first = True

for i in range(nb_samples):

    # Quantize the input and encrypt it
    encrypted_inputs = fhemodel_client.quantize_encrypt_serialize(X_test[i].reshape(1, -1))

    # print(f"Size of encrypted input {sys.getsizeof(encrypted_inputs)}")
    # print(f"Size of keys {sys.getsizeof(evaluation_keys)}")

    # Prepare the payload, including the evaluation keys which are needed server side
    payload = {
        "inputs": "fake",
        # "encrypted_inputs": to_json(encrypted_inputs),
        # "evaluation_keys": to_json(evaluation_keys),
        "encrypted_inputs": encrypted_inputs,
        "evaluation_keys": evaluation_keys,
    }

    print(f"{payload=}")

    # Run the inference on HF servers
    duration -= time.time()
    print(f"Starting at {time.time()}")
    encrypted_prediction = query(payload)
    print(f"Ending at {time.time()}")
    duration += time.time()

    print(f"{encrypted_prediction=}")

    encrypted_prediction = encrypted_prediction

    if is_first:
        is_first = False
        print(f"Size of the payload: {sys.getsizeof(payload)} bytes")

    # Decrypt the result and dequantize
    prediction_proba = fhemodel_client.deserialize_decrypt_dequantize(encrypted_prediction)[0]
    prediction = np.argmax(prediction_proba)

    if verbose or True:
        print(f"for {i}-th input, {prediction=} with expected {y_test[i]}")

    # Measure accuracy
    nb_good += y_test[i] == prediction

print(f"Accuracy on {nb_samples} samples is {nb_good * 1. / nb_samples}")
print(f"Total time: {time.time() - time_start} seconds")
print(f"Duration in inferences: {duration} seconds")
print(f"Duration per inference: {duration / nb_samples} seconds")