binoua commited on
Commit
35199db
·
1 Parent(s): 0010ef2

Upload 8 files

Browse files

From https://huggingface.co/zama-fhe/concrete-ml-template-alpha/tree/main, with a DT for spam detection

README.md CHANGED
@@ -1,3 +1,47 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ # Template for Concrete ML
6
+
7
+ Concrete ML is Zama's open-source privacy-preserving ML package, based on fully homomorphic encryption (FHE). We refer the reader to fhe.org or Zama's websites for more information on FHE.
8
+
9
+ This directory is used:
10
+ - by ML practicioners, to create Concrete ML FHE-friendly models, and make them available to HF users
11
+ - by companies, institutions or people to deploy those models over HF inference endpoints
12
+ - by developers, to use these entry points to make applications on privacy-preserving ML
13
+
14
+ ## Creating models and making them available on HF
15
+
16
+ This is quite easy. Fork this template (maybe use this experimental tool https://huggingface.co/spaces/huggingface-projects/repo_duplicator for that), and then:
17
+ - install everything with: `pip install -r requirements.txt`
18
+ - edit `creating_models.py`, and fill the part between "# BEGIN: insert your ML task here" and
19
+ "# END: insert your ML task here"
20
+ - run the python file: `python creating_models.py`
21
+
22
+ At the end, if the script is successful, you'll have your compiled model ready in `compiled_model`. Now you can commit and push your repository (with in particular `compiled_model`, `handler.py`, `play_with_endpoint.py` and `requirements.txt`, but you can include the other files as well).
23
+
24
+ We recommend you to tag your Concrete ML compiled repository with `Concrete ML FHE friendly` tag, such that people can find them easily.
25
+
26
+ ## Deploying a compiled model on HF inference endpoint
27
+
28
+ If you find an `Concrete ML FHE friendly` repository that you would like to deploy, it is very easy.
29
+ - click on 'Deploy' button in HF interface
30
+ - chose "Inference endpoints"
31
+ - chose the right model repository
32
+ - (the rest of the options are classical to HF end points; we refer you to their documentation for more information)
33
+ and then click on 'Create endpoint'
34
+
35
+ And now, your model should be deployed, after few secunds of installation.
36
+
37
+ ## Using HF entry points on privacy-preserving models
38
+
39
+ Now, this is the final step: using the entry point. You should:
40
+ - if your inference endpoint is private, set an environment variable HF_TOKEN with your HF token
41
+ - edit `play_with_endpoint.py`
42
+ - replace `API_URL` by your entry point URL
43
+ - replace the part between "# BEGIN: replace this part with your privacy-preserving application" and
44
+ "# END: replace this part with your privacy-preserving application" with your application
45
+
46
+ Finally, you'll be able to launch your application with `python play_with_endpoint.py`.
47
+
compiled_model/client.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b0f9fd9accd2e11a36cd784ffaec57bf3278a941263a68a1843e5998440539
3
+ size 104290
compiled_model/server.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90443a9eeab45aa664e58e21b92a3b636a41e55a4e702cdc04e569c6308b70ef
3
+ size 2710
compiled_model/versions.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"concrete-python": "2.5.0rc1", "concrete-ml": "1.3.0", "python": "3.9.15"}
creating_models.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from concrete.ml.deployment import FHEModelDev
5
+
6
+
7
+ def compile_and_make_it_deployable(model_dev, X_train):
8
+
9
+ path_to_model = Path("compiled_model")
10
+
11
+ # Compile into FHE
12
+ model_dev.compile(X_train)
13
+
14
+ # Saving the model
15
+ shutil.rmtree(path_to_model, ignore_errors=True)
16
+ fhemodel_dev = FHEModelDev(path_to_model, model_dev)
17
+ fhemodel_dev.save(via_mlir=True)
18
+
19
+
20
+ # This is the spam classifier. Taken from https://github.com/zama-ai/concrete-ml/blob/main/docs/advanced_examples/DecisionTreeClassifier.ipynb
21
+ import numpy
22
+ from sklearn.datasets import fetch_openml
23
+ from sklearn.model_selection import train_test_split
24
+
25
+ features, classes = fetch_openml(data_id=44, as_frame=False, cache=True, return_X_y=True)
26
+ classes = classes.astype(numpy.int64)
27
+
28
+ x_train, x_test, y_train, y_test = train_test_split(
29
+ features,
30
+ classes,
31
+ test_size=0.15,
32
+ random_state=42,
33
+ )
34
+
35
+ # Find best hyper parameters with cross validation
36
+ from sklearn.model_selection import GridSearchCV
37
+ from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier
38
+
39
+ # List of hyper parameters to tune
40
+ param_grid = {
41
+ "max_features": [None],
42
+ "min_samples_leaf": [10],
43
+ "min_samples_split": [100],
44
+ "max_depth": [None],
45
+ }
46
+
47
+ grid_search = GridSearchCV(
48
+ ConcreteDecisionTreeClassifier(),
49
+ param_grid,
50
+ cv=10,
51
+ scoring="average_precision",
52
+ error_score="raise",
53
+ n_jobs=1,
54
+ )
55
+
56
+ gs_results = grid_search.fit(x_train, y_train)
57
+ print("Best hyper parameters:", gs_results.best_params_)
58
+ print("Best score:", gs_results.best_score_)
59
+
60
+ # Build the model with best hyper parameters
61
+ model_dev = ConcreteDecisionTreeClassifier(
62
+ max_features=gs_results.best_params_["max_features"],
63
+ min_samples_leaf=gs_results.best_params_["min_samples_leaf"],
64
+ min_samples_split=gs_results.best_params_["min_samples_split"],
65
+ max_depth=gs_results.best_params_["max_depth"],
66
+ n_bits=6,
67
+ )
68
+ model_dev = model_dev.fit(x_train, y_train)
69
+
70
+ # Compute average precision on test
71
+ from sklearn.metrics import average_precision_score
72
+
73
+ # pylint: disable=no-member
74
+ y_pred_concrete = model_dev.predict_proba(x_test)[:, 1]
75
+ concrete_average_precision = average_precision_score(y_test, y_pred_concrete)
76
+
77
+ print(f"Concrete average precision score: {concrete_average_precision:0.2f}")
78
+
79
+ compile_and_make_it_deployable(model_dev, x_train)
80
+ print("Your model is ready to be deployable.")
handler.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ import numpy as np
3
+ from concrete.ml.deployment import FHEModelServer
4
+
5
+
6
+ def from_json(python_object):
7
+ if "__class__" in python_object:
8
+ return bytes(python_object["__value__"])
9
+
10
+
11
+ def to_json(python_object):
12
+ if isinstance(python_object, bytes):
13
+ return {"__class__": "bytes", "__value__": list(python_object)}
14
+ raise TypeError(repr(python_object) + " is not JSON serializable")
15
+
16
+
17
+ class EndpointHandler:
18
+ def __init__(self, path=""):
19
+
20
+ # For server
21
+ self.fhemodel_server = FHEModelServer(path + "/compiled_model")
22
+
23
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
24
+ """
25
+ data args:
26
+ inputs (:obj: `str`)
27
+ date (:obj: `str`)
28
+ Return:
29
+ A :obj:`list` | `dict`: will be serialized and returned
30
+ """
31
+
32
+ # Get inputs
33
+ encrypted_inputs = from_json(data.pop("encrypted_inputs", data))
34
+
35
+ # Get keys
36
+ evaluation_keys = from_json(data.pop("evaluation_keys", data))
37
+
38
+ # Run CML prediction
39
+ encrypted_prediction = self.fhemodel_server.run(encrypted_inputs, evaluation_keys)
40
+
41
+ return to_json(encrypted_prediction)
play_with_endpoint.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import time
3
+ import os, sys
4
+
5
+ from pathlib import Path
6
+
7
+ from sklearn.datasets import make_classification
8
+ from sklearn.model_selection import train_test_split
9
+
10
+ from concrete.ml.deployment import FHEModelClient
11
+
12
+ import requests
13
+
14
+
15
+ def to_json(python_object):
16
+ if isinstance(python_object, bytes):
17
+ return {"__class__": "bytes", "__value__": list(python_object)}
18
+ raise TypeError(repr(python_object) + " is not JSON serializable")
19
+
20
+
21
+ def from_json(python_object):
22
+ if "__class__" in python_object:
23
+ return bytes(python_object["__value__"])
24
+
25
+
26
+ # TODO: put the right link `API_URL` for your entryp point
27
+ API_URL = "https://puqif7goarh132kl.us-east-1.aws.endpoints.huggingface.cloud"
28
+ headers = {
29
+ "Authorization": "Bearer " + os.environ.get("HF_TOKEN"),
30
+ "Content-Type": "application/json",
31
+ }
32
+
33
+
34
+ def query(payload):
35
+ response = requests.post(API_URL, headers=headers, json=payload)
36
+ return response.json()
37
+
38
+
39
+ path_to_model = Path("compiled_model")
40
+
41
+ # BEGIN: replace this part with your privacy-preserving application
42
+ x, y = make_classification(n_samples=1000, class_sep=2, n_features=30, random_state=42)
43
+ _, X_test, _, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
44
+
45
+ # Recover parameters for client side
46
+ fhemodel_client = FHEModelClient(path_to_model)
47
+
48
+ # Generate the keys
49
+ fhemodel_client.generate_private_and_evaluation_keys()
50
+ evaluation_keys = fhemodel_client.get_serialized_evaluation_keys()
51
+
52
+ # Test the handler
53
+ nb_good = 0
54
+ nb_samples = len(X_test)
55
+ verbose = False
56
+ time_start = time.time()
57
+ duration = 0
58
+ is_first = True
59
+
60
+ for i in range(nb_samples):
61
+
62
+ # Quantize the input and encrypt it
63
+ encrypted_inputs = fhemodel_client.quantize_encrypt_serialize([X_test[i]])
64
+
65
+ # Prepare the payload, including the evaluation keys which are needed server side
66
+ payload = {
67
+ "inputs": "fake",
68
+ "encrypted_inputs": to_json(encrypted_inputs),
69
+ "evaluation_keys": to_json(evaluation_keys),
70
+ }
71
+
72
+ # Run the inference on HF servers
73
+ duration -= time.time()
74
+ encrypted_prediction = query(payload)
75
+ duration += time.time()
76
+
77
+ encrypted_prediction = from_json(encrypted_prediction)
78
+
79
+ if is_first:
80
+ is_first = False
81
+ print(f"Size of the payload: {sys.getsizeof(payload)} bytes")
82
+
83
+ # Decrypt the result and dequantize
84
+ prediction_proba = fhemodel_client.deserialize_decrypt_dequantize(encrypted_prediction)[0]
85
+ prediction = np.argmax(prediction_proba)
86
+
87
+ if verbose or True:
88
+ print(f"for {i}-th input, {prediction=} with expected {y_test[i]}")
89
+
90
+ # Measure accuracy
91
+ nb_good += y_test[i] == prediction
92
+
93
+ print(f"Accuracy on {nb_samples} samples is {nb_good * 1. / nb_samples}")
94
+ print(f"Total time: {time.time() - time_start} seconds")
95
+ print(f"Duration in inferences: {duration} seconds")
96
+ print(f"Duration per inference: {duration / nb_samples} seconds")
97
+ # END: replace this part with your privacy-preserving application
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ concrete-ml==1.3.0