Spaces:
Runtime error
Runtime error
| import os | |
| import pandas as pd | |
| import numpy | |
| import pickle | |
| import pefile | |
| import sklearn.ensemble as ek | |
| from sklearn.feature_selection import SelectFromModel | |
| import joblib | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.metrics import confusion_matrix | |
| from sklearn import svm | |
| import sklearn.metrics as metrics | |
| from sklearn.model_selection import train_test_split | |
| import pdb | |
| from tqdm import tqdm | |
| dataset = pd.read_csv("data.csv", sep="|") | |
| # Feature | |
| X = dataset.drop( | |
| ["Name", "md5", "legitimate"], axis=1 | |
| ).values # Droping this because classification model will not accept object type elements (float and int only) | |
| # Target variable | |
| ugly = [ | |
| "Machine", | |
| "SizeOfOptionalHeader", | |
| "Characteristics", | |
| "MajorLinkerVersion", | |
| "MinorLinkerVersion", | |
| "SizeOfCode", | |
| "SizeOfInitializedData", | |
| "SizeOfUninitializedData", | |
| "AddressOfEntryPoint", | |
| "BaseOfCode", | |
| "BaseOfData", | |
| "ImageBase", | |
| "SectionAlignment", | |
| "FileAlignment", | |
| "MajorOperatingSystemVersion", | |
| "MinorOperatingSystemVersion", | |
| "MajorImageVersion", | |
| "MinorImageVersion", | |
| "MajorSubsystemVersion", | |
| "MinorSubsystemVersion", | |
| "SizeOfImage", | |
| "SizeOfHeaders", | |
| "CheckSum", | |
| "Subsystem", | |
| "DllCharacteristics", | |
| "SizeOfStackReserve", | |
| "SizeOfStackCommit", | |
| "SizeOfHeapReserve", | |
| "SizeOfHeapCommit", | |
| "LoaderFlags", | |
| "NumberOfRvaAndSizes", | |
| "SectionsNb", | |
| "SectionsMeanEntropy", | |
| "SectionsMinEntropy", | |
| "SectionsMaxEntropy", | |
| "SectionsMeanRawsize", | |
| "SectionsMinRawsize", | |
| #"SectionsMaxRawsize", | |
| "SectionsMeanVirtualsize", | |
| "SectionsMinVirtualsize", | |
| "SectionMaxVirtualsize", | |
| "ImportsNbDLL", | |
| "ImportsNb", | |
| "ImportsNbOrdinal", | |
| "ExportNb", | |
| "ResourcesNb", | |
| "ResourcesMeanEntropy", | |
| "ResourcesMinEntropy", | |
| "ResourcesMaxEntropy", | |
| "ResourcesMeanSize", | |
| "ResourcesMinSize", | |
| "ResourcesMaxSize", | |
| "LoadConfigurationSize", | |
| "VersionInformationSize", | |
| ] | |
| X = dataset[ugly].values | |
| y = dataset["legitimate"].values | |
| extratrees = ek.ExtraTreesClassifier().fit(X[:1000], y[:1000]) | |
| model = SelectFromModel(extratrees, prefit=True) | |
| X_new = model.transform(X) | |
| nbfeatures = X_new.shape[1] | |
| # splitting the data (70% - training and 30% - testing) | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X_new, y, test_size=0.29, stratify=y | |
| ) | |
| features = [] | |
| index = numpy.argsort(extratrees.feature_importances_)[::-1][:nbfeatures] | |
| for f in range(nbfeatures): | |
| print( | |
| "%d. feature %s (%f)" | |
| % ( | |
| f + 1, | |
| dataset.columns[2 + index[f]], | |
| extratrees.feature_importances_[index[f]], | |
| ) | |
| ) | |
| features.append(dataset.columns[2 + f]) | |
| model = { | |
| "DecisionTree": DecisionTreeClassifier(max_depth=10), | |
| "RandomForest": ek.RandomForestClassifier(n_estimators=50), | |
| } | |
| results = {} | |
| for algo in model: | |
| clf = model[algo] | |
| clf.fit(X_train, y_train) | |
| score = clf.score(X_test, y_test) | |
| print("%s : %s " % (algo, score)) | |
| results[algo] = score | |
| winner = max(results, key=results.get) # Selecting the classifier with good result | |
| print("Using", winner, "for classification, with", len(features), "features.") | |
| joblib.dump(model[winner], "classifier.pkl") | |
| open("features.pkl", "wb").write(pickle.dumps(features)) | |
| from fhe_utils import ( | |
| client_server_interaction, train_zama, | |
| setup_network, | |
| copy_directory, | |
| setup_client, | |
| ) | |
| model_dev_fhe = train_zama(X_train, y_train) | |
| #pdb.set_trace() | |
| network, _ = setup_network(model_dev_fhe) | |
| copied, error_message = copy_directory(network.dev_dir.name, destination="fhe_model") | |
| if not copied: | |
| print(f"Error copying directory: {error_message}") | |
| network.dev_send_model_to_server() | |
| network.dev_send_clientspecs_and_modelspecs_to_client() | |
| fhemodel_client, serialized_evaluation_keys = setup_client( | |
| network, network.client_dir.name | |
| ) | |
| print(f"Evaluation keys size: {len(serialized_evaluation_keys)} B") | |
| network.client_send_evaluation_key_to_server(serialized_evaluation_keys) | |
| decrypted_predictions, execution_time = client_server_interaction(network, fhemodel_client, X_test[:100]) | |