HakHan
/

SafeSwitch

Model card Files Files and versions Community

HakHan commited on 20 days ago

Commit

c6a41ac

verified ·

1 Parent(s): 07caa33

Upload folder using huggingface_hub

Browse files

Files changed (42) hide show

.gitattributes +0 -34
README.md +11 -3
SafeSwitch_Llama-3.1-8B-Instruct/direct_prober/args.json +25 -0
SafeSwitch_Llama-3.1-8B-Instruct/direct_prober/model_weights.pth +3 -0
SafeSwitch_Llama-3.1-8B-Instruct/direct_prober/result.json +7 -0
SafeSwitch_Llama-3.1-8B-Instruct/refusal_head.pth +3 -0
SafeSwitch_Llama-3.1-8B-Instruct/stage1_prober/args.json +25 -0
SafeSwitch_Llama-3.1-8B-Instruct/stage1_prober/model_weights.pth +3 -0
SafeSwitch_Llama-3.1-8B-Instruct/stage1_prober/result.json +7 -0
SafeSwitch_Llama-3.1-8B-Instruct/stage2_prober/args.json +25 -0
SafeSwitch_Llama-3.1-8B-Instruct/stage2_prober/model_weights.pth +3 -0
SafeSwitch_Llama-3.1-8B-Instruct/stage2_prober/result.json +7 -0
SafeSwitch_Ministral-8B-Instruct-2410/direct_prober/args.json +23 -0
SafeSwitch_Ministral-8B-Instruct-2410/direct_prober/model_weights.pth +3 -0
SafeSwitch_Ministral-8B-Instruct-2410/direct_prober/result.json +7 -0
SafeSwitch_Ministral-8B-Instruct-2410/refusal_head.pth +3 -0
SafeSwitch_Ministral-8B-Instruct-2410/stage1_prober/args.json +23 -0
SafeSwitch_Ministral-8B-Instruct-2410/stage1_prober/model_weights.pth +3 -0
SafeSwitch_Ministral-8B-Instruct-2410/stage1_prober/result.json +7 -0
SafeSwitch_Ministral-8B-Instruct-2410/stage2_prober/args.json +23 -0
SafeSwitch_Ministral-8B-Instruct-2410/stage2_prober/model_weights.pth +3 -0
SafeSwitch_Ministral-8B-Instruct-2410/stage2_prober/result.json +7 -0
SafeSwitch_Yi-1.5-9B-Chat/direct_prober/args.json +25 -0
SafeSwitch_Yi-1.5-9B-Chat/direct_prober/model_weights.pth +3 -0
SafeSwitch_Yi-1.5-9B-Chat/direct_prober/result.json +7 -0
SafeSwitch_Yi-1.5-9B-Chat/refusal_head.pth +3 -0
SafeSwitch_Yi-1.5-9B-Chat/stage1_prober/args.json +25 -0
SafeSwitch_Yi-1.5-9B-Chat/stage1_prober/model_weights.pth +3 -0
SafeSwitch_Yi-1.5-9B-Chat/stage1_prober/result.json +7 -0
SafeSwitch_Yi-1.5-9B-Chat/stage2_prober/args.json +25 -0
SafeSwitch_Yi-1.5-9B-Chat/stage2_prober/model_weights.pth +3 -0
SafeSwitch_Yi-1.5-9B-Chat/stage2_prober/result.json +7 -0
SwfeSwitch_Qwen2.5-7B-Instruct/direct_prober/args.json +25 -0
SwfeSwitch_Qwen2.5-7B-Instruct/direct_prober/model_weights.pth +3 -0
SwfeSwitch_Qwen2.5-7B-Instruct/direct_prober/result.json +7 -0
SwfeSwitch_Qwen2.5-7B-Instruct/refusal_head.pth +3 -0
SwfeSwitch_Qwen2.5-7B-Instruct/stage1_prober/args.json +25 -0
SwfeSwitch_Qwen2.5-7B-Instruct/stage1_prober/model_weights.pth +3 -0
SwfeSwitch_Qwen2.5-7B-Instruct/stage1_prober/result.json +7 -0
SwfeSwitch_Qwen2.5-7B-Instruct/stage2_prober/args.json +25 -0
SwfeSwitch_Qwen2.5-7B-Instruct/stage2_prober/model_weights.pth +3 -0
SwfeSwitch_Qwen2.5-7B-Instruct/stage2_prober/result.json +7 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text
























1	*.pth filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,11 @@
----
-license: mit
----

+Refer to our [code repo](https://github.com/Hanpx20/SafeSwitch) for usage.
+`refusal_head.pth`: the refusal head.
+`direct_prober/`: the direct prober from the last layer.
+`stage1_prober/`: the prober to predict unsafe inputs from the last layer tokens.
+`stage2_prober/`: the prober to predict mdoel compliance after decoding 3 tokens.
+All probers are 2-layer MLPs with intermediate sizes of 64.

SafeSwitch_Llama-3.1-8B-Instruct/direct_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Llama-3.1-8B-Instruct",
+    "job_name": "Llama-3.1-8B-Instruct_both/layer32",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "last",
+    "label": "both",
+    "n_decode": 0,
+    "layer_id": 32,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Llama-3.1-8B-Instruct",
+    "use_lens": false
+}

SafeSwitch_Llama-3.1-8B-Instruct/direct_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42f144e6ef937fab8e46fdc975576e6d9bbcff8c3bf6bd0511c8b601df20ff49
+size 1051584

SafeSwitch_Llama-3.1-8B-Instruct/direct_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 85.435,
+    "acc": 93.227,
+    "positive_rate": 23.955,
+    "prec": 88.105,
+    "recall": 82.922
+}

SafeSwitch_Llama-3.1-8B-Instruct/refusal_head.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6c9ce8c3b81b2f8ce3154ab1c7170f2b5c19e2afd598e29e46cd58be54b93f7
+size 2101347548

SafeSwitch_Llama-3.1-8B-Instruct/stage1_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Llama-3.1-8B-Instruct",
+    "job_name": "Llama-3.1-8B-Instruct_multi_safety/token0",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "multi",
+    "label": "safety",
+    "n_decode": 0,
+    "layer_id": 32,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Llama-3.1-8B-Instruct",
+    "use_lens": false
+}

SafeSwitch_Llama-3.1-8B-Instruct/stage1_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a4da13c0df8f8d46b66a0118adfd6a687a1bf68112f42184fef105bf4992d16
+size 1051584

SafeSwitch_Llama-3.1-8B-Instruct/stage1_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 95.022,
+    "acc": 95.909,
+    "positive_rate": 40.909,
+    "prec": 94.604,
+    "recall": 95.444
+}

SafeSwitch_Llama-3.1-8B-Instruct/stage2_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Llama-3.1-8B-Instruct",
+    "job_name": "Llama-3.1-8B-Instruct_multi_response/token3",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "multi",
+    "label": "response",
+    "n_decode": 3,
+    "layer_id": 32,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Llama-3.1-8B-Instruct",
+    "use_lens": false
+}

SafeSwitch_Llama-3.1-8B-Instruct/stage2_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67bcd47033d8c1a75cc4221bfd9c55fc49d6f2a3867f06ada0f885cc519b330c
+size 1051584

SafeSwitch_Llama-3.1-8B-Instruct/stage2_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 98.369,
+    "acc": 97.409,
+    "positive_rate": 79.091,
+    "prec": 97.949,
+    "recall": 98.793
+}

SafeSwitch_Ministral-8B-Instruct-2410/direct_prober/args.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Ministral-8B-Instruct-2410",
+    "job_name": "Ministral-8B-Instruct-2410_both/layer36",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "last",
+    "label": "both",
+    "n_decode": 0,
+    "layer_id": 36,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "llm": "Ministral-8B-Instruct-2410"
+}

SafeSwitch_Ministral-8B-Instruct-2410/direct_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04d59fa05b5f560a4fc2e0ef659622b811a7dfd0af9396849899fd16c2dbc322
+size 1051584

SafeSwitch_Ministral-8B-Instruct-2410/direct_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 88.587,
+    "acc": 92.364,
+    "positive_rate": 32.773,
+    "prec": 86.818,
+    "recall": 90.43
+}

SafeSwitch_Ministral-8B-Instruct-2410/refusal_head.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:118e733c3423193c1e2023ca87b3518c4bc1231a13cdaee576905bd45004ffbb
+size 2147484892

SafeSwitch_Ministral-8B-Instruct-2410/stage1_prober/args.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Ministral-8B-Instruct-2410",
+    "job_name": "Ministral-8B-Instruct-2410_multi_safety/token0",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "multi",
+    "label": "safety",
+    "n_decode": 0,
+    "layer_id": -1,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "llm": "Ministral-8B-Instruct-2410"
+}

SafeSwitch_Ministral-8B-Instruct-2410/stage1_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db2325ec00a6fd983b39aebe698690c46cee6b6c1a52c9e24a7beff060a7d264
+size 1051584

SafeSwitch_Ministral-8B-Instruct-2410/stage1_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 94.8,
+    "acc": 95.682,
+    "positive_rate": 40.909,
+    "prec": 93.42,
+    "recall": 96.222
+}

SafeSwitch_Ministral-8B-Instruct-2410/stage2_prober/args.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Ministral-8B-Instruct-2410",
+    "job_name": "Ministral-8B-Instruct-2410_multi_response/token3",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "multi",
+    "label": "response",
+    "n_decode": 3,
+    "layer_id": -1,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "llm": "Ministral-8B-Instruct-2410"
+}

SafeSwitch_Ministral-8B-Instruct-2410/stage2_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0926c89c23db5ed38f107b3525ee025d374ec4b28643e44f686303c88640ba3
+size 1051584

SafeSwitch_Ministral-8B-Instruct-2410/stage2_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 97.665,
+    "acc": 95.818,
+    "positive_rate": 89.409,
+    "prec": 97.516,
+    "recall": 97.814
+}

SafeSwitch_Yi-1.5-9B-Chat/direct_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Yi-1.5-9B-Chat",
+    "job_name": "Yi-1.5-9B-Chat_both/layer48",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "last",
+    "label": "both",
+    "n_decode": 0,
+    "layer_id": 48,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Yi-1.5-9B-Chat",
+    "use_lens": false
+}

SafeSwitch_Yi-1.5-9B-Chat/direct_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a05dda4f63e650a835fd3caff30b3647cf12f7023661083e7b10157d1beab011
+size 1051584

SafeSwitch_Yi-1.5-9B-Chat/direct_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 86.595,
+    "acc": 92.091,
+    "positive_rate": 29.364,
+    "prec": 86.196,
+    "recall": 86.997
+}

SafeSwitch_Yi-1.5-9B-Chat/refusal_head.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b175f425c26d02ebce55add355e3f4bb29aa08c73488bbb74add6c222f9bb0fa
+size 1048577244

SafeSwitch_Yi-1.5-9B-Chat/stage1_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Yi-1.5-9B-Chat",
+    "job_name": "Yi-1.5-9B-Chat_multi_safety/token0",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "multi",
+    "label": "safety",
+    "n_decode": 0,
+    "layer_id": 48,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Yi-1.5-9B-Chat",
+    "use_lens": false
+}

SafeSwitch_Yi-1.5-9B-Chat/stage1_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efdbbc95fc9425a92eaf683e0eeb97bd75d683b2a1a4d77a361b89451e313411
+size 1051584

SafeSwitch_Yi-1.5-9B-Chat/stage1_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 92.536,
+    "acc": 93.818,
+    "positive_rate": 40.909,
+    "prec": 91.432,
+    "recall": 93.667
+}

SafeSwitch_Yi-1.5-9B-Chat/stage2_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Yi-1.5-9B-Chat",
+    "job_name": "Yi-1.5-9B-Chat_multi_response/token3",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "multi",
+    "label": "response",
+    "n_decode": 3,
+    "layer_id": 48,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Yi-1.5-9B-Chat",
+    "use_lens": false
+}

SafeSwitch_Yi-1.5-9B-Chat/stage2_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9036a833d33997443441616015a0264514aeb452050cd86168587c838d8a7ca
+size 1051584

SafeSwitch_Yi-1.5-9B-Chat/stage2_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 98.24,
+    "acc": 96.909,
+    "positive_rate": 86.682,
+    "prec": 96.985,
+    "recall": 99.528
+}

SwfeSwitch_Qwen2.5-7B-Instruct/direct_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Qwen2.5-7B-Instruct",
+    "job_name": "Qwen2.5-7B-Instruct_both/layer28",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "last",
+    "label": "both",
+    "n_decode": 0,
+    "layer_id": 28,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Qwen2.5-7B-Instruct",
+    "use_lens": false
+}

SwfeSwitch_Qwen2.5-7B-Instruct/direct_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d6bb732f71e5b6fe8f0aed289d73a76422c5bdafcb2f6a82b581b60a62aae38
+size 920512

SwfeSwitch_Qwen2.5-7B-Instruct/direct_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 86.651,
+    "acc": 92.045,
+    "positive_rate": 29.727,
+    "prec": 86.454,
+    "recall": 86.85
+}

SwfeSwitch_Qwen2.5-7B-Instruct/refusal_head.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4869de880199a48ce421a9a95166f9b2f295c3a864e397d4ef73ef36fe57899
+size 2179990748

SwfeSwitch_Qwen2.5-7B-Instruct/stage1_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Qwen2.5-7B-Instruct",
+    "job_name": "Qwen2.5-7B-Instruct_multi_safety/token0",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "multi",
+    "label": "safety",
+    "n_decode": 0,
+    "layer_id": 28,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Qwen2.5-7B-Instruct",
+    "use_lens": false
+}

SwfeSwitch_Qwen2.5-7B-Instruct/stage1_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67754809618102057da44cc98d3bb98b8639e827108089f7d779e64da74a04b1
+size 920512

SwfeSwitch_Qwen2.5-7B-Instruct/stage1_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 93.475,
+    "acc": 94.682,
+    "positive_rate": 40.909,
+    "prec": 93.841,
+    "recall": 93.111
+}

SwfeSwitch_Qwen2.5-7B-Instruct/stage2_prober/args.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "epochs": 20,
+    "batch_size": 8,
+    "learning_rate": 1e-05,
+    "base_dir": "/shared/nas2/ph16/toxic/outputs/classifier",
+    "data_dir": "/shared/nas2/ph16/toxic/outputs/states/Qwen2.5-7B-Instruct",
+    "job_name": "Qwen2.5-7B-Instruct_multi_response/token3",
+    "ckpt": "",
+    "gpu": "",
+    "wandb": false,
+    "overwrite": true,
+    "hidden_sizes": [
+        "64"
+    ],
+    "random_seed": 42,
+    "token_rule": "multi",
+    "label": "response",
+    "n_decode": 3,
+    "layer_id": 28,
+    "num_classification": 2,
+    "neg_weight": 1.0,
+    "lens_path": "",
+    "llm": "Qwen2.5-7B-Instruct",
+    "use_lens": false
+}

SwfeSwitch_Qwen2.5-7B-Instruct/stage2_prober/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f150487f92aae5d1c33188efd6ebb91fd7d1bb3cc5567d5ce1a60fd129fdbdf5
+size 920512

SwfeSwitch_Qwen2.5-7B-Instruct/stage2_prober/result.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "F1": 96.885,
+    "acc": 94.636,
+    "positive_rate": 85.0,
+    "prec": 95.673,
+    "recall": 98.128
+}