josh-oo commited on
Commit
04e0baf
·
verified ·
1 Parent(s): 490b2df

2025-02-14_specter2_contrastive_fold_0

Browse files
Files changed (2) hide show
  1. config.json +57 -15
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,28 +1,70 @@
1
  {
2
- "_name_or_path": "BAAI/bge-m3-retromae",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "architectures": [
4
  "CustomModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
  "classifier_dropout": null,
9
- "eos_token_id": 2,
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
- "hidden_size": 1024,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 4096,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 8194,
17
- "model_type": "xlm-roberta",
18
- "num_attention_heads": 16,
19
- "num_hidden_layers": 24,
20
- "output_past": true,
21
- "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
- "torch_dtype": "bfloat16",
 
24
  "transformers_version": "4.47.1",
25
- "type_vocab_size": 1,
26
  "use_cache": true,
27
- "vocab_size": 250008
28
  }
 
1
  {
2
+ "_name_or_path": "specter2",
3
+ "adapters": {
4
+ "adapters": {
5
+ "specter2": "9076f36a74755ac4"
6
+ },
7
+ "config_map": {
8
+ "9076f36a74755ac4": {
9
+ "adapter_residual_before_ln": false,
10
+ "cross_adapter": false,
11
+ "dropout": 0.0,
12
+ "factorized_phm_W": true,
13
+ "factorized_phm_rule": false,
14
+ "hypercomplex_nonlinearity": "glorot-uniform",
15
+ "init_weights": "bert",
16
+ "inv_adapter": null,
17
+ "inv_adapter_reduction_factor": null,
18
+ "is_parallel": false,
19
+ "learn_phm": true,
20
+ "leave_out": [],
21
+ "ln_after": false,
22
+ "ln_before": false,
23
+ "mh_adapter": false,
24
+ "non_linearity": "relu",
25
+ "original_ln_after": true,
26
+ "original_ln_before": true,
27
+ "output_adapter": true,
28
+ "phm_bias": true,
29
+ "phm_c_init": "normal",
30
+ "phm_dim": 4,
31
+ "phm_init_range": 0.0001,
32
+ "phm_layer": false,
33
+ "phm_rank": 1,
34
+ "reduction_factor": 16,
35
+ "residual_before_ln": true,
36
+ "scaling": 1.0,
37
+ "shared_W_phm": false,
38
+ "shared_phm_rule": true,
39
+ "stochastic_depth": 0.0,
40
+ "use_gating": false
41
+ }
42
+ },
43
+ "fusion_config_map": {},
44
+ "fusion_name_map": {},
45
+ "fusions": {}
46
+ },
47
  "architectures": [
48
  "CustomModel"
49
  ],
50
  "attention_probs_dropout_prob": 0.1,
 
51
  "classifier_dropout": null,
 
52
  "hidden_act": "gelu",
53
  "hidden_dropout_prob": 0.1,
54
+ "hidden_size": 768,
55
  "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "layer_norm_eps": 1e-12,
58
+ "max_position_embeddings": 512,
59
+ "model_type": "bert",
60
+ "num_attention_heads": 12,
61
+ "num_hidden_layers": 12,
62
+ "pad_token_id": 0,
 
63
  "position_embedding_type": "absolute",
64
+ "prediction_heads": {},
65
+ "torch_dtype": "float32",
66
  "transformers_version": "4.47.1",
67
+ "type_vocab_size": 2,
68
  "use_cache": true,
69
+ "vocab_size": 31096
70
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a4882ad5e660fe4e35f1b87e3a66ae4036f79873aa5a4b7437013e1e7a65014
3
- size 1222432592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81651fc08ba418def3d6134995e255709b2239e8c5ce2a52fbceb19196637e5f
3
+ size 571522000