NicoNico6 commited on
Commit
1aa9dc5
·
1 Parent(s): 37aa9cd
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +74 -0
  2. model-00001-of-00088.safetensors +3 -0
  3. model-00002-of-00088.safetensors +3 -0
  4. model-00003-of-00088.safetensors +3 -0
  5. model-00004-of-00088.safetensors +3 -0
  6. model-00005-of-00088.safetensors +3 -0
  7. model-00006-of-00088.safetensors +3 -0
  8. model-00007-of-00088.safetensors +3 -0
  9. model-00008-of-00088.safetensors +3 -0
  10. model-00009-of-00088.safetensors +3 -0
  11. model-00010-of-00088.safetensors +3 -0
  12. model-00011-of-00088.safetensors +3 -0
  13. model-00012-of-00088.safetensors +3 -0
  14. model-00013-of-00088.safetensors +3 -0
  15. model-00014-of-00088.safetensors +3 -0
  16. model-00015-of-00088.safetensors +3 -0
  17. model-00016-of-00088.safetensors +3 -0
  18. model-00017-of-00088.safetensors +3 -0
  19. model-00018-of-00088.safetensors +3 -0
  20. model-00019-of-00088.safetensors +3 -0
  21. model-00020-of-00088.safetensors +3 -0
  22. model-00021-of-00088.safetensors +3 -0
  23. model-00022-of-00088.safetensors +3 -0
  24. model-00023-of-00088.safetensors +3 -0
  25. model-00024-of-00088.safetensors +3 -0
  26. model-00025-of-00088.safetensors +3 -0
  27. model-00026-of-00088.safetensors +3 -0
  28. model-00027-of-00088.safetensors +3 -0
  29. model-00028-of-00088.safetensors +3 -0
  30. model-00029-of-00088.safetensors +3 -0
  31. model-00030-of-00088.safetensors +3 -0
  32. model-00031-of-00088.safetensors +3 -0
  33. model-00032-of-00088.safetensors +3 -0
  34. model-00033-of-00088.safetensors +3 -0
  35. model-00034-of-00088.safetensors +3 -0
  36. model-00035-of-00088.safetensors +3 -0
  37. model-00036-of-00088.safetensors +3 -0
  38. model-00037-of-00088.safetensors +3 -0
  39. model-00038-of-00088.safetensors +3 -0
  40. model-00039-of-00088.safetensors +3 -0
  41. model-00040-of-00088.safetensors +3 -0
  42. model-00041-of-00088.safetensors +3 -0
  43. model-00042-of-00088.safetensors +3 -0
  44. model-00043-of-00088.safetensors +3 -0
  45. model-00044-of-00088.safetensors +3 -0
  46. model-00045-of-00088.safetensors +3 -0
  47. model-00046-of-00088.safetensors +3 -0
  48. model-00047-of-00088.safetensors +3 -0
  49. model-00048-of-00088.safetensors +3 -0
  50. model-00049-of-00088.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "aux_loss_alpha": 0.001,
13
+ "bos_token_id": 0,
14
+ "eos_token_id": 1,
15
+ "ep_size": 1,
16
+ "first_k_dense_replace": 3,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 7168,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 18432,
21
+ "kv_lora_rank": 512,
22
+ "max_position_embeddings": 163840,
23
+ "model_type": "deepseek_v3",
24
+ "moe_intermediate_size": 2048,
25
+ "moe_layer_freq": 1,
26
+ "n_group": 8,
27
+ "n_routed_experts": 256,
28
+ "n_shared_experts": 1,
29
+ "norm_topk_prob": true,
30
+ "num_attention_heads": 128,
31
+ "num_experts_per_tok": 8,
32
+ "num_hidden_layers": 61,
33
+ "num_key_value_heads": 128,
34
+ "num_nextn_predict_layers": 1,
35
+ "pretraining_tp": 1,
36
+ "q_lora_rank": 1536,
37
+ "qk_nope_head_dim": 128,
38
+ "qk_rope_head_dim": 64,
39
+ "quantization": {
40
+ "group_size": 128,
41
+ "bits": 4
42
+ },
43
+ "quantization_config": {
44
+ "activation_scheme": "dynamic",
45
+ "modules_to_not_convert": null,
46
+ "quant_method": "fp8",
47
+ "weight_block_size": [
48
+ 128,
49
+ 128
50
+ ]
51
+ },
52
+ "rms_norm_eps": 1e-06,
53
+ "rope_scaling": {
54
+ "beta_fast": 32,
55
+ "beta_slow": 1,
56
+ "factor": 40,
57
+ "mscale": 1.0,
58
+ "mscale_all_dim": 1.0,
59
+ "original_max_position_embeddings": 4096,
60
+ "type": "yarn"
61
+ },
62
+ "rope_theta": 10000,
63
+ "routed_scaling_factor": 2.5,
64
+ "scoring_func": "sigmoid",
65
+ "seq_aux": true,
66
+ "tie_word_embeddings": false,
67
+ "topk_group": 4,
68
+ "topk_method": "noaux_tc",
69
+ "torch_dtype": "float32",
70
+ "transformers_version": "4.49.0",
71
+ "use_cache": true,
72
+ "v_head_dim": 128,
73
+ "vocab_size": 129280
74
+ }
model-00001-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8abae0cf5c5e8cb5ae56f66e8618b679723b76baf79291d1071179cd7d5d85
3
+ size 4880186243
model-00002-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c02a6b7a5ce7efbe68d09735c1efc70b209a9ed6c551d7ab78ad1f3b1d80ac59
3
+ size 4119679627
model-00003-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f73693b16d54db79027d07407cbb2504904cd644c711d50b1e401e606e12c55
3
+ size 3992978192
model-00004-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d4f1942e584cb925570a687ee295631ccf0a943b9317c6d050d47f7b2bd7f5
3
+ size 4119679767
model-00005-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e4efe9e988d39f26d52e9b990931f6b6c9e811aa71e3fff6328bd19e020b8d4
3
+ size 4119679673
model-00006-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79f9ad48d50c265489b7fea31588f4992ccc60f1182f6c4bd1b71dbcb64a218
3
+ size 3992978186
model-00007-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5737f95c3e77d32c401ecc20b5c7a7ca0004dce709892c6fc9e8f0d61dd2b3f6
3
+ size 4119679725
model-00008-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d96734f674f3ab9f2942eaea23eea086d16ea0b646912cb86f1b40e476103ac
3
+ size 4119679679
model-00009-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd4bd48fb78b9de748c2495756e14510aeb8351a3bc4f899a1c126605ee88ab2
3
+ size 3992978190
model-00010-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14e910b50ddd75e835744e9291f5880b742264aa6a8097f6982b0cfba041d97
3
+ size 4119679761
model-00011-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27876cde343c35b13ca0feb6d4e9de5ac6a9230f11787edc30f43bfe1e12f18f
3
+ size 4119679676
model-00012-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c37b7b83fffbd66400e12ff15c75797ea8fc5862cd10575ac0cd89b89d21ab5d
3
+ size 3992978196
model-00013-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e55a5ba6bd226b66df23f1f29e082ea5ec2376addad051cff7a865aaeb21f8
3
+ size 4119679815
model-00014-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fbb9b3f029c6bee085eb0662b72e4c1d67d69b3593b25a5beb1136b2056cb08
3
+ size 4119679653
model-00015-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5714e7f8a2617b9b7ef00168e14884b1f13e147683254d945ef47ac6c4ab60f
3
+ size 3992978196
model-00016-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38913a64b449a1093919c83a08f11172d7fc1e12fd6fd195b17e73b6ea35aea
3
+ size 4119679715
model-00017-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8cc16eed3ae21ebfa6d0c8eff1541e12f142f63491b376c2b1089b5882a34e0
3
+ size 4119679725
model-00018-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fac4242d875be2902085131158e7646c5c4991fbd704e65d61f671c112edebf
3
+ size 3992978196
model-00019-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cb64217a170939705b2f7701df0c6a9d4c184324acdd2335ca1936c7c3af28e
3
+ size 4119679751
model-00020-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f16f9377f7935c46b828154ff8311e271a502b289ce6dfa264234036439a4fe8
3
+ size 4119679697
model-00021-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c13f05349f7c19bdbc1322316cff473b46326138bd0e50ef4b0beff5bbb984
3
+ size 3992978198
model-00022-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:252e5c510965527daee2236abdc6fbeeae1dd0516b709a3e5bbd783f346e0b53
3
+ size 4119679815
model-00023-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5151c3fc6870ab5bbf7a791a6e25e95718fc4bbcd1ca885c85b27166563dfdee
3
+ size 4119679721
model-00024-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e01e847edbf3d0742cd31330e7c2b09ce1758064be8414cca85842f9780c100
3
+ size 3992978196
model-00025-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d5ab569ed48fec236cc7073cf1f6ce745577eb5a25d17dd42058dda71e16eb
3
+ size 4119679809
model-00026-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7412f494885274cd34b13f7196132402e28daaee2ab85f1627dc4bd77ea6718b
3
+ size 4119679719
model-00027-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fee4b6aaa3cd3d26f9e37e169db9ee604ead0e62688c60bd7ba7f9f01a5d3a84
3
+ size 3992978196
model-00028-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0fd34310d7ff6a5681f412f2bd7978d37dc0fed4716dd93dc52d46a1642ab6
3
+ size 4119679819
model-00029-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:508c6bac6505b237742a0ec1d33512120e5c7d36cf574941edd6869f3c4e0f69
3
+ size 4119679679
model-00030-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330e1d79345a3db8490debc613b3e96268a5d4d6f500b92c06fdea4e1fa3aee4
3
+ size 3992978198
model-00031-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:344c5a8b341eddcd7434a297229a06460fa0294989c6a586b37b24439d55121b
3
+ size 4119679815
model-00032-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5727f3f54d6f3fcfa632018a5f0ad7f90ee2c207f4719c0cbc5b35970e944270
3
+ size 4119679731
model-00033-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58a78fe6411fb962ea54bfd27c1fc68eea1dce0186f319fda3498f231c519c43
3
+ size 3992978194
model-00034-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f67a663204fe22ee56b6a3778ce4304c66807a111c13580a4b5e7151e5ca250
3
+ size 4119679819
model-00035-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e91e799dc2b58364fabca2ab5be3d716aac625e56550b414ed8c7209daad461
3
+ size 4119679725
model-00036-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e164107de0ca785b002c2dace5f31a14973f37b36387c52e3539aa5a5e524af1
3
+ size 3992978198
model-00037-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac9abc2f7d862ec2d0fa44d106bc35afc05215654369baf997f57d7de941c18
3
+ size 4119679785
model-00038-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:632f31716bffae0714196c8c33115bd02e3fe5e9315fef51858d504fef5cfc3a
3
+ size 4119679679
model-00039-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c208760ed5cf9a86c3be5e077502894767faa858d9975ca64c0827cc7e2f229c
3
+ size 3992978194
model-00040-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6143a6b747c91aa2b7dd4e91d9f5b57945df656590374d4e0885d0c9d1f7ad74
3
+ size 4119679815
model-00041-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f617822fa06ce988b2e79d013b91e14b6729cc739157616456ebbc74010bf31f
3
+ size 4119679735
model-00042-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ed6493afee732759b4c001066a9ad79d016c9e7bcf395b9691fca48c408afc6
3
+ size 3992978198
model-00043-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a400509ad4324ce743fa5feac0880702def9cbf5398de259c1a82c22c65e0fd7
3
+ size 4119679813
model-00044-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6efe678bcb8928eaeed8fdbd1c35b07efbcb723f0345dff8670f2167886d7698
3
+ size 4119679703
model-00045-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026404c144794a9aa8203d830b535f1c51562c773c10afd673000e0815a8a708
3
+ size 3992978194
model-00046-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15ee1a0911cf193dfa558bc3fa14b706099371a18c261997f87afe5bcfa4973
3
+ size 4119679813
model-00047-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67bde6a209819e8c14c71e2f8c74b1d9f0e241bffed0878924cfbaac48bffc6d
3
+ size 4119679699
model-00048-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d60ccad7043593f7f4f490735a05a36085f74a886a829ad957f233d8ee4b9d2
3
+ size 3992978198
model-00049-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05adcb0e543dc1f173272409c238d57f1b92dcdba24e23c49b04891e90509b8
3
+ size 4119679767