diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..d42fae903e9fa07f3e8edb0db00a8d905ba49560 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 DeepSeek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2e3a69315af15dba078a32ea04fd67a3d220689 --- /dev/null +++ b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:223f8c7e295b02d403dcf3d0ebd17d7b7a0aae140dd075923ec5e60bd12cdd69 +size 1477 diff --git a/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5c123f227f60b30751063533876a2d2915de1c4c --- /dev/null +++ b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d2180d888cc778b11c2594738d8431b1c8d8418dfe4694662b8ccabd89028c +size 212761 diff --git a/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data new file mode 100644 index 0000000000000000000000000000000000000000..8fc404932d9b1f6846011151cd4a62278b30d0d6 --- /dev/null +++ b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cbb55e5348955e7d7596a861e4e99f6264bbdeba38b914ec4d47eb84dfabffa +size 6859931648 diff --git a/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4f47952a1bdada713048f5e45fd3b52ad615fb34 --- /dev/null +++ b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91915040cfac999d8c55f4b5bc6e67367c065e3a7a4e4b9438ce1f256addd86 +size 17209530 diff --git a/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84584b8fd3b1c3415b29d84945c3acdd73213adb --- /dev/null +++ b/deepseek-r1-distill-llama-8B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dd0191e04767e4bb08ded463e8cfc118ec5089c442a38e0c8221bff425f2e7b +size 52921 diff --git a/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/genai_config.json b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/genai_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e2101221f714fb288daa01edc73015f872897bc4 --- /dev/null +++ b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/genai_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce7201d5299760db0a212953ea963f35f7d5ebf171a11d1b75efb5f080af147 +size 1525 diff --git a/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/model.onnx b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e49285dbf56c623a8ec2acb8b4f68143e5cdf721 --- /dev/null +++ b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9499a79a6204f3276dea9a5d8d9a35123036af938b0117b8a1ca9f857cb13a +size 284037 diff --git a/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/model.onnx.data b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/model.onnx.data new file mode 100644 index 0000000000000000000000000000000000000000..f52dc41459f9e69ae32fe28bc1bc595b0b4631a5 --- /dev/null +++ b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/model.onnx.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e8fcf924a0ae90140c0099f6c55d5aa8a8259810185afc2e2c33b0a527c488f +size 5306130432 diff --git a/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/tokenizer.json b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4f47952a1bdada713048f5e45fd3b52ad615fb34 --- /dev/null +++ b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91915040cfac999d8c55f4b5bc6e67367c065e3a7a4e4b9438ce1f256addd86 +size 17209530 diff --git a/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..84584b8fd3b1c3415b29d84945c3acdd73213adb --- /dev/null +++ b/deepseek-r1-distill-llama-8B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dd0191e04767e4bb08ded463e8cfc118ec5089c442a38e0c8221bff425f2e7b +size 52921 diff --git a/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0a83664728d087f3fa4a8f1e3eafc015ac171be2 --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc87bc52132e57d152c937c72394cdfb9d202ee3a9ea1a1c2769588f1e59dc5f +size 1477 diff --git a/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4562e39f090c9b5858de3e31d38b787720d043a1 --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905c518a61d91d39bc624d17edefcf9c4dedba0e9dae4a1949c3af8ea53fc851 +size 196782 diff --git a/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data new file mode 100644 index 0000000000000000000000000000000000000000..7bb2a295059a64b639059ac365947a5a63705b54 --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31729113e4cb78c0ca74137eb5c1ff3911c24d5097c795eba4a018fe981a8a47 +size 1965914112 diff --git a/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b77d56d6f345933190886b76d1eca4796ee8a91e --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b869a935677e8f9dc3896cb69982de89843c1bbff27194eb83542c0e3f82babc +size 6754 diff --git a/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/genai_config.json b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/genai_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e9da361687f2a160cad752510c1263693ae35f48 --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/genai_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0b37e684b7809280ea6f94304d95e7cce3df3e93d5f148db79b897e7cc893c6 +size 1525 diff --git a/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/model.onnx b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b24ea12092df934806b368115df8bc16bc564af4 --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63196cbdd6340502f90a07cea6b07447208d0bfa87f29921d56eb900f01e87cf +size 281689 diff --git a/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/model.onnx.data b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/model.onnx.data new file mode 100644 index 0000000000000000000000000000000000000000..accf1fd04aad4cada0ecc0169dcee6bc6c528b02 --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/model.onnx.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28262bc2d208413b3dd3189df4aa238c098dcc6e0dc1de7e0c37745fd2a2f5df +size 1368849408 diff --git a/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/tokenizer.json b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b77d56d6f345933190886b76d1eca4796ee8a91e --- /dev/null +++ b/deepseek-r1-distill-qwen-1.5B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b869a935677e8f9dc3896cb69982de89843c1bbff27194eb83542c0e3f82babc +size 6754 diff --git a/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json new file mode 100644 index 0000000000000000000000000000000000000000..23a49b4498338a31f555ed517d2c8b544cad8a9c --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94acba632247c6733b9a01c7fad5ed377a2be2435576d87c19e2e3c1d3c8f7ee +size 1477 diff --git a/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a027be94d3c76af162d2c90cbb3780f0709d32d8 --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9240308a8a7893e729f904084919c18c6fbed160c26c660fc8c563b7bc97520 +size 336374 diff --git a/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data new file mode 100644 index 0000000000000000000000000000000000000000..fc3e85c2d07ef2bcbdd2026bc64dd2dc59961532 --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:590668a880e7255a6883bc0b119a95721a662c6d61639339d5169c7f7778ea15 +size 11928883200 diff --git a/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b77d56d6f345933190886b76d1eca4796ee8a91e --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b869a935677e8f9dc3896cb69982de89843c1bbff27194eb83542c0e3f82babc +size 6754 diff --git a/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/genai_config.json b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/genai_config.json new file mode 100644 index 0000000000000000000000000000000000000000..757465173e5b45aa513f8ae0c5c6e0c3f53d85e8 --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/genai_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf801e1dc0924a3d7bd47ecf41be2c35e525ffd3b474e002e51d0f9eff7b3e4e +size 1525 diff --git a/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/model.onnx b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ea47519d8d044ab50295fc2cdbd7e468dadb251a --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb7f7b9ab54f552551f273e799433217316cf6a71d1bde95a64bccb5997df8b +size 482575 diff --git a/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/model.onnx.data b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/model.onnx.data new file mode 100644 index 0000000000000000000000000000000000000000..ad94c69df84d0dc0c6a0fbe4ea2b2a6dd1c69f7b --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/model.onnx.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95f91e948ad458bdc2a1e0697fd3e8a84c54b2f9bad0617a40094ae6972bfbf +size 9462097920 diff --git a/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/tokenizer.json b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b77d56d6f345933190886b76d1eca4796ee8a91e --- /dev/null +++ b/deepseek-r1-distill-qwen-14B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b869a935677e8f9dc3896cb69982de89843c1bbff27194eb83542c0e3f82babc +size 6754 diff --git a/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c387811f6788fe9ff817852099d0fd7662ba0a7 --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f5900c8ba518e7d73fddfe903cd5b1f9e7b50095d9c7f6c7d4e05045e7ed75 +size 1477 diff --git a/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2d693b1354eac044ded1cf3f45a6cec28f371e90 --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc13f1a64185e05e8b9b7bbc333b221720d37c948c1406e188f44980237a51b8 +size 197432 diff --git a/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data new file mode 100644 index 0000000000000000000000000000000000000000..632411e501ba3ff5ff48a0e141f4eb4c33809dd3 --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c77a01ab0da245da13925b0f92f5f12d76d30fe5b91cf6a2fade666172bcad73 +size 6667360256 diff --git a/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b77d56d6f345933190886b76d1eca4796ee8a91e --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b869a935677e8f9dc3896cb69982de89843c1bbff27194eb83542c0e3f82babc +size 6754 diff --git a/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/genai_config.json b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/genai_config.json new file mode 100644 index 0000000000000000000000000000000000000000..66250ba9b0223fac9d9a88dc3332154ceeab0fef --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/genai_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0966761d8b5da58788742cc4b441663ce87b21db3186865812f14ecd15cad7af +size 1525 diff --git a/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/model.onnx b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b0fecc45112411d8e9f27fc20331404c77a9ccb9 --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6964501f47433e067091a911260174dedacf2dd6fd372873f428701d9d74cdb1 +size 282885 diff --git a/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/model.onnx.data b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/model.onnx.data new file mode 100644 index 0000000000000000000000000000000000000000..a304e0ddaa630042627a2c24bb8794b92a8fffcc --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/model.onnx.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:787cfa017ea3caf3e0b255eecdf0e6b0e189cec19b1452b26dc14f2551c6c55f +size 5101251584 diff --git a/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/tokenizer.json b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b77d56d6f345933190886b76d1eca4796ee8a91e --- /dev/null +++ b/deepseek-r1-distill-qwen-7B/gpu/gpu-int4-rtn-block-32/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b869a935677e8f9dc3896cb69982de89843c1bbff27194eb83542c0e3f82babc +size 6754