cgus commited on
Commit
7e71351
·
verified ·
1 Parent(s): 4b2298a

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: transformers
4
+ datasets:
5
+ - PrimeIntellect/verifiable-coding-problems
6
+ - likaixin/TACO-verified
7
+ - livecodebench/code_generation_lite
8
+ language:
9
+ - en
10
+ base_model:
11
+ - deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
12
+ pipeline_tag: text-generation
13
+ ---
14
+
15
+ <div align="center">
16
+ <span style="font-family: default; font-size: 1.5em;">DeepCoder-14B-Preview</span>
17
+ <div>
18
+ 🚀 Democratizing Reinforcement Learning for LLMs (RLLM) 🌟
19
+ </div>
20
+ </div>
21
+ <br>
22
+ <div align="center" style="line-height: 1;">
23
+ <a href="https://github.com/agentica-project/rllm" style="margin: 2px;">
24
+ <img alt="Code" src="https://img.shields.io/badge/RLLM-000000?style=for-the-badge&logo=github&logoColor=000&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
25
+ </a>
26
+ <a href="https://pretty-radio-b75.notion.site/DeepCoder-A-Fully-Open-Source-14B-Coder-at-O3-mini-Level-1cf81902c14680b3bee5eb349a512a51" target="_blank" style="margin: 2px;">
27
+ <img alt="Blog" src="https://img.shields.io/badge/Notion-%23000000.svg?style=for-the-badge&logo=notion&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
28
+ </a>
29
+ <a href="https://x.com/Agentica_" style="margin: 2px;">
30
+ <img alt="X.ai" src="https://img.shields.io/badge/Agentica-white?style=for-the-badge&logo=X&logoColor=000&color=000&labelColor=white" style="display: inline-block; vertical-align: middle;"/>
31
+ </a>
32
+ <a href="https://huggingface.co/agentica-org" style="margin: 2px;">
33
+ <img alt="Hugging Face" src="https://img.shields.io/badge/Agentica-fcd022?style=for-the-badge&logo=huggingface&logoColor=000&labelColor" style="display: inline-block; vertical-align: middle;"/>
34
+ </a>
35
+ <a href="https://www.together.ai" style="margin: 2px;">
36
+ <img alt="Together AI" src="https://img.shields.io/badge/-Together_AI%20-white?style=for-the-badge&logo=data%3Aimage%2Fpng%3Bbase64%2CiVBORw0KGgoAAAANSUhEUgAAAUAAAAFACAMAAAD6TlWYAAAC7lBMVEUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8AAAAPb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8Pb%2F8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADIBDt6AAAA%2BnRSTlMAAiQEKgcdKQwiHBMUzrtSUEmjhmZGH96yv8n1ey7nL3y1U%2FZfCaIo1WFg1NrcsHYrA2%2Fv80J%2BMeilnpefqKw%2B64%2BQlSbYZGVnBGkCV%2BxW8XJube6WJ9kZF9bSzBALRynPQfLhIjvwyBEAXOTLp3o%2FJA9Y9%2F7%2F9FEKDhIVFo4GHkVzjGz8icrHzY39iHR1i0M8Jj14LLZUvb7DxMXGoQEFeQcgSBOHaPvm4uOdRLMMqcDTLbcII0sNuVn4TKaRd6RKIeDd37Svra6xuLpaW17lXUAlHh8WGxUPIS4JGQoFECMsBg4gFwsRJRIrCC0oAycaFC8NMDIzMRgBsVt9rwAAD25JREFUeNrs3QVzG0kWB%2FA3ikHhZeYwk3LMbF7GcBasOGw9hb3MzLyKw8zMzMx2rsokhySNY2mmR1N4xXV3a7sHuzWu%2BX2Ef3XPG%2Br3wOVyuVwul8vlcrlcLpfL5XK5dOlXOHTIvLnb27Xd%2FasBvrt9A%2B7r1bbdTTffcmuXwhzgTYwk6q%2BHr2RWlcclRYqXV2VeCV%2Bvr4mIkCJKZ83uc9NLC0fMD%2BD%2FCswfMfLtzh%2FeelsJcKJW19SG66KSTP6fLEXrwrU11Srw5Z8zbuzePcUBbFyg%2BPY7Pv%2Bs0A%2Bsid7ayiqFNEWp8iS9Ir%2F0Cl957bkRAaQLFLz15sBBfpbpJc7FJKKFFGuV4JJh6N573g6idr7vP%2F8iC9iI1NZJRDupLnlRBbaW3XjTfQHUJ3D8d68MBtsJiTNRold5uEYAdibkHgqiESMefGi9zfFVeCRihOS5LLJafV99XYxGddgwabKt8SmEyEQ%2FmRDlSoUA9gsNvKMDmhE8MC4L7OFtSYmPFmFlAmzm%2F9tfH0Oz8v6yFmxQ3SpOiY8eYTwjHew0%2BB9%2FD6B5ga4dLd%2FHQus0SnzaIrzWWgDb9P19MVqjw01dwFLpYYVYQymLgD1Kjj6J1umaHwLLqJfpy0%2FHIryqgg2mvetDKxXMnQMWEa9LxEpSqxZguS%2B%2BfA%2Bt9cZBi7ZxeqVMX376FqEnAtbyv7ISrTfspB%2FM82bq3r70BNMSYKV%2Bo4rQDiPzc8Csy1Fih%2BhVsE7o0cfQHnn%2FygJz6uNEJtaTSfy8ChYpnelDuxQ8HAIT1LOS8fwoCSq1FiVYcs%2FdaJ%2FgNhMJqrWKqfwoCSYtSTA08260U%2FBh47v4LDU%2F%2FgnmPOJDexX86ycwpp6yf80neB7M8o96DO2Wl2%2Bw%2FlLrh%2FlKYroW31qE9ht5EgzwRs3nR00wmgBTVq1EFtp2Ad0imdbkR0kwLQImTP8S2eg9B3QSKwkbHhPPxSUzAsjGe3P1luLrMmGklQpGjfIhKwU6C8llibBJUCaS4UKy6klkp0cX0CE9zcr8KAlei4Ahy36PLHXuBJqpYcJSmQBG3LIJWerQETS7qhCWlHowoMvfka2Va0Gjaus3MGUTp4NuWY8ja3%2FuB9q0IqydBt1eeQxZ%2B9MfQRNvnLAWT%2BiuIEuRvT9MBg3UlkQmbMmkUgB9cjsge8EbQIMLCmFPuQy6DPoGeVi9HqgED5EJazL5VAQ9Nm5CHjq0B6oKhZCUX4LrNyAfSycDhVBJZMKeTK4IoN26IPJRsAQoEhLhQ7kAmoV%2Bjbwspt0LniF8yKRMBa1%2B%2BSvkZVFfaFIkSngpvwha%2FQL56QNNqiX8%2FBs0mnMX8vPtBGiCWEf4iYmgzey7kZ8Rw6EJXonwo9SANn9GnuZCE84RnlqBJm3aIk8vFUKjxBjhKbMFaDHQhzy9%2BAI06pJEeJIS%2FGuwBn1M1WD%2BdXjNauSrdwk0Qq0kfHlUoFs7Evnq9TI0orqK8BVN1%2FIcvAn56vAKNCKhEDruz8NjkbdXOV4CKZJA1W8M8vbjT9CwMOGtDKjmjEbefpgCDRLqCB33p7kvipC3kc83UkOihLdohF5DfMjbiBf43UZTSPQq8vobyNsbudCgyzLhTT4PNK8hpmoZPkv4awU0y5G%2F1%2Fj90WG%2BDK9ATNX7mDDh71OgWYn83RHi9yRMkQY0I5G%2FOydDA4RPCX9RoMlD%2Fu6a0mCAMcJfHGh8yN%2BwqdAAMZPwJwFNB%2BRv5TRoQIs0wp%2FiiAB7TG%2B2Abor0L0GmiO5VdicuHsfaE7UfRIxJ80Rz8Kdnfss7L6NoShz8vvAWsLfOUe8kZ7o5DfSm1Pgm8gnTv4msqoIzXC%2FyrUZjWa434XdPxOoRZjiHjTD%2FTcGNm9Cg9y%2Fs9z%2FAymi1e4fqqZ4VPcfaQZnlQYGkacXP3H6X%2FrT2qIZ7jkR%2BAvy9L5jTyq5Z%2BUolBpHnNYc5PDTmubrsHtemOeJ9aJmcWI9tAV5%2BQ29Z4Kc%2Bj0TYHOQVwl5pVl07YD1h9EMt28MHOHUueihZtK5CArvRB4OTWkuvbNgYjGyF5wEGlQ4oXsbrF%2BK7O2fDBoIPPoHegQndLAc14w6WELot8jaX5pVD1Xo8iSy1WM8nzbcFMZbcf%2BLcR%2Fp7qBZayf0kYZly5GlzpOd3Mmcfy%2F9rl1AhwjTXvoXwaATDKc55Dp6mgP%2FeSLvZ4E%2B55wwTwSmr0Y2Djp6og3%2FmUrDhqbuTKWLYMqQ42i%2FkcNTdqpXeQ2Y4z82AO2Wl8txrpz5AkLRr38Q7TUiOydlJxueBfNCYzugnYKvOn62JkXpA3YmGPy8xPnTXanzhYP27d8PSvjPFzafH0Wov12VJC87ZSdcS2dVsEy%2FE8fRDgtznTFj3Tz%2FrT3QesOGO2bKv3mrVr%2BH1nrjjqFgiUilTGRr8%2FNEwHLTZ%2FisLR9vzgGLiOckYiWpVQuwQcmonmidZ3JDYBn1chohslXL79pVFWzh%2F2L5JrRG8fahYKlIWCHWUMoiYJtl%2F3wygOYFunabDBYTWmtdhJTlVy%2BAjfxPPP4YmpW3dTzYID0jTo%2BQEl88Ix1sFlqytAOacfe%2Bk1lgD29LxXiEMiFKZUIF%2By3L%2F6YYjSpu134w2EaouEKPsNH4rlwWgI0JEzcE0Qjfl19NAVsJFR6JGCF5LovAzrId2%2B8LoD6BBT8OGQy2E2rCUaJXebhGALZC9z%2FwUhC18%2F0wc1UWsBFJ1klEOymWvKgCe%2F7CW999xxdAusCI0R99PMgP7IiJczFJY3qtEiLw8tOckw88uKs40FR4xXuWzvzjVD%2BwJnqTlVUKaYpS5Ul6ReCsdOeOmVveKgq%2Bh%2F%2FvveCiu7Zvmz2rFDhRq2tqw7GoJJP%2FJ0vRWFmyplqF1NBv0KmTJz7fumX1d889%2B8yTzzz73Ldfbtm6bdS48RNygDcx3Xu1NqPMUxdLS7uWlhar85RlJK9600VIOf6c0mWDpj391NNtBg0uyfFDSlEF8T%2Ft3eFyqjwTwPGNiKq9eq%2BtqiCeoxZVEcRW4mK%2Bvc%2F5%2Bk7bBSDZOJPfFfwHWkEMG%2B%2BfXChwHMdxHMdxHMdxHMdxHMdxHIeV4yiR%2FyOUS6tHfBxP88Vse74N%2F7mdt7PF%2FHT8EFakbYg0XupvMZ%2Fddt%2F%2Ber27zebFX%2BXSfpQfD%2BMLsX7iMp4fc460%2BfgiqbSD1jSCGH1WXAV1v32OhOm0O1Yh9aUR0sNUYnVyekjBEH9eL%2B2mIY2gilmGdWXvhTKQNnpvkDYrBJgjNluJTchtIDSnBY3TNgLMUEGvbL4Qvhco3WkPbOS%2FNAEGjMay1bsEMjyCJsewXVo5HoFuH5P2b7OsJh9a0har1mn3tmkElXTzPlU%2FUd2nDfnTKH53b%2FTN%2FI7TZp2l7X3QZNPlO6X9jb1pJwUa5J8SuyQ%2Fc2vTFjl0zu%2F8vfrH2O8obdx52jaFjmmZ7HAdQQeOVw1pwxF0StNskd0GWtvsUIfsBB3SNt3m%2FgUtva1402jEfCXm%2BUBLjWkHBZ2gJ3zxHcG51JhWdnQENc%2BYk3O2vz%2F6CEJrBqYcyi9o6E172hJaMjJn876BRjYG0k7QiqFJr7tRo7SdgbSsgBaMzRoe%2BlCbfzWTlkILxqZdj%2FPaaWM0Y%2BtBUwbnrT8%2BoaZPY2kLBc2Ynfi%2FgVo2BtNO0JDRPSf6PtTgm0y7pNCI2KNJewWVqZnZNAH1md93J4HKEsNpb1Abw85P%2FQ%2Bo6GNoOs2H%2BgZo2gQqWqBpA6iNY%2Fe7EVRyXNm%2FMR%2FP%2FotjBRWokCFtK6AOrh1AA6ggkBxpG6hFnImzzLUFKNv2uOec5Q9Qw3kO7N%2BgmT7LjB81asuU1hNQXSyRhyyAULClxVDdHh%2FI4YEzIMzY0vZQWZQhlyyFX6V8aasIqnoinwP86oB8nlBRfkM%2Btxx%2BIaZWpNGf03zkCH4xYk0r7PiuTljALz6R0wQqya%2FI6ZrTHy78acS%2FCSd5hB8dmdNGdlyDCQfiGmz7dVhtkddWWZvWU0D72CGv3Qf84O%2BFP40Wl8irLOAHBXtaDLQDoq0fgnPk9gTaHrnt4Qcz5Bba8T2OcBPwLUGnWXAnmGbILfP5Lm%2BELLX3WSp9v3q0IC0GytcDuT1O8K2TBWlLq58kEJfhOfJbACVEfhN7z20IlDPy2xM3WIymQBkiv57i%2ByZM6ANlh%2FymAr6hpshvB5QVoqW3q%2BKK%2FO5AkchvmMM38iHyk0ApkV%2Ffg294feRXugPoDiCr0n0GtiPdVbid%2BwvfB4op8svcN5F2%2Bu67cDvTV34aM0F%2B4Ss%2FDzzYcW4JSwse%2Byav%2FETa4t9ERhakBS%2F9q5wFaRH%2F6kDaNbf3d2EPXuAyvLd30UQItCdyO9i7bOf5EquzYnvTgpdeH8iflvlAUz3kZf8KVcs%2FBJ%2F2rl1cQxWFvUvhR8xpBVThDfnvAu28SR16UMkEOS3sfdQxgGri0tp%2Fk0Lac39l6T%2FKLbd2AfLVg4rW9t7rPy24BtOiFXJZRda%2BTL%2F6A1Wp0N7BBHu2tFBBZUGJPGRs7QPfMrB9cBExnIV7pM1ZQA0nrvFA9qYlUEc%2B5R9QZddYrymdxn%2Bey5O9g%2BUSqEf0rB3SJ7YMaT0BNRUMEywLa9NkDHWpdzRtYO9413cFtaUXw6NyL76VA4abj%2BL%2BMjys%2BcvaEdePJTQhxmhSKGqkhWjSWEAj0cXagfWpybRdBA0lpbktExJrN5oo36ApNUFTJqpm2gJNGShozOuhGT3P2rSzBy1EfSMbF%2FVTqC01lBZBK%2FHK2q2zisxA2iqGlhKpf%2FO2pGHaXXuafOPfGZKMLJeMO0MSaXNoTz1LvRtYPhXftqlE2lpBB9SayOQ6fgDqqTXtk07jzKSPH00dpL60tbJ9h%2Bb2%2BzODWt7tSKM34tZhlUBrSaYn7Q06Ffc1bKXfj6EDhQ1ptOhcP5OI7EXQibTXedo5gs55gxK7VE68ztImstu0gQcaqGSH%2BOjqHF8S1WXapcO03ZsCPaLxA7tRhhF0Kg1L7MZjHIE24os%2B05X%2B%2FL6ErWm7pQCd0ndJdxKN93cfNPDf763T5CwFzVTcK%2BnOXxrLXqE0pRXbtmmxAv3EaUp3%2Ftg4PQlL0x7TRIAZeXIusYnyfMo1p50apyU5mCOCcIV1rcJA2J9mivqzvpZYXXldR8pQWlQ77Y8CBnk8GFYLlcNBnJtNmwwlVlH%2Bl%2BYBG69Yn7Py98Ksty48lrQemXY2kEZRfvAMr5l84P97yOwaPgNfWZq2NpZG86JgPhlP%2B9ldlo9S3rP%2BdDyZB5FnRdqygzTHcRzHcRzHcRzHcRzHcZz%2FAbyvLkVmYcs9AAAAAElFTkSuQmCC&link=https%3A%2F%2Fwww.together.ai" style="display: inline-block; vertical-align: middle;"/>
37
+ </a>
38
+ </div>
39
+ </div>
40
+ </div>
41
+
42
+ ## DeepCoder Overview
43
+ DeepCoder-14B-Preview is a code reasoning LLM fine-tuned from DeepSeek-R1-Distilled-Qwen-14B using distributed reinforcement learning (RL) to scale up to long context lengths. The model achieves 60.6% Pass@1 accuracy on LiveCodeBench v5 (8/1/24-2/1/25), representing a 8% improvement over the base model (53%) and achieving similar performance to OpenAI's o3-mini with just 14B parameters.
44
+
45
+ <div style="margin: 0 auto;">
46
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/654037be97949fd2304aab7f/r3-vzkItOCrMf1qldW0Mj.png" style="width: 100%;" />
47
+ </div>
48
+
49
+ ## Data
50
+ Our training dataset consists of approximately 24K unique problem-tests pairs compiled from:
51
+ - Taco-Verified
52
+ - PrimeIntellect SYNTHETIC-1
53
+ - LiveCodeBench v5 (5/1/23-7/31/24)
54
+
55
+ ## Training Recipe
56
+
57
+ Our training recipe relies on an improved version of GRPO (GRPO+) and iterative context lengthening, introduced in DeepScaleR.
58
+
59
+ ### GRPO+
60
+
61
+ We enhance the original GRPO algorithm with insights from DAPO to enable more stable training:
62
+
63
+ - **Offline Difficulty Filtering:** DAPO employs online dynamic sampling, discarding both entirely correct and entirely incorrect samples on the fly. While this helps maintain a more stable effective batch size, it introduces significant runtime overhead due to rejection sampling. Instead, we perform offline difficulty filtering on a subset of coding problems to ensure the training dataset remains within a suitable difficulty range.
64
+ - **No Entropy Loss:** We observed that including an entropy loss term often led to instability, with entropy growing exponentially and ultimately collapsing training. To mitigate this, we eliminate the entropy loss entirely.
65
+ - **No KL Loss:** Eliminating KL loss prevents the LLM from staying within trust region of the original SFT model. This removal also obviates the need to compute log probabilities for the reference policy, thereby accelerating training.
66
+ - **Overlong Filtering** **(from DAPO):** To preserve long-context reasoning, we mask the loss for truncated sequences. This technique enables DeepCoder to generalize to 64K-context inference despite being trained with a 32K context.
67
+ - **Clip High (from DAPO):** By increasing the upper bound in GRPO/PPO’s surrogate loss, we encourage more exploration and more stable entropy.
68
+
69
+ ### Iterative Context Lengthening
70
+
71
+ Our original `Deepscaler-1.5B-Preview` scaled long context training from 8K→16K→24K, achieving 33→38→43% on AIME respectively. Similarly, `Deepcoder-14B-Preview` is trained on 16K→32K, achieving 54→58% on LiveCodeBench (v5). `DeepCoder-14B-Preview` successfully generalizes to longer contexts when evaluated at 64K context, reaching 60.6%.
72
+
73
+ DeepCoder generalizes better to long contexts than the base distilled model, due to DAPO's overlong filtering. However, it's longer responses are often truncated when the max length is capped at 16K, which can lower its scores.
74
+
75
+ | **Model** | **16K** | **32K** | **64K** |
76
+ | --- | --- | --- | --- |
77
+ | **DeepCoder-14B-Preview** | 45.6 | 57.9 | 60.6 |
78
+ | **DeepSeek-R1-Distill-Qwen-14B** | 50.2 | 53.0 | 53.0 |
79
+
80
+ A more detailed description of the training recipe can be found in our [blog post](https://pretty-radio-b75.notion.site/DeepCoder-A-Fully-Open-Source-14B-Coder-at-O3-mini-Level-1cf81902c14680b3bee5eb349a512a51).
81
+
82
+ ## Evaluation
83
+
84
+ We evaluate `Deepcoder-14B-Preview` on various coding benchmarks, including LiveCodeBench (LCBv5), Codeforces, and HumanEval+.
85
+
86
+ | **Model** | LCB (v5)(8/1/24-2/1/25) | Codeforces Rating | Codeforces Percentile | HumanEval+ |
87
+ | --- | --- | --- | --- | --- |
88
+ | **DeepCoder-14B-Preview (ours)** | ***60.6*** | ***1936*** | ***95.3*** | ***92.6*** |
89
+ | **DeepSeek-R1-Distill-Qwen-14B** | 53.0 | 1791 | 92.7 | 92.0 |
90
+ | **O1-2024-12-17 (Low)** | 59.5 | **1991** | **96.1** | 90.8 |
91
+ | **O3-Mini-2025-1-31 (Low)** | **60.9** | 1918 | 94.9 | 92.6 |
92
+ | **O1-Preview** | 42.7 | 1658 | 88.5 | 89 |
93
+ | **Deepseek-R1** | 62.8 | 1948 | 95.4 | 92.6 |
94
+ | **Llama-4-Behemoth** | 49.4 | - | - | - |
95
+
96
+ ## Serving DeepCoder
97
+ Our model can be served using popular high-performance inference systems:
98
+ - vLLM
99
+ - Hugging Face Text Generation Inference (TGI)
100
+ - SGLang
101
+ - TensorRT-LLM
102
+
103
+ All these systems support the OpenAI Chat Completions API format.
104
+
105
+ ### Usage Recommendations
106
+ Our usage recommendations are similar to those of R1 and R1 Distill series:
107
+ 1. Avoid adding a system prompt; all instructions should be contained within the user prompt.
108
+ 2. `temperature = 0.6`
109
+ 3. `top_p = 0.95`
110
+ 4. This model performs best with `max_tokens` set to at least `64000`
111
+
112
+ ## License
113
+ This project is released under the MIT License, reflecting our commitment to open and accessible AI development.
114
+ We believe in democratizing AI technology by making our work freely available for anyone to use, modify, and build upon.
115
+ This permissive license ensures that researchers, developers, and enthusiasts worldwide can leverage and extend our work without restrictions, fostering innovation and collaboration in the AI community.
116
+
117
+ ## Acknowledgement
118
+ - Our training experiments are powered by our heavily modified fork of [Verl](https://github.com/agentica-project/verl), an open-source post-training library.
119
+ - Our model is trained on top of [`DeepSeek-R1-Distill-Qwen-14B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B).
120
+ - Our work is done as part of [Berkeley Sky Computing Lab](https://skycomputing.berkeley.edu/) and [Berkeley AI Research](https://bair.berkeley.edu/).
121
+
122
+ ## Citation
123
+ ```bibtex
124
+ @misc{deepcoder2025,
125
+ title={DeepCoder: A Fully Open-Source 14B Coder at O3-mini Level},
126
+ author={Michael Luo, Sijun Tan, Roy Huang, Ameen Patel, Alpay Ariyak, Qingyang Wu, Xiaoxiang Shi, Rachel Xin, Colin Cai, Maurice Weber, Ce Zhang, Li Erran Li, Raluca Ada Popa, Ion Stoica},
127
+ howpublished={\url{https://pretty-radio-b75.notion.site/DeepCoder-A-Fully-Open-Source-14B-Coder-at-O3-mini-Level-1cf81902c14680b3bee5eb349a512a51}},
128
+ note={Notion Blog},
129
+ year={2025}
130
+ }
131
+ ```
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "agentica-org/DeepCoder-14B-Preview",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151646,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 5120,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 13824,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 48,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 40,
17
+ "num_hidden_layers": 48,
18
+ "num_key_value_heads": 8,
19
+ "pad_token_id": 151643,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000.0,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.47.1",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 152064,
30
+ "quantization_config": {
31
+ "quant_method": "exl2",
32
+ "version": "0.2.8",
33
+ "bits": 4.0,
34
+ "head_bits": 6,
35
+ "calibration": {
36
+ "rows": 115,
37
+ "length": 2048,
38
+ "dataset": "(default)"
39
+ }
40
+ }
41
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151646,
4
+ "do_sample": true,
5
+ "eos_token_id": 151643,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "max_tokens":64000,
9
+ "transformers_version": "4.47.1"
10
+ }
output-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d14114db16848d19cb9647bddf944448299b4a65fbe8b7fefd3c8f8f3c008e66
3
+ size 8167376620
output-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89211cae67c0a81252a8e1d9b6b701fd1660f0281b758fdd23fa9a47e3bdae33
3
+ size 613122144
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end▁of▁sentence|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
together-ai-branding-lightOnDark.png ADDED
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
+ size 11422778
tokenizer_config.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "151643": {
7
+ "content": "<|end▁of▁sentence|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "151644": {
15
+ "content": "<|User|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": false
21
+ },
22
+ "151645": {
23
+ "content": "<|Assistant|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "151646": {
31
+ "content": "<|begin▁of▁sentence|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "151647": {
39
+ "content": "<|EOT|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": false
45
+ },
46
+ "151648": {
47
+ "content": "<think>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": false
53
+ },
54
+ "151649": {
55
+ "content": "</think>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": false
61
+ },
62
+ "151650": {
63
+ "content": "<|quad_start|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "151651": {
71
+ "content": "<|quad_end|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "151652": {
79
+ "content": "<|vision_start|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "151653": {
87
+ "content": "<|vision_end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "151654": {
95
+ "content": "<|vision_pad|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "151655": {
103
+ "content": "<|image_pad|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "151656": {
111
+ "content": "<|video_pad|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "151657": {
119
+ "content": "<tool_call>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": false
125
+ },
126
+ "151658": {
127
+ "content": "</tool_call>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": false
133
+ },
134
+ "151659": {
135
+ "content": "<|fim_prefix|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": false
141
+ },
142
+ "151660": {
143
+ "content": "<|fim_middle|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": false
149
+ },
150
+ "151661": {
151
+ "content": "<|fim_suffix|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": false
157
+ },
158
+ "151662": {
159
+ "content": "<|fim_pad|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": false
165
+ },
166
+ "151663": {
167
+ "content": "<|repo_name|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": false
173
+ },
174
+ "151664": {
175
+ "content": "<|file_sep|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": false
181
+ }
182
+ },
183
+ "bos_token": "<|begin▁of▁sentence|>",
184
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
+ "clean_up_tokenization_spaces": false,
186
+ "eos_token": "<|end▁of▁sentence|>",
187
+ "extra_special_tokens": {},
188
+ "legacy": true,
189
+ "model_max_length": 16384,
190
+ "pad_token": "<|end▁of▁sentence|>",
191
+ "sp_model_kwargs": {},
192
+ "tokenizer_class": "LlamaTokenizer",
193
+ "unk_token": null,
194
+ "use_default_system_prompt": false
195
+ }