Spaces:
Running
Running
Add CodeLlama-70b-Instruct-hf (#752)
Browse files* Add CodeLlama-70b-Instruct-hf
* add comment to reduce
* Added missing newlines to prompt format for codellama 70b
* remove extra space
* stop tokens
* Remove source newline
* fix preprompt
* fix prompt one last time
* add news
* shorter text
* fix link & remove old tokens
---------
Co-authored-by: Mishig Davaadorj <[email protected]>
- .env +2 -2
- .env.template +9 -12
- PROMPTS.md +6 -0
- src/routes/conversation/[id]/+server.ts +12 -2
.env
CHANGED
|
@@ -99,9 +99,9 @@ PUBLIC_SHARE_PREFIX=#https://hf.co/chat
|
|
| 99 |
PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
|
| 100 |
PUBLIC_ANNOUNCEMENT_BANNERS=`[
|
| 101 |
{
|
| 102 |
-
"title": "Llama
|
| 103 |
"linkTitle": "Announcement",
|
| 104 |
-
"linkHref": "https://
|
| 105 |
}
|
| 106 |
]`
|
| 107 |
|
|
|
|
| 99 |
PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
|
| 100 |
PUBLIC_ANNOUNCEMENT_BANNERS=`[
|
| 101 |
{
|
| 102 |
+
"title": "Code Llama 70B is live! 🦙",
|
| 103 |
"linkTitle": "Announcement",
|
| 104 |
+
"linkHref": "https://ai.meta.com/blog/code-llama-large-language-model-coding/"
|
| 105 |
}
|
| 106 |
]`
|
| 107 |
|
.env.template
CHANGED
|
@@ -89,16 +89,12 @@ MODELS=`[
|
|
| 89 |
}
|
| 90 |
},
|
| 91 |
{
|
| 92 |
-
"name": "codellama/CodeLlama-
|
| 93 |
-
"displayName": "codellama/CodeLlama-
|
| 94 |
-
"description": "Code Llama, a state of the art code model from Meta.",
|
| 95 |
-
"websiteUrl": "https://
|
| 96 |
-
"
|
| 97 |
-
"
|
| 98 |
-
"assistantMessageToken": "",
|
| 99 |
-
"assistantMessageEndToken": " </s><s>[INST] ",
|
| 100 |
-
"preprompt": " ",
|
| 101 |
-
"chatPromptTemplate" : "<s>[INST] <<SYS>>\n{{preprompt}}\n<</SYS>>\n\n{{#each messages}}{{#ifUser}}{{content}} [/INST] {{/ifUser}}{{#ifAssistant}}{{content}} </s><s>[INST] {{/ifAssistant}}{{/each}}",
|
| 102 |
"promptExamples": [
|
| 103 |
{
|
| 104 |
"title": "Fibonacci in Python",
|
|
@@ -118,7 +114,7 @@ MODELS=`[
|
|
| 118 |
"top_k": 50,
|
| 119 |
"truncate": 4096,
|
| 120 |
"max_new_tokens": 4096,
|
| 121 |
-
"stop": ["
|
| 122 |
}
|
| 123 |
},
|
| 124 |
{
|
|
@@ -217,7 +213,8 @@ OLD_MODELS=`[
|
|
| 217 |
{"name":"HuggingFaceH4/zephyr-7b-alpha"},
|
| 218 |
{"name":"openchat/openchat_3.5"},
|
| 219 |
{"name":"openchat/openchat-3.5-1210"},
|
| 220 |
-
{"name": "tiiuae/falcon-180B-chat"}
|
|
|
|
| 221 |
]`
|
| 222 |
|
| 223 |
TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1'
|
|
|
|
| 89 |
}
|
| 90 |
},
|
| 91 |
{
|
| 92 |
+
"name": "codellama/CodeLlama-70b-Instruct-hf",
|
| 93 |
+
"displayName": "codellama/CodeLlama-70b-Instruct-hf",
|
| 94 |
+
"description": "Code Llama, a state of the art code model from Meta. Now in 70B!",
|
| 95 |
+
"websiteUrl": "https://ai.meta.com/blog/code-llama-large-language-model-coding/",
|
| 96 |
+
"preprompt": "",
|
| 97 |
+
"chatPromptTemplate" : "<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
"promptExamples": [
|
| 99 |
{
|
| 100 |
"title": "Fibonacci in Python",
|
|
|
|
| 114 |
"top_k": 50,
|
| 115 |
"truncate": 4096,
|
| 116 |
"max_new_tokens": 4096,
|
| 117 |
+
"stop": ["<step>", " <step>", " <step> "],
|
| 118 |
}
|
| 119 |
},
|
| 120 |
{
|
|
|
|
| 213 |
{"name":"HuggingFaceH4/zephyr-7b-alpha"},
|
| 214 |
{"name":"openchat/openchat_3.5"},
|
| 215 |
{"name":"openchat/openchat-3.5-1210"},
|
| 216 |
+
{"name": "tiiuae/falcon-180B-chat"},
|
| 217 |
+
{"name": "codellama/CodeLlama-34b-Instruct-hf"}
|
| 218 |
]`
|
| 219 |
|
| 220 |
TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1'
|
PROMPTS.md
CHANGED
|
@@ -55,3 +55,9 @@ System: {{preprompt}}\nUser:{{#each messages}}{{#ifUser}}{{content}}\nFalcon:{{/
|
|
| 55 |
```env
|
| 56 |
{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}
|
| 57 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
```env
|
| 56 |
{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}
|
| 57 |
```
|
| 58 |
+
|
| 59 |
+
## CodeLlama 70B
|
| 60 |
+
|
| 61 |
+
```env
|
| 62 |
+
<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ``
|
| 63 |
+
```
|
src/routes/conversation/[id]/+server.ts
CHANGED
|
@@ -310,13 +310,23 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
| 310 |
}
|
| 311 |
}
|
| 312 |
} else {
|
|
|
|
| 313 |
// add output.generated text to the last message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
messages = [
|
| 315 |
...messages.slice(0, -1),
|
| 316 |
{
|
| 317 |
...messages[messages.length - 1],
|
| 318 |
-
content: previousContent +
|
| 319 |
-
interrupted
|
| 320 |
updates,
|
| 321 |
updatedAt: new Date(),
|
| 322 |
},
|
|
|
|
| 310 |
}
|
| 311 |
}
|
| 312 |
} else {
|
| 313 |
+
let interrupted = !output.token.special;
|
| 314 |
// add output.generated text to the last message
|
| 315 |
+
// strip end tokens from the output.generated_text
|
| 316 |
+
const text = (model.parameters.stop ?? []).reduce((acc: string, curr: string) => {
|
| 317 |
+
if (acc.endsWith(curr)) {
|
| 318 |
+
interrupted = false;
|
| 319 |
+
return acc.slice(0, acc.length - curr.length);
|
| 320 |
+
}
|
| 321 |
+
return acc;
|
| 322 |
+
}, output.generated_text.trimEnd());
|
| 323 |
+
|
| 324 |
messages = [
|
| 325 |
...messages.slice(0, -1),
|
| 326 |
{
|
| 327 |
...messages[messages.length - 1],
|
| 328 |
+
content: previousContent + text,
|
| 329 |
+
interrupted, // if its a special token it finished on its own, else it was interrupted
|
| 330 |
updates,
|
| 331 |
updatedAt: new Date(),
|
| 332 |
},
|