Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
10 |
|
11 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
12 |
|
13 |
-
MODEL_ID = "
|
14 |
CHAT_TEMPLATE = "ChatML"
|
15 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
16 |
CONTEXT_LENGTH = 1300
|
@@ -64,11 +64,6 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
|
|
64 |
yield "".join(outputs)
|
65 |
|
66 |
|
67 |
-
def handle_retry(history, retry_data: gr.RetryData):
|
68 |
-
new_history = history[:retry_data.index]
|
69 |
-
previous_prompt = history[retry_data.index]['content']
|
70 |
-
yield from respond(previous_prompt, new_history)
|
71 |
-
|
72 |
|
73 |
# Load model
|
74 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
@@ -128,5 +123,4 @@ gr.ChatInterface(
|
|
128 |
scale=1,
|
129 |
show_copy_button=True
|
130 |
)
|
131 |
-
#chatbot.retry(handle_retry, chatbot, [chatbot]),
|
132 |
).queue().launch()
|
|
|
10 |
|
11 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
12 |
|
13 |
+
MODEL_ID = "bigcode/starcoder"
|
14 |
CHAT_TEMPLATE = "ChatML"
|
15 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
16 |
CONTEXT_LENGTH = 1300
|
|
|
64 |
yield "".join(outputs)
|
65 |
|
66 |
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
# Load model
|
69 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
123 |
scale=1,
|
124 |
show_copy_button=True
|
125 |
)
|
|
|
126 |
).queue().launch()
|