Spaces:

MegaTronX
/

OpenCoder

Paused

MegaTronX commited on Nov 29, 2024

Commit

94bce2f

verified ·

1 Parent(s): 87f34b0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-MODEL_ID = "infly/OpenCoder-8B-Instruct"
 CHAT_TEMPLATE =  "ChatML"
 MODEL_NAME = MODEL_ID.split("/")[-1]
 CONTEXT_LENGTH = 1300
@@ -64,11 +64,6 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
         yield "".join(outputs)
-def handle_retry(history, retry_data: gr.RetryData):
-    new_history = history[:retry_data.index]
-    previous_prompt = history[retry_data.index]['content']
-    yield from respond(previous_prompt, new_history)
 # Load model
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -128,5 +123,4 @@ gr.ChatInterface(
         scale=1,
         show_copy_button=True
     )
-    #chatbot.retry(handle_retry, chatbot, [chatbot]),
 ).queue().launch()

 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+MODEL_ID = "bigcode/starcoder"
 CHAT_TEMPLATE =  "ChatML"
 MODEL_NAME = MODEL_ID.split("/")[-1]
 CONTEXT_LENGTH = 1300
         yield "".join(outputs)
 # Load model
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         scale=1,
         show_copy_button=True
     )
 ).queue().launch()