MegaTronX commited on
Commit
94bce2f
·
verified ·
1 Parent(s): 87f34b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -7
app.py CHANGED
@@ -10,7 +10,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
10
 
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
- MODEL_ID = "infly/OpenCoder-8B-Instruct"
14
  CHAT_TEMPLATE = "ChatML"
15
  MODEL_NAME = MODEL_ID.split("/")[-1]
16
  CONTEXT_LENGTH = 1300
@@ -64,11 +64,6 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
64
  yield "".join(outputs)
65
 
66
 
67
- def handle_retry(history, retry_data: gr.RetryData):
68
- new_history = history[:retry_data.index]
69
- previous_prompt = history[retry_data.index]['content']
70
- yield from respond(previous_prompt, new_history)
71
-
72
 
73
  # Load model
74
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -128,5 +123,4 @@ gr.ChatInterface(
128
  scale=1,
129
  show_copy_button=True
130
  )
131
- #chatbot.retry(handle_retry, chatbot, [chatbot]),
132
  ).queue().launch()
 
10
 
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
+ MODEL_ID = "bigcode/starcoder"
14
  CHAT_TEMPLATE = "ChatML"
15
  MODEL_NAME = MODEL_ID.split("/")[-1]
16
  CONTEXT_LENGTH = 1300
 
64
  yield "".join(outputs)
65
 
66
 
 
 
 
 
 
67
 
68
  # Load model
69
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
123
  scale=1,
124
  show_copy_button=True
125
  )
 
126
  ).queue().launch()