Spaces:

TeamTonic
/

TonicsYI-6B-200k

Paused

Tonic commited on Nov 22, 2023

Commit

6e49c29

1 Parent(s): 5d8f4a6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,12 +5,16 @@ import os
 import gradio as gr
 import sentencepiece
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:126'
 # Load the model and tokenizer using transformers
-model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-34B-200K", device_map="auto", torch_dtype="auto", trust_remote_code=True)
 tokenizer = YiTokenizer(vocab_file="./tokenizer.model")
 def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
     prompt = get_prompt(message, chat_history)

 import gradio as gr
 import sentencepiece
+model_id = "01-ai/Yi-34B-200K"
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:126'
+device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load the model and tokenizer using transformers
 tokenizer = YiTokenizer(vocab_file="./tokenizer.model")
+model = AutoModelForCausalLM.from_pretrained(01-ai/Yi-34B-200K, trust_remote_code=True)
+model = model.to(torch.bfloat16)
+model = model.to(device)
 def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
     prompt = get_prompt(message, chat_history)