Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import torch
|
|
| 3 |
import os
|
| 4 |
import gradio as gr
|
| 5 |
import sentencepiece
|
| 6 |
-
from tokenization_yi import YiTokenizer
|
| 7 |
|
| 8 |
|
| 9 |
from transformers import AutoModelForCausalLM, GPTQConfig, AutoTokenizer, AutoModelForCausalLM
|
|
@@ -16,9 +16,13 @@ from tokenization_yi import YiTokenizer
|
|
| 16 |
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
|
| 17 |
model_id = "TheBloke/Yi-34B-200K-Llamafied-GPTQ"
|
| 18 |
|
| 19 |
-
|
| 20 |
-
tokenizer =
|
| 21 |
-
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
|
| 24 |
prompt = message.strip()
|
|
|
|
| 3 |
import os
|
| 4 |
import gradio as gr
|
| 5 |
import sentencepiece
|
| 6 |
+
# from tokenization_yi import YiTokenizer
|
| 7 |
|
| 8 |
|
| 9 |
from transformers import AutoModelForCausalLM, GPTQConfig, AutoTokenizer, AutoModelForCausalLM
|
|
|
|
| 16 |
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
|
| 17 |
model_id = "TheBloke/Yi-34B-200K-Llamafied-GPTQ"
|
| 18 |
|
| 19 |
+
|
| 20 |
+
tokenizer = AutoTokenizer.from_pretrained("larryvrh/Yi-34B-200K-Llamafied")
|
| 21 |
+
model = AutoModelForCausalLM.from_pretrained("larryvrh/Yi-34B-200K-Llamafied", device_map="auto", torch_dtype="bfloat16", trust_remote_code=True)
|
| 22 |
+
|
| 23 |
+
# gptq_config = GPTQConfig(bits=4, exllama_config={"version": 2})
|
| 24 |
+
# tokenizer = YiTokenizer.from_pretrained("./")
|
| 25 |
+
# model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True, quantization_config=gptq_config)
|
| 26 |
|
| 27 |
def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
|
| 28 |
prompt = message.strip()
|