Spaces:

schuler
/

experimental-kphi-3-micro-4k-instruct-gradio-autoloader

Sleeping

App Files Files Community

schuler commited on Dec 3, 2024

Commit

2185001

·

verified ·

1 Parent(s): 1929afc

Update app.py

Files changed (1) hide show

app.py +5 -3

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import os, sys
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
 import torch
 import spaces
 import psutil
@@ -13,9 +14,10 @@ REPO_NAME = 'schuler/experimental-JP47D21-KPhi-3-micro-4k-instruct'
 # How to cache?
 @spaces.GPU()
 def load_model(repo_name):
-    tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
-    generator_conf = GenerationConfig.from_pretrained(repo_name)
-    model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="eager")
     # model.to('cuda')
     return tokenizer, generator_conf, model

 import os, sys
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
+from transformers import LlamaTokenizer
 import torch
 import spaces
 import psutil
 # How to cache?
 @spaces.GPU()
 def load_model(repo_name):
+    # tokenizer = AutoTokenizer.from_pretrained(REPO_NAME, trust_remote_code=True)
+    tokenizer = LlamaTokenizer.from_pretrained(REPO_NAME, trust_remote_code=True)
+    generator_conf = GenerationConfig.from_pretrained(REPO_NAME)
+    model = AutoModelForCausalLM.from_pretrained(REPO_NAME, trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="eager")
     # model.to('cuda')
     return tokenizer, generator_conf, model