Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,6 @@ import spaces
|
|
7 |
import torch
|
8 |
from huggingface_hub import InferenceClient
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
10 |
-
from peft import AutoPeftModelForCausalLM
|
11 |
|
12 |
MAX_MAX_NEW_TOKENS = 512
|
13 |
DEFAULT_MAX_NEW_TOKENS = 512
|
@@ -20,7 +19,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
20 |
if torch.cuda.is_available():
|
21 |
#model_id = "Qwen/Qwen2.5-7B-Instruct"
|
22 |
model_id = "BenBranyon/sumbot7b"
|
23 |
-
model =
|
24 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
25 |
tokenizer.use_default_system_prompt = False
|
26 |
|
|
|
7 |
import torch
|
8 |
from huggingface_hub import InferenceClient
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
|
|
10 |
|
11 |
MAX_MAX_NEW_TOKENS = 512
|
12 |
DEFAULT_MAX_NEW_TOKENS = 512
|
|
|
19 |
if torch.cuda.is_available():
|
20 |
#model_id = "Qwen/Qwen2.5-7B-Instruct"
|
21 |
model_id = "BenBranyon/sumbot7b"
|
22 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
24 |
tokenizer.use_default_system_prompt = False
|
25 |
|