Spaces:
Sleeping
Sleeping
app.py
CHANGED
@@ -9,9 +9,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
9 |
from datasets import load_dataset
|
10 |
|
11 |
huggingface_hub.login(os.getenv('HF_TOKEN'))
|
12 |
-
peft_model_id = "debisoft/DeepSeek-R1-Distill-Qwen-7B-thinking-function_calling-quant-V0"
|
13 |
#peft_model_id = "debisoft/Qwen2.5-VL-7B-Instruct-thinking-function_calling-quant-V0"
|
14 |
-
|
15 |
|
16 |
bnb_config = BitsAndBytesConfig(
|
17 |
load_in_4bit=True,
|
@@ -25,8 +25,8 @@ cuda_device = torch.device("cuda")
|
|
25 |
cpu_device = torch.device("cpu")
|
26 |
|
27 |
config = PeftConfig.from_pretrained(peft_model_id)
|
28 |
-
#model =
|
29 |
-
model =
|
30 |
quantization_config=bnb_config,
|
31 |
device_map="auto",
|
32 |
)
|
@@ -52,7 +52,7 @@ def sentience_check():
|
|
52 |
|
53 |
with torch.no_grad():
|
54 |
outputs = peft_model.generate(
|
55 |
-
**inputs, max_new_tokens=
|
56 |
)
|
57 |
|
58 |
#peft_model.to(cpu_device)
|
|
|
9 |
from datasets import load_dataset
|
10 |
|
11 |
huggingface_hub.login(os.getenv('HF_TOKEN'))
|
12 |
+
#peft_model_id = "debisoft/DeepSeek-R1-Distill-Qwen-7B-thinking-function_calling-quant-V0"
|
13 |
#peft_model_id = "debisoft/Qwen2.5-VL-7B-Instruct-thinking-function_calling-quant-V0"
|
14 |
+
peft_model_id = "debisoft/Qwen2.5-VL-3B-Instruct-thinking-function_calling-V0"
|
15 |
|
16 |
bnb_config = BitsAndBytesConfig(
|
17 |
load_in_4bit=True,
|
|
|
25 |
cpu_device = torch.device("cpu")
|
26 |
|
27 |
config = PeftConfig.from_pretrained(peft_model_id)
|
28 |
+
#model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
|
29 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(config.base_model_name_or_path,
|
30 |
quantization_config=bnb_config,
|
31 |
device_map="auto",
|
32 |
)
|
|
|
52 |
|
53 |
with torch.no_grad():
|
54 |
outputs = peft_model.generate(
|
55 |
+
**inputs, max_new_tokens=1024, pad_token_id = tokenizer.eos_token_id
|
56 |
)
|
57 |
|
58 |
#peft_model.to(cpu_device)
|