debisoft commited on
Commit
a4ad80b
·
1 Parent(s): 3ace9c8
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -9,9 +9,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
9
  from datasets import load_dataset
10
 
11
  huggingface_hub.login(os.getenv('HF_TOKEN'))
12
- peft_model_id = "debisoft/DeepSeek-R1-Distill-Qwen-7B-thinking-function_calling-quant-V0"
13
  #peft_model_id = "debisoft/Qwen2.5-VL-7B-Instruct-thinking-function_calling-quant-V0"
14
- #peft_model_id = "debisoft/Qwen2.5-VL-3B-Instruct-thinking-function_calling-V0"
15
 
16
  bnb_config = BitsAndBytesConfig(
17
  load_in_4bit=True,
@@ -25,8 +25,8 @@ cuda_device = torch.device("cuda")
25
  cpu_device = torch.device("cpu")
26
 
27
  config = PeftConfig.from_pretrained(peft_model_id)
28
- #model = Qwen2_5_VLForConditionalGeneration.from_pretrained(config.base_model_name_or_path,
29
- model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
30
  quantization_config=bnb_config,
31
  device_map="auto",
32
  )
@@ -52,7 +52,7 @@ def sentience_check():
52
 
53
  with torch.no_grad():
54
  outputs = peft_model.generate(
55
- **inputs, max_new_tokens=500, pad_token_id = tokenizer.eos_token_id
56
  )
57
 
58
  #peft_model.to(cpu_device)
 
9
  from datasets import load_dataset
10
 
11
  huggingface_hub.login(os.getenv('HF_TOKEN'))
12
+ #peft_model_id = "debisoft/DeepSeek-R1-Distill-Qwen-7B-thinking-function_calling-quant-V0"
13
  #peft_model_id = "debisoft/Qwen2.5-VL-7B-Instruct-thinking-function_calling-quant-V0"
14
+ peft_model_id = "debisoft/Qwen2.5-VL-3B-Instruct-thinking-function_calling-V0"
15
 
16
  bnb_config = BitsAndBytesConfig(
17
  load_in_4bit=True,
 
25
  cpu_device = torch.device("cpu")
26
 
27
  config = PeftConfig.from_pretrained(peft_model_id)
28
+ #model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
29
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(config.base_model_name_or_path,
30
  quantization_config=bnb_config,
31
  device_map="auto",
32
  )
 
52
 
53
  with torch.no_grad():
54
  outputs = peft_model.generate(
55
+ **inputs, max_new_tokens=1024, pad_token_id = tokenizer.eos_token_id
56
  )
57
 
58
  #peft_model.to(cpu_device)