ehristoforu commited on
Commit
2ea7af4
·
verified ·
1 Parent(s): 1e4ed2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -22,17 +22,18 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
 
25
- model_name = "estrogen/c4ai-command-r7b-12-2024"
26
-
27
- model = Llama3ForCausalLM.from_pretrained(
28
  model_name,
29
  torch_dtype=torch.float16,
30
  trust_remote_code=True
31
  )
 
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
33
 
34
- #peft_model = AutoPeftModelForCausalLM.from_pretrained("ehristoforu/think-lora-qwen-r64")
35
- #merged_model = peft_model.merge_and_unload()
36
  #merged_model.save_pretrained("./coolqwen")
37
  #model.save_pretrained("./coolqwen")
38
  #tokenizer.save_pretrained("./coolqwen")
@@ -81,7 +82,7 @@ def generate(
81
  num_beams=1,
82
  repetition_penalty=repetition_penalty,
83
  )
84
- t = Thread(target=model.generate, kwargs=generate_kwargs)
85
  t.start()
86
 
87
  outputs = []
 
22
  HF_TOKEN = os.getenv("HF_TOKEN")
23
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
 
25
+ model_name = "Qwen/Qwen2.5-1.5B-Instruct"
26
+ '''
27
+ model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
29
  torch_dtype=torch.float16,
30
  trust_remote_code=True
31
  )
32
+ '''
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
 
35
+ peft_model = AutoPeftModelForCausalLM.from_pretrained("ehristoforu/fd-lora-64x128", torch_dtype=torch.float16, trust_remote_code=True)
36
+ merged_model = peft_model.merge_and_unload()
37
  #merged_model.save_pretrained("./coolqwen")
38
  #model.save_pretrained("./coolqwen")
39
  #tokenizer.save_pretrained("./coolqwen")
 
82
  num_beams=1,
83
  repetition_penalty=repetition_penalty,
84
  )
85
+ t = Thread(target=merged_model.generate, kwargs=generate_kwargs)
86
  t.start()
87
 
88
  outputs = []