Lahiru Menikdiwela commited on
Commit
638094e
·
1 Parent(s): 4a4e906

load model in 4bit and bfloat16 computying type with pipeline op check

Browse files
Files changed (2) hide show
  1. model.py +2 -1
  2. summarizer.py +1 -1
model.py CHANGED
@@ -25,7 +25,8 @@ def get_local_model(model_name_or_path:str)->pipeline:
25
  )
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name_or_path,
28
- torch_dtype=torch.float32,
 
29
  token = hf_token
30
  )
31
  pipe = pipeline(
 
25
  )
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name_or_path,
28
+ torch_dtype=torch.bfloat16,
29
+ load_in_4bit = True,
30
  token = hf_token
31
  )
32
  pipe = pipeline(
summarizer.py CHANGED
@@ -44,7 +44,7 @@ def summarizer_summarize(model_type,tokenizer, base_summarizer, text:str,summari
44
  elif model_type == "local":
45
  pipe = base_summarizer
46
  start = time.time()
47
- summary = pipe(text_to_summarize)[0]['summary_text']
48
  end = time.time()
49
  print(f"Summary generation took {round((end-start),2)}s.")
50
  return summary,round((end-start),2)
 
44
  elif model_type == "local":
45
  pipe = base_summarizer
46
  start = time.time()
47
+ summary = pipe(text_to_summarize)[0]
48
  end = time.time()
49
  print(f"Summary generation took {round((end-start),2)}s.")
50
  return summary,round((end-start),2)