Spaces:

IronOne-AI-Labs
/

Annual_Report_Summarization_Demo

Sleeping

Lahiru Menikdiwela commited on Nov 26, 2024

Commit

638094e

1 Parent(s): 4a4e906

load model in 4bit and bfloat16 computying type with pipeline op check

Files changed (2) hide show

model.py CHANGED Viewed

@@ -25,7 +25,8 @@ def get_local_model(model_name_or_path:str)->pipeline:
     )
     model = AutoModelForCausalLM.from_pretrained(
         model_name_or_path,
-        torch_dtype=torch.float32,
         token = hf_token
     )
     pipe = pipeline(

     )
     model = AutoModelForCausalLM.from_pretrained(
         model_name_or_path,
+        torch_dtype=torch.bfloat16,
+        load_in_4bit = True,
         token = hf_token
     )
     pipe = pipeline(

summarizer.py CHANGED Viewed

@@ -44,7 +44,7 @@ def summarizer_summarize(model_type,tokenizer, base_summarizer, text:str,summari
         elif model_type == "local":
             pipe = base_summarizer
             start = time.time()
-            summary = pipe(text_to_summarize)[0]['summary_text']
             end = time.time()
             print(f"Summary generation took {round((end-start),2)}s.")
             return summary,round((end-start),2)

         elif model_type == "local":
             pipe = base_summarizer
             start = time.time()
+            summary = pipe(text_to_summarize)[0]
             end = time.time()
             print(f"Summary generation took {round((end-start),2)}s.")
             return summary,round((end-start),2)