schuler
/

experimental-JP47D55B

Text Generation

Model card Files Files and versions Community

schuler commited on 4 days ago

Commit

9110504

·

verified ·

1 Parent(s): c6c83ca

Update README.md

Files changed (1) hide show

README.md +7 -3

README.md CHANGED Viewed

@@ -36,6 +36,10 @@ The following table shows LaMini training results with the baseline and the opti
 ## Usage:
 ```
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
 from transformers import LlamaTokenizer
 import torch
@@ -45,8 +49,8 @@ REPO_NAME = 'schuler/experimental-JP47D55C'
 def load_model(local_repo_name):
     tokenizer = LlamaTokenizer.from_pretrained(local_repo_name, trust_remote_code=True)
     generator_conf = GenerationConfig.from_pretrained(local_repo_name)
-    model = AutoModelForCausalLM.from_pretrained(local_repo_name, trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="eager")
-    # model.to('cuda')
     return tokenizer, generator_conf, model
 tokenizer, generator_conf, model = load_model(REPO_NAME)
@@ -57,7 +61,7 @@ except Exception as e:
   global_error =  f"Failed to load model: {str(e)}"
 def PrintTest(str):
-  print(generator(str, max_new_tokens=256, do_sample=True, top_p=0.25, repetition_penalty=1.2))
 PrintTest(f"<|user|>\nHello\n<|end|>\n<|assistant|>\n")
 PrintTest(f"<|user|>Hello\n<|end|><|assistant|>")

 ## Usage:
 ```
+!pip install -q -U transformers
+!pip install -q -U accelerate
+!pip install -q -U flash-attn --no-build-isolation
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
 from transformers import LlamaTokenizer
 import torch
 def load_model(local_repo_name):
     tokenizer = LlamaTokenizer.from_pretrained(local_repo_name, trust_remote_code=True)
     generator_conf = GenerationConfig.from_pretrained(local_repo_name)
+    model = AutoModelForCausalLM.from_pretrained(local_repo_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16)
+    model.to('cuda')
     return tokenizer, generator_conf, model
 tokenizer, generator_conf, model = load_model(REPO_NAME)
   global_error =  f"Failed to load model: {str(e)}"
 def PrintTest(str):
+  print(generator(str, max_new_tokens=256, do_sample=True, top_p=0.5, repetition_penalty=1.2))
 PrintTest(f"<|user|>\nHello\n<|end|>\n<|assistant|>\n")
 PrintTest(f"<|user|>Hello\n<|end|><|assistant|>")