eralFlare commited on
Commit
6843232
·
verified ·
1 Parent(s): 50bcb93

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -5
README.md CHANGED
@@ -18,7 +18,8 @@ license_link: https://ai.google.dev/gemma/terms
18
  ---
19
 
20
  # Gemma Model Card
21
- This model card is copied from the original [google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it) with edits to the code snippets on how to run this auto-gptq quantized version of the model. This auto-gptq quantized version of the model had only been tested to work on cuda GPU.
 
22
 
23
  **Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
24
 
@@ -67,7 +68,7 @@ model = AutoModelForCausalLM.from_pretrained("eralFlare/gemma-2b-it", device_map
67
  input_text = "Write me a poem about Machine Learning."
68
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
69
 
70
- outputs = model.generate(**input_ids)
71
  print(tokenizer.decode(outputs[0]))
72
  ```
73
 
@@ -84,14 +85,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
84
  import transformers
85
  import torch
86
 
87
- model_id = "gg-hf/gemma-2b-it"
88
- dtype = torch.bfloat16
89
 
90
  tokenizer = AutoTokenizer.from_pretrained(model_id)
91
  model = AutoModelForCausalLM.from_pretrained(
92
  model_id,
93
  device_map="cuda",
94
- torch_dtype=dtype,
95
  )
96
 
97
  chat = [
 
18
  ---
19
 
20
  # Gemma Model Card
21
+ This model card is copied from the original [google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it) with edits to the code snippets on how to run this auto-gptq quantized version of the model.
22
+ This auto-gptq quantized version of the model had only been tested to work on cuda GPU. This quantized model utilise approximately 2.6GB of VRAM.
23
 
24
  **Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
25
 
 
68
  input_text = "Write me a poem about Machine Learning."
69
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
70
 
71
+ outputs = model.generate(**input_ids, max_new_tokens=1024)
72
  print(tokenizer.decode(outputs[0]))
73
  ```
74
 
 
85
  import transformers
86
  import torch
87
 
88
+ model_id = "eralFlare/gemma-2b-it"
 
89
 
90
  tokenizer = AutoTokenizer.from_pretrained(model_id)
91
  model = AutoModelForCausalLM.from_pretrained(
92
  model_id,
93
  device_map="cuda",
 
94
  )
95
 
96
  chat = [