Update README.md
Browse files
README.md
CHANGED
@@ -18,7 +18,8 @@ license_link: https://ai.google.dev/gemma/terms
|
|
18 |
---
|
19 |
|
20 |
# Gemma Model Card
|
21 |
-
This model card is copied from the original [google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it) with edits to the code snippets on how to run this auto-gptq quantized version of the model.
|
|
|
22 |
|
23 |
**Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
|
24 |
|
@@ -67,7 +68,7 @@ model = AutoModelForCausalLM.from_pretrained("eralFlare/gemma-2b-it", device_map
|
|
67 |
input_text = "Write me a poem about Machine Learning."
|
68 |
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
69 |
|
70 |
-
outputs = model.generate(**input_ids)
|
71 |
print(tokenizer.decode(outputs[0]))
|
72 |
```
|
73 |
|
@@ -84,14 +85,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
84 |
import transformers
|
85 |
import torch
|
86 |
|
87 |
-
model_id = "
|
88 |
-
dtype = torch.bfloat16
|
89 |
|
90 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
91 |
model = AutoModelForCausalLM.from_pretrained(
|
92 |
model_id,
|
93 |
device_map="cuda",
|
94 |
-
torch_dtype=dtype,
|
95 |
)
|
96 |
|
97 |
chat = [
|
|
|
18 |
---
|
19 |
|
20 |
# Gemma Model Card
|
21 |
+
This model card is copied from the original [google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it) with edits to the code snippets on how to run this auto-gptq quantized version of the model.
|
22 |
+
This auto-gptq quantized version of the model had only been tested to work on cuda GPU. This quantized model utilise approximately 2.6GB of VRAM.
|
23 |
|
24 |
**Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
|
25 |
|
|
|
68 |
input_text = "Write me a poem about Machine Learning."
|
69 |
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
70 |
|
71 |
+
outputs = model.generate(**input_ids, max_new_tokens=1024)
|
72 |
print(tokenizer.decode(outputs[0]))
|
73 |
```
|
74 |
|
|
|
85 |
import transformers
|
86 |
import torch
|
87 |
|
88 |
+
model_id = "eralFlare/gemma-2b-it"
|
|
|
89 |
|
90 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
91 |
model = AutoModelForCausalLM.from_pretrained(
|
92 |
model_id,
|
93 |
device_map="cuda",
|
|
|
94 |
)
|
95 |
|
96 |
chat = [
|