Update README.md
Browse files
README.md
CHANGED
|
@@ -18,7 +18,8 @@ license_link: https://ai.google.dev/gemma/terms
|
|
| 18 |
---
|
| 19 |
|
| 20 |
# Gemma Model Card
|
| 21 |
-
This model card is copied from the original [google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it) with edits to the code snippets on how to run this auto-gptq quantized version of the model.
|
|
|
|
| 22 |
|
| 23 |
**Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
|
| 24 |
|
|
@@ -67,7 +68,7 @@ model = AutoModelForCausalLM.from_pretrained("eralFlare/gemma-2b-it", device_map
|
|
| 67 |
input_text = "Write me a poem about Machine Learning."
|
| 68 |
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
| 69 |
|
| 70 |
-
outputs = model.generate(**input_ids)
|
| 71 |
print(tokenizer.decode(outputs[0]))
|
| 72 |
```
|
| 73 |
|
|
@@ -84,14 +85,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 84 |
import transformers
|
| 85 |
import torch
|
| 86 |
|
| 87 |
-
model_id = "
|
| 88 |
-
dtype = torch.bfloat16
|
| 89 |
|
| 90 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 91 |
model = AutoModelForCausalLM.from_pretrained(
|
| 92 |
model_id,
|
| 93 |
device_map="cuda",
|
| 94 |
-
torch_dtype=dtype,
|
| 95 |
)
|
| 96 |
|
| 97 |
chat = [
|
|
|
|
| 18 |
---
|
| 19 |
|
| 20 |
# Gemma Model Card
|
| 21 |
+
This model card is copied from the original [google/gemma-2b-it](https://huggingface.co/google/gemma-2b-it) with edits to the code snippets on how to run this auto-gptq quantized version of the model.
|
| 22 |
+
This auto-gptq quantized version of the model had only been tested to work on cuda GPU. This quantized model utilise approximately 2.6GB of VRAM.
|
| 23 |
|
| 24 |
**Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
|
| 25 |
|
|
|
|
| 68 |
input_text = "Write me a poem about Machine Learning."
|
| 69 |
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
| 70 |
|
| 71 |
+
outputs = model.generate(**input_ids, max_new_tokens=1024)
|
| 72 |
print(tokenizer.decode(outputs[0]))
|
| 73 |
```
|
| 74 |
|
|
|
|
| 85 |
import transformers
|
| 86 |
import torch
|
| 87 |
|
| 88 |
+
model_id = "eralFlare/gemma-2b-it"
|
|
|
|
| 89 |
|
| 90 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 91 |
model = AutoModelForCausalLM.from_pretrained(
|
| 92 |
model_id,
|
| 93 |
device_map="cuda",
|
|
|
|
| 94 |
)
|
| 95 |
|
| 96 |
chat = [
|