Update README.md
Browse files
README.md
CHANGED
@@ -18,12 +18,13 @@ This model is an int4 model with group_size 128 of [THUDM/chatglm2-6b](https://h
|
|
18 |
```python
|
19 |
##pip install auto-gptq[triton]
|
20 |
##pip install triton==2.2.0
|
21 |
-
from transformers import
|
22 |
quantized_model_dir = "Intel/chatglm2-6b-int4-inc"
|
23 |
-
model =
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
27 |
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
|
28 |
print(tokenizer.decode(model.generate(**tokenizer("There is a girl who likes adventure,", return_tensors="pt").to(model.device),max_new_tokens=50)[0]))
|
29 |
```
|
@@ -35,27 +36,11 @@ print(tokenizer.decode(model.generate(**tokenizer("There is a girl who likes adv
|
|
35 |
Install [lm-eval-harness](https://github.com/EleutherAI/lm-evaluation-harness.git) from source, we used the git id 96d185fa6232a5ab685ba7c43e45d1dbb3bb906d
|
36 |
|
37 |
```bash
|
38 |
-
lm_eval --model hf --model_args pretrained="Intel/
|
39 |
```
|
40 |
|
41 |
|
42 |
|
43 |
-
| Metric | BF16 | INT4 |
|
44 |
-
| -------------- | ------ | ------ |
|
45 |
-
| Avg. | 0.6647 | 0.6621 |
|
46 |
-
| mmlu | 0.5906 | 0.5872 |
|
47 |
-
| lambada_openai | 0.7141 | 0.7141 |
|
48 |
-
| hellaswag | 0.6602 | 0.6557 |
|
49 |
-
| winogrande | 0.7395 | 0.7364 |
|
50 |
-
| piqa | 0.8052 | 0.8047 |
|
51 |
-
| truthfulqa_mc1 | 0.5251 | 0.5153 |
|
52 |
-
| openbookqa | 0.3600 | 0.3420 |
|
53 |
-
| boolq | 0.8535 | 0.8541 |
|
54 |
-
| rte | 0.7040 | 0.7148 |
|
55 |
-
| arc_easy | 0.8161 | 0.8165 |
|
56 |
-
| arc_challenge | 0.5435 | 0.5435 |
|
57 |
-
|
58 |
-
|
59 |
|
60 |
### Reproduce the model
|
61 |
|
|
|
18 |
```python
|
19 |
##pip install auto-gptq[triton]
|
20 |
##pip install triton==2.2.0
|
21 |
+
from transformers import AutoModel, AutoTokenizer
|
22 |
quantized_model_dir = "Intel/chatglm2-6b-int4-inc"
|
23 |
+
model = AutoModel.from_pretrained(quantized_model_dir,
|
24 |
+
device_map="auto",
|
25 |
+
trust_remote_code=False,
|
26 |
+
)
|
27 |
+
|
28 |
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
|
29 |
print(tokenizer.decode(model.generate(**tokenizer("There is a girl who likes adventure,", return_tensors="pt").to(model.device),max_new_tokens=50)[0]))
|
30 |
```
|
|
|
36 |
Install [lm-eval-harness](https://github.com/EleutherAI/lm-evaluation-harness.git) from source, we used the git id 96d185fa6232a5ab685ba7c43e45d1dbb3bb906d
|
37 |
|
38 |
```bash
|
39 |
+
lm_eval --model hf --model_args pretrained="Intel/chatglm2-6b-int4-inc",autogptq=True,gptq_use_triton=True --device cuda:0 --tasks lambada_openai,hellaswag,piqa,winogrande,truthfulqa_mc1,openbookqa,boolq,rte,arc_easy,arc_challenge,mmlu --batch_size 32
|
40 |
```
|
41 |
|
42 |
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
### Reproduce the model
|
46 |
|