alexmarques commited on
Commit
17984c5
·
verified ·
1 Parent(s): dc44c57

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -2
README.md CHANGED
@@ -195,7 +195,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
195
  </td>
196
  </tr>
197
  <tr>
198
- <td>TruthfulQA (0-shot)
199
  </td>
200
  <td>54.04
201
  </td>
@@ -247,6 +247,7 @@ lm_eval \
247
  --model vllm \
248
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-70B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=4 \
249
  --tasks gsm8k_cot_llama_3.1_instruct \
 
250
  --apply_chat_template \
251
  --num_fewshot 8 \
252
  --batch_size auto
@@ -272,7 +273,7 @@ lm_eval \
272
  --batch_size auto
273
  ```
274
 
275
- #### Hellaswag
276
  ```
277
  lm_eval \
278
  --model vllm \
 
195
  </td>
196
  </tr>
197
  <tr>
198
+ <td>TruthfulQA (0-shot, mc2)
199
  </td>
200
  <td>54.04
201
  </td>
 
247
  --model vllm \
248
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-70B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=4 \
249
  --tasks gsm8k_cot_llama_3.1_instruct \
250
+ --fewshot_as_multiturn \
251
  --apply_chat_template \
252
  --num_fewshot 8 \
253
  --batch_size auto
 
273
  --batch_size auto
274
  ```
275
 
276
+ #### TruthfulQA
277
  ```
278
  lm_eval \
279
  --model vllm \