Spaces:
Runtime error
Runtime error
future-xy
commited on
Commit
·
85e30d4
1
Parent(s):
f0ad559
fix generation bugs
Browse files
src/backend/huggingface_generate_until.py
CHANGED
|
@@ -28,7 +28,10 @@ class HFLMwithChatTemplate(HFLMWithMeasurement):
|
|
| 28 |
messages = [
|
| 29 |
{"role": "user", "content": f"{input_string}"},
|
| 30 |
]
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
| 32 |
updated_strings.append(updated_string)
|
| 33 |
strings = updated_strings[:]
|
| 34 |
except:
|
|
|
|
| 28 |
messages = [
|
| 29 |
{"role": "user", "content": f"{input_string}"},
|
| 30 |
]
|
| 31 |
+
if "dbrx-instruct" in self.model.name_or_path:
|
| 32 |
+
updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 33 |
+
else:
|
| 34 |
+
updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
|
| 35 |
updated_strings.append(updated_string)
|
| 36 |
strings = updated_strings[:]
|
| 37 |
except:
|
src/backend/manage_requests.py
CHANGED
|
@@ -42,6 +42,9 @@ class EvalRequest:
|
|
| 42 |
# A GPTQ model does not need dtype to be specified,
|
| 43 |
# it will be inferred from the config
|
| 44 |
pass
|
|
|
|
|
|
|
|
|
|
| 45 |
else:
|
| 46 |
raise Exception(f"Unknown precision {self.precision}.")
|
| 47 |
return model_args
|
|
|
|
| 42 |
# A GPTQ model does not need dtype to be specified,
|
| 43 |
# it will be inferred from the config
|
| 44 |
pass
|
| 45 |
+
elif self.precision == "8bit":
|
| 46 |
+
model_args += ",load_in_8bit=True"
|
| 47 |
+
model_args += ",trust_remote_code=True"
|
| 48 |
else:
|
| 49 |
raise Exception(f"Unknown precision {self.precision}.")
|
| 50 |
return model_args
|
src/backend/run_eval_suite.py
CHANGED
|
@@ -48,7 +48,7 @@ def run_evaluation(
|
|
| 48 |
)
|
| 49 |
# hf-chat is implemented to use apply_chat_template
|
| 50 |
results = evaluator.simple_evaluate(
|
| 51 |
-
model=eval_request.inference_framework, # "hf-
|
| 52 |
model_args=eval_request.get_model_args(),
|
| 53 |
tasks=task_names,
|
| 54 |
num_fewshot=num_fewshot,
|
|
|
|
| 48 |
)
|
| 49 |
# hf-chat is implemented to use apply_chat_template
|
| 50 |
results = evaluator.simple_evaluate(
|
| 51 |
+
model=eval_request.inference_framework, # "hf-chat", "moe-infinity"
|
| 52 |
model_args=eval_request.get_model_args(),
|
| 53 |
tasks=task_names,
|
| 54 |
num_fewshot=num_fewshot,
|
src/backend/tasks/selfcheckgpt/task.py
CHANGED
|
@@ -23,13 +23,14 @@ class SelfCheckGPT(ConfigurableTask):
|
|
| 23 |
def __init__(self):
|
| 24 |
super().__init__(config={"metadata": {"version": self.VERSION}})
|
| 25 |
# these end tokens are hard coded because of the current limitaion of the llm-eval.
|
| 26 |
-
self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
|
|
|
|
| 27 |
self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
|
| 28 |
self.generation_kwargs_sampling = {
|
| 29 |
"temperature": 0.99,
|
| 30 |
"do_sample": True,
|
| 31 |
-
"until": ["
|
| 32 |
-
"max_length":
|
| 33 |
}
|
| 34 |
|
| 35 |
self.selfcheckgpt_type = os.environ.get("SELFCHECKGPTTYPE", "SelfCheckNLI")
|
|
|
|
| 23 |
def __init__(self):
|
| 24 |
super().__init__(config={"metadata": {"version": self.VERSION}})
|
| 25 |
# these end tokens are hard coded because of the current limitaion of the llm-eval.
|
| 26 |
+
# self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
|
| 27 |
+
self.generation_kwargs = {"until": ["<im_end>"], "max_length": 1024}
|
| 28 |
self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
|
| 29 |
self.generation_kwargs_sampling = {
|
| 30 |
"temperature": 0.99,
|
| 31 |
"do_sample": True,
|
| 32 |
+
"until": ["<im_end>", "</s>"],
|
| 33 |
+
"max_length": 1024,
|
| 34 |
}
|
| 35 |
|
| 36 |
self.selfcheckgpt_type = os.environ.get("SELFCHECKGPTTYPE", "SelfCheckNLI")
|