Spaces:
Sleeping
Sleeping
File size: 1,879 Bytes
39e6ae5 956b1a1 39e6ae5 956b1a1 39e6ae5 e838258 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# pip install "distilabel[vllm] @ git+https://github.com/argilla-io/distilabel.git@develop"
# pip install flash-attn --no-build-isolation
# huggingface-cli login
import time
from distilabel.pipeline import Pipeline
from distilabel.steps import KeepColumns, LoadHubDataset
from distilabel.steps.tasks import PrometheusEval
from distilabel.llms import TransformersLLM
if __name__ == "__main__":
start_time = time.time()
with Pipeline(name="prometheus") as pipeline:
load_dataset = LoadHubDataset(
name="load_dataset",
repo_id="HuggingFaceH4/instruction-dataset",
split="test",
output_mappings={"prompt": "instruction", "completion": "generation"},
)
task = PrometheusEval(
name="task",
llm=TransformersLLM(
model="prometheus-eval/prometheus-7b-v2.0",
chat_template="[INST] {{ messages[0]['content'] }}\n{{ messages[1]['content'] }}[/INST]",
),
mode="absolute",
rubric="factual-validity",
reference=False,
num_generations=1,
group_generations=False,
)
keep_columns = KeepColumns(
name="keep_columns",
columns=["instruction", "generation", "feedback", "result", "model_name"],
)
load_dataset >> task >> keep_columns # type: ignore
distiset = pipeline.run(
parameters={
task.name: { # type: ignore
"llm": {
"generation_kwargs": {
"max_new_tokens": 1024,
"temperature": 0.7,
},
},
},
},
)
print("--- %s seconds ---" % (time.time() - start_time))
if distiset is not None:
distiset.push_to_hub("instruction-dataset-prometheus")
|