Spaces:
Runtime error
Runtime error
Refactor
Browse files- app.py +94 -93
- evaluation.py +1 -1
app.py
CHANGED
|
@@ -446,9 +446,9 @@ with st.form(key="form"):
|
|
| 446 |
elif len(selected_models) == 0:
|
| 447 |
st.warning("β οΈ No models were selected for evaluation! Please select at least one model and try again.")
|
| 448 |
elif len(selected_models) > 10:
|
| 449 |
-
st.warning("Only 10 models can be evaluated at once. Please select fewer models
|
| 450 |
else:
|
| 451 |
-
# Filter out
|
| 452 |
selected_models = filter_evaluated_models(
|
| 453 |
selected_models,
|
| 454 |
selected_task,
|
|
@@ -458,102 +458,103 @@ with st.form(key="form"):
|
|
| 458 |
selected_metrics,
|
| 459 |
)
|
| 460 |
print("INFO -- Selected models after filter:", selected_models)
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
},
|
| 479 |
-
"evaluation": {"metrics": selected_metrics, "models": selected_models, "hf_username": hf_username},
|
| 480 |
-
},
|
| 481 |
-
}
|
| 482 |
-
print(f"INFO -- Payload: {project_payload}")
|
| 483 |
-
project_json_resp = http_post(
|
| 484 |
-
path="/projects/create",
|
| 485 |
-
payload=project_payload,
|
| 486 |
-
token=HF_TOKEN,
|
| 487 |
-
domain=AUTOTRAIN_BACKEND_API,
|
| 488 |
-
).json()
|
| 489 |
-
print(f"INFO -- Project creation response: {project_json_resp}")
|
| 490 |
-
|
| 491 |
-
if project_json_resp["created"]:
|
| 492 |
-
data_payload = {
|
| 493 |
-
"split": 4, # use "auto" split choice in AutoTrain
|
| 494 |
-
"col_mapping": col_mapping,
|
| 495 |
-
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
| 496 |
}
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
|
|
|
| 500 |
token=HF_TOKEN,
|
| 501 |
domain=AUTOTRAIN_BACKEND_API,
|
| 502 |
-
params={
|
| 503 |
-
"type": "dataset",
|
| 504 |
-
"config_name": selected_config,
|
| 505 |
-
"split_name": selected_split,
|
| 506 |
-
},
|
| 507 |
).json()
|
| 508 |
-
print(f"INFO --
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
token=HF_TOKEN,
|
| 513 |
domain=AUTOTRAIN_BACKEND_API,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
).json()
|
| 515 |
-
print(f"INFO --
|
| 516 |
-
if
|
| 517 |
-
|
| 518 |
-
"
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
to
|
| 541 |
-
|
| 542 |
-
[
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
elif len(selected_models) == 0:
|
| 447 |
st.warning("β οΈ No models were selected for evaluation! Please select at least one model and try again.")
|
| 448 |
elif len(selected_models) > 10:
|
| 449 |
+
st.warning("Only 10 models can be evaluated at once. Please select fewer models and try again.")
|
| 450 |
else:
|
| 451 |
+
# Filter out previously evaluated models
|
| 452 |
selected_models = filter_evaluated_models(
|
| 453 |
selected_models,
|
| 454 |
selected_task,
|
|
|
|
| 458 |
selected_metrics,
|
| 459 |
)
|
| 460 |
print("INFO -- Selected models after filter:", selected_models)
|
| 461 |
+
if len(selected_models) > 0:
|
| 462 |
+
project_id = str(uuid.uuid4())[:8]
|
| 463 |
+
project_payload = {
|
| 464 |
+
"username": AUTOTRAIN_USERNAME,
|
| 465 |
+
"proj_name": f"eval-project-{project_id}",
|
| 466 |
+
"task": TASK_TO_ID[selected_task],
|
| 467 |
+
"config": {
|
| 468 |
+
"language": AUTOTRAIN_TASK_TO_LANG[selected_task]
|
| 469 |
+
if selected_task in AUTOTRAIN_TASK_TO_LANG
|
| 470 |
+
else "en",
|
| 471 |
+
"max_models": 5,
|
| 472 |
+
"instance": {
|
| 473 |
+
"provider": "aws",
|
| 474 |
+
"instance_type": "ml.g4dn.4xlarge",
|
| 475 |
+
"max_runtime_seconds": 172800,
|
| 476 |
+
"num_instances": 1,
|
| 477 |
+
"disk_size_gb": 150,
|
| 478 |
+
},
|
| 479 |
+
"evaluation": {
|
| 480 |
+
"metrics": selected_metrics,
|
| 481 |
+
"models": selected_models,
|
| 482 |
+
"hf_username": hf_username,
|
| 483 |
+
},
|
| 484 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
}
|
| 486 |
+
print(f"INFO -- Payload: {project_payload}")
|
| 487 |
+
project_json_resp = http_post(
|
| 488 |
+
path="/projects/create",
|
| 489 |
+
payload=project_payload,
|
| 490 |
token=HF_TOKEN,
|
| 491 |
domain=AUTOTRAIN_BACKEND_API,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
).json()
|
| 493 |
+
print(f"INFO -- Project creation response: {project_json_resp}")
|
| 494 |
+
|
| 495 |
+
if project_json_resp["created"]:
|
| 496 |
+
data_payload = {
|
| 497 |
+
"split": 4, # use "auto" split choice in AutoTrain
|
| 498 |
+
"col_mapping": col_mapping,
|
| 499 |
+
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
| 500 |
+
}
|
| 501 |
+
data_json_resp = http_post(
|
| 502 |
+
path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
|
| 503 |
+
payload=data_payload,
|
| 504 |
token=HF_TOKEN,
|
| 505 |
domain=AUTOTRAIN_BACKEND_API,
|
| 506 |
+
params={
|
| 507 |
+
"type": "dataset",
|
| 508 |
+
"config_name": selected_config,
|
| 509 |
+
"split_name": selected_split,
|
| 510 |
+
},
|
| 511 |
).json()
|
| 512 |
+
print(f"INFO -- Dataset creation response: {data_json_resp}")
|
| 513 |
+
if data_json_resp["download_status"] == 1:
|
| 514 |
+
train_json_resp = http_get(
|
| 515 |
+
path=f"/projects/{project_json_resp['id']}/data/start_process",
|
| 516 |
+
token=HF_TOKEN,
|
| 517 |
+
domain=AUTOTRAIN_BACKEND_API,
|
| 518 |
+
).json()
|
| 519 |
+
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
| 520 |
+
if train_json_resp["success"]:
|
| 521 |
+
train_eval_index = {
|
| 522 |
+
"train-eval-index": [
|
| 523 |
+
{
|
| 524 |
+
"config": selected_config,
|
| 525 |
+
"task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
|
| 526 |
+
"task_id": selected_task,
|
| 527 |
+
"splits": {"eval_split": selected_split},
|
| 528 |
+
"col_mapping": col_mapping,
|
| 529 |
+
}
|
| 530 |
+
]
|
| 531 |
+
}
|
| 532 |
+
selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
|
| 533 |
+
dataset_card_url = get_dataset_card_url(selected_dataset)
|
| 534 |
+
st.success("β
Successfully submitted evaluation job!")
|
| 535 |
+
st.markdown(
|
| 536 |
+
f"""
|
| 537 |
+
Evaluation can take up to 1 hour to complete, so grab a βοΈ or π΅ while you wait:
|
| 538 |
+
|
| 539 |
+
* π A [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) with the evaluation results will be opened for each model you selected. Check your email for notifications.
|
| 540 |
+
* π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) to view the results from your submission once the Hub pull request is merged.
|
| 541 |
+
* π₯± Tired of configuring evaluations? Add the following metadata to the [dataset card]({dataset_card_url}) to enable 1-click evaluations:
|
| 542 |
+
""" # noqa
|
| 543 |
+
)
|
| 544 |
+
st.markdown(
|
| 545 |
+
f"""
|
| 546 |
+
```yaml
|
| 547 |
+
{selected_metadata}
|
| 548 |
+
"""
|
| 549 |
+
)
|
| 550 |
+
print("INFO -- Pushing evaluation job logs to the Hub")
|
| 551 |
+
evaluation_log = {}
|
| 552 |
+
evaluation_log["payload"] = project_payload
|
| 553 |
+
evaluation_log["project_creation_response"] = project_json_resp
|
| 554 |
+
evaluation_log["dataset_creation_response"] = data_json_resp
|
| 555 |
+
evaluation_log["autotrain_job_response"] = train_json_resp
|
| 556 |
+
commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
|
| 557 |
+
else:
|
| 558 |
+
st.error("π Oh no, there was an error submitting your evaluation job!")
|
| 559 |
+
else:
|
| 560 |
+
st.warning("β οΈ No models left to evaluate! Please select other models and try again.")
|
evaluation.py
CHANGED
|
@@ -6,7 +6,7 @@ from huggingface_hub import DatasetFilter, HfApi
|
|
| 6 |
from huggingface_hub.hf_api import DatasetInfo
|
| 7 |
|
| 8 |
|
| 9 |
-
@dataclass(frozen=True, eq=True
|
| 10 |
class EvaluationInfo:
|
| 11 |
task: str
|
| 12 |
model: str
|
|
|
|
| 6 |
from huggingface_hub.hf_api import DatasetInfo
|
| 7 |
|
| 8 |
|
| 9 |
+
@dataclass(frozen=True, eq=True)
|
| 10 |
class EvaluationInfo:
|
| 11 |
task: str
|
| 12 |
model: str
|