|
import gradio as gr |
|
from codecarbon import EmissionsTracker |
|
from datasets import load_dataset |
|
import numpy as np |
|
from sklearn.metrics import accuracy_score |
|
import random |
|
import os |
|
import json |
|
from datetime import datetime |
|
from huggingface_hub import HfApi |
|
from huggingface_hub import upload_file |
|
import tempfile |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN_TEXT") |
|
print(HF_TOKEN) |
|
if not HF_TOKEN: |
|
print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.") |
|
|
|
tracker = EmissionsTracker(allow_multiple_runs=True) |
|
|
|
|
|
def get_space_info(): |
|
space_name = os.getenv("SPACE_ID", "") |
|
if space_name: |
|
try: |
|
username = space_name.split("/")[0] |
|
space_url = f"https://huggingface.co/spaces/{space_name}" |
|
return username, space_url |
|
except Exception as e: |
|
print(f"Error getting space info: {e}") |
|
return "local-user", "local-development" |
|
|
|
def clean_emissions_data(emissions_data): |
|
"""Remove unwanted fields from emissions data""" |
|
data_dict = emissions_data.__dict__ |
|
fields_to_remove = ['timestamp', 'project_name', 'experiment_id', 'latitude', 'longitude'] |
|
return {k: v for k, v in data_dict.items() if k not in fields_to_remove} |
|
|
|
def evaluate(): |
|
|
|
username, space_url = get_space_info() |
|
|
|
|
|
tracker.start() |
|
tracker.start_task("inference") |
|
|
|
|
|
true_labels = test_dataset["label"] |
|
predictions = [random.randint(0, 7) for _ in range(len(true_labels))] |
|
|
|
|
|
accuracy = accuracy_score(true_labels, predictions) |
|
|
|
|
|
emissions_data = tracker.stop_task() |
|
|
|
|
|
results = { |
|
"username": username, |
|
"space_url": space_url, |
|
"submission_timestamp": datetime.now().isoformat(), |
|
"accuracy": float(accuracy), |
|
"energy_consumed_wh": emissions_data.energy_consumed * 1000, |
|
"emissions_gco2eq": emissions_data.emissions * 1000, |
|
"emissions_data": clean_emissions_data(emissions_data) |
|
} |
|
|
|
|
|
return [ |
|
accuracy, |
|
emissions_data.emissions * 1000, |
|
emissions_data.energy_consumed * 1000, |
|
json.dumps(results, indent=2) |
|
] |
|
|
|
def submit_results(results_json): |
|
if not results_json: |
|
return gr.Warning("No results to submit") |
|
|
|
if not HF_TOKEN: |
|
return gr.Warning("HF_TOKEN not found. Please set up your Hugging Face token.") |
|
|
|
|
|
|
|
results_str = json.dumps(results_json) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f: |
|
|
|
f.write(results_str) |
|
temp_path = f.name |
|
|
|
|
|
api = HfApi(token=HF_TOKEN) |
|
path_in_repo = f"submissions/{results_json['username']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" |
|
api.upload_file( |
|
path_or_fileobj=temp_path, |
|
path_in_repo=path_in_repo, |
|
repo_id="frugal-ai-challenge/public-leaderboard-text", |
|
repo_type="dataset", |
|
token=HF_TOKEN |
|
) |
|
|
|
|
|
os.unlink(temp_path) |
|
|
|
return gr.Info("Results submitted successfully to the leaderboard! π") |
|
|
|
|
|
|
|
|
|
LABEL_MAPPING = { |
|
"0_not_relevant": 0, |
|
"1_not_happening": 1, |
|
"2_not_human": 2, |
|
"3_not_bad": 3, |
|
"4_solutions_harmful_unnecessary": 4, |
|
"5_science_unreliable": 5, |
|
"6_proponents_biased": 6, |
|
"7_fossil_fuels_needed": 7 |
|
} |
|
|
|
|
|
LABEL_DESCRIPTIONS = { |
|
0: "No relevant claim detected", |
|
1: "Global warming is not happening", |
|
2: "Not caused by humans", |
|
3: "Not bad or beneficial", |
|
4: "Solutions harmful/unnecessary", |
|
5: "Science is unreliable", |
|
6: "Proponents are biased", |
|
7: "Fossil fuels are needed" |
|
} |
|
|
|
|
|
print("Loading dataset...") |
|
dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train") |
|
|
|
|
|
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]}) |
|
|
|
|
|
train_test = dataset["train"].train_test_split(test_size=0.2, seed=42) |
|
train_dataset = train_test["train"] |
|
test_dataset = train_test["test"] |
|
|
|
|
|
print("\nFirst 5 rows of test set:") |
|
for i, example in enumerate(test_dataset.select(range(5))): |
|
print(f"\nExample {i+1}:") |
|
print(f"Text: {example['quote'][:100]}...") |
|
print(f"Label: {example['label']} - {LABEL_DESCRIPTIONS[example['label']]}") |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
|
|
gr.Markdown(""" |
|
# Frugal AI Challenge - Text task - Submission portal |
|
## Climate Disinformation Classification |
|
""") |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.Tab("Instructions"): |
|
|
|
gr.Markdown(""" |
|
To submit your results, please follow the steps below: |
|
|
|
## Prepare your model submission |
|
1. Clone the space of this portal on your own Hugging Face account. |
|
2. Modify the ``evaluate`` function to replace the baseline by your model loading and inference within the inference pass where the energy consumption and emissions are tracked. |
|
3. Eventually complete the requirements and/or any necessaries dependencies in your space. |
|
4. Write down your model card in the ``modelcard.md`` file. |
|
5. Deploy your space and verify that it works. |
|
6. (Optional) You can change the Space hardware to use any GPU directly on Hugging Face. |
|
|
|
## Submit your model to the leaderboard in the ``Model Submission`` tab |
|
7. Step 1 - Evaluate model: Click on the button to evaluate your model. This will run you model, computes the accuracy on the test set (20% of the train set), and track the energy consumption and emissions. |
|
8. Step 2 - Submit to leaderboard: Click on the button to submit your results to the leaderboard. This will upload the results to the leaderboard dataset and update the leaderboard. |
|
9. You can see the leaderboard at https://huggingface.co/datasets/frugal-ai-challenge/public-leaderboard-text |
|
""") |
|
|
|
with gr.Tab("Model Submission"): |
|
gr.Markdown("## Random Baseline Model") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
evaluate_btn = gr.Button("1. Evaluate model", variant="secondary") |
|
with gr.Column(scale=1): |
|
submit_btn = gr.Button("2. Submit to leaderboard", variant="primary", size="lg") |
|
|
|
with gr.Row(): |
|
accuracy_output = gr.Number(label="Accuracy", precision=4) |
|
emissions_output = gr.Number(label="Emissions (gCO2eq)", precision=12) |
|
energy_output = gr.Number(label="Energy Consumed (Wh)", precision=12) |
|
|
|
with gr.Row(): |
|
results_json = gr.JSON(label="Detailed Results", visible=True) |
|
|
|
evaluate_btn.click( |
|
evaluate, |
|
inputs=None, |
|
outputs=[accuracy_output, emissions_output, energy_output, results_json] |
|
) |
|
|
|
submit_btn.click( |
|
submit_results, |
|
inputs=[results_json], |
|
outputs=None |
|
) |
|
|
|
with gr.Tab("Model Card"): |
|
with open("modelcard.md", "r") as f: |
|
model_card_content = f.read() |
|
gr.Markdown(model_card_content) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |