# ISCO-08 hierarchical accuracy measure

In [18]:
import evaluate

ham = evaluate.load("/home/dux/workspace/1-IEA_RnD/isco_hierarchical_accuracy")
print(ham.description)

ISCO CSV file downloaded
Weighted ISCO hierarchy dictionary created as isco_hierarchy

The ISCO-08 Hierarchical Accuracy Measure is an implementation of the measure described in [Functional Annotation of Genes Using Hierarchical Text Categorization](https://www.researchgate.net/publication/44046343_Functional_Annotation_of_Genes_Using_Hierarchical_Text_Categorization) (Kiritchenko, Svetlana and Famili, Fazel. 2005) and adapted for the ISCO-08 classification scheme by the International Labour Organization.

The measure rewards more precise classifications that correctly identify an occupation's placement down to the specific Unit group level and applies penalties for misclassifications based on the hierarchical distance between the correct and assigned categories.




In [3]:
references = ["1111", "1112", "1113", "1114", "1120"]
predictions = ["1111", "1113", "1120", "1211", "2111"]

print(f"References: {references}")
print(f"Predictions: {predictions}")
print(ham.compute(references=references, predictions=predictions))

References: ['1111', '1112', '1113', '1114', '1120']
Predictions: ['1111', '1113', '1120', '1211', '2111']
Accuracy: 0.2, Hierarchical Precision: 0.5, Hierarchical Recall: 0.7777777777777778, Hierarchical F-measure: 0.6086956521739131
{'accuracy': 0.2, 'hierarchical_precision': 0.5, 'hierarchical_recall': 0.7777777777777778, 'hierarchical_fmeasure': 0.6086956521739131}


In [16]:
# Compute all test cases and print the results
from tests import test_cases

test_number = 1

for test_case in test_cases:
    references = test_case["references"]
    predictions = test_case["predictions"]
    print(f"TEST CASE #{test_number}")
    print(f"References: {references}")
    print(f"Predictions: {predictions}")
    print(ham.compute(references=references, predictions=predictions))
    print()
    test_number += 1

TEST CASE #1
References: ['1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111']
Predictions: ['1111', '1112', '1120', '1211', '1311', '2111', '111', '11', '1', '9999']
Accuracy: 0.1, Hierarchical Precision: 0.2222222222222222, Hierarchical Recall: 1.0, Hierarchical F-measure: 0.3636363636363636
{'accuracy': 0.1, 'hierarchical_precision': 0.2222222222222222, 'hierarchical_recall': 1.0, 'hierarchical_fmeasure': 0.3636363636363636}

TEST CASE #2
References: ['1111']
Predictions: ['1111']
Accuracy: 1.0, Hierarchical Precision: 1.0, Hierarchical Recall: 1.0, Hierarchical F-measure: 1.0
{'accuracy': 1.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 1.0, 'hierarchical_fmeasure': 1.0}

TEST CASE #3
References: ['1111']
Predictions: ['1112']
Accuracy: 0.0, Hierarchical Precision: 0.75, Hierarchical Recall: 0.75, Hierarchical F-measure: 0.75
{'accuracy': 0.0, 'hierarchical_precision': 0.75, 'hierarchical_recall': 0.75, 'hierarchical_fmeasure': 0.75}

TEST CASE 

# Model evaluation using the test split of the dataset

In [17]:
import os
from datasets import load_dataset
from transformers import pipeline
import evaluate
import json

# Ensure that the HF_TOKEN environment variable is set
hf_token = os.getenv("HF_TOKEN")
if hf_token is None:
    raise ValueError("HF_TOKEN environment variable is not set.")

# Load the dataset
test_data_subset = (
    load_dataset(
        "ICILS/multilingual_parental_occupations", split="test", token=hf_token
    )
    .shuffle(seed=42)
    .select(range(100))
)
test_data = load_dataset(
    "ICILS/multilingual_parental_occupations", split="test", token=hf_token
)

# Initialize the pipeline
pipe = pipeline("text-classification", model="ICILS/XLM-R-ISCO", token=hf_token)

# Define the mapping from ISCO_CODE_TITLE to ISCO codes
def extract_isco_code(isco_code_title: str):
    # ISCO_CODE_TITLE is a string like "7412 Electrical Mechanics and Fitters" so we need to extract the first part for the evaluation.
    return isco_code_title.split()[0]

# Evaluate the model
predictions = []
references = []
for example in test_data:

    # Predict
    prediction = pipe(
        example["JOB_DUTIES"]
    )  # Use the correct key "JOB_DUTIES" for the text data
    predicted_label = extract_isco_code(prediction[0]["label"])
    predictions.append(predicted_label)

    # Reference
    reference_label = example["ISCO"]  # Use the correct key "ISCO" for the ISCO code
    references.append(reference_label)

# Initialize the hierarchical accuracy measure
hierarchical_accuracy = evaluate.load("danieldux/isco_hierarchical_accuracy")

# Compute the hierarchical accuracy
results = hierarchical_accuracy.compute(predictions=predictions, references=references)

# Save the results to a JSON file
with open("isco_results.json", "w") as f:
    json.dump(results, f)

print("Evaluation results saved to isco_results.json")

ISCO CSV file downloaded
Weighted ISCO hierarchy dictionary created
{'1111': {'111': 0.75, '11': 0.5, '1': 0.25}, '1112': {'111': 0.75, '11': 0.5, '1': 0.25}, '1113': {'111': 0.75, '11': 0.5, '1': 0.25}, '1114': {'111': 0.75, '11': 0.5, '1': 0.25}, '1120': {'112': 0.75, '11': 0.5, '1': 0.25}, '1211': {'121': 0.75, '12': 0.5, '1': 0.25}, '1212': {'121': 0.75, '12': 0.5, '1': 0.25}, '1213': {'121': 0.75, '12': 0.5, '1': 0.25}, '1219': {'121': 0.75, '12': 0.5, '1': 0.25}, '1221': {'122': 0.75, '12': 0.5, '1': 0.25}, '1222': {'122': 0.75, '12': 0.5, '1': 0.25}, '1223': {'122': 0.75, '12': 0.5, '1': 0.25}, '1311': {'131': 0.75, '13': 0.5, '1': 0.25}, '1312': {'131': 0.75, '13': 0.5, '1': 0.25}, '1321': {'132': 0.75, '13': 0.5, '1': 0.25}, '1322': {'132': 0.75, '13': 0.5, '1': 0.25}, '1323': {'132': 0.75, '13': 0.5, '1': 0.25}, '1324': {'132': 0.75, '13': 0.5, '1': 0.25}, '1330': {'133': 0.75, '13': 0.5, '1': 0.25}, '1341': {'134': 0.75, '13': 0.5, '1': 0.25}, '1342': {'134': 0.75, '13': 0.5