# ISCO-08 hierarchical accuracy measure

In [1]:
import evaluate

ham = evaluate.load("/home/dux/workspace/1-IEA_RnD/isco_hierarchical_accuracy")
print(ham.description)

ISCO CSV file downloaded
Weighted ISCO hierarchy dictionary created as isco_hierarchy

The ISCO-08 Hierarchical Accuracy Measure is an implementation of the measure described in [Functional Annotation of Genes Using Hierarchical Text Categorization](https://www.researchgate.net/publication/44046343_Functional_Annotation_of_Genes_Using_Hierarchical_Text_Categorization) (Kiritchenko, Svetlana and Famili, Fazel. 2005) and adapted for the ISCO-08 classification scheme by the International Labour Organization.

The measure rewards more precise classifications that correctly identify an occupation's placement down to the specific Unit group level and applies penalties for misclassifications based on the hierarchical distance between the correct and assigned categories.




In [2]:
references = ["1111", "1112", "1113", "1114", "1120"]
predictions = ["1111", "1113", "1120", "1211", "2111"]

print(f"References: {references}")
print(f"Predictions: {predictions}")
print(ham.compute(references=references, predictions=predictions))

References: ['1111', '1112', '1113', '1114', '1120']
Predictions: ['1111', '1113', '1120', '1211', '2111']
Accuracy: 0.2, Hierarchical Precision: 0.5, Hierarchical Recall: 0.7777777777777778, Hierarchical F-measure: 0.6086956521739131
{'accuracy': 0.2, 'hierarchical_precision': 0.5, 'hierarchical_recall': 0.7777777777777778, 'hierarchical_fmeasure': 0.6086956521739131}


In [16]:
# Compute all test cases and print the results
from tests import test_cases

test_number = 1

for test_case in test_cases:
    references = test_case["references"]
    predictions = test_case["predictions"]
    print(f"TEST CASE #{test_number}")
    print(f"References: {references}")
    print(f"Predictions: {predictions}")
    print(ham.compute(references=references, predictions=predictions))
    print()
    test_number += 1

TEST CASE #1
References: ['1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111', '1111']
Predictions: ['1111', '1112', '1120', '1211', '1311', '2111', '111', '11', '1', '9999']
Accuracy: 0.1, Hierarchical Precision: 0.2222222222222222, Hierarchical Recall: 1.0, Hierarchical F-measure: 0.3636363636363636
{'accuracy': 0.1, 'hierarchical_precision': 0.2222222222222222, 'hierarchical_recall': 1.0, 'hierarchical_fmeasure': 0.3636363636363636}

TEST CASE #2
References: ['1111']
Predictions: ['1111']
Accuracy: 1.0, Hierarchical Precision: 1.0, Hierarchical Recall: 1.0, Hierarchical F-measure: 1.0
{'accuracy': 1.0, 'hierarchical_precision': 1.0, 'hierarchical_recall': 1.0, 'hierarchical_fmeasure': 1.0}

TEST CASE #3
References: ['1111']
Predictions: ['1112']
Accuracy: 0.0, Hierarchical Precision: 0.75, Hierarchical Recall: 0.75, Hierarchical F-measure: 0.75
{'accuracy': 0.0, 'hierarchical_precision': 0.75, 'hierarchical_recall': 0.75, 'hierarchical_fmeasure': 0.75}

TEST CASE 

# Model evaluation using the test split of the dataset

In [None]:
import os
from datasets import load_dataset
from transformers import pipeline
import evaluate
import json

# Ensure that the HF_TOKEN environment variable is set
hf_token = os.getenv("HF_TOKEN")
if hf_token is None:
    raise ValueError("HF_TOKEN environment variable is not set.")

# Load the dataset
test_data_subset = (
    load_dataset(
        "ICILS/multilingual_parental_occupations", split="test", token=hf_token
    )
    .shuffle(seed=42)
    .select(range(100))
)
test_data = load_dataset(
    "ICILS/multilingual_parental_occupations", split="test", token=hf_token
)

validation_data = load_dataset(
    "ICILS/multilingual_parental_occupations", split="validation", token=hf_token
)

# Initialize the pipeline
pipe = pipeline("text-classification", model="ICILS/XLM-R-ISCO", token=hf_token)

# Define the mapping from ISCO_CODE_TITLE to ISCO codes
def extract_isco_code(isco_code_title: str):
    # ISCO_CODE_TITLE is a string like "7412 Electrical Mechanics and Fitters" so we need to extract the first part for the evaluation.
    return isco_code_title.split()[0]

# Initialize the hierarchical accuracy measure
hierarchical_accuracy = evaluate.load("danieldux/isco_hierarchical_accuracy")

## Test set

In [2]:
# Evaluate the model
predictions = []
references = []
for example in test_data:

    # Predict
    prediction = pipe(
        example["JOB_DUTIES"]
    )  # Use the key "JOB_DUTIES" for the text data
    predicted_label = extract_isco_code(prediction[0]["label"])
    predictions.append(predicted_label)

    # Reference
    reference_label = example["ISCO"]  # Use the key "ISCO" for the ISCO code
    references.append(reference_label)

# Compute the hierarchical accuracy
test_results = hierarchical_accuracy.compute(predictions=predictions, references=references)

# Save the results to a JSON file
with open("isco_test_results.json", "w") as f:
    json.dump(test_results, f)

print("Evaluation results saved to isco_test_results.json")

Accuracy: 0.8611914401388086, Hierarchical Precision: 0.989010989010989, Hierarchical Recall: 0.9836065573770492, Hierarchical F-measure: 0.9863013698630136
Evaluation results saved to isco_test_results.json


## Validation set

In [78]:
# Evaluate the model
predictions = []
references = []
for example in validation_data:

    # Predict
    prediction = pipe(
        example["JOB_DUTIES"]
    )  # Use the key "JOB_DUTIES" for the text data
    predicted_label = extract_isco_code(prediction[0]["label"])
    predictions.append(predicted_label)

    # Reference
    reference_label = example["ISCO"]  # Use the key "ISCO" for the ISCO code
    references.append(reference_label)

# Compute the hierarchical accuracy
validation_results = hierarchical_accuracy.compute(predictions=predictions, references=references)

# Save the results to a JSON file
with open("isco_validation_results.json", "w") as f:
    json.dump(validation_results, f)

print("Evaluation results saved to isco_validation_results.json")

Accuracy: 0.8576800694243564, Hierarchical Precision: 0.9757462686567164, Hierarchical Recall: 0.9812382739212008, Hierarchical F-measure: 0.9784845650140319
Evaluation results saved to isco_validation_results.json


# Inter rater agreement

In [70]:
import pandas as pd

# icils_isco_int_ml = "/datasets/isco-data/processed/2018/icils_2018_isco_ml.parquet"
icils_isco_int_ml = "gs://isco-data-asia-southeast1/processed/2018/icils_2018_isco_ml.parquet"

icils_df = pd.read_parquet(icils_isco_int_ml)[['JOB', 'DUTIES', 'ISCO', 'ISCO_REL', 'LANGUAGE']]

# Create a new pandas dataframe with samples that have ISCO_REL values
isco_rel_df = icils_df[icils_df['ISCO'].notna()].copy()

# remove rows with None values in ISCO_REL
isco_rel_df = isco_rel_df[isco_rel_df['ISCO_REL'].notna()]

# Group the DataFrame by LANGUAGE column
grouped_df = isco_rel_df.groupby('LANGUAGE')

In [None]:

results_df = pd.DataFrame(columns=['Language', 'Accuracy', 'Hierarchical Precision', 'Hierarchical Recall', 'Hierarchical F1'])

# Iterate over each group
for language, group in grouped_df:
    references = group['ISCO'].tolist()
    predictions = group['ISCO_REL'].tolist()
    
    # Apply the compute function
    rel_result = hierarchical_accuracy.compute(references=references, predictions=predictions)
    
    # Create a new DataFrame with the result for the current group
    group_result_df = pd.DataFrame({'Language': [language], 'Accuracy': [rel_result['accuracy']], 'Hierarchical Precision': [rel_result['hierarchical_precision']], 'Hierarchical Recall': [rel_result['hierarchical_recall']], 'Hierarchical F1': [rel_result['hierarchical_fmeasure']]})
    
    # Concatenate the group_result_df with the results_df
    results_df = pd.concat([results_df, group_result_df], ignore_index=True)
    
    # Print the result
    print(f"Language: {language}")
    # print(f"References: {references}")
    # print(f"Predictions: {predictions}")
    print(f"Result: {rel_result}")
    print()

average_accuracy = results_df['Accuracy'].mean()
average_hierarchical_precision = results_df['Hierarchical Precision'].mean()
average_hierarchical_recall = results_df['Hierarchical Recall'].mean()
average_hierarchical_f1 = results_df['Hierarchical F1'].mean()

average_row = ['Average', average_accuracy, average_hierarchical_precision, average_hierarchical_recall, average_hierarchical_f1]
results_df.loc[len(results_df)] = average_row


results_df.to_csv('language_results.csv', index=False)

In [None]:
# create a dataframe with samples where ISCO and ISCO_REL the same
isco_rel_df_same = isco_rel_df[isco_rel_df['ISCO'] == isco_rel_df['ISCO_REL']]

isco_rel_df_same

In [None]:
# create a dataframe with samples where ISCO and ISCO_REL are different
isco_rel_df_diff = isco_rel_df[isco_rel_df['ISCO'] != isco_rel_df['ISCO_REL']]

isco_rel_df_diff

In [64]:
# Make a list of all values in ISCO and ISCO_REL columns
coder1 = list(isco_rel_df['ISCO'])
coder2 = list(isco_rel_df['ISCO_REL'])

In [None]:
# Compute the hierarchical accuracy
reliability_results = hierarchical_accuracy.compute(predictions=coder2, references=coder1)

# Save the results to a JSON file
with open("isco_rel_results.json", "w") as f:
    json.dump(reliability_results, f)

print("Evaluation results saved to isco_rel_results.json")

## Giskard model testing

In [1]:
import numpy as np
import pandas as pd
from scipy.special import softmax
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from giskard import Dataset, Model, scan, testing, GiskardClient, Suite

In [3]:
MODEL_NAME = "ICILS/XLM-R-ISCO"
# DATASET_CONFIG = {"path": "tweet_eval", "name": "sentiment", "split": "validation"}
TEXT_COLUMN = "JOB_DUTIES"
TARGET_COLUMN = "ISCO_CODE_TITLE"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

label2id: dict = model.config.label2id
id2label: dict = model.config.id2label
# LABEL_MAPPING = id2label.items()

# raw_data = load_dataset(**DATASET_CONFIG).to_pandas().iloc[:500]
raw_data = load_dataset("ICILS/multilingual_parental_occupations", split="test").to_pandas().iloc[:500]
# raw_data = raw_data.replace({"ISCO_CODE_TITLE": LABEL_MAPPING})
raw_data["ISCO"] = raw_data["ISCO"].astype(str)
raw_data["ISCO_REL"] = raw_data["ISCO_REL"].astype(str)

raw_data

Unnamed: 0,IDSTUD,JOB_DUTIES,ISCO,ISCO_REL,ISCO_TITLE,ISCO_CODE_TITLE,COUNTRY,LANGUAGE
0,10670109,forældre 1: Han arbejder som med-chef sammen...,7412,,Electrical Mechanics and Fitters,7412 Electrical Mechanics and Fitters,DNK,da
1,10130106,asistente de parbulo y basica. ayudaba en la e...,5312,5312,Teachers' Aides,5312 Teachers' Aides,CHL,es
2,10740120,trabajaba en el campo como capatas. aveces cui...,6121,,Livestock and Dairy Producers,6121 Livestock and Dairy Producers,URY,es
3,10170109,gas abastible. vende gas abastible,9621,5243,"Messengers, Package Deliverers and Luggage Por...","9621 Messengers, Package Deliverers and Luggag...",CHL,es
4,11480109,jordbruk. sår potatis tar upp potatis plogar h...,6111,6111,Field Crop and Vegetable Growers,6111 Field Crop and Vegetable Growers,FIN,sv
...,...,...,...,...,...,...,...,...
495,11780107,acountent mannager|she mannages calls for jobs...,1211,9998,Finance Managers,1211 Finance Managers,AUS,en
496,10850104,geometra/muratore. proggetta case e le restaura,3112,3112,Civil Engineering Technicians,3112 Civil Engineering Technicians,ITA,it
497,11460111,fa parte della misericordia. Trasporta i malat...,3258,3258,Ambulance Workers,3258 Ambulance Workers,ITA,it
498,10340111,사회복지사. 회사에서 복지원 관리,2635,2635,Social Work and Counselling Professionals,2635 Social Work and Counselling Professionals,KOR,ko


In [4]:
giskard_dataset = Dataset(
    df=raw_data,  # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).
    target=TARGET_COLUMN,  # Ground truth variable.
    name="ISCO-08 Parental Occupation Corpus",  # Optional.
)

def prediction_function(df: pd.DataFrame) -> np.ndarray:
    encoded_input = tokenizer(list(df[TEXT_COLUMN]), padding=True, return_tensors="pt")
    output = model(**encoded_input)
    return softmax(output["logits"].detach().numpy(), axis=1)


giskard_model = Model(
    model=prediction_function,  # A prediction function that encapsulates all the data pre-processing steps and that
    model_type="classification",  # Either regression, classification or text_generation.
    name="XLM-R ISCO",  # Optional
    classification_labels=list(label2id.keys()),  # Their order MUST be identical to the prediction_function's
    feature_names=[TEXT_COLUMN],  # Default: all columns of your dataset
)

2024-03-15 01:07:06,923 pid:166193 MainThread giskard.datasets.base INFO     Your 'pandas.DataFrame' is successfully wrapped by Giskard's 'Dataset' wrapper class.
2024-03-15 01:07:06,925 pid:166193 MainThread giskard.models.automodel INFO     Your 'prediction_function' is successfully wrapped by Giskard's 'PredictionFunctionModel' wrapper class.




In [5]:
results = scan(giskard_model, giskard_dataset)

2024-03-15 01:07:10,228 pid:166193 MainThread giskard.datasets.base INFO     Casting dataframe columns from {'JOB_DUTIES': 'object'} to {'JOB_DUTIES': 'object'}


2024-03-15 01:07:12,838 pid:166193 MainThread giskard.utils.logging_utils INFO     Predicted dataset with shape (10, 8) executed in 0:00:02.617399
2024-03-15 01:07:12,848 pid:166193 MainThread giskard.datasets.base INFO     Casting dataframe columns from {'JOB_DUTIES': 'object'} to {'JOB_DUTIES': 'object'}
2024-03-15 01:07:13,007 pid:166193 MainThread giskard.utils.logging_utils INFO     Predicted dataset with shape (1, 8) executed in 0:00:00.166843
2024-03-15 01:07:13,015 pid:166193 MainThread giskard.datasets.base INFO     Casting dataframe columns from {'JOB_DUTIES': 'object'} to {'JOB_DUTIES': 'object'}
2024-03-15 01:07:13,017 pid:166193 MainThread giskard.utils.logging_utils INFO     Predicted dataset with shape (10, 8) executed in 0:00:00.009517
2024-03-15 01:07:13,029 pid:166193 MainThread giskard.datasets.base INFO     Casting dataframe columns from {'JOB_DUTIES': 'object'} to {'JOB_DUTIES': 'object'}


: 

In [None]:
display(results)

# Save it to a file
results.to_html("scan_report.html")

NameError: name 'results' is not defined

In [2]:
import giskard
from datasets import load_dataset

dataset = load_dataset("ICILS/multilingual_parental_occupations", split="test")

# Replace this with your own data & model creation.
# df = giskard.demo.titanic_df()
df = dataset
demo_data_preprocessing_function, demo_sklearn_model = giskard.demo.titanic_pipeline()

# Wrap your Pandas DataFrame
giskard_dataset = giskard.Dataset(df=df,
                                  target="ISCO_CODE_TITLE",
                                  name="ISCO-08 Parental Occupation Corpus",
                                  cat_columns=['LANGUAGE', 'COUNTRY'])

# Wrap your model
def prediction_function(df):
    preprocessed_df = demo_data_preprocessing_function(df)
    return demo_sklearn_model.predict_proba(preprocessed_df)

giskard_model = giskard.Model(model=prediction_function,
                              model_type="classification",
                              name="Titanic model",
                              classification_labels=demo_sklearn_model.classes_,
                              feature_names=['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'])

# Then apply the scan
results = giskard.scan(giskard_model, giskard_dataset)


# Create a Giskard client
client = giskard.GiskardClient(
    url="https://danieldux-giskard.hf.space",  # URL of your Giskard instance
    key="<Generate your API Key on the Giskard Hub settings page first>")


# Upload an automatically created test suite to the current project ✉️
results.generate_test_suite("Test suite created by scan").upload(client, "xlmr_isco")


GiskardError: No details or messages available.