update submit
Browse files- src/submission/submit.py +3 -3
src/submission/submit.py
CHANGED
@@ -70,7 +70,7 @@ def get_top_prediction(text, tokenizer, model):
|
|
70 |
return top_option
|
71 |
|
72 |
@spaces.GPU(duration=120)
|
73 |
-
def evaluate_model_accuracy_by_subject(model_name, num_questions_per_subject=
|
74 |
try:
|
75 |
# Load the model and tokenizer
|
76 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
@@ -87,7 +87,7 @@ def evaluate_model_accuracy_by_subject(model_name, num_questions_per_subject=30)
|
|
87 |
|
88 |
# Load your custom MMMLU dataset from HuggingFace
|
89 |
dataset = load_dataset("Omartificial-Intelligence-Space/Arabic_Openai_MMMLU")
|
90 |
-
dataset = dataset['test']
|
91 |
|
92 |
# Filter out excluded subjects
|
93 |
dataset = dataset.filter(lambda x: x['Subject'] not in excluded_subjects)
|
@@ -234,7 +234,7 @@ def add_new_eval(
|
|
234 |
|
235 |
# Now, perform the evaluation
|
236 |
try:
|
237 |
-
overall_accuracy, subject_results = evaluate_model_accuracy_by_subject(model, num_questions_per_subject=
|
238 |
if isinstance(overall_accuracy, str) and overall_accuracy.startswith("Error"):
|
239 |
return styled_error(overall_accuracy)
|
240 |
except Exception as e:
|
|
|
70 |
return top_option
|
71 |
|
72 |
@spaces.GPU(duration=120)
|
73 |
+
def evaluate_model_accuracy_by_subject(model_name, num_questions_per_subject=100):
|
74 |
try:
|
75 |
# Load the model and tokenizer
|
76 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
|
87 |
|
88 |
# Load your custom MMMLU dataset from HuggingFace
|
89 |
dataset = load_dataset("Omartificial-Intelligence-Space/Arabic_Openai_MMMLU")
|
90 |
+
dataset = dataset['test'[:100]]
|
91 |
|
92 |
# Filter out excluded subjects
|
93 |
dataset = dataset.filter(lambda x: x['Subject'] not in excluded_subjects)
|
|
|
234 |
|
235 |
# Now, perform the evaluation
|
236 |
try:
|
237 |
+
overall_accuracy, subject_results = evaluate_model_accuracy_by_subject(model, num_questions_per_subject=100)
|
238 |
if isinstance(overall_accuracy, str) and overall_accuracy.startswith("Error"):
|
239 |
return styled_error(overall_accuracy)
|
240 |
except Exception as e:
|