Omartificial-Intelligence-Space commited on
Commit
9d42a0b
·
verified ·
1 Parent(s): 22c3520

update submit

Browse files
Files changed (1) hide show
  1. src/submission/submit.py +3 -3
src/submission/submit.py CHANGED
@@ -70,7 +70,7 @@ def get_top_prediction(text, tokenizer, model):
70
  return top_option
71
 
72
  @spaces.GPU(duration=120)
73
- def evaluate_model_accuracy_by_subject(model_name, num_questions_per_subject=30):
74
  try:
75
  # Load the model and tokenizer
76
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -87,7 +87,7 @@ def evaluate_model_accuracy_by_subject(model_name, num_questions_per_subject=30)
87
 
88
  # Load your custom MMMLU dataset from HuggingFace
89
  dataset = load_dataset("Omartificial-Intelligence-Space/Arabic_Openai_MMMLU")
90
- dataset = dataset['test']
91
 
92
  # Filter out excluded subjects
93
  dataset = dataset.filter(lambda x: x['Subject'] not in excluded_subjects)
@@ -234,7 +234,7 @@ def add_new_eval(
234
 
235
  # Now, perform the evaluation
236
  try:
237
- overall_accuracy, subject_results = evaluate_model_accuracy_by_subject(model, num_questions_per_subject=30)
238
  if isinstance(overall_accuracy, str) and overall_accuracy.startswith("Error"):
239
  return styled_error(overall_accuracy)
240
  except Exception as e:
 
70
  return top_option
71
 
72
  @spaces.GPU(duration=120)
73
+ def evaluate_model_accuracy_by_subject(model_name, num_questions_per_subject=100):
74
  try:
75
  # Load the model and tokenizer
76
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
87
 
88
  # Load your custom MMMLU dataset from HuggingFace
89
  dataset = load_dataset("Omartificial-Intelligence-Space/Arabic_Openai_MMMLU")
90
+ dataset = dataset['test'[:100]]
91
 
92
  # Filter out excluded subjects
93
  dataset = dataset.filter(lambda x: x['Subject'] not in excluded_subjects)
 
234
 
235
  # Now, perform the evaluation
236
  try:
237
+ overall_accuracy, subject_results = evaluate_model_accuracy_by_subject(model, num_questions_per_subject=100)
238
  if isinstance(overall_accuracy, str) and overall_accuracy.startswith("Error"):
239
  return styled_error(overall_accuracy)
240
  except Exception as e: