Spaces:
Running
Running
File size: 13,704 Bytes
9ae8d89 0a14325 20dad4a 3df6003 9ae8d89 0a14325 9ae8d89 0da5ee3 9ae8d89 09b313f 0da5ee3 553b217 20dad4a 09b313f 9ae8d89 d8147b8 9ae8d89 d8147b8 20dad4a c92b14d 09b313f 0da5ee3 0a14325 ba515db 553b217 20dad4a d8147b8 9ae8d89 d8147b8 9ae8d89 d8147b8 9ae8d89 d8147b8 9ae8d89 09b313f 9ae8d89 0a14325 9ae8d89 b3eff40 d86ca68 0a14325 553b217 20dad4a 09b313f 9ae8d89 09b313f 9ae8d89 b3eff40 9ae8d89 b3eff40 d8147b8 b3eff40 9ae8d89 b3eff40 09b313f b3eff40 e1cdc4b b3eff40 d8147b8 b3eff40 9ae8d89 09b313f 9ae8d89 d8147b8 09b313f 9ae8d89 671e1a6 9ae8d89 09b313f 9ae8d89 671e1a6 9ae8d89 09b313f 9ae8d89 09b313f 9ae8d89 0a14325 553b217 20dad4a 553b217 0a14325 0da5ee3 09b313f 9ae8d89 0a14325 09b313f 0da5ee3 0a14325 553b217 20dad4a 0da5ee3 09b313f 6616540 09b313f 3df6003 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 |
from dataclasses import dataclass, make_dataclass
from enum import Enum
import pandas as pd
# changes to be made here
from src.about import HarnessTasks, OpenEndedColumns, MedSafetyColumns, MedicalSummarizationColumns, ACIColumns, SOAPColumns, ClosedEndedArabicColumns
from src.envs import PRIVATE_REPO
import json
import gradio as gr
def fields(raw_class):
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
# These classes are for user facing column names,
# to avoid having to change them all around the code
# when a modif is needed
@dataclass
class ColumnContent:
# changes to be made here
name: str
type: str
displayed_by_default: bool
hidden: bool = False
invariant: bool = True
never_hidden: bool = False
dataset_task_col: bool = False
open_ended_col: bool = False
med_safety_col: bool = False
medical_summarization_col: bool = False
aci_col: bool = False
soap_col: bool = False
closed_ended_arabic_col: bool = False
## Leaderboard columns
auto_eval_column_dict = []
# Init
auto_eval_column_dict = []
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, True)])
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average", "number", True, False, dataset_task_col=True, closed_ended_arabic_col=True, invariant=False)])
auto_eval_column_dict.append(["overall", ColumnContent, ColumnContent("Overall Score", "number", True, False, medical_summarization_col=True, aci_col=True, soap_col=True, invariant=False)])
for task in HarnessTasks:
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True, False, dataset_task_col=True, invariant=False)])
for column in OpenEndedColumns:
auto_eval_column_dict.append([column.name, ColumnContent, ColumnContent(column.value.col_name, "number", True, False, open_ended_col=True, invariant=False)])
# changes to be made here
for column in MedSafetyColumns:
if column.value.col_name == "95% CI" or column.value.col_name == "Harmfulness Score":
auto_eval_column_dict.append([column.name, ColumnContent, ColumnContent(column.value.col_name, "number", True, False, med_safety_col=True, invariant=False)])
else:
auto_eval_column_dict.append([column.name, ColumnContent, ColumnContent(column.value.col_name, "number", False, False, med_safety_col=True, invariant=False)])
for column in MedicalSummarizationColumns:
auto_eval_column_dict.append([column.name, ColumnContent, ColumnContent(column.value.col_name, "number", True, False, medical_summarization_col=True, invariant=False)])
for column in ACIColumns:
auto_eval_column_dict.append([column.name, ColumnContent, ColumnContent(column.value.col_name, "number", True, False, aci_col=True, invariant=False)])
for column in SOAPColumns:
auto_eval_column_dict.append([column.name, ColumnContent, ColumnContent(column.value.col_name, "number", True, False, soap_col=True, invariant=False)])
# if PRIVATE_REPO:
for column in ClosedEndedArabicColumns:
auto_eval_column_dict.append([column.name, ColumnContent, ColumnContent(column.value.col_name, "number", True, False, closed_ended_arabic_col=True, invariant=False)])
auto_eval_column_dict.append(["is_domain_specific", ColumnContent, ColumnContent("Is Domain Specific", "bool", False)])
auto_eval_column_dict.append(["use_chat_template", ColumnContent, ColumnContent("Uses Chat Template", "bool", False)])
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False)])
# auto_eval_column_dict.append(["backbone", ColumnContent, ColumnContent("Base Model", "str", False)])
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub β€οΈ", "number", False)])
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, True)])
# auto_eval_column_dict.append(["display_result", ColumnContent, ColumnContent("Display Result", "bool", False, True)])
auto_eval_column_dict.append(["date", ColumnContent, ColumnContent("Submission Date", "str", False)])
# We use make dataclass to dynamically fill the scores from Tasks
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
## For the queue columns in the submission tab
# changes to be made here
@dataclass(frozen=True)
class EvalQueueColumn: # Queue column
model = ColumnContent("model", "markdown", True)
revision = ColumnContent("revision", "str", True)
private = ColumnContent("private", "bool", True)
model_type = ColumnContent("model_type", "str", True)
precision = ColumnContent("precision", "str", True)
weight_type = ColumnContent("weight_type", "str", "Original")
closed_ended_status = ColumnContent("closed_ended_status", "str", True)
open_ended_status = ColumnContent("open_ended_status", "str", True)
med_safety_status = ColumnContent("med_safety_status", "str", True)
medical_summarization_status = ColumnContent("medical_summarization_status", "str", True)
note_generation_status = ColumnContent("note_generation_status", "str", True)
if PRIVATE_REPO:
closed_ended_arabic_status = ColumnContent("closed_ended_arabic_status", "str", True)
## All the model information that we might need
@dataclass
class ModelDetails:
name: str
display_name: str = ""
symbol: str = "" # emoji
class ModelType(Enum):
# ZEROSHOT = ModelDetails(name="zero-shot", symbol="β«")
# FINETUNED = ModelDetails(name="fine-tuned", symbol="βͺ")
PT = ModelDetails(name="pretrained", symbol="π’")
# FT = ModelDetails(name="fine-tuned", symbol="πΆ")
# DS = ModelDetails(name="domain-specific", symbol="π₯")
IFT = ModelDetails(name="instruction-tuned", symbol="β")
RL = ModelDetails(name="preference-tuned", symbol="π¦")
Unknown = ModelDetails(name="", symbol="?")
def to_str(self, separator=" "):
return f"{self.value.symbol}{separator}{self.value.name}"
@staticmethod
def from_str(type):
# if "zero-shot" in type or "β«" in type:
# return ModelType.ZEROSHOT
# if "fine-tuned" in type or "βͺ" in type:
# return ModelType.FINETUNED
# if "fine-tuned" in type or "πΆ" in type:
# return ModelType.FT
if "pretrained" in type or "π’" in type:
return ModelType.PT
if "preference-tuned" in type or "π¦" in type:
return ModelType.RL
if "instruction-tuned" in type or "β" in type:
return ModelType.IFT
# if "domain-specific" in type or "π₯" in type:
# return ModelType.DS
return ModelType.Unknown
class ModelArch(Enum):
Encoder = ModelDetails("Encoder")
Decoder = ModelDetails("Decoder")
GLiNEREncoder = ModelDetails("GLiNER Encoder")
Unknown = ModelDetails(name="Other", symbol="?")
def to_str(self, separator=" "):
return f"{self.value.name}"
@staticmethod
def from_str(type):
if "Encoder" == type:
return ModelArch.Encoder
if "Decoder" == type:
return ModelArch.Decoder
if "GLiNER Encoder" == type:
return ModelArch.GLiNEREncoder
# if "unknown" in type:
# return ModelArch.Unknown
return ModelArch.Unknown
class WeightType(Enum):
Adapter = ModelDetails("Adapter")
Original = ModelDetails("Original")
Delta = ModelDetails("Delta")
Unknown = ModelDetails("?")
def from_str(wt):
if "original" in wt.lower():
return WeightType.Original
if "adapter" in wt.lower():
return WeightType.Adapter
if "delta" in wt.lower():
return WeightType.Delta
return WeightType.Unknown
class Precision(Enum):
auto = ModelDetails("auto")
float16 = ModelDetails("float16")
bfloat16 = ModelDetails("bfloat16")
float32 = ModelDetails("float32")
# qt_8bit = ModelDetails("8bit")
# qt_4bit = ModelDetails("4bit")
# qt_GPTQ = ModelDetails("GPTQ")
Unknown = ModelDetails("?")
def from_str(precision):
if precision in ["auto"]:
return Precision.auto
if precision in ["torch.float16", "float16"]:
return Precision.float16
if precision in ["torch.bfloat16", "bfloat16"]:
return Precision.bfloat16
if precision in ["float32"]:
return Precision.float32
# if precision in ["8bit"]:
# return Precision.qt_8bit
# if precision in ["4bit"]:
# return Precision.qt_4bit
# if precision in ["GPTQ", "None"]:
# return Precision.qt_GPTQ
return Precision.Unknown
class PromptTemplateName(Enum):
UniversalNERTemplate = "universal_ner"
LLMHTMLHighlightedSpansTemplate = "llm_html_highlighted_spans"
LLMHTMLHighlightedSpansTemplateV1 = "llm_html_highlighted_spans_v1"
LLamaNERTemplate = "llama_70B_ner"
# MixtralNERTemplate = "mixtral_ner_v0.3"
class EvaluationMetrics(Enum):
SpanBased = "Span Based"
TokenBased = "Token Based"
# Column selection
# changes to be made here
DATASET_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and (c.dataset_task_col or c.invariant)]
OPEN_ENDED_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and (c.open_ended_col or c.invariant)]
MED_SAFETY_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and (c.med_safety_col or c.invariant)]
MEDICAL_SUMMARIZATION_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and (c.medical_summarization_col or c.invariant)]
ACI_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and (c.aci_col or c.invariant)]
SOAP_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and (c.soap_col or c.invariant)]
# if PRIVATE_REPO:
CLOSED_ENDED_ARABIC_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and (c.closed_ended_arabic_col or c.invariant)]
# CROSS_EXAMINATION_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and (c.cross_examination_col or c.invariant)]
# DATASET_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.open_ended_col and not c.med_safety_col and not c.cross_examination_col]
# OPEN_ENDED_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.dataset_task_col and not c.med_safety_col and not c.cross_examination_col]
# MED_SAFETY_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.open_ended_col and not c.dataset_task_col and not c.cross_examination_col]
# CROSS_EXAMINATION_COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.open_ended_col and not c.med_safety_col and not c.dataset_task_col]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
# changes to be made here
DATASET_BENCHMARK_COLS = [t.value.col_name for t in HarnessTasks]
OPEN_ENDED_BENCHMARK_COLS = [t.value.col_name for t in OpenEndedColumns]
MED_SAFETY_BENCHMARK_COLS = [t.value.col_name for t in MedSafetyColumns]
MEDICAL_SUMMARIZATION_BENCHMARK_COLS = [t.value.col_name for t in MedicalSummarizationColumns]
ACI_BENCHMARK_COLS = [t.value.col_name for t in ACIColumns]
SOAP_BENCHMARK_COLS = [t.value.col_name for t in SOAPColumns]
# if PRIVATE_REPO:
CLOSED_ENDED_ARABIC_BENCHMARK_COLS = [t.value.col_name for t in ClosedEndedArabicColumns]
# CROSS_EXAMINATION_BENCHMARK_COLS = [t.value.col_name for t in CrossExaminationTasks]
NUMERIC_INTERVALS = {
"?": pd.Interval(-100, 0, closed="right"),
"~1.5": pd.Interval(0, 2, closed="right"),
"~3": pd.Interval(2, 4, closed="right"),
"~7": pd.Interval(4, 9, closed="right"),
"~13": pd.Interval(9, 20, closed="right"),
"~35": pd.Interval(20, 45, closed="right"),
"~60": pd.Interval(45, 70, closed="right"),
"70+": pd.Interval(70, 10000, closed="right"),
}
def render_generation_templates(task: str, generation_type: str):
with open("src/display/templates/system_prompts.json", "r") as f:
system_prompt = json.load(f)[f"{task}+_+{generation_type}"]
with open(f"src/display/templates/{task}+_+{generation_type}.jinja", "r") as f:
user_prompt = f.read()
system_prompt_textbox = gr.Textbox(
value=system_prompt,
label="System Prompt",
lines=2,
elem_id=f"system-prompt-textbox-{task}-{generation_type}",
show_copy_button=True,
)
user_prompt_textbox = gr.Textbox(
value=user_prompt,
label="User Prompt",
lines=15,
elem_id=f"user-prompt-textbox-{task}-{generation_type}",
show_copy_button=True,
)
return system_prompt_textbox, user_prompt_textbox
# return None, None |