tathagataraha commited on
Commit
3c09632
Β·
1 Parent(s): faceee1

[ODIFY] Documentation

Browse files
Files changed (3) hide show
  1. app.py +3 -0
  2. src/about.py +6 -0
  3. src/leaderboard/read_evals.py +7 -0
app.py CHANGED
@@ -11,6 +11,7 @@ from src.about import (
11
  EVALUATION_QUEUE_TEXT,
12
  INTRODUCTION_TEXT,
13
  LLM_BENCHMARKS_TEXT_1,
 
14
  # EVALUATION_EXAMPLE_IMG,
15
  # LLM_BENCHMARKS_TEXT_2,
16
  # ENTITY_DISTRIBUTION_IMG,
@@ -580,6 +581,7 @@ with demo:
580
  )
581
 
582
  with gr.TabItem("πŸ… Medical Summarization", elem_id="llm-benchmark-tab-table", id=3):
 
583
  with gr.Row():
584
  with gr.Column():
585
  with gr.Row():
@@ -689,6 +691,7 @@ with demo:
689
  queue=True,
690
  )
691
  with gr.TabItem("πŸ… Note generation", elem_id="llm-benchmark-tab-table", id=4):
 
692
  with gr.Tabs(elem_classes="tab-buttons2") as tabs:
693
  with gr.TabItem("ACI Bench", elem_id="llm-benchmark-tab-table2", id=0):
694
  with gr.Row():
 
11
  EVALUATION_QUEUE_TEXT,
12
  INTRODUCTION_TEXT,
13
  LLM_BENCHMARKS_TEXT_1,
14
+ CROSS_EVALUATION_METRICS,
15
  # EVALUATION_EXAMPLE_IMG,
16
  # LLM_BENCHMARKS_TEXT_2,
17
  # ENTITY_DISTRIBUTION_IMG,
 
581
  )
582
 
583
  with gr.TabItem("πŸ… Medical Summarization", elem_id="llm-benchmark-tab-table", id=3):
584
+ gr.Markdown(CROSS_EVALUATION_METRICS, elem_classes="markdown-text")
585
  with gr.Row():
586
  with gr.Column():
587
  with gr.Row():
 
691
  queue=True,
692
  )
693
  with gr.TabItem("πŸ… Note generation", elem_id="llm-benchmark-tab-table", id=4):
694
+ gr.Markdown(CROSS_EVALUATION_METRICS, elem_classes="markdown-text")
695
  with gr.Tabs(elem_classes="tab-buttons2") as tabs:
696
  with gr.TabItem("ACI Bench", elem_id="llm-benchmark-tab-table2", id=0):
697
  with gr.Row():
src/about.py CHANGED
@@ -175,6 +175,12 @@ Select this option if your model uses a chat template. The chat template will be
175
  Upon successful submission of your request, your model's result would be updated on the leaderboard within 5 working days!
176
  """
177
 
 
 
 
 
 
 
178
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
179
  CITATION_BUTTON_TEXT = r"""
180
  @misc{kanithi2024mediccomprehensiveframeworkevaluating,
 
175
  Upon successful submission of your request, your model's result would be updated on the leaderboard within 5 working days!
176
  """
177
 
178
+ CROSS_EVALUATION_METRICS = """
179
+ - **Coverage**: Measures how thoroughly the summary covers the original document. A higher score means the summary includes more details from the original.
180
+ - **Conformity**: Also called the non-contradiction score, this checks if the summary avoids contradicting the original document. A higher score means the summary aligns better with the original.
181
+ - **Consistency**: Measures the level of non-hallucination, or how much the summary sticks to the facts in the document. A higher score means the summary is more factual and accurate.
182
+ - **Conciseness**: Measures how brief the summary is. A higher score means the summary is more concise. A negative score means the summary is longer than the original document.
183
+ """
184
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
185
  CITATION_BUTTON_TEXT = r"""
186
  @misc{kanithi2024mediccomprehensiveframeworkevaluating,
src/leaderboard/read_evals.py CHANGED
@@ -161,6 +161,13 @@ class EvalResult:
161
  continue
162
  mean_acc = np.mean(accs) # * 100.0
163
  soap_results[task.benchmark] = mean_acc
 
 
 
 
 
 
 
164
  # types_results = {}
165
  # for clinical_type in ClinicalTypes:
166
  # clinical_type = clinical_type.value
 
161
  continue
162
  mean_acc = np.mean(accs) # * 100.0
163
  soap_results[task.benchmark] = mean_acc
164
+ if harness_results == {} or open_ended_results == {} or med_safety_results == {} or medical_summarization_results == {} or aci_results == {} or soap_results == {}:
165
+ harness_results = {}
166
+ open_ended_results = {}
167
+ med_safety_results = {}
168
+ medical_summarization_results = {}
169
+ aci_results = {}
170
+ soap_results = {}
171
  # types_results = {}
172
  # for clinical_type in ClinicalTypes:
173
  # clinical_type = clinical_type.value