Lisa Dunlap commited on
Commit
6334bb3
·
1 Parent(s): fb9b9d1

updated help page

Browse files
stringsight/dashboard/app.py CHANGED
@@ -341,11 +341,11 @@ def create_app() -> gr.Blocks:
341
  with gr.Group(visible=False, elem_id="help-panel") as help_panel:
342
  help_md = gr.Markdown(
343
  """
344
- **Overview**: Compare model quality metrics and view model cards with top behavior clusters. Use Filter Controls to refine and switch between Plot/Table.
345
 
346
- **View Clusters**: Explore clusters interactively. Use the search field in this tab to filter cluster labels; optional tag filter appears when available.
347
 
348
- **View Examples**: Inspect individual examples with rich conversation rendering. Filter by prompt/model/cluster; adjust max examples and formatting options.
349
  """
350
  )
351
  help_close_btn = gr.Button("Close", variant="secondary", elem_id="help-close-btn")
 
341
  with gr.Group(visible=False, elem_id="help-panel") as help_panel:
342
  help_md = gr.Markdown(
343
  """
344
+ **Overview**: Compare model metrics and what behaviors are more unique to each model.
345
 
346
+ **View Clusters**: Explore the individual bheaviors seen in each property cluster. To view that example, copy over the property description into the search on the examples tab.
347
 
348
+ **View Examples**: Inspect individual examples and localize each example to the property cluster that it belongs to.
349
  """
350
  )
351
  help_close_btn = gr.Button("Close", variant="secondary", elem_id="help-close-btn")
stringsight/dashboard/utils.py CHANGED
@@ -1706,6 +1706,7 @@ def get_example_data(
1706
  "response": response_val,
1707
  "property_description": row.get("property_description", "N/A"),
1708
  "score": row.get("score", "N/A"),
 
1709
  "fine_cluster_id": fine_cluster_id,
1710
  "fine_cluster_label": fine_cluster_label,
1711
  "coarse_cluster_id": coarse_cluster_id,
@@ -1850,26 +1851,7 @@ def format_examples_display(examples: List[Dict[str, Any]],
1850
  )
1851
 
1852
  # Score display for summary (only for non-side-by-side or when not shown in side-by-side)
1853
- score_badge = ""
1854
- if not example.get('is_side_by_side', False) and example['score'] != 'N/A':
1855
- try:
1856
- score_val = float(example['score'])
1857
- score_color = '#28a745' if score_val >= 0 else '#dc3545'
1858
- score_badge = f"""
1859
- <span style="
1860
- background: {score_color};
1861
- color: white;
1862
- padding: 4px 8px;
1863
- border-radius: 12px;
1864
- font-size: 12px;
1865
- font-weight: bold;
1866
- margin-left: 10px;
1867
- ">
1868
- Score: {score_val:.3f}
1869
- </span>
1870
- """
1871
- except:
1872
- pass
1873
 
1874
  # Create short preview of prompt for summary
1875
  prompt_preview = example['prompt'][:80] + "..." if len(example['prompt']) > 80 else example['prompt']
@@ -1878,30 +1860,48 @@ def format_examples_display(examples: List[Dict[str, Any]],
1878
  # First example is expanded by default
1879
  open_attr = "open" if i == 1 else ""
1880
 
1881
- # Build top-of-card score section (above conversation) if score exists
1882
- score_section_html = ""
1883
- raw_score = example.get('score')
1884
- numeric_score: float | None = None
1885
- if isinstance(raw_score, (int, float)):
1886
- numeric_score = float(raw_score)
1887
- elif isinstance(raw_score, str):
1888
- # Accept simple numeric strings without try/except
1889
- if re.match(r"^[+-]?\d+(?:\.\d+)?$", raw_score.strip() or ""):
1890
- numeric_score = float(raw_score)
1891
- # Avoid duplicating score display for side-by-side, which renders its own score section
1892
- if numeric_score is not None and not example.get('is_side_by_side', False):
1893
- color_bg = '#dcfce7' if numeric_score >= 0 else '#fee2e2'
1894
- color_fg = '#166534' if numeric_score >= 0 else '#991b1b'
1895
- score_chip = (
1896
- f"<span style=\"display:inline-block; padding:4px 10px; border-radius:999px; "
1897
- f"background:{color_bg}; color:{color_fg}; font-weight:600; font-size:12px; "
1898
- f"border:1px solid rgba(0,0,0,0.05);\">Score: {numeric_score:.3f}</span>"
1899
- )
1900
- score_section_html = (
1901
- f"<div style=\"margin: 0 0 12px 0; display:flex; align-items:center; flex-wrap:wrap; gap:8px;\">"
1902
- f"{score_chip}"
1903
- f"</div>"
1904
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1905
 
1906
  html_out += f"""
1907
  <details {open_attr} style="border: 1px solid #dee2e6; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
@@ -1932,6 +1932,7 @@ def format_examples_display(examples: List[Dict[str, Any]],
1932
  <span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">ID: {html.escape(str(example['id']))}</span>
1933
  <span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">Model: {html.escape(str(example['model']))}</span>
1934
  {tag_badge}
 
1935
  {(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#ecfdf5; color:#047857; border:1px solid #bbf7d0;">Category: {html.escape(str(example["category"]))}</span>' if example["category"] not in [None, "N/A", "None", "", "null"] and str(example["category"]).strip() != "" else '')}
1936
  {(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#eff6ff; color:#1d4ed8; border:1px solid #dbeafe;">Type: {html.escape(str(example["type"]))}</span>' if example["type"] not in [None, "N/A", "None", "", "null"] and str(example["type"]).strip() != "" else '')}
1937
  {(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#fff7ed; color:#c2410c; border:1px solid #fed7aa;">Impact: {html.escape(str(example["impact"]))}</span>' if example["impact"] not in [None, "N/A", "None", "", "null"] and str(example["impact"]).strip() != "" else '')}
@@ -1956,8 +1957,6 @@ def format_examples_display(examples: List[Dict[str, Any]],
1956
  example.get("evidence") not in [None, "N/A", "None", "", "null"] and str(example.get("evidence", "")).strip() != "",
1957
  ]) else ''}
1958
 
1959
- {score_section_html}
1960
-
1961
  <div style="margin-bottom: 15px;">
1962
  <div style="border-radius: 6px; font-size: 15px; line-height: 1.5;">
1963
  {conversation_html}
 
1706
  "response": response_val,
1707
  "property_description": row.get("property_description", "N/A"),
1708
  "score": row.get("score", "N/A"),
1709
+ "scores": row.get("scores", None),
1710
  "fine_cluster_id": fine_cluster_id,
1711
  "fine_cluster_label": fine_cluster_label,
1712
  "coarse_cluster_id": coarse_cluster_id,
 
1851
  )
1852
 
1853
  # Score display for summary (only for non-side-by-side or when not shown in side-by-side)
1854
+ # (Removed) score_badge in header; scores will be shown in badges row instead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1855
 
1856
  # Create short preview of prompt for summary
1857
  prompt_preview = example['prompt'][:80] + "..." if len(example['prompt']) > 80 else example['prompt']
 
1860
  # First example is expanded by default
1861
  open_attr = "open" if i == 1 else ""
1862
 
1863
+ # Build score chips for badges row (show scalar score and/or scores dict), excluding side-by-side
1864
+ score_chips_html = ""
1865
+ if not example.get('is_side_by_side', False):
1866
+ chips: list[str] = []
1867
+ # Scalar score
1868
+ raw_score = example.get('score')
1869
+ if isinstance(raw_score, (int, float)):
1870
+ sv = float(raw_score)
1871
+ bg = '#ecfdf5' if sv >= 0 else '#fee2e2'
1872
+ fg = '#047857' if sv >= 0 else '#991b1b'
1873
+ chips.append(
1874
+ f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">Score: {sv:.3f}</span>"
1875
+ )
1876
+ elif isinstance(raw_score, str):
1877
+ s = raw_score.strip() if raw_score is not None else ""
1878
+ if s and re.match(r"^[+-]?\d+(?:\.\d+)?$", s):
1879
+ sv = float(s)
1880
+ bg = '#ecfdf5' if sv >= 0 else '#fee2e2'
1881
+ fg = '#047857' if sv >= 0 else '#991b1b'
1882
+ chips.append(
1883
+ f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">Score: {sv:.3f}</span>"
1884
+ )
1885
+ # Scores dict
1886
+ raw_scores = example.get('scores')
1887
+ if isinstance(raw_scores, dict):
1888
+ for k, v in raw_scores.items():
1889
+ label = str(k)
1890
+ val: Optional[float] = None
1891
+ if isinstance(v, (int, float)):
1892
+ val = float(v)
1893
+ elif isinstance(v, str):
1894
+ vs = v.strip()
1895
+ if vs and re.match(r"^[+-]?\d+(?:\.\d+)?$", vs):
1896
+ val = float(vs)
1897
+ if val is not None:
1898
+ bg = '#ecfdf5' if val >= 0 else '#fee2e2'
1899
+ fg = '#047857' if val >= 0 else '#991b1b'
1900
+ chips.append(
1901
+ f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">{html.escape(label)}: {val:.3f}</span>"
1902
+ )
1903
+ if chips:
1904
+ score_chips_html = "".join(chips)
1905
 
1906
  html_out += f"""
1907
  <details {open_attr} style="border: 1px solid #dee2e6; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
 
1932
  <span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">ID: {html.escape(str(example['id']))}</span>
1933
  <span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">Model: {html.escape(str(example['model']))}</span>
1934
  {tag_badge}
1935
+ {score_chips_html}
1936
  {(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#ecfdf5; color:#047857; border:1px solid #bbf7d0;">Category: {html.escape(str(example["category"]))}</span>' if example["category"] not in [None, "N/A", "None", "", "null"] and str(example["category"]).strip() != "" else '')}
1937
  {(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#eff6ff; color:#1d4ed8; border:1px solid #dbeafe;">Type: {html.escape(str(example["type"]))}</span>' if example["type"] not in [None, "N/A", "None", "", "null"] and str(example["type"]).strip() != "" else '')}
1938
  {(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#fff7ed; color:#c2410c; border:1px solid #fed7aa;">Impact: {html.escape(str(example["impact"]))}</span>' if example["impact"] not in [None, "N/A", "None", "", "null"] and str(example["impact"]).strip() != "" else '')}
 
1957
  example.get("evidence") not in [None, "N/A", "None", "", "null"] and str(example.get("evidence", "")).strip() != "",
1958
  ]) else ''}
1959
 
 
 
1960
  <div style="margin-bottom: 15px;">
1961
  <div style="border-radius: 6px; font-size: 15px; line-height: 1.5;">
1962
  {conversation_html}