Spaces:
Running
Running
Lisa Dunlap
commited on
Commit
·
6334bb3
1
Parent(s):
fb9b9d1
updated help page
Browse files- stringsight/dashboard/app.py +3 -3
- stringsight/dashboard/utils.py +45 -46
stringsight/dashboard/app.py
CHANGED
@@ -341,11 +341,11 @@ def create_app() -> gr.Blocks:
|
|
341 |
with gr.Group(visible=False, elem_id="help-panel") as help_panel:
|
342 |
help_md = gr.Markdown(
|
343 |
"""
|
344 |
-
**Overview**: Compare model
|
345 |
|
346 |
-
**View Clusters**: Explore
|
347 |
|
348 |
-
**View Examples**: Inspect individual examples
|
349 |
"""
|
350 |
)
|
351 |
help_close_btn = gr.Button("Close", variant="secondary", elem_id="help-close-btn")
|
|
|
341 |
with gr.Group(visible=False, elem_id="help-panel") as help_panel:
|
342 |
help_md = gr.Markdown(
|
343 |
"""
|
344 |
+
**Overview**: Compare model metrics and what behaviors are more unique to each model.
|
345 |
|
346 |
+
**View Clusters**: Explore the individual bheaviors seen in each property cluster. To view that example, copy over the property description into the search on the examples tab.
|
347 |
|
348 |
+
**View Examples**: Inspect individual examples and localize each example to the property cluster that it belongs to.
|
349 |
"""
|
350 |
)
|
351 |
help_close_btn = gr.Button("Close", variant="secondary", elem_id="help-close-btn")
|
stringsight/dashboard/utils.py
CHANGED
@@ -1706,6 +1706,7 @@ def get_example_data(
|
|
1706 |
"response": response_val,
|
1707 |
"property_description": row.get("property_description", "N/A"),
|
1708 |
"score": row.get("score", "N/A"),
|
|
|
1709 |
"fine_cluster_id": fine_cluster_id,
|
1710 |
"fine_cluster_label": fine_cluster_label,
|
1711 |
"coarse_cluster_id": coarse_cluster_id,
|
@@ -1850,26 +1851,7 @@ def format_examples_display(examples: List[Dict[str, Any]],
|
|
1850 |
)
|
1851 |
|
1852 |
# Score display for summary (only for non-side-by-side or when not shown in side-by-side)
|
1853 |
-
score_badge
|
1854 |
-
if not example.get('is_side_by_side', False) and example['score'] != 'N/A':
|
1855 |
-
try:
|
1856 |
-
score_val = float(example['score'])
|
1857 |
-
score_color = '#28a745' if score_val >= 0 else '#dc3545'
|
1858 |
-
score_badge = f"""
|
1859 |
-
<span style="
|
1860 |
-
background: {score_color};
|
1861 |
-
color: white;
|
1862 |
-
padding: 4px 8px;
|
1863 |
-
border-radius: 12px;
|
1864 |
-
font-size: 12px;
|
1865 |
-
font-weight: bold;
|
1866 |
-
margin-left: 10px;
|
1867 |
-
">
|
1868 |
-
Score: {score_val:.3f}
|
1869 |
-
</span>
|
1870 |
-
"""
|
1871 |
-
except:
|
1872 |
-
pass
|
1873 |
|
1874 |
# Create short preview of prompt for summary
|
1875 |
prompt_preview = example['prompt'][:80] + "..." if len(example['prompt']) > 80 else example['prompt']
|
@@ -1878,30 +1860,48 @@ def format_examples_display(examples: List[Dict[str, Any]],
|
|
1878 |
# First example is expanded by default
|
1879 |
open_attr = "open" if i == 1 else ""
|
1880 |
|
1881 |
-
# Build
|
1882 |
-
|
1883 |
-
|
1884 |
-
|
1885 |
-
|
1886 |
-
|
1887 |
-
|
1888 |
-
|
1889 |
-
|
1890 |
-
|
1891 |
-
|
1892 |
-
|
1893 |
-
|
1894 |
-
|
1895 |
-
|
1896 |
-
|
1897 |
-
|
1898 |
-
|
1899 |
-
|
1900 |
-
|
1901 |
-
|
1902 |
-
|
1903 |
-
|
1904 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1905 |
|
1906 |
html_out += f"""
|
1907 |
<details {open_attr} style="border: 1px solid #dee2e6; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
@@ -1932,6 +1932,7 @@ def format_examples_display(examples: List[Dict[str, Any]],
|
|
1932 |
<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">ID: {html.escape(str(example['id']))}</span>
|
1933 |
<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">Model: {html.escape(str(example['model']))}</span>
|
1934 |
{tag_badge}
|
|
|
1935 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#ecfdf5; color:#047857; border:1px solid #bbf7d0;">Category: {html.escape(str(example["category"]))}</span>' if example["category"] not in [None, "N/A", "None", "", "null"] and str(example["category"]).strip() != "" else '')}
|
1936 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#eff6ff; color:#1d4ed8; border:1px solid #dbeafe;">Type: {html.escape(str(example["type"]))}</span>' if example["type"] not in [None, "N/A", "None", "", "null"] and str(example["type"]).strip() != "" else '')}
|
1937 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#fff7ed; color:#c2410c; border:1px solid #fed7aa;">Impact: {html.escape(str(example["impact"]))}</span>' if example["impact"] not in [None, "N/A", "None", "", "null"] and str(example["impact"]).strip() != "" else '')}
|
@@ -1956,8 +1957,6 @@ def format_examples_display(examples: List[Dict[str, Any]],
|
|
1956 |
example.get("evidence") not in [None, "N/A", "None", "", "null"] and str(example.get("evidence", "")).strip() != "",
|
1957 |
]) else ''}
|
1958 |
|
1959 |
-
{score_section_html}
|
1960 |
-
|
1961 |
<div style="margin-bottom: 15px;">
|
1962 |
<div style="border-radius: 6px; font-size: 15px; line-height: 1.5;">
|
1963 |
{conversation_html}
|
|
|
1706 |
"response": response_val,
|
1707 |
"property_description": row.get("property_description", "N/A"),
|
1708 |
"score": row.get("score", "N/A"),
|
1709 |
+
"scores": row.get("scores", None),
|
1710 |
"fine_cluster_id": fine_cluster_id,
|
1711 |
"fine_cluster_label": fine_cluster_label,
|
1712 |
"coarse_cluster_id": coarse_cluster_id,
|
|
|
1851 |
)
|
1852 |
|
1853 |
# Score display for summary (only for non-side-by-side or when not shown in side-by-side)
|
1854 |
+
# (Removed) score_badge in header; scores will be shown in badges row instead
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1855 |
|
1856 |
# Create short preview of prompt for summary
|
1857 |
prompt_preview = example['prompt'][:80] + "..." if len(example['prompt']) > 80 else example['prompt']
|
|
|
1860 |
# First example is expanded by default
|
1861 |
open_attr = "open" if i == 1 else ""
|
1862 |
|
1863 |
+
# Build score chips for badges row (show scalar score and/or scores dict), excluding side-by-side
|
1864 |
+
score_chips_html = ""
|
1865 |
+
if not example.get('is_side_by_side', False):
|
1866 |
+
chips: list[str] = []
|
1867 |
+
# Scalar score
|
1868 |
+
raw_score = example.get('score')
|
1869 |
+
if isinstance(raw_score, (int, float)):
|
1870 |
+
sv = float(raw_score)
|
1871 |
+
bg = '#ecfdf5' if sv >= 0 else '#fee2e2'
|
1872 |
+
fg = '#047857' if sv >= 0 else '#991b1b'
|
1873 |
+
chips.append(
|
1874 |
+
f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">Score: {sv:.3f}</span>"
|
1875 |
+
)
|
1876 |
+
elif isinstance(raw_score, str):
|
1877 |
+
s = raw_score.strip() if raw_score is not None else ""
|
1878 |
+
if s and re.match(r"^[+-]?\d+(?:\.\d+)?$", s):
|
1879 |
+
sv = float(s)
|
1880 |
+
bg = '#ecfdf5' if sv >= 0 else '#fee2e2'
|
1881 |
+
fg = '#047857' if sv >= 0 else '#991b1b'
|
1882 |
+
chips.append(
|
1883 |
+
f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">Score: {sv:.3f}</span>"
|
1884 |
+
)
|
1885 |
+
# Scores dict
|
1886 |
+
raw_scores = example.get('scores')
|
1887 |
+
if isinstance(raw_scores, dict):
|
1888 |
+
for k, v in raw_scores.items():
|
1889 |
+
label = str(k)
|
1890 |
+
val: Optional[float] = None
|
1891 |
+
if isinstance(v, (int, float)):
|
1892 |
+
val = float(v)
|
1893 |
+
elif isinstance(v, str):
|
1894 |
+
vs = v.strip()
|
1895 |
+
if vs and re.match(r"^[+-]?\d+(?:\.\d+)?$", vs):
|
1896 |
+
val = float(vs)
|
1897 |
+
if val is not None:
|
1898 |
+
bg = '#ecfdf5' if val >= 0 else '#fee2e2'
|
1899 |
+
fg = '#047857' if val >= 0 else '#991b1b'
|
1900 |
+
chips.append(
|
1901 |
+
f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">{html.escape(label)}: {val:.3f}</span>"
|
1902 |
+
)
|
1903 |
+
if chips:
|
1904 |
+
score_chips_html = "".join(chips)
|
1905 |
|
1906 |
html_out += f"""
|
1907 |
<details {open_attr} style="border: 1px solid #dee2e6; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
|
|
1932 |
<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">ID: {html.escape(str(example['id']))}</span>
|
1933 |
<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">Model: {html.escape(str(example['model']))}</span>
|
1934 |
{tag_badge}
|
1935 |
+
{score_chips_html}
|
1936 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#ecfdf5; color:#047857; border:1px solid #bbf7d0;">Category: {html.escape(str(example["category"]))}</span>' if example["category"] not in [None, "N/A", "None", "", "null"] and str(example["category"]).strip() != "" else '')}
|
1937 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#eff6ff; color:#1d4ed8; border:1px solid #dbeafe;">Type: {html.escape(str(example["type"]))}</span>' if example["type"] not in [None, "N/A", "None", "", "null"] and str(example["type"]).strip() != "" else '')}
|
1938 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#fff7ed; color:#c2410c; border:1px solid #fed7aa;">Impact: {html.escape(str(example["impact"]))}</span>' if example["impact"] not in [None, "N/A", "None", "", "null"] and str(example["impact"]).strip() != "" else '')}
|
|
|
1957 |
example.get("evidence") not in [None, "N/A", "None", "", "null"] and str(example.get("evidence", "")).strip() != "",
|
1958 |
]) else ''}
|
1959 |
|
|
|
|
|
1960 |
<div style="margin-bottom: 15px;">
|
1961 |
<div style="border-radius: 6px; font-size: 15px; line-height: 1.5;">
|
1962 |
{conversation_html}
|