nlp-qual-space

Sleeping

App Files Files Community

maxspad commited on Jan 17, 2023

Commit

1bb3e7f

1 Parent(s): d69173c

prep for final revisions for paper

Browse files

Files changed (3) hide show

TODO.md +20 -0
fullreport.py +43 -12
plots.py +14 -1

TODO.md ADDED Viewed

	@@ -0,0 +1,20 @@

+- Level of detail = description of context and performance
+- Suggestion for "improvement" given
+- "written" feedback
+- Link
+- How specific was the evaluator in describing the behavior? => exact language of Q1
+- get rid of overall model just use component model
+- get rid of true comments
+- Medical education requires high-quality *written* feedback, but evaluating these *supervisor narrative comments* is time-consuming. The QuAL score has validity evidence for measuring the quality of short comments in  this context. We developed a NLP/ML-powered tool to assess written comment quality via the QuAL score with high accuracy. Try it for yourself!
+- include confidence bar
+- change out to formal descriptions
+# Paper
+- zotero refs
+- methods comments
+- flowchart figure
+- final word clouds
+- add within-1 accuracy numbers

fullreport.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import streamlit as st
 import altair as alt
 import pandas as pd
-from plots import altair_gauge
 md_about_qual = '''
 The Quality of Assessment for Learning (QuAL) score measures three
@@ -27,19 +28,49 @@ class NQDFullReport(object):
         # with st.expander('About the QuAL Score', True):
         st.markdown(md_about_qual)
-        st.subheader('Your Level of Detail')
-        gauge = altair_gauge(self.results['q1']['label'], 3, 'Level of Detail')
         c1, c2 = st.columns(2)
         with c1:
             st.altair_chart(gauge, use_container_width=True)
         with c2:
-            # st.write(self.results)
-            bar_df = (pd.DataFrame(self.results['q1']['scores'])
-                        .reset_index()
-                        .rename(columns={'index': 'Rating', 0: 'Score'}))
-            bar = alt.Chart(bar_df).mark_bar().encode(
-                x='Rating:O', y='Score',
-                color=alt.Color('Rating', scale=alt.Scale(scheme='redyellowgreen'), legend=None)
-            ).properties(height=225, title='Prediction Scores')
             st.altair_chart(bar, use_container_width=True)

 import streamlit as st
 import altair as alt
 import pandas as pd
+from plots import altair_gauge, pred_bar_chart
+import streamlit.components.v1 as components
 md_about_qual = '''
 The Quality of Assessment for Learning (QuAL) score measures three
         # with st.expander('About the QuAL Score', True):
         st.markdown(md_about_qual)
+        st.subheader('Level of Detail')
         c1, c2 = st.columns(2)
         with c1:
+            gauge = altair_gauge(self.results['q1']['label'], 3, 'Level of Detail')
+            gauge_html = gauge.to_html()
+            # components.html(gauge_html, height=225, width=334)
             st.altair_chart(gauge, use_container_width=True)
         with c2:
+            bar = pred_bar_chart(self.results['q1']['scores'])
+            st.altair_chart(bar, use_container_width=True)
+        st.subheader('Suggestion for Improvement')
+        c1, c2 = st.columns(2)
+        with c1:
+            q2lab = self.results['q2i']['label']
+            st.markdown('#### Suggestion Given')
+            if q2lab == 0:
+                md_str = '# ✅ Yes'
+            else:
+                md_str = '# ❌ No'
+            st.markdown(md_str)
+            # st.metric('Suggestion Given', (md_str),
+                # help='Did the evaluator give a suggestion for improvement?')
+            gauge = altair_gauge(self.results['q2i']['label'], 1, 'Suggestion for Improvement')
+            # st.altair_chart(gauge, use_container_width=True)
+        with c2:
+            bar = pred_bar_chart(self.results['q2i']['scores'], binary_labels={0: 'Yes', 1: 'No'})
+            st.altair_chart(bar, use_container_width=True)
+        st.subheader('Suggestion Linking')
+        c1, c2 = st.columns(2)
+        with c1:
+            q2lab = self.results['q3i']['label']
+            st.markdown('#### Suggestion Linked')
+            if q2lab == 0:
+                md_str = '# ✅ Yes'
+            else:
+                md_str = '# ❌ No'
+            st.markdown(md_str)
+            # st.metric('Suggestion Given', (md_str),
+                # help='Did the evaluator give a suggestion for improvement?')
+            gauge = altair_gauge(self.results['q3i']['label'], 1, 'Suggestion for Improvement')
+            # st.altair_chart(gauge, use_container_width=True)
+        with c2:
+            bar = pred_bar_chart(self.results['q3i']['scores'], binary_labels={0: 'Yes', 1: 'No'})
             st.altair_chart(bar, use_container_width=True)

plots.py CHANGED Viewed

@@ -16,4 +16,17 @@ def get_color(score, max_score):
     cmap = cm.get_cmap('RdYlGn')
     color = cmap(score / float(max_score))
     color = f'rgba({int(color[0]*256)}, {int(color[1]*256)}, {int(color[2]*256)}, {int(color[3]*256)})'
-    return color

     cmap = cm.get_cmap('RdYlGn')
     color = cmap(score / float(max_score))
     color = f'rgba({int(color[0]*256)}, {int(color[1]*256)}, {int(color[2]*256)}, {int(color[3]*256)})'
+    return color
+def pred_bar_chart(scores, binary_labels=None):
+    bar_df = (pd.DataFrame(scores)
+                .reset_index()
+                .rename(columns={'index': 'Rating', 0: 'Score'}))
+    if binary_labels:
+        bar_df['Rating'].replace(binary_labels, inplace=True)
+    bar = alt.Chart(bar_df).mark_bar().encode(
+        x='Rating:O', y='Score',
+        color=alt.Color('Rating', scale=alt.Scale(scheme='redyellowgreen'), legend=None)
+    ).properties(height=225, title='Prediction Scores')
+    bar.to_html()
+    return bar