Spaces:

Glaciohound
/

LM-Steer

Running

App Files Files Community

hanchier commited on Sep 29, 2024

Commit

8e3f0b6

1 Parent(s): 950569a

annotation

Browse files

Files changed (1) hide show

app.py +61 -52

app.py CHANGED Viewed

@@ -133,9 +133,11 @@ def main():
     # col1, col2, col3 = st.columns(3, gap="medium")
     col1, col2, col3 = st.columns([2, 2, 1], gap="medium")
     sentiment = col1.slider(
-        "Sentiment", -steer_range, steer_range, 3.0, steer_interval)
     detoxification = col2.slider(
-        "Detoxification Strength", -steer_range, steer_range, 0.0,
         steer_interval)
     max_length = col3.number_input("Max length", 50, 300, 50, 50)
     col1, col2, col3, _ = st.columns(4)
@@ -144,15 +146,16 @@ def main():
     if "output" not in st.session_state:
         st.session_state.output = ""
     if col1.button("Steer and generate!", type="primary"):
-        steer_values = [detoxification, 0, sentiment, 0]
-        st.session_state.output = model.generate(
-            st.session_state.prompt,
-            steer_values,
-            seed=None if randomness else 0,
-            min_length=0,
-            max_length=max_length,
-            do_sample=True,
-        )
     analyzed_text = \
         st.text_area("Generated text:", st.session_state.output, height=200)
@@ -176,46 +179,51 @@ def main():
             [2, 0],
             ["#ff7f0e", "#1f77b4"],
         ):
-            col.subheader(name)
-            # classification
-            col.markdown("##### Dimension-Wise Classification Distribution")
-            _, dist_list, _ = model.steer_analysis(
-                analyzed_text,
-                dim, -steer_range, steer_range,
-                bins=2*int(steer_range)+1,
-            )
-            dist_list = np.array(dist_list)
-            col.bar_chart(
-                pd.DataFrame(
-                    {
-                        "Value": dist_list[:, 0],
-                        "Probability": dist_list[:, 1],
-                    }
-                ), x="Value", y="Probability",
-                color=color,
-            )
-            # key tokens
-            pos_steer, neg_steer = np.zeros((2, 4))
-            pos_steer[dim] = 1
-            neg_steer[dim] = -1
-            _, token_evidence = model.evidence_words(
-                analyzed_text,
-                [pos_steer, neg_steer],
-            )
-            tokens = tokenizer(analyzed_text).input_ids
-            tokens = [f"{i:3d}: {tokenizer.decode([t])}"
-                      for i, t in enumerate(tokens)]
-            col.markdown("##### Token's Evidence Score in the Dimension")
-            col.bar_chart(
-                pd.DataFrame(
-                    {
-                        "Token": tokens[1:],
-                        "Evidence": token_evidence,
-                    }
-                ), x="Token", y="Evidence",
-                horizontal=True, color=color,
-            )
     st.divider()
     st.divider()
@@ -234,7 +242,8 @@ def main():
         ["Sentiment", "Detoxification"],
     )
     dim = 2 if dimension == "Sentiment" else 0
-    word_embedding_space_analysis(model, tokenizer, dim)
 if __name__ == "__main__":

     # col1, col2, col3 = st.columns(3, gap="medium")
     col1, col2, col3 = st.columns([2, 2, 1], gap="medium")
     sentiment = col1.slider(
+        "Sentiment (the larger the more positive)",
+        -steer_range, steer_range, 3.0, steer_interval)
     detoxification = col2.slider(
+        "Detoxification Strength (the larger the less toxic)",
+        -steer_range, steer_range, 0.0,
         steer_interval)
     max_length = col3.number_input("Max length", 50, 300, 50, 50)
     col1, col2, col3, _ = st.columns(4)
     if "output" not in st.session_state:
         st.session_state.output = ""
     if col1.button("Steer and generate!", type="primary"):
+        with st.spinner("Generating..."):
+            steer_values = [detoxification, 0, sentiment, 0]
+            st.session_state.output = model.generate(
+                st.session_state.prompt,
+                steer_values,
+                seed=None if randomness else 0,
+                min_length=0,
+                max_length=max_length,
+                do_sample=True,
+            )
     analyzed_text = \
         st.text_area("Generated text:", st.session_state.output, height=200)
             [2, 0],
             ["#ff7f0e", "#1f77b4"],
         ):
+            with st.spinner(f"Analyzing {name}..."):
+                col.subheader(name)
+                # classification
+                col.markdown(
+                    "##### Dimension-Wise Classification Distribution")
+                _, dist_list, _ = model.steer_analysis(
+                    analyzed_text,
+                    dim, -steer_range, steer_range,
+                    bins=2*int(steer_range)+1,
+                )
+                dist_list = np.array(dist_list)
+                col.bar_chart(
+                    pd.DataFrame(
+                        {
+                            "Value": dist_list[:, 0],
+                            "Probability": dist_list[:, 1],
+                        }
+                    ), x="Value", y="Probability",
+                    color=color,
+                )
+                # key tokens
+                pos_steer, neg_steer = np.zeros((2, 4))
+                pos_steer[dim] = 1
+                neg_steer[dim] = -1
+                _, token_evidence = model.evidence_words(
+                    analyzed_text,
+                    [pos_steer, neg_steer],
+                )
+                tokens = tokenizer(analyzed_text).input_ids
+                tokens = [f"{i:3d}: {tokenizer.decode([t])}"
+                          for i, t in enumerate(tokens)]
+                col.markdown("##### Token's Evidence Score in the Dimension")
+                col.write("The polarity of the token's evidence score "
+                          "which aligns with sliding bar directions."
+                          )
+                col.bar_chart(
+                    pd.DataFrame(
+                        {
+                            "Token": tokens[1:],
+                            "Evidence": token_evidence,
+                        }
+                    ), x="Token", y="Evidence",
+                    horizontal=True, color=color,
+                )
     st.divider()
     st.divider()
         ["Sentiment", "Detoxification"],
     )
     dim = 2 if dimension == "Sentiment" else 0
+    with st.spinner("Analyzing..."):
+        word_embedding_space_analysis(model, tokenizer, dim)
 if __name__ == "__main__":