Spaces:
Running
Running
word embeddings
Browse files
app.py
CHANGED
|
@@ -36,7 +36,7 @@ def word_embedding_space_analysis(
|
|
| 36 |
S, V, D = torch.linalg.svd(matrix)
|
| 37 |
|
| 38 |
data = []
|
| 39 |
-
top =
|
| 40 |
select_words = 20
|
| 41 |
n_dim = 10
|
| 42 |
for _i in range(n_dim):
|
|
@@ -54,15 +54,16 @@ def word_embedding_space_analysis(
|
|
| 54 |
word = word[1:]
|
| 55 |
if word.lower() in nltk.corpus.words.words():
|
| 56 |
output.append(word)
|
| 57 |
-
return output
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
| 63 |
return pd.DataFrame(
|
| 64 |
data,
|
| 65 |
-
columns=["
|
| 66 |
index=[f"Dim#{_i}" for _i in range(n_dim)],
|
| 67 |
)
|
| 68 |
|
|
@@ -196,7 +197,7 @@ def main():
|
|
| 196 |
# Analysing the sentence
|
| 197 |
st.divider()
|
| 198 |
st.divider()
|
| 199 |
-
st.subheader("LM-Steer Converts
|
| 200 |
'''
|
| 201 |
LM-Steer also serves as a probe for analyzing the text. It can be used to
|
| 202 |
analyze the sentiment and detoxification of the text. Now, we proceed and
|
|
@@ -267,14 +268,8 @@ def main():
|
|
| 267 |
embeddings: what word dimensions contribute to or contrast to a specific
|
| 268 |
style. This analysis can be used to understand the word embedding space
|
| 269 |
and how it steers the model's generation.
|
| 270 |
-
|
| 271 |
-
Note that due to the bidirectional nature of the embedding spaces, in each
|
| 272 |
-
dimension, sometimes only one side of the word embeddings contributes
|
| 273 |
-
(has an impact on the style), while the other side, (resulting in negative
|
| 274 |
-
logits) has a negligible impact on the style. The table below shows both
|
| 275 |
-
sides of the word embeddings in each dimension.
|
| 276 |
'''
|
| 277 |
-
for dimension in ["
|
| 278 |
f'##### {dimension} Word Dimensions'
|
| 279 |
dim = 2 if dimension == "Sentiment" else 0
|
| 280 |
analysis_result = word_embedding_space_analysis(
|
|
|
|
| 36 |
S, V, D = torch.linalg.svd(matrix)
|
| 37 |
|
| 38 |
data = []
|
| 39 |
+
top = 50
|
| 40 |
select_words = 20
|
| 41 |
n_dim = 10
|
| 42 |
for _i in range(n_dim):
|
|
|
|
| 54 |
word = word[1:]
|
| 55 |
if word.lower() in nltk.corpus.words.words():
|
| 56 |
output.append(word)
|
| 57 |
+
return output
|
| 58 |
|
| 59 |
+
left_tokens = filter_words(left_tokens)
|
| 60 |
+
right_tokens = filter_words(right_tokens)
|
| 61 |
+
if len(left_tokens) < len(right_tokens):
|
| 62 |
+
left_tokens = right_tokens
|
| 63 |
+
data.append(", ".join(left_tokens[:select_words]))
|
| 64 |
return pd.DataFrame(
|
| 65 |
data,
|
| 66 |
+
columns=["Words Contributing to the Style"],
|
| 67 |
index=[f"Dim#{_i}" for _i in range(n_dim)],
|
| 68 |
)
|
| 69 |
|
|
|
|
| 197 |
# Analysing the sentence
|
| 198 |
st.divider()
|
| 199 |
st.divider()
|
| 200 |
+
st.subheader("LM-Steer Converts Any LM Into A Text Analyzer")
|
| 201 |
'''
|
| 202 |
LM-Steer also serves as a probe for analyzing the text. It can be used to
|
| 203 |
analyze the sentiment and detoxification of the text. Now, we proceed and
|
|
|
|
| 268 |
embeddings: what word dimensions contribute to or contrast to a specific
|
| 269 |
style. This analysis can be used to understand the word embedding space
|
| 270 |
and how it steers the model's generation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
'''
|
| 272 |
+
for dimension in ["Detoxification", "Sentiment"]:
|
| 273 |
f'##### {dimension} Word Dimensions'
|
| 274 |
dim = 2 if dimension == "Sentiment" else 0
|
| 275 |
analysis_result = word_embedding_space_analysis(
|