Spaces:

taka-yamakoshi
/

tokenizer-demo

Running

taka-yamakoshi commited on Jul 2, 2022

Commit

f70863b

1 Parent(s): 6353e7e

fix bugs

Files changed (1) hide show

app.py CHANGED Viewed

@@ -55,6 +55,7 @@ if __name__=='__main__':
             sents[f'sent_{sent_id+1}'] = sentence
             if len(sentence)>0:
                 input_sent = tokenizer(sentence)['input_ids']
                 decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
                 num_tokens[f'sent_{sent_id+1}'] = len(decoded_sent)
@@ -63,7 +64,7 @@ if __name__=='__main__':
                 #for word_col,word in zip(word_cols,decoded_sent):
                     #with word_col:
                         #st.write(word)
-                st.write('   '.join(input_sent))
                 st.write('   '.join(decoded_sent))
                 st.markdown(f"<p style='text-align: center; color: black; font-family:Arial; font-size:20px;'>{len(decoded_sent)} tokens </p>", unsafe_allow_html=True)

             sents[f'sent_{sent_id+1}'] = sentence
             if len(sentence)>0:
                 input_sent = tokenizer(sentence)['input_ids']
+                encoded_sent = [str(token) for token in input_sent]
                 decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
                 num_tokens[f'sent_{sent_id+1}'] = len(decoded_sent)
                 #for word_col,word in zip(word_cols,decoded_sent):
                     #with word_col:
                         #st.write(word)
+                st.write('   '.join(encoded_sent))
                 st.write('   '.join(decoded_sent))
                 st.markdown(f"<p style='text-align: center; color: black; font-family:Arial; font-size:20px;'>{len(decoded_sent)} tokens </p>", unsafe_allow_html=True)