tamang0000
commited on
Commit
·
e805282
1
Parent(s):
4d1fdaf
changed words
Browse files
app.py
CHANGED
@@ -106,9 +106,10 @@ def generate_split_token_table(text):
|
|
106 |
with gr.Blocks() as sutra_token_count:
|
107 |
gr.Markdown(
|
108 |
"""
|
109 |
-
#
|
110 |
## Tokenize paragraphs in multiple languages and compare token counts.
|
111 |
-
Space inspired from [SUTRA](https://huggingface.co/spaces/TWO/sutra-tokenizer-comparison
|
|
|
112 |
""")
|
113 |
textbox = gr.Textbox(label="Input Text")
|
114 |
submit_button = gr.Button("Submit")
|
@@ -136,9 +137,10 @@ def generate_tokens_table(text):
|
|
136 |
with gr.Blocks() as sutra_tokenize:
|
137 |
gr.Markdown(
|
138 |
"""
|
139 |
-
#
|
140 |
## Tokenize a sentence with various tokenizers and inspect how it's broken down.
|
141 |
Space inspired from [SUTRA](https://huggingface.co/spaces/TWO/sutra-tokenizer-comparison)
|
|
|
142 |
""")
|
143 |
textbox = gr.Textbox(label="Input Text")
|
144 |
submit_button = gr.Button("Submit")
|
|
|
106 |
with gr.Blocks() as sutra_token_count:
|
107 |
gr.Markdown(
|
108 |
"""
|
109 |
+
# Assamese Tokenizer Specs & Stats.
|
110 |
## Tokenize paragraphs in multiple languages and compare token counts.
|
111 |
+
Space inspired from [SUTRA](https://huggingface.co/spaces/TWO/sutra-tokenizer-comparison
|
112 |
+
Number of Tokens (Lesser the better)
|
113 |
""")
|
114 |
textbox = gr.Textbox(label="Input Text")
|
115 |
submit_button = gr.Button("Submit")
|
|
|
137 |
with gr.Blocks() as sutra_tokenize:
|
138 |
gr.Markdown(
|
139 |
"""
|
140 |
+
# Assamese Tokenizer Sentence Inspector.
|
141 |
## Tokenize a sentence with various tokenizers and inspect how it's broken down.
|
142 |
Space inspired from [SUTRA](https://huggingface.co/spaces/TWO/sutra-tokenizer-comparison)
|
143 |
+
Number of Tokens (Lesser the better)
|
144 |
""")
|
145 |
textbox = gr.Textbox(label="Input Text")
|
146 |
submit_button = gr.Button("Submit")
|