byt5-tokenizer-utils

Build error

d1ef commited on Oct 31, 2023

Commit

a1ad15c

1 Parent(s): bd1449f

add files

Files changed (3) hide show

README.md CHANGED Viewed

@@ -1,13 +1,12 @@
 ---
 title: Tokenizer Utils
-emoji: 📚
-colorFrom: indigo
-colorTo: indigo
 sdk: gradio
-sdk_version: 3.50.2
 app_file: app.py
 pinned: false
-license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Tokenizer Utils
+emoji: 🏆
+colorFrom: blue
+colorTo: blue
 sdk: gradio
+sdk_version: 3.48.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

+import gradio as gr
+from transformers import T5Tokenizer
+import json
+ACTIONS = ["text2ids", "text2tokens", "ids2tokens", "tokens2ids and JSON requires double quotes"]
+def translate(model, action, inputs):
+    tokenizer = T5Tokenizer.from_pretrained(model)
+    vocab_size = tokenizer.vocab_size
+    len_tokenizer = len(tokenizer)
+    input = inputs
+    output = ''
+    if action == ACTIONS[0]:
+      input_ids = tokenizer(input)['input_ids']
+      output = input_ids
+    if action == ACTIONS[1]:
+      input_ids = tokenizer(input)['input_ids']
+      tokens = tokenizer.convert_ids_to_tokens(input_ids)
+      output = tokens
+    if action == ACTIONS[2]:
+      list = json.loads(input)
+      tokens = tokenizer.convert_ids_to_tokens(list)
+      output = tokens
+    if action == ACTIONS[3]:
+      list = json.loads(input)
+      tokens = tokenizer.convert_tokens_to_ids(list)
+      output = tokens
+    return f'{output}\n\n\n\nother infos:\nvocab_size: {vocab_size}\nlen(tokenizer): {len_tokenizer}'
+demo = gr.Interface(
+    fn=translate,
+    inputs=[
+        gr.components.Textbox(label="MODEL NAME, eg: t5-small"),
+        gr.components.Dropdown(label="ACTIONS", choices=ACTIONS),
+        gr.components.Textbox(label="INPUTS"),
+    ],
+    outputs=["text"],
+    cache_examples=False,
+    title="Test T5Tokenizer",
+    description="▁Test, ▁To, ken, izer, s, ▁happily, !, </s>"
+)
+demo.launch(debug=True)

requirements.txt ADDED Viewed

+transformers==4.34.0
+sentencepiece==0.1.99
+numpy==1.24.2