d1ef commited on
Commit
a1ad15c
·
1 Parent(s): bd1449f
Files changed (3) hide show
  1. README.md +4 -5
  2. app.py +46 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
  title: Tokenizer Utils
3
- emoji: 📚
4
- colorFrom: indigo
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 3.50.2
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Tokenizer Utils
3
+ emoji: 🏆
4
+ colorFrom: blue
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 3.48.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import T5Tokenizer
3
+ import json
4
+
5
+ ACTIONS = ["text2ids", "text2tokens", "ids2tokens", "tokens2ids and JSON requires double quotes"]
6
+
7
+ def translate(model, action, inputs):
8
+ tokenizer = T5Tokenizer.from_pretrained(model)
9
+ vocab_size = tokenizer.vocab_size
10
+ len_tokenizer = len(tokenizer)
11
+ input = inputs
12
+ output = ''
13
+ if action == ACTIONS[0]:
14
+ input_ids = tokenizer(input)['input_ids']
15
+ output = input_ids
16
+
17
+ if action == ACTIONS[1]:
18
+ input_ids = tokenizer(input)['input_ids']
19
+ tokens = tokenizer.convert_ids_to_tokens(input_ids)
20
+ output = tokens
21
+ if action == ACTIONS[2]:
22
+ list = json.loads(input)
23
+ tokens = tokenizer.convert_ids_to_tokens(list)
24
+ output = tokens
25
+ if action == ACTIONS[3]:
26
+ list = json.loads(input)
27
+ tokens = tokenizer.convert_tokens_to_ids(list)
28
+ output = tokens
29
+
30
+
31
+ return f'{output}\n\n\n\nother infos:\nvocab_size: {vocab_size}\nlen(tokenizer): {len_tokenizer}'
32
+
33
+ demo = gr.Interface(
34
+ fn=translate,
35
+ inputs=[
36
+ gr.components.Textbox(label="MODEL NAME, eg: t5-small"),
37
+ gr.components.Dropdown(label="ACTIONS", choices=ACTIONS),
38
+ gr.components.Textbox(label="INPUTS"),
39
+ ],
40
+ outputs=["text"],
41
+ cache_examples=False,
42
+ title="Test T5Tokenizer",
43
+ description="▁Test, ▁To, ken, izer, s, ▁happily, !, </s>"
44
+ )
45
+
46
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers==4.34.0
2
+ sentencepiece==0.1.99
3
+ numpy==1.24.2