import gradio as gr from transformers import ByT5Tokenizer import json ACTIONS = ["text2ids", "text2tokens", "ids2tokens", "tokens2ids and JSON requires double quotes", "ids2text"] def translate(model, action, inputs): tokenizer = ByT5Tokenizer.from_pretrained(model) vocab_size = tokenizer.vocab_size len_tokenizer = len(tokenizer) input = inputs output = '' if action == ACTIONS[0]: input_ids = tokenizer(input)['input_ids'] output = input_ids if action == ACTIONS[1]: input_ids = tokenizer(input)['input_ids'] tokens = tokenizer.convert_ids_to_tokens(input_ids) output = tokens if action == ACTIONS[2]: list = json.loads(input) tokens = tokenizer.convert_ids_to_tokens(list) output = tokens if action == ACTIONS[3]: list = json.loads(input) tokens = tokenizer.convert_tokens_to_ids(list) output = tokens if action == ACTIONS[4]: list = json.loads(input) text = tokenizer.decode(list) output = text return f'{output}\n\n\n\nother infos:\njson:{json.dumps(output)} \nvocab_size: {vocab_size}\nlen(tokenizer): {len_tokenizer}' demo = gr.Interface( fn=translate, inputs=[ gr.components.Textbox(label="MODEL NAME, eg: google/byt5-small", value="google/byt5-small"), gr.components.Dropdown(label="ACTIONS", choices=ACTIONS), gr.components.Textbox(label="INPUTS"), ], outputs=["text"], cache_examples=False, title="Test T5Tokenizer", description="▁Test, ▁To, ken, izer, s, ▁happily, !, " ) demo.launch(debug=True)