Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import ByT5Tokenizer | |
import json | |
ACTIONS = ["text2ids", "text2tokens", "ids2tokens", "tokens2ids and JSON requires double quotes"] | |
def translate(model, action, inputs): | |
tokenizer = ByT5Tokenizer.from_pretrained(model) | |
vocab_size = tokenizer.vocab_size | |
len_tokenizer = len(tokenizer) | |
input = inputs | |
output = '' | |
if action == ACTIONS[0]: | |
input_ids = tokenizer(input)['input_ids'] | |
output = input_ids | |
if action == ACTIONS[1]: | |
input_ids = tokenizer(input)['input_ids'] | |
tokens = tokenizer.convert_ids_to_tokens(input_ids) | |
output = tokens | |
if action == ACTIONS[2]: | |
list = json.loads(input) | |
tokens = tokenizer.convert_ids_to_tokens(list) | |
output = tokens | |
if action == ACTIONS[3]: | |
list = json.loads(input) | |
tokens = tokenizer.convert_tokens_to_ids(list) | |
output = tokens | |
return f'{output}\n\n\n\nother infos:\njson:{json.dumps(output)} \nvocab_size: {vocab_size}\nlen(tokenizer): {len_tokenizer}' | |
demo = gr.Interface( | |
fn=translate, | |
inputs=[ | |
gr.components.Textbox(label="MODEL NAME, eg: google/byt5-small", value="google/byt5-small"), | |
gr.components.Dropdown(label="ACTIONS", choices=ACTIONS), | |
gr.components.Textbox(label="INPUTS"), | |
], | |
outputs=["text"], | |
cache_examples=False, | |
title="Test T5Tokenizer", | |
description="▁Test, ▁To, ken, izer, s, ▁happily, !, </s>" | |
) | |
demo.launch(debug=True) |