d1ef
Update app.py
6f7a741
raw
history blame
1.49 kB
import gradio as gr
from transformers import ByT5Tokenizer
import json
ACTIONS = ["text2ids", "text2tokens", "ids2tokens", "tokens2ids and JSON requires double quotes"]
def translate(model, action, inputs):
tokenizer = ByT5Tokenizer.from_pretrained(model)
vocab_size = tokenizer.vocab_size
len_tokenizer = len(tokenizer)
input = inputs
output = ''
if action == ACTIONS[0]:
input_ids = tokenizer(input)['input_ids']
output = input_ids
if action == ACTIONS[1]:
input_ids = tokenizer(input)['input_ids']
tokens = tokenizer.convert_ids_to_tokens(input_ids)
output = tokens
if action == ACTIONS[2]:
list = json.loads(input)
tokens = tokenizer.convert_ids_to_tokens(list)
output = tokens
if action == ACTIONS[3]:
list = json.loads(input)
tokens = tokenizer.convert_tokens_to_ids(list)
output = tokens
return f'{output}\n\n\n\nother infos:\njson:{json.dumps(output)} \nvocab_size: {vocab_size}\nlen(tokenizer): {len_tokenizer}'
demo = gr.Interface(
fn=translate,
inputs=[
gr.components.Textbox(label="MODEL NAME, eg: google/byt5-small", value="google/byt5-small"),
gr.components.Dropdown(label="ACTIONS", choices=ACTIONS),
gr.components.Textbox(label="INPUTS"),
],
outputs=["text"],
cache_examples=False,
title="Test T5Tokenizer",
description="▁Test, ▁To, ken, izer, s, ▁happily, !, </s>"
)
demo.launch(debug=True)