Spaces:
Sleeping
Sleeping
File size: 1,626 Bytes
a1ad15c d459757 a1ad15c a0f9f54 a1ad15c d459757 a1ad15c a0f9f54 a1ad15c cd3f827 a1ad15c 6f7a741 a1ad15c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from transformers import ByT5Tokenizer
import json
ACTIONS = ["text2ids", "text2tokens", "ids2tokens", "tokens2ids and JSON requires double quotes", "ids2text"]
def translate(model, action, inputs):
tokenizer = ByT5Tokenizer.from_pretrained(model)
vocab_size = tokenizer.vocab_size
len_tokenizer = len(tokenizer)
input = inputs
output = ''
if action == ACTIONS[0]:
input_ids = tokenizer(input)['input_ids']
output = input_ids
if action == ACTIONS[1]:
input_ids = tokenizer(input)['input_ids']
tokens = tokenizer.convert_ids_to_tokens(input_ids)
output = tokens
if action == ACTIONS[2]:
list = json.loads(input)
tokens = tokenizer.convert_ids_to_tokens(list)
output = tokens
if action == ACTIONS[3]:
list = json.loads(input)
tokens = tokenizer.convert_tokens_to_ids(list)
output = tokens
if action == ACTIONS[4]:
list = json.loads(input)
text = tokenizer.decode(list)
output = text
return f'{output}\n\n\n\nother infos:\njson:{json.dumps(output)} \nvocab_size: {vocab_size}\nlen(tokenizer): {len_tokenizer}'
demo = gr.Interface(
fn=translate,
inputs=[
gr.components.Textbox(label="MODEL NAME, eg: google/byt5-small", value="google/byt5-small"),
gr.components.Dropdown(label="ACTIONS", choices=ACTIONS),
gr.components.Textbox(label="INPUTS"),
],
outputs=["text"],
cache_examples=False,
title="Test T5Tokenizer",
description="▁Test, ▁To, ken, izer, s, ▁happily, !, </s>"
)
demo.launch(debug=True) |