File size: 1,236 Bytes
9715926 e5aaf6d 9715926 b6678bf 9715926 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
from transformers import AutoTokenizer
# Define a function to tokenize text with a selected tokenizer
def tokenize_text(text, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
tokenized_text = tokenizer.tokenize(text)
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
decoded_text = tokenizer.decode(input_ids) # Decode the input IDs
return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}\nDecoded Text: {decoded_text}"
# Define available tokenizers
tokenizer_names = [
"riotu-lab/ArabianGPT-01B",
"riotu-lab/ArabianGPT-03B",
"riotu-lab/ArabianGPT-08B",
"FreedomIntelligence/AceGPT-13B",
"FreedomIntelligence/AceGPT-7B",
"inception-mbzuai/jais-13b",
"aubmindlab/aragpt2-base",
"aubmindlab/aragpt2-medium",
"aubmindlab/aragpt2-large",
"aubmindlab/aragpt2-mega"
]
# Create the Gradio interface
iface = gr.Interface(
fn=tokenize_text,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Dropdown(choices=tokenizer_names, label="Select Tokenizer"),
],
outputs="text",
title="Hugging Face Tokenizer Demo",
description="Try different tokenizers and see the tokenized form with input IDs.",
)
# Launch the app
iface.launch() |