Kalemat / app.py
Omartificial-Intelligence-Space's picture
update app
e5aaf6d verified
raw
history blame
1.24 kB
import gradio as gr
from transformers import AutoTokenizer
# Define a function to tokenize text with a selected tokenizer
def tokenize_text(text, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
tokenized_text = tokenizer.tokenize(text)
input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
decoded_text = tokenizer.decode(input_ids) # Decode the input IDs
return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}\nDecoded Text: {decoded_text}"
# Define available tokenizers
tokenizer_names = [
"riotu-lab/ArabianGPT-01B",
"riotu-lab/ArabianGPT-03B",
"riotu-lab/ArabianGPT-08B",
"FreedomIntelligence/AceGPT-13B",
"FreedomIntelligence/AceGPT-7B",
"inception-mbzuai/jais-13b",
"aubmindlab/aragpt2-base",
"aubmindlab/aragpt2-medium",
"aubmindlab/aragpt2-large",
"aubmindlab/aragpt2-mega"
]
# Create the Gradio interface
iface = gr.Interface(
fn=tokenize_text,
inputs=[
gr.Textbox(label="Enter Text"),
gr.Dropdown(choices=tokenizer_names, label="Select Tokenizer"),
],
outputs="text",
title="Hugging Face Tokenizer Demo",
description="Try different tokenizers and see the tokenized form with input IDs.",
)
# Launch the app
iface.launch()