Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
from transformers import AutoTokenizer
|
4 |
+
|
5 |
+
# Define tokenizers for English and Arabic
|
6 |
+
tokenizers = {
|
7 |
+
"English - BERT (bert-base-uncased)": "bert-base-uncased",
|
8 |
+
"Arabic - CAMeL BERT (bert-base-arabic-camelbert-ca)": "CAMeL-Lab/bert-base-arabic-camelbert-ca",
|
9 |
+
"Arabic - AraBERT (asafaya/bert-base-arabic)": "asafaya/bert-base-arabic"
|
10 |
+
}
|
11 |
+
|
12 |
+
# Tokenization function
|
13 |
+
def tokenize_text(text,model_name):
|
14 |
+
# Load the selected tokenizer
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(tokenizers[model_name])
|
16 |
+
|
17 |
+
# Tokenize the input text
|
18 |
+
tokens = tokenizer.tokenize(text)
|
19 |
+
|
20 |
+
# Return tokens and number of tokens
|
21 |
+
return f"Tokens: {tokens}", f"Number of tokens: {len(tokens)}"
|
22 |
+
|
23 |
+
# Define Gradio interface components
|
24 |
+
model_choice = gr.Dropdown(choices=list(tokenizers.keys()), label="Select Tokenizer", value="English - BERT (bert-base-uncased)")
|
25 |
+
text_input = gr.Textbox(label="Enter a sentence to tokenize")
|
26 |
+
|
27 |
+
|
28 |
+
# Predefined example sentences
|
29 |
+
examples = [
|
30 |
+
["The quick brown fox jumps over the lazy dog."], # English sentence
|
31 |
+
["القمر جميل في السماء."] # Arabic sentence
|
32 |
+
]
|
33 |
+
|
34 |
+
# Set up the Gradio interface
|
35 |
+
demo = gr.Interface(
|
36 |
+
fn=tokenize_text,
|
37 |
+
inputs=[text_input,model_choice],
|
38 |
+
outputs=[gr.Textbox(label="Tokens"), gr.Textbox(label="Number of Tokens")], # Properly named outputs
|
39 |
+
title="Hugging Face Tokenizer Explorer",
|
40 |
+
description="Enter a sentence or use one of the example sentences below to see how different tokenizers work.",
|
41 |
+
examples=examples, # Provide examples that apply to the text input field
|
42 |
+
allow_flagging=False
|
43 |
+
)
|