Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -40,8 +40,8 @@ def compare_tokenizers(tokenizer_name, text):
|
|
40 |
tokenizer = tokenizers[tokenizer_name]()
|
41 |
tokens = tokenizer.tokenize(text)
|
42 |
tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
|
43 |
-
encoded_output = tokenizer.encode(text, add_special_tokens=True
|
44 |
-
decoded_text = tokenizer.decode(encoded_output
|
45 |
else:
|
46 |
# AraNizer tokenizers
|
47 |
tokenizer = tokenizers[tokenizer_name]()
|
@@ -51,7 +51,7 @@ def compare_tokenizers(tokenizer_name, text):
|
|
51 |
tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
|
52 |
|
53 |
# Prepare the results to be displayed
|
54 |
-
results = [(tokenizer_name, tokens_arabic, encoded_output
|
55 |
return results
|
56 |
|
57 |
# Define the Gradio interface components with a dropdown for model selection
|
|
|
40 |
tokenizer = tokenizers[tokenizer_name]()
|
41 |
tokens = tokenizer.tokenize(text)
|
42 |
tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
|
43 |
+
encoded_output = tokenizer.encode(text, add_special_tokens=True)
|
44 |
+
decoded_text = tokenizer.decode(encoded_output, skip_special_tokens=True)
|
45 |
else:
|
46 |
# AraNizer tokenizers
|
47 |
tokenizer = tokenizers[tokenizer_name]()
|
|
|
51 |
tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
|
52 |
|
53 |
# Prepare the results to be displayed
|
54 |
+
results = [(tokenizer_name, tokens_arabic, encoded_output, decoded_text)]
|
55 |
return results
|
56 |
|
57 |
# Define the Gradio interface components with a dropdown for model selection
|