Spaces:
Sleeping
Sleeping
File size: 1,454 Bytes
ce54c6a 6db2364 ce54c6a 0555443 ce54c6a 2bced45 6db2364 ce54c6a 0555443 cc05908 0555443 ce54c6a 6db2364 0555443 cc05908 0555443 6db2364 0555443 ce54c6a 0555443 cc05908 0555443 ce54c6a cc05908 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from transformers import pipeline
import requests
import json
import gradio as gr
pipe = pipeline("translation", "guymorlan/TokenizerLabeller")
# download json and open
r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json")
data = json.loads(r.text)
# build gradio interface
def predict(input):
out = pipe(input)[0]['translation_text']
raw = out
out = [x.strip() for x in out.split(" + ")]
output = f"""
<div style='direction: rtl; text-align: right; font-size: 20px; font-family: sans-serif; line-height: 1.5'>{raw}<br><br>"""
for o in out:
oo = [x.strip() for x in o.split("+")]
newout = []
for ooo in oo:
if ooo in data:
newout.append(f"""
<span style='color: green; font-family: "Courier New", Courier, monospace;'
data-toggle='tooltip' data-placement='top' title='{data[ooo]['translation']}\n{data[ooo]['features']}'>{data[ooo]['word']}</span>
""")
else:
newout.append(ooo)
output += "+".join(newout) + " | "
output += "</div>"
output += """
<script>
$(document).ready(function(){
$('[data-toggle="tooltip"]').tooltip();
});
</script>
"""
return output
gr.Interface(predict, "textbox", "html", title="Ammiya Tokenizer", description="Tokenize Ammiya text and show Playaling words").launch() |