guymorlan's picture
Update app.py
c6866fc
raw
history blame
2.97 kB
from transformers import pipeline
import requests
import json
import gradio as gr
js = """
async () => {
function showCard(event, title, content) {
document.getElementById('hovercard').style.visibility = 'visible';
document.getElementById('card_title').innerText = title;
document.getElementById('card_content').innerText = content;
}
function hideCard(event) {
document.getElementById('hovercard').style.visibility = 'hidden';
}
globalThis.showCard = showCard;
globalThis.hideCard = hideCard;
}
"""
pipe = pipeline("translation", "guymorlan/TokenizerLabeller")
r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json")
data = json.loads(r.text)
def predict(input):
out = pipe(input)[0]['translation_text']
raw = out
out = [x.strip() for x in out.split(" + ")]
output = f"""
<div style='direction: rtl; text-align: right; font-size: 18px; font-family: Arial, sans-serif; line-height: 1.5'>{raw}<br><br>"""
for o in out:
oo = [x.strip() for x in o.split("+")]
output += "<span style='background-color: #E0E0E0; border-radius: 5px; padding: 5px; margin-right: 5px; display: inline-block;'>"
for ooo in oo:
if ooo in data:
output += f"""
<span style='background-color: #4CAF50; color: #FFFFFF; border: 1px solid #4CAF50; border-radius: 5px; padding: 2px; margin-right: 2px; font-family: "Courier New", Courier, monospace;'
onmouseover='showCard(event, "{data[ooo]['translation']}", "{data[ooo]['features']}")'
onmouseout='hideCard(event)' onclick='showCard(event, "{data[ooo]['translation']}", "{data[ooo]['features']}")'>{data[ooo]['word']}</span>
"""
else:
output += ooo
output += "</span> "
output += "</div>"
output += """
<div id='hovercard' style='position: absolute; visibility: hidden; background: #FFFFFF; padding: 10px;
border: 1px solid #9E9E9E; border-radius: 5px; font-family: Arial, sans-serif;'>
<h3 id='card_title' style='color: #000000;'></h3>
<p id='card_content' style='color: #000000;'></p>
</div>
"""
return output
with gr.Blocks(title="Ammiya Tokenizer and Annotator") as demo:
gr.HTML("<h2><span style='color: #2563eb'>Colloquial Arabic</span></h2> Tokenizer and Annotator")
with gr.Row():
with gr.Column():
input = gr.Textbox(label="Input", placeholder="Enter English Text", lines=1)
gr.Examples(["بديش اروح معك", "مكنتش هون قبل ما جيت"], input)
btn = gr.Button(label="Analyze")
with gr.Column():
with gr.Box():
html = gr.HTML()
btn.click(predict, inputs=[input], outputs=[html])
input.submit(predict, inputs = [input], outputs=[html])
demo.load(_js=js)
demo.launch()