File size: 2,992 Bytes
ce54c6a
 
 
6db2364
ce54c6a
0a66247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce54c6a
 
 
 
 
 
 
2bced45
6db2364
ce54c6a
0555443
0a66247
0555443
ce54c6a
6db2364
0a66247
6db2364
 
0a66247
 
 
 
 
6db2364
0a66247
 
0555443
 
 
0a66247
 
 
 
 
0555443
 
ce54c6a
0a66247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from transformers import pipeline
import requests
import json
import gradio as gr

js = """
async () => {
    function showCard(event, title, content) {
        document.getElementById('hovercard').style.visibility = 'visible';
        document.getElementById('card_title').innerText = title;
        document.getElementById('card_content').innerText = content;
    }

    function hideCard(event) {
        document.getElementById('hovercard').style.visibility = 'hidden';
    }

    globalThis.showCard = showCard;
    globalThis.hideCard = hideCard;
}
"""

pipe = pipeline("translation", "guymorlan/TokenizerLabeller")

r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json")
data = json.loads(r.text)

def predict(input):
    out = pipe(input)[0]['translation_text']
    raw = out
    out = [x.strip() for x in out.split(" + ")]

    output = f"""
        <div style='direction: rtl; text-align: right; font-size: 18px; font-family: Arial, sans-serif; line-height: 1.5'>{raw}<br><br>"""

    for o in out:
        oo = [x.strip() for x in o.split("+")]
        output += "<span style='background-color: #E0E0E0; border-radius: 5px; padding: 5px; margin-right: 5px; display: inline-block;'>"
        for ooo in oo:
            if ooo in data:
                output += f"""
                    <span style='background-color: #4CAF50; color: #FFFFFF; border: 1px solid #4CAF50; border-radius: 5px; padding: 2px; margin-right: 2px; font-family: "Courier New", Courier, monospace;' 
                    onmouseover='showCard(event, "{data[ooo]['translation']}", "{data[ooo]['features']}")' 
                    onmouseout='hideCard(event)' onclick='showCard(event, "{data[ooo]['translation']}", "{data[ooo]['features']}")'>{data[ooo]['word']}</span>
                """
            else:
                output += ooo
        output += "</span> "
    output += "</div>"

    output += """
    <div id='hovercard' style='position: absolute; visibility: hidden; background: #FFFFFF; padding: 10px; 
    border: 1px solid #9E9E9E; border-radius: 5px; font-family: Arial, sans-serif;'>
        <h3 id='card_title' style='color: #000000;'></h3>
        <p id='card_content' style='color: #000000;'></p>
    </div>
    """
    return output

with gr.Blocks(theme=gr.themes.Soft(), title="Ammiya Tokenizer and Labeler") as demo:
    gr.HTML("<h2><span style='color: #2563eb'>Colloquial Arabic</span></h2> Tokenizer and Annotator")
    with gr.Row():
        with gr.Column():
            input = gr.Textbox(label="Input", placeholder="Enter English Text", lines=1)
            gr.Examples(["بديش اروح معك", "مكنتش هون قبل ما جيت"], input)
            btn = gr.Button(label="Analyze")
        with gr.Column():
            with gr.Box():
                html = gr.HTML()
    btn.click(predict, inputs=[input], outputs=[html])
    input.submit(predict, inputs = [input], outputs=[html])

    demo.load(_js=js)
    demo.launch()