File size: 1,454 Bytes
ce54c6a
 
 
6db2364
ce54c6a
 
 
 
 
 
 
0555443
ce54c6a
 
 
2bced45
6db2364
ce54c6a
0555443
cc05908
0555443
ce54c6a
6db2364
 
 
 
0555443
cc05908
 
0555443
6db2364
 
 
0555443
ce54c6a
0555443
 
 
 
cc05908
 
 
0555443
 
 
 
ce54c6a
cc05908
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from transformers import pipeline
import requests
import json
import gradio as gr

pipe = pipeline("translation", "guymorlan/TokenizerLabeller")

# download json and open
r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json")
data = json.loads(r.text)

# build gradio interface
def predict(input):

    out = pipe(input)[0]['translation_text']
    raw = out
    out = [x.strip() for x in out.split(" + ")]

    output = f"""
    <div style='direction: rtl; text-align: right; font-size: 20px; font-family: sans-serif; line-height: 1.5'>{raw}<br><br>"""

    for o in out:
        oo = [x.strip() for x in o.split("+")]
        newout = []
        for ooo in oo:
            if ooo in data:
                newout.append(f"""
                <span style='color: green; font-family: "Courier New", Courier, monospace;'
                data-toggle='tooltip' data-placement='top' title='{data[ooo]['translation']}\n{data[ooo]['features']}'>{data[ooo]['word']}</span>
                """)
            else:
                newout.append(ooo)

        output += "+".join(newout) + " | "

    output += "</div>"

    output += """
    <script>
    $(document).ready(function(){
        $('[data-toggle="tooltip"]').tooltip();   
    });
    </script>
    """

    return output

gr.Interface(predict, "textbox", "html", title="Ammiya Tokenizer", description="Tokenize Ammiya text and show Playaling words").launch()