Spaces:

guymorlan
/

TokenizerLabeller

Sleeping

File size: 1,454 Bytes

ce54c6a
 
 
6db2364
ce54c6a
 
 
 
 
 
 
0555443
ce54c6a
 
 
2bced45
6db2364
ce54c6a
0555443
cc05908
0555443
ce54c6a
6db2364
 
 
 
0555443
cc05908
 
0555443
6db2364
 
 
0555443
ce54c6a
0555443
 
 
 
cc05908
 
 
0555443
 
 
 
ce54c6a
cc05908

from transformers import pipeline
import requests
import json
import gradio as gr

pipe = pipeline("translation", "guymorlan/TokenizerLabeller")

# download json and open
r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json")
data = json.loads(r.text)

# build gradio interface
def predict(input):

    out = pipe(input)[0]['translation_text']
    raw = out
    out = [x.strip() for x in out.split(" + ")]

    output = f"""
    <div style='direction: rtl; text-align: right; font-size: 20px; font-family: sans-serif; line-height: 1.5'>{raw}<br><br>"""

    for o in out:
        oo = [x.strip() for x in o.split("+")]
        newout = []
        for ooo in oo:
            if ooo in data:
                newout.append(f"""
                <span style='color: green; font-family: "Courier New", Courier, monospace;'
                data-toggle='tooltip' data-placement='top' title='{data[ooo]['translation']}\n{data[ooo]['features']}'>{data[ooo]['word']}</span>
                """)
            else:
                newout.append(ooo)

        output += "+".join(newout) + " | "

    output += "</div>"

    output += """
    <script>
    $(document).ready(function(){
        $('[data-toggle="tooltip"]').tooltip();   
    });
    </script>
    """

    return output

gr.Interface(predict, "textbox", "html", title="Ammiya Tokenizer", description="Tokenize Ammiya text and show Playaling words").launch()