File size: 920 Bytes
ce54c6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from transformers import pipeline
import requests
import json

pipe = pipeline("translation", "guymorlan/TokenizerLabeller")

# download json and open
# from https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json
r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json")
data = json.loads(r.text)


# built gradio interface
import gradio as gr

def predict(input):

    out = pipe(input)[0]['translation_text']
    out = [x.strip() for x in out.split("+")]

    output =  ""
    for o in out:
        if o in data:
            output += f"<span style='color: green' title='{data[o]['translation']}\n{data[o]['features']}'>{data[o]['word']}</span> "
        else:
            output += o + " "

    return output


gr.Interface(predict, "textbox", "html", title="Ammiya Tokenizer", description="Tokenize Ammiya text and show Playaling words").launch()