from transformers import pipeline import requests import json import gradio as gr js = """ async () => { function showCard(event, title, content) { document.getElementById('hovercard').style.visibility = 'visible'; document.getElementById('card_title').innerText = title; document.getElementById('card_content').innerText = content; } function hideCard(event) { document.getElementById('hovercard').style.visibility = 'hidden'; } globalThis.showCard = showCard; globalThis.hideCard = hideCard; } """ pipe = pipeline("translation", "guymorlan/TokenizerLabeller") r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json") data = json.loads(r.text) def predict(input): out = pipe(input)[0]['translation_text'] raw = out out = [x.strip() for x in out.split(" + ")] output = f"""
{raw}

""" for o in out: oo = [x.strip() for x in o.split("+")] output += "" for ooo in oo: if ooo in data: output += f""" {data[ooo]['word']} """ else: output += ooo output += " " output += "
" output += """ """ return output with gr.Blocks(title="Ammiya Tokenizer and Annotator") as demo: gr.HTML("

Colloquial Arabic

Tokenizer and Annotator") with gr.Row(): with gr.Column(): input = gr.Textbox(label="Input", placeholder="Enter English Text", lines=1) gr.Examples(["بديش اروح معك", "مكنتش هون قبل ما جيت"], input) btn = gr.Button(label="Analyze") with gr.Column(): with gr.Box(): html = gr.HTML() btn.click(predict, inputs=[input], outputs=[html]) input.submit(predict, inputs = [input], outputs=[html]) demo.load(_js=js) demo.launch()