File size: 2,506 Bytes
428bc7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from huggingface_hub import hf_hub_download
import joblib
from transformers import AutoTokenizer
import numpy as np
from dotenv import load_dotenv
import os
import onnxruntime as ort


class EmojiPrediction:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.label_map = None
        self.thresholds = None
        self.repo_id = "ashish-001/tweet-emoji-predictor"
        self.load_model()
        self.load_required_data()

    def load_model(self):
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.repo_id)
        onnx_file = hf_hub_download(
            repo_id=self.repo_id,
            filename='model_quantized.onnx'
        )

        self.model = ort.InferenceSession(onnx_file)

    def load_required_data(self):
        filepath = hf_hub_download(
            repo_id=self.repo_id,
            filename='mlb_emoji_encoder.pkl'
        )
        with open(filepath, 'rb') as f:
            self.label_map = joblib.load(f)

        threshold_filepath = hf_hub_download(
            repo_id=self.repo_id,
            filename='thresholds.npy'
        )
        self.thresholds = np.load(threshold_filepath)

    def predict_emoji(self, text):
        if not len(text.strip()):
            return "", ""
        inputs = self.tokenizer(
            text,
            return_tensors="np"
        )

        # with torch.no_grad():
        onnx_inputs = {
            "input_ids": inputs["input_ids"].astype(np.int64),
            "attention_mask": inputs["attention_mask"].astype(np.int64),
        }
        logits = self.model.run(None, onnx_inputs)[0]
        probs = np.exp(logits) / np.sum(np.exp(logits), axis=-1, keepdims=True)
        predicted_labels = (probs >= self.thresholds).astype(
            int).reshape(1, -1)

        emojis = "".join(self.label_map.inverse_transform(predicted_labels)[0])
        return emojis, f"{text} {emojis}"


emojiprediction = EmojiPrediction()

with gr.Blocks() as app:
    gr.Markdown("# Tweet/Text Emoji Predictor")
    with gr.Row(equal_height=True):
        textbox = gr.Textbox(lines=1, label="User Input",
                             placeholder="Start entering the text")
        textbox1 = gr.Textbox(label="Raw Emoji Output")
        textbox2 = gr.Textbox(label="Text with Emojis")
    textbox.change(
        fn=emojiprediction.predict_emoji,
        inputs=textbox,
        outputs=[textbox1, textbox2]
    )


if __name__ == "__main__":
    app.launch()