File size: 2,071 Bytes
df3b007
 
ebb2014
 
df3b007
 
11c4f8e
df3b007
 
0fd5020
df3b007
 
1824d10
84c02ea
ed24bc1
9859f70
404d045
320787e
df3b007
 
 
 
 
 
 
1824d10
20106f5
df3b007
 
 
0a214bf
404d045
df3b007
 
 
 
 
 
 
404d045
df3b007
 
 
 
 
 
 
 
 
 
 
ebb2014
8e4d744
 
 
0a214bf
8e4d744
df3b007
 
 
 
 
 
 
8e4d744
 
ebb2014
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from __future__ import print_function, division, unicode_literals

import gradio as gr

import sys
from os.path import abspath, dirname

import json
import numpy as np

from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
from transformers import AutoModel, AutoTokenizer
model_name = "Pendrokar/TorchMoji"
model = AutoModel.from_pretrained(model_name, cache_dir="~/.cache/huggingface/hub/")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model_path = "~/.cache/huggingface/hub/pytorch_model.bin"
vocab_path = './' + model_name + "/vocabulary.json"

def top_elements(array, k):
    ind = np.argpartition(array, -k)[-k:]
    return ind[np.argsort(array[ind])][::-1]

maxlen = 30

st = SentenceTokenizer(tokenizer.get_added_vocab(), maxlen)

model = torchmoji_emojis(model_path)

def predict(deepmoji_analysis):
    output_text = "\n"
    tokenized, _, _ = st.tokenize_sentences([deepmoji_analysis])
    prob = model(tokenized)

    for prob in [prob]:
        # Find top emojis for each sentence. Emoji ids (0-63)
        # correspond to the mapping in emoji_overview.png
        # at the root of the torchMoji repo.
        scores = []
        for i, t in enumerate([deepmoji_analysis]):
            t_tokens = tokenized[i]
            t_score = [t]
            t_prob = prob[i]
            ind_top = top_elements(t_prob, 5)
            t_score.append(sum(t_prob[ind_top]))
            t_score.extend(ind_top)
            t_score.extend([t_prob[ind] for ind in ind_top])
            scores.append(t_score)
            output_text += t_score

    return str(tokenized) + output_text

gradio_app = gr.Interface(
    fn=predict,
    inputs="text",
    outputs="text",
    examples=[
        "You love hurting me, huh?",
        "I know good movies, this ain't one",
        "It was fun, but I'm not going to miss you",
        "My flight is delayed.. amazing.",
        "What is happening to me??",
        "This is the shit!",
        "This is shit!",
    ]
)

if __name__ == "__main__":
    gradio_app.launch()