ashish-001 commited on
Commit
fe5ff82
·
verified ·
1 Parent(s): c8f16db

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +84 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ import joblib
4
+ from transformers import AutoTokenizer
5
+ import numpy as np
6
+ from dotenv import load_dotenv
7
+ import os
8
+ import onnxruntime as ort
9
+
10
+
11
+ class EmojiPrediction:
12
+ def __init__(self):
13
+ self.model = None
14
+ self.tokenizer = None
15
+ self.label_map = None
16
+ self.thresholds = None
17
+ self.repo_id = "ashish-001/tweet-emoji-predictor"
18
+ self.load_model()
19
+ self.load_required_data()
20
+
21
+ def load_model(self):
22
+ self.tokenizer = AutoTokenizer.from_pretrained(
23
+ self.repo_id)
24
+ onnx_file = hf_hub_download(
25
+ repo_id=self.repo_id,
26
+ filename='model_quantized.onnx'
27
+ )
28
+
29
+ self.model = ort.InferenceSession(onnx_file)
30
+
31
+ def load_required_data(self):
32
+ filepath = hf_hub_download(
33
+ repo_id=self.repo_id,
34
+ filename='mlb_emoji_encoder.pkl'
35
+ )
36
+ with open(filepath, 'rb') as f:
37
+ self.label_map = joblib.load(f)
38
+
39
+ threshold_filepath = hf_hub_download(
40
+ repo_id=self.repo_id,
41
+ filename='thresholds.npy'
42
+ )
43
+ self.thresholds = np.load(threshold_filepath)
44
+
45
+ def predict_emoji(self, text):
46
+ if not len(text.strip()):
47
+ return "", ""
48
+ inputs = self.tokenizer(
49
+ text,
50
+ return_tensors="np"
51
+ )
52
+
53
+ # with torch.no_grad():
54
+ onnx_inputs = {
55
+ "input_ids": inputs["input_ids"].astype(np.int64),
56
+ "attention_mask": inputs["attention_mask"].astype(np.int64),
57
+ }
58
+ logits = self.model.run(None, onnx_inputs)[0]
59
+ probs = np.exp(logits) / np.sum(np.exp(logits), axis=-1, keepdims=True)
60
+ predicted_labels = (probs >= self.thresholds).astype(
61
+ int).reshape(1, -1)
62
+
63
+ emojis = "".join(self.label_map.inverse_transform(predicted_labels)[0])
64
+ return emojis, f"{text} {emojis}"
65
+
66
+
67
+ emojiprediction = EmojiPrediction()
68
+
69
+ with gr.Blocks() as app:
70
+ gr.Markdown("# Tweet/Text Emoji Predictor")
71
+ with gr.Row(equal_height=True):
72
+ textbox = gr.Textbox(lines=1, label="User Input",
73
+ placeholder="Start entering the text")
74
+ textbox1 = gr.Textbox(label="Raw Emoji Output")
75
+ textbox2 = gr.Textbox(label="Text with Emojis")
76
+ textbox.input(
77
+ fn=emojiprediction.predict_emoji,
78
+ inputs=textbox,
79
+ outputs=[textbox1, textbox2]
80
+ )
81
+
82
+
83
+ if __name__ == "__main__":
84
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ scikit-learn==1.7.0
2
+ emoji==2.14.1
3
+ transformers==4.52.4
4
+ onnxruntime==1.17.1
5
+ numpy==1.26.4
6
+ huggingface-hub==0.33.0
7
+ gradio==5.34.0
8
+ joblib==1.5.1