Archisman Karmakar commited on
Commit
b4e0bee
·
1 Parent(s): a8efbdc

2025.03.18.post1

Browse files

Fixes, Memory handling updates, storage fixes.

dashboard.py CHANGED
@@ -1,10 +1,55 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def show_dashboard():
 
4
  st.title("Tachygraphy Micro-text Analysis & Normalization")
5
  st.write("""
6
  Welcome to the Tachygraphy Micro-text Analysis & Normalization Project. This application is designed to analyze text data through three stages:
7
  1. Sentiment Polarity Analysis
8
  2. Emotion Mood-tag Analysis
9
  3. Text Transformation & Normalization
10
- """)
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers.utils.hub import TRANSFORMERS_CACHE
3
+ import shutil
4
+ import torch
5
+ import psutil
6
+ import gc
7
+ import os
8
+
9
+ def free_memory():
10
+ # """Free up CPU & GPU memory before loading a new model."""
11
+ global current_model, current_tokenizer
12
+
13
+ if current_model is not None:
14
+ del current_model # Delete the existing model
15
+ current_model = None # Reset reference
16
+
17
+ if current_tokenizer is not None:
18
+ del current_tokenizer # Delete the tokenizer
19
+ current_tokenizer = None
20
+
21
+ gc.collect() # Force garbage collection for CPU memory
22
+
23
+ if torch.cuda.is_available():
24
+ torch.cuda.empty_cache() # Free GPU memory
25
+ torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
26
+
27
+ # If running on CPU, reclaim memory using OS-level commands
28
+ try:
29
+ if torch.cuda.is_available() is False:
30
+ psutil.virtual_memory() # Refresh memory stats
31
+ except Exception as e:
32
+ print(f"Memory cleanup error: {e}")
33
+
34
+ # Delete cached Hugging Face models
35
+ try:
36
+ cache_dir = TRANSFORMERS_CACHE
37
+ if os.path.exists(cache_dir):
38
+ shutil.rmtree(cache_dir)
39
+ print("Cache cleared!")
40
+ except Exception as e:
41
+ print(f"❌ Cache cleanup error: {e}")
42
 
43
  def show_dashboard():
44
+ # free_memory()
45
  st.title("Tachygraphy Micro-text Analysis & Normalization")
46
  st.write("""
47
  Welcome to the Tachygraphy Micro-text Analysis & Normalization Project. This application is designed to analyze text data through three stages:
48
  1. Sentiment Polarity Analysis
49
  2. Emotion Mood-tag Analysis
50
  3. Text Transformation & Normalization
51
+ """)
52
+
53
+
54
+ def __main__():
55
+ show_dashboard()
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.17.post1"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.18.post1"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
pyprojectOLD.toml CHANGED
@@ -1,6 +1,9 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.16.post3"
 
 
 
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.17.post1"
4
+ # version = "2025.03.16.post3"
5
+ # version = "2025.03.16.post2"
6
+ # version = "2025.03.16.post1"
7
  description = ""
8
  authors = [
9
  { name = "Archisman Karmakar", email = "[email protected]" },
sentiment_analysis/config/stage1_models.json CHANGED
@@ -12,5 +12,19 @@
12
  "device": "cpu",
13
  "load_function": "load_model",
14
  "predict_function": "predict"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
16
  }
 
12
  "device": "cpu",
13
  "load_function": "load_model",
14
  "predict_function": "predict"
15
+ },
16
+ "2": {
17
+ "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
18
+ "type": "db3_base_custom",
19
+ "module_path": "hmv_cfg_base_stage1.model2",
20
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
21
+ "tokenizer_class": "DebertaV2Tokenizer",
22
+ "model_class": "SentimentModel",
23
+ "problem_type": "multi_label_classification",
24
+ "base_model": "microsoft/deberta-v3-base",
25
+ "num_labels": 3,
26
+ "device": "cpu",
27
+ "load_function": "load_model",
28
+ "predict_function": "predict"
29
  }
30
  }
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc CHANGED
Binary files a/sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc and b/sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc differ
 
sentiment_analysis/hmv_cfg_base_stage1/imports.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, DebertaV2Tokenizer, DebertaV2ForSequenceClassification
3
  import torch
4
  import numpy as np
5
  import matplotlib.pyplot as plt
@@ -13,4 +13,10 @@ import importlib
13
  import importlib.util
14
  import asyncio
15
  import sys
16
- import pytorch_lightning as pl
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, DebertaV2Tokenizer, DebertaV2ForSequenceClassification, DebertaV2Model
3
  import torch
4
  import numpy as np
5
  import matplotlib.pyplot as plt
 
13
  import importlib.util
14
  import asyncio
15
  import sys
16
+ import pytorch_lightning as pl
17
+
18
+ import safetensors
19
+ from safetensors import load_file, save_file
20
+ import json
21
+ import huggingface_hub
22
+ from huggingface_hub import hf_hub_download
sentiment_analysis/hmv_cfg_base_stage1/model1.py CHANGED
@@ -34,10 +34,12 @@ def load_model():
34
  tokenizer_class = globals()[model_info["tokenizer_class"]]
35
  model_class = globals()[model_info["model_class"]]
36
  tokenizer = tokenizer_class.from_pretrained(hf_location)
 
37
  model = model_class.from_pretrained(hf_location,
38
  problem_type=model_info["problem_type"],
39
  num_labels=model_info["num_labels"]
40
  )
 
41
 
42
  return model, tokenizer
43
 
@@ -58,12 +60,14 @@ def predict(text, model, tokenizer, device, max_len=128):
58
 
59
  # probabilities = outputs.logits.cpu().numpy()
60
 
61
- probabilities = torch.relu(outputs.logits)
62
- probabilities = torch.clamp(torch.tensor(probabilities), min=0.00000, max=1.00000).cpu().numpy()
63
  # probabilities /= probabilities.sum()
64
  # probabilities = probabilities.cpu().numpy()
65
 
66
- return probabilities
 
 
67
 
68
 
69
  if __name__ == "__main__":
 
34
  tokenizer_class = globals()[model_info["tokenizer_class"]]
35
  model_class = globals()[model_info["model_class"]]
36
  tokenizer = tokenizer_class.from_pretrained(hf_location)
37
+ print("Loading model 1")
38
  model = model_class.from_pretrained(hf_location,
39
  problem_type=model_info["problem_type"],
40
  num_labels=model_info["num_labels"]
41
  )
42
+ print("Model 1 loaded")
43
 
44
  return model, tokenizer
45
 
 
60
 
61
  # probabilities = outputs.logits.cpu().numpy()
62
 
63
+ # probabilities = torch.relu(outputs.logits)
64
+ # probabilities = torch.clamp(torch.tensor(probabilities), min=0.00000, max=1.00000).cpu().numpy()
65
  # probabilities /= probabilities.sum()
66
  # probabilities = probabilities.cpu().numpy()
67
 
68
+ predictions = torch.sigmoid(outputs.logits).cpu().numpy()
69
+
70
+ return predictions
71
 
72
 
73
  if __name__ == "__main__":
sentiment_analysis/hmv_cfg_base_stage1/model2.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from imports import *
2
+
3
+ import importlib.util
4
+ import os
5
+ import sys
6
+ import joblib
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.functional as F
11
+ from transformers import DebertaV2Model, DebertaV2Tokenizer
12
+ import safetensors
13
+ # from safetensors import load_file, save_file
14
+ import json
15
+ from huggingface_hub import hf_hub_download
16
+ from safetensors.torch import save_file, safe_open
17
+
18
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
19
+
20
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
21
+ CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "config", "stage1_models.json")
22
+
23
+ MODEL_OPTIONS = {
24
+ "2": {
25
+ "name": "DeBERTa v3 Base Custom Model with minimal Regularized Loss",
26
+ "type": "db3_base_custom",
27
+ "module_path": "hmv_cfg_base_stage1.model2",
28
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
29
+ "tokenizer_class": "DebertaV2Tokenizer",
30
+ "model_class": "SentimentModel",
31
+ "problem_type": "multi_label_classification",
32
+ "base_model": "microsoft/deberta-v3-base",
33
+ "num_labels": 3,
34
+ "device": "cpu",
35
+ "load_function": "load_model",
36
+ "predict_function": "predict"
37
+ }
38
+ }
39
+
40
+
41
+ # class SentimentModel(nn.Module):
42
+ # def __init__(self, roberta_model=DebertaV2Model.from_pretrained(
43
+ # 'microsoft/deberta-v3-base',
44
+ # device_map=torch.device("cuda" if torch.cuda.is_available() else "cpu")
45
+ # ), n_classes=3, dropout_rate=0.2):
46
+ # super(SentimentModel, self).__init__()
47
+
48
+ # self.roberta = roberta_model
49
+ # self.drop = nn.Dropout(p=dropout_rate)
50
+ # self.fc1 = nn.Linear(self.roberta.config.hidden_size, 256) # Reduced neurons
51
+ # self.relu = nn.ReLU()
52
+ # self.out = nn.Linear(256, n_classes)
53
+
54
+ # def forward(self, input_ids, attention_mask):
55
+ # output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
56
+ # cls_token_state = output.last_hidden_state[:, 0, :]
57
+ # output = self.drop(cls_token_state)
58
+ # output = self.relu(self.fc1(output))
59
+ # return self.out(output)
60
+
61
+ # def save_pretrained(self, save_directory):
62
+ # os.makedirs(save_directory, exist_ok=True)
63
+
64
+ # # Save model weights using safetensors
65
+ # model_weights = self.state_dict()
66
+ # save_file(model_weights, os.path.join(save_directory, "model.safetensors"))
67
+
68
+ # # Save model config
69
+ # config = {
70
+ # "hidden_size": self.roberta.config.hidden_size,
71
+ # "num_labels": self.out.out_features,
72
+ # "dropout_rate": self.drop.p,
73
+ # "roberta_model": self.roberta.name_or_path
74
+ # }
75
+ # with open(os.path.join(save_directory, "config.json"), "w") as f:
76
+ # json.dump(config, f)
77
+
78
+ # print(f"Model saved in {save_directory}")
79
+
80
+ # @classmethod
81
+ # def load_pretrained(cls, model_path_or_repo, roberta_model):
82
+ # # if model_path_or_repo.startswith("http") or "/" not in model_path_or_repo:
83
+ # # # Load from Hugging Face Hub
84
+ # # model_config_path = hf_hub_download(model_path_or_repo, "config.json")
85
+ # # model_weights_path = hf_hub_download(model_path_or_repo, "model.safetensors")
86
+ # # else:
87
+ # # # Load from local directory
88
+ # # model_config_path = os.path.join(model_path_or_repo, "config.json")
89
+ # # model_weights_path = os.path.join(model_path_or_repo, "model.safetensors")
90
+
91
+ # model_config_path = hf_hub_download(model_path_or_repo, "config.json")
92
+ # model_weights_path = hf_hub_download(model_path_or_repo, "model.safetensors")
93
+
94
+ # # Load model config
95
+ # with open(model_config_path, "r") as f:
96
+ # config = json.load(f)
97
+
98
+ # # Load RoBERTa model
99
+ # roberta_model = DebertaV2Model.from_pretrained(config["roberta_model"])
100
+
101
+ # # Initialize SentimentModel
102
+ # model = cls(
103
+ # roberta_model,
104
+ # n_classes=config["num_labels"],
105
+ # dropout_rate=config["dropout_rate"]
106
+ # )
107
+
108
+ # # Load safetensors weights
109
+ # with safe_open(model_weights_path, framework="pt", device="cpu") as f:
110
+ # model_weights = {key: f.get_tensor(key) for key in f.keys()}
111
+ # model.load_state_dict(model_weights)
112
+
113
+ # print(f"Model loaded from {model_path_or_repo}")
114
+ # return model
115
+
116
+
117
+ class SentimentModel(nn.Module):
118
+ def __init__(self, roberta_model, n_classes=3, dropout_rate=0.2):
119
+ super(SentimentModel, self).__init__()
120
+
121
+ self.roberta = roberta_model
122
+ self.drop = nn.Dropout(p=dropout_rate)
123
+ self.fc1 = nn.Linear(self.roberta.config.hidden_size, 256)
124
+ self.relu = nn.ReLU()
125
+ self.out = nn.Linear(256, n_classes)
126
+
127
+ def forward(self, input_ids, attention_mask):
128
+ output = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
129
+ cls_token_state = output.last_hidden_state[:, 0, :]
130
+ output = self.drop(cls_token_state)
131
+ output = self.relu(self.fc1(output))
132
+ return self.out(output)
133
+
134
+ def save_pretrained(self, save_directory):
135
+ os.makedirs(save_directory, exist_ok=True)
136
+
137
+ model_weights = self.state_dict()
138
+ save_file(model_weights, os.path.join(save_directory, "model.safetensors"))
139
+
140
+ config = {
141
+ "hidden_size": self.roberta.config.hidden_size,
142
+ "num_labels": self.out.out_features,
143
+ "dropout_rate": self.drop.p,
144
+ "roberta_model": self.roberta.name_or_path, # ✅ Save model name
145
+ }
146
+ with open(os.path.join(save_directory, "config.json"), "w") as f:
147
+ json.dump(config, f)
148
+
149
+ print(f"Model saved in {save_directory}")
150
+
151
+
152
+ @classmethod
153
+ @st.cache_resource
154
+ def load_pretrained(cls, model_path_or_repo):
155
+ """Loads and caches the model (RoBERTa + SentimentModel) only when called."""
156
+ print(f"Loading model from {model_path_or_repo}...")
157
+
158
+ model_config_path = hf_hub_download(model_path_or_repo, "config.json")
159
+ model_weights_path = hf_hub_download(model_path_or_repo, "model.safetensors")
160
+
161
+ with open(model_config_path, "r") as f:
162
+ config = json.load(f)
163
+
164
+ print(f"Loading RoBERTa model: {config['roberta_model']}...")
165
+ roberta_model = DebertaV2Model.from_pretrained(
166
+ config["roberta_model"],
167
+ )
168
+
169
+ model = cls(
170
+ roberta_model, n_classes=config["num_labels"], dropout_rate=config["dropout_rate"]
171
+ )
172
+
173
+ with safe_open(model_weights_path, framework="pt", device="cpu") as f:
174
+ model_weights = {key: f.get_tensor(key) for key in f.keys()}
175
+ model.load_state_dict(model_weights)
176
+
177
+ print(f"Model loaded from {model_path_or_repo}")
178
+ return model
179
+
180
+
181
+ @st.cache_resource
182
+
183
+ # def load_pretrained(model_path_or_repo):
184
+
185
+ # model_config_path = hf_hub_download(model_path_or_repo, "config.json")
186
+ # model_weights_path = hf_hub_download(model_path_or_repo, "model.safetensors")
187
+
188
+ # with open(model_config_path, "r") as f:
189
+ # config = json.load(f)
190
+
191
+ # roberta_model = DebertaV2Model.from_pretrained(
192
+ # config["roberta_model"],
193
+ # )
194
+
195
+ # model = SentimentModel(
196
+ # roberta_model, n_classes=config["num_labels"], dropout_rate=config["dropout_rate"]
197
+ # )
198
+
199
+ # with safe_open(model_weights_path, framework="pt", device="cpu") as f:
200
+ # model_weights = {key: f.get_tensor(key) for key in f.keys()}
201
+ # model.load_state_dict(model_weights)
202
+
203
+ # print(f"Model loaded from {model_path_or_repo}")
204
+ # return model
205
+
206
+
207
+
208
+ def load_model():
209
+ model_key = "2"
210
+ model_info = MODEL_OPTIONS[model_key]
211
+ hf_location = model_info["hf_location"]
212
+
213
+ tokenizer_class = globals()[model_info["tokenizer_class"]]
214
+ model_class = globals()[model_info["model_class"]]
215
+ tokenizer = tokenizer_class.from_pretrained(hf_location)
216
+ print("Loading model 2")
217
+ model = SentimentModel.load_pretrained(hf_location)
218
+ print("Model 2 loaded")
219
+ # model.eval()
220
+
221
+ return model, tokenizer
222
+
223
+
224
+ def predict(text, model, tokenizer, device, max_len=128):
225
+ # model.eval() # Set model to evaluation mode
226
+
227
+ # Tokenize and pad the input text
228
+ inputs = tokenizer(
229
+ text,
230
+ None,
231
+ add_special_tokens=True,
232
+ padding=True,
233
+ truncation=False,
234
+ return_tensors="pt",
235
+ return_token_type_ids=False,
236
+ ).to(device) # Move input tensors to the correct device
237
+
238
+ with torch.no_grad():
239
+ outputs = model(**inputs)
240
+
241
+ # Apply sigmoid activation (for BCEWithLogitsLoss)
242
+ probabilities = torch.sigmoid(outputs).cpu().numpy()
243
+ # probabilities = outputs.cpu().numpy()
244
+
245
+ return probabilities
246
+
247
+
248
+ if __name__ == "__main__":
249
+ model, tokenizer = load_model("2")
250
+ print("Model and tokenizer loaded successfully.")
sentiment_analysis/hmv_cfg_base_stage1/{stage1_bert_architecture.py → model3.py} RENAMED
@@ -1,26 +1,66 @@
1
- import torch.nn as nn
2
-
3
- class BERT_architecture(nn.Module):
4
-
5
- def __init__(self, bert):
6
- super(BERT_architecture, self).__init__()
7
- self.bert = bert
8
-
9
- self.dropout = nn.Dropout(0.3) # Increased dropout for regularization
10
- self.layer_norm = nn.LayerNorm(768) # Layer normalization
11
-
12
- self.fc1 = nn.Linear(768, 256) # Dense layer
13
- self.fc2 = nn.Linear(256, 3) # Output layer with 3 classes
14
-
15
- self.relu = nn.ReLU()
16
- self.softmax = nn.LogSoftmax(dim=1)
17
-
18
- def forward(self, sent_id, mask, token_type_ids):
19
- _, cls_hs = self.bert(sent_id, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
20
- x = self.layer_norm(cls_hs)
21
- x = self.fc1(x)
22
- x = self.relu(x)
23
- x = self.dropout(x)
24
- x = self.fc2(x)
25
- x = self.softmax(x)
26
- return x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from imports import *
2
+
3
+ import importlib.util
4
+ import os
5
+ import sys
6
+ import joblib
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.functional as F
11
+ from transformers import DebertaV2Model, DebertaV2Tokenizer
12
+ import safetensors
13
+ # from safetensors import load_file, save_file
14
+ import json
15
+ from huggingface_hub import hf_hub_download
16
+ from safetensors.torch import save_file, safe_open
17
+
18
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
19
+
20
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
21
+ CONFIG_STAGE1 = os.path.join(BASE_DIR, "..", "config", "stage1_models.json")
22
+
23
+ MODEL_OPTIONS = {
24
+ "3": {
25
+ "name": "BERT Base Uncased Custom Model",
26
+ "type": "db3_base_custom",
27
+ "module_path": "hmv_cfg_base_stage1.model2",
28
+ "hf_location": "tachygraphy-microtrext-norm-org/DeBERTa-v3-Base-Cust-LV1-SentimentPolarities-minRegLoss",
29
+ "tokenizer_class": "DebertaV2Tokenizer",
30
+ "model_class": "BERT_architecture",
31
+ "problem_type": "multi_label_classification",
32
+ "base_model": "google/bert-base-uncased",
33
+ "num_labels": 3,
34
+ "device": "cpu",
35
+ "load_function": "load_model",
36
+ "predict_function": "predict"
37
+ }
38
+ }
39
+
40
+
41
+ class BERT_architecture(nn.Module):
42
+
43
+ def __init__(self, bert=AutoModel.from_pretrained("bert-base-uncased",
44
+ device_map=torch.device("cuda" if torch.cuda.is_available() else "cpu"))):
45
+ super(BERT_architecture, self).__init__()
46
+ self.bert = bert
47
+
48
+ self.dropout = nn.Dropout(0.3) # Increased dropout for regularization
49
+ self.layer_norm = nn.LayerNorm(768) # Layer normalization
50
+
51
+ self.fc1 = nn.Linear(768, 256) # Dense layer
52
+ self.fc2 = nn.Linear(256, 3) # Output layer with 3 classes
53
+
54
+ self.relu = nn.ReLU()
55
+ self.softmax = nn.LogSoftmax(dim=1)
56
+
57
+ def forward(self, sent_id, mask, token_type_ids):
58
+ _, cls_hs = self.bert(sent_id, attention_mask=mask,
59
+ token_type_ids=token_type_ids, return_dict=False)
60
+ x = self.layer_norm(cls_hs)
61
+ x = self.fc1(x)
62
+ x = self.relu(x)
63
+ x = self.dropout(x)
64
+ x = self.fc2(x)
65
+ x = self.softmax(x)
66
+ return x
sentiment_analysis/sentiment_analysis_main.py CHANGED
@@ -3,6 +3,11 @@ import importlib.util
3
  import os
4
  import sys
5
  import joblib
 
 
 
 
 
6
 
7
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
8
 
@@ -13,10 +18,6 @@ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
13
  CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
14
  LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")
15
 
16
- # Load the model and tokenizer
17
- # model_name = "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8"
18
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
19
- # model = AutoModel.from_pretrained(model_name)
20
 
21
  SENTIMENT_POLARITY_LABELS = [
22
  "negative", "neutral", "positive"
@@ -26,23 +27,19 @@ current_model = None
26
  current_tokenizer = None
27
 
28
  # Enabling Resource caching
29
- @st.cache_resource
30
 
 
 
31
  def load_model_config():
32
  with open(CONFIG_STAGE1, "r") as f:
33
  model_data = json.load(f)
34
 
35
- model_options = {v["name"]: v for v in model_data.values()} # Extract names for dropdown
 
36
  return model_data, model_options
37
 
38
- MODEL_DATA, MODEL_OPTIONS = load_model_config()
39
-
40
 
41
-
42
- # def load_model():
43
- # model = DebertaV2ForSequenceClassification.from_pretrained(model_name)
44
- # tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
45
- # return model, tokenizer
46
 
47
 
48
  # ✅ Dynamically Import Model Functions
@@ -69,7 +66,7 @@ def free_memory():
69
 
70
  gc.collect() # Force garbage collection for CPU memory
71
 
72
- if torch.cuda.is_available():
73
  torch.cuda.empty_cache() # Free GPU memory
74
  torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
75
 
@@ -80,10 +77,22 @@ def free_memory():
80
  except Exception as e:
81
  print(f"Memory cleanup error: {e}")
82
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  def load_selected_model(model_name):
85
  global current_model, current_tokenizer
86
 
 
 
87
  free_memory()
88
 
89
  # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
@@ -109,10 +118,163 @@ def load_selected_model(model_name):
109
  return None, None, None
110
 
111
  model, tokenizer = load_model_func()
112
-
113
  current_model, current_tokenizer = model, tokenizer
114
  return model, tokenizer, predict_func
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  # def load_selected_model(model_name):
117
  # # """Load model and tokenizer based on user selection."""
118
  # global current_model, current_tokenizer
@@ -157,7 +319,7 @@ def load_selected_model(model_name):
157
  # # else:
158
  # # st.error("Invalid model selection")
159
  # # return None, None
160
-
161
 
162
  # if load_model_func is None or predict_func is None:
163
  # st.error("❌ Model functions could not be loaded!")
@@ -167,30 +329,29 @@ def load_selected_model(model_name):
167
  # # return model, tokenizer
168
 
169
  # model, tokenizer = load_model_func(hf_location)
170
-
171
  # current_model, current_tokenizer = model, tokenizer
172
  # return model, tokenizer, predict_func
173
 
174
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- def predict(text, model, tokenizer, device, max_len=128):
177
- # Tokenize and pad the input text
178
- inputs = tokenizer(
179
- text,
180
- add_special_tokens=True,
181
- padding=True,
182
- truncation=False,
183
- return_tensors="pt",
184
- return_token_type_ids=False,
185
- ).to(device) # Move input tensors to the correct device
186
 
187
- with torch.no_grad():
188
- outputs = model(**inputs)
189
 
190
- # Apply sigmoid activation (for BCEWithLogitsLoss)
191
- probabilities = outputs.logits.cpu().numpy()
192
-
193
- return probabilities
194
 
195
  # def show_sentiment_analysis():
196
 
@@ -200,97 +361,119 @@ def predict(text, model, tokenizer, device, max_len=128):
200
  # user_input = st.text_area("Enter text for sentiment analysis:", height=200)
201
  # user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)
202
 
203
- def show_sentiment_analysis():
204
- st.title("Stage 1: Sentiment Polarity Analysis")
205
- st.write("This section will handle sentiment analysis.")
206
 
207
- if "selected_model" not in st.session_state:
208
- st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0] # Default selection
209
 
210
- if "clear_output" not in st.session_state:
211
- st.session_state.clear_output = False
212
 
213
- st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")
214
 
215
- selected_model = st.session_state.selected_model
216
 
217
- if selected_model not in MODEL_OPTIONS:
218
- st.error(f"❌ Selected model '{selected_model}' not found!")
219
- st.stop()
220
 
221
- st.session_state.clear_output = True # Reset output when model changes
222
 
223
 
224
- # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
225
- # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model]) # ✅ Check selected model
226
 
227
 
228
- user_input = st.text_input("Enter text for sentiment analysis:")
 
229
 
230
- if user_input:
231
- # Make prediction
 
 
232
 
233
- # model, tokenizer = load_model()
234
- # model, tokenizer = load_selected_model(selected_model)
 
235
 
236
- model, tokenizer, predict_func = load_selected_model(selected_model)
237
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
238
 
239
- if model is None:
240
- st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
241
- st.stop()
242
 
243
- model.to(device)
 
 
 
244
 
245
- # predictions = predict(user_input, model, tokenizer, device)
 
246
 
247
- predictions = predict_func(user_input, model, tokenizer, device)
248
 
249
- # Squeeze predictions to remove extra dimensions
250
- predictions_array = predictions.squeeze()
251
 
252
- # Convert to binary predictions (argmax)
253
- binary_predictions = np.zeros_like(predictions_array)
254
- max_indices = np.argmax(predictions_array)
255
- binary_predictions[max_indices] = 1
256
 
257
- # Display raw predictions
258
- st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
 
259
 
260
- # Display binary classification result
261
- st.write(f"**Predicted Sentiment:**")
262
- st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
263
- # st.write(f"**NEUTRAL:** {binary_predictions[1]}")
264
- # st.write(f"**POSITIVE:** {binary_predictions[2]}")
265
 
266
- # 1️⃣ **Polar Plot (Plotly)**
267
- sentiment_polarities = predictions_array.tolist()
268
- fig_polar = px.line_polar(
269
- pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)),
270
- r='r', theta='theta', line_close=True
271
- )
272
- st.plotly_chart(fig_polar)
273
 
274
- # 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
275
- normalized_predictions = predictions_array / predictions_array.sum()
276
 
277
- fig, ax = plt.subplots(figsize=(8, 2))
278
- left = 0
279
- for i in range(len(normalized_predictions)):
280
- ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
281
- left += normalized_predictions[i]
282
 
283
- # Configure the chart
284
- ax.set_xlim(0, 1)
285
- ax.set_yticks([])
286
- ax.set_xticks(np.arange(0, 1.1, 0.1))
287
- ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
288
- plt.title("Sentiment Polarity Prediction Distribution")
289
 
290
- # Display in Streamlit
291
- st.pyplot(fig)
292
-
293
 
 
 
294
 
295
- if __name__ == "__main__":
296
- show_sentiment_analysis()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import os
4
  import sys
5
  import joblib
6
+ import time
7
+ # from transformers.utils import move_cache_to_trash
8
+ # from huggingface_hub import delete_cache
9
+ from transformers.utils.hub import TRANSFORMERS_CACHE
10
+ import shutil
11
 
12
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
13
 
 
18
  CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
19
  LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")
20
 
 
 
 
 
21
 
22
  SENTIMENT_POLARITY_LABELS = [
23
  "negative", "neutral", "positive"
 
27
  current_tokenizer = None
28
 
29
  # Enabling Resource caching
 
30
 
31
+
32
+ @st.cache_resource
33
  def load_model_config():
34
  with open(CONFIG_STAGE1, "r") as f:
35
  model_data = json.load(f)
36
 
37
+ # Extract names for dropdown
38
+ model_options = {v["name"]: v for v in model_data.values()}
39
  return model_data, model_options
40
 
 
 
41
 
42
+ MODEL_DATA, MODEL_OPTIONS = load_model_config()
 
 
 
 
43
 
44
 
45
  # ✅ Dynamically Import Model Functions
 
66
 
67
  gc.collect() # Force garbage collection for CPU memory
68
 
69
+ if torch.cuda.is_available():
70
  torch.cuda.empty_cache() # Free GPU memory
71
  torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
72
 
 
77
  except Exception as e:
78
  print(f"Memory cleanup error: {e}")
79
 
80
+ # Delete cached Hugging Face models
81
+ try:
82
+ cache_dir = TRANSFORMERS_CACHE
83
+ if os.path.exists(cache_dir):
84
+ shutil.rmtree(cache_dir)
85
+ print("Cache cleared!")
86
+ except Exception as e:
87
+ print(f"❌ Cache cleanup error: {e}")
88
+
89
+
90
 
91
  def load_selected_model(model_name):
92
  global current_model, current_tokenizer
93
 
94
+ st.cache_resource.clear()
95
+
96
  free_memory()
97
 
98
  # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
 
118
  return None, None, None
119
 
120
  model, tokenizer = load_model_func()
121
+
122
  current_model, current_tokenizer = model, tokenizer
123
  return model, tokenizer, predict_func
124
 
125
+
126
+ # Function to increment progress dynamically
127
+ def update_progress(progress_bar, start, end, delay=0.1):
128
+ for i in range(start, end + 1, 5): # Increment in steps of 5%
129
+ progress_bar.progress(i)
130
+ time.sleep(delay) # Simulate processing time
131
+ # st.experimental_rerun() # Refresh the page
132
+
133
+
134
+ # Function to update session state when model changes
135
+ def on_model_change():
136
+ st.session_state.model_changed = True # Mark model as changed
137
+
138
+
139
+ # Function to update session state when text changes
140
+
141
+
142
+ def on_text_change():
143
+ st.session_state.text_changed = True # Mark text as changed
144
+
145
+
146
+ # Initialize session state variables
147
+ if "selected_model" not in st.session_state:
148
+ st.session_state.selected_model = list(MODEL_OPTIONS.keys())[
149
+ 0] # Default model
150
+ if "user_input" not in st.session_state:
151
+ st.session_state.user_input = ""
152
+ if "last_processed_input" not in st.session_state:
153
+ st.session_state.last_processed_input = ""
154
+ if "model_changed" not in st.session_state:
155
+ st.session_state.model_changed = False
156
+ if "text_changed" not in st.session_state:
157
+ st.session_state.text_changed = False
158
+ if "processing" not in st.session_state:
159
+ st.session_state.processing = False
160
+
161
+
162
+ def show_sentiment_analysis():
163
+ st.cache_resource.clear()
164
+ free_memory()
165
+
166
+ st.title("Stage 1: Sentiment Polarity Analysis")
167
+ st.write("This section handles sentiment analysis.")
168
+
169
+ # Model selection with change detection
170
+ selected_model = st.selectbox(
171
+ "Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model", on_change=on_model_change
172
+ )
173
+
174
+ # Text input with change detection
175
+ user_input = st.text_input(
176
+ "Enter text for sentiment analysis:", key="user_input", on_change=on_text_change
177
+ )
178
+ user_input_copy = user_input
179
+
180
+ # Only run inference if:
181
+ # 1. The text is NOT empty
182
+ # 2. The text has changed OR the model has changed
183
+ if user_input.strip() and (st.session_state.text_changed or st.session_state.model_changed):
184
+
185
+ # Reset session state flags
186
+ st.session_state.last_processed_input = user_input
187
+ st.session_state.model_changed = False
188
+ st.session_state.text_changed = False # Store selected model
189
+
190
+ # ADD A DYNAMIC PROGRESS BAR
191
+ progress_bar = st.progress(0)
192
+ update_progress(progress_bar, 0, 10)
193
+ # status_text = st.empty()
194
+
195
+ # update_progress(0, 10)
196
+ # status_text.text("Loading model...")
197
+
198
+ # Make prediction
199
+
200
+ # model, tokenizer = load_model()
201
+ # model, tokenizer = load_selected_model(selected_model)
202
+ with st.spinner("Please wait..."):
203
+ model, tokenizer, predict_func = load_selected_model(selected_model)
204
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
205
+
206
+ if model is None:
207
+ st.error(
208
+ "⚠️ Error: Model failed to load! Check model selection or configuration.")
209
+ st.stop()
210
+
211
+ model.to(device)
212
+
213
+ # predictions = predict(user_input, model, tokenizer, device)
214
+
215
+ predictions = predict_func(user_input, model, tokenizer, device)
216
+
217
+ # Squeeze predictions to remove extra dimensions
218
+ predictions_array = predictions.squeeze()
219
+
220
+ # Convert to binary predictions (argmax)
221
+ binary_predictions = np.zeros_like(predictions_array)
222
+ max_indices = np.argmax(predictions_array)
223
+ binary_predictions[max_indices] = 1
224
+
225
+ # Update progress bar for prediction and model loading
226
+ update_progress(progress_bar, 10, 100)
227
+
228
+ # Display raw predictions
229
+ st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
230
+
231
+ # Display binary classification result
232
+ st.write(f"**Predicted Sentiment:**")
233
+ st.write(
234
+ f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
235
+ # st.write(f"**NEUTRAL:** {binary_predictions[1]}")
236
+ # st.write(f"**POSITIVE:** {binary_predictions[2]}")
237
+
238
+ # 1️⃣ **Polar Plot (Plotly)**
239
+ sentiment_polarities = predictions_array.tolist()
240
+ fig_polar = px.line_polar(
241
+ pd.DataFrame(dict(r=sentiment_polarities,
242
+ theta=SENTIMENT_POLARITY_LABELS)),
243
+ r='r', theta='theta', line_close=True
244
+ )
245
+ st.plotly_chart(fig_polar)
246
+
247
+ # 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
248
+ normalized_predictions = predictions_array / predictions_array.sum()
249
+
250
+ fig, ax = plt.subplots(figsize=(8, 2))
251
+ left = 0
252
+ for i in range(len(normalized_predictions)):
253
+ ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(
254
+ i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
255
+ left += normalized_predictions[i]
256
+
257
+ # Configure the chart
258
+ ax.set_xlim(0, 1)
259
+ ax.set_yticks([])
260
+ ax.set_xticks(np.arange(0, 1.1, 0.1))
261
+ ax.legend(loc='upper center', bbox_to_anchor=(
262
+ 0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
263
+ plt.title("Sentiment Polarity Prediction Distribution")
264
+
265
+ # Display in Streamlit
266
+ st.pyplot(fig)
267
+
268
+ progress_bar.empty()
269
+
270
+
271
+ if __name__ == "__main__":
272
+ show_sentiment_analysis()
273
+
274
+
275
+ ### COMMENTED OUT CODE ###
276
+
277
+
278
  # def load_selected_model(model_name):
279
  # # """Load model and tokenizer based on user selection."""
280
  # global current_model, current_tokenizer
 
319
  # # else:
320
  # # st.error("Invalid model selection")
321
  # # return None, None
322
+
323
 
324
  # if load_model_func is None or predict_func is None:
325
  # st.error("❌ Model functions could not be loaded!")
 
329
  # # return model, tokenizer
330
 
331
  # model, tokenizer = load_model_func(hf_location)
332
+
333
  # current_model, current_tokenizer = model, tokenizer
334
  # return model, tokenizer, predict_func
335
 
336
 
337
+ # def predict(text, model, tokenizer, device, max_len=128):
338
+ # # Tokenize and pad the input text
339
+ # inputs = tokenizer(
340
+ # text,
341
+ # add_special_tokens=True,
342
+ # padding=True,
343
+ # truncation=False,
344
+ # return_tensors="pt",
345
+ # return_token_type_ids=False,
346
+ # ).to(device) # Move input tensors to the correct device
347
 
348
+ # with torch.no_grad():
349
+ # outputs = model(**inputs)
 
 
 
 
 
 
 
 
350
 
351
+ # # Apply sigmoid activation (for BCEWithLogitsLoss)
352
+ # probabilities = outputs.logits.cpu().numpy()
353
 
354
+ # return probabilities
 
 
 
355
 
356
  # def show_sentiment_analysis():
357
 
 
361
  # user_input = st.text_area("Enter text for sentiment analysis:", height=200)
362
  # user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)
363
 
364
+ # def show_sentiment_analysis():
365
+ # st.title("Stage 1: Sentiment Polarity Analysis")
366
+ # st.write("This section will handle sentiment analysis.")
367
 
368
+ # if "selected_model" not in st.session_state:
369
+ # st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0] # Default selection
370
 
371
+ # if "clear_output" not in st.session_state:
372
+ # st.session_state.clear_output = False
373
 
374
+ # st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")
375
 
376
+ # selected_model = st.session_state.selected_model
377
 
378
+ # if selected_model not in MODEL_OPTIONS:
379
+ # st.error(f"❌ Selected model '{selected_model}' not found!")
380
+ # st.stop()
381
 
382
+ # st.session_state.clear_output = True # Reset output when model changes
383
 
384
 
385
+ # # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
386
+ # # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model]) # ✅ Check selected model
387
 
388
 
389
+ # user_input = st.text_input("Enter text for sentiment analysis:")
390
+ # user_input_copy = user_input
391
 
392
+ # # if st.button("Run Analysis"):
393
+ # # if not user_input.strip():
394
+ # # st.warning("⚠️ Please enter some text before running analysis.")
395
+ # # return
396
 
397
+ # # with st.form(key="sentiment_form"):
398
+ # # user_input = st.text_input("Enter text for sentiment analysis:")
399
+ # # submit_button = st.form_submit_button("Run Analysis")
400
 
401
+ # # user_input_copy = user_input
 
402
 
403
+ # if user_input.strip():
 
 
404
 
405
+ # ADD A DYNAMIC PROGRESS BAR
406
+ # progress_bar = st.progress(0)
407
+ # update_progress(progress_bar, 0, 10)
408
+ # # status_text = st.empty()
409
 
410
+ # # update_progress(0, 10)
411
+ # # status_text.text("Loading model...")
412
 
413
+ # # Make prediction
414
 
415
+ # # model, tokenizer = load_model()
416
+ # # model, tokenizer = load_selected_model(selected_model)
417
 
418
+ # model, tokenizer, predict_func = load_selected_model(selected_model)
419
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
420
 
421
+ # if model is None:
422
+ # st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
423
+ # st.stop()
424
 
425
+ # model.to(device)
 
 
 
 
426
 
427
+ # # predictions = predict(user_input, model, tokenizer, device)
 
 
 
 
 
 
428
 
429
+ # predictions = predict_func(user_input, model, tokenizer, device)
 
430
 
431
+ # # Squeeze predictions to remove extra dimensions
432
+ # predictions_array = predictions.squeeze()
 
 
 
433
 
434
+ # # Convert to binary predictions (argmax)
435
+ # binary_predictions = np.zeros_like(predictions_array)
436
+ # max_indices = np.argmax(predictions_array)
437
+ # binary_predictions[max_indices] = 1
 
 
438
 
 
 
 
439
 
440
+ # # Update progress bar for prediction and model loading
441
+ # update_progress(progress_bar, 10, 100)
442
 
443
+ # # Display raw predictions
444
+ # st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
445
+
446
+ # # Display binary classification result
447
+ # st.write(f"**Predicted Sentiment:**")
448
+ # st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
449
+ # # st.write(f"**NEUTRAL:** {binary_predictions[1]}")
450
+ # # st.write(f"**POSITIVE:** {binary_predictions[2]}")
451
+
452
+ # # 1️⃣ **Polar Plot (Plotly)**
453
+ # sentiment_polarities = predictions_array.tolist()
454
+ # fig_polar = px.line_polar(
455
+ # pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)),
456
+ # r='r', theta='theta', line_close=True
457
+ # )
458
+ # st.plotly_chart(fig_polar)
459
+
460
+ # # 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
461
+ # normalized_predictions = predictions_array / predictions_array.sum()
462
+
463
+ # fig, ax = plt.subplots(figsize=(8, 2))
464
+ # left = 0
465
+ # for i in range(len(normalized_predictions)):
466
+ # ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
467
+ # left += normalized_predictions[i]
468
+
469
+ # # Configure the chart
470
+ # ax.set_xlim(0, 1)
471
+ # ax.set_yticks([])
472
+ # ax.set_xticks(np.arange(0, 1.1, 0.1))
473
+ # ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
474
+ # plt.title("Sentiment Polarity Prediction Distribution")
475
+
476
+ # # Display in Streamlit
477
+ # st.pyplot(fig)
478
+
479
+ # progress_bar.empty()