Archisman Karmakar commited on
Commit
f31213f
·
1 Parent(s): 986513a

2025.03.17.post1

Browse files
.github/workflows/ossar.yml CHANGED
@@ -31,6 +31,10 @@ jobs:
31
  runs-on: windows-latest
32
 
33
  steps:
 
 
 
 
34
  - name: Checkout repository
35
  uses: actions/checkout@v4
36
 
 
31
  runs-on: windows-latest
32
 
33
  steps:
34
+
35
+ - name: Enable long paths in Git
36
+ run: git config --system core.longpaths true
37
+
38
  - name: Checkout repository
39
  uses: actions/checkout@v4
40
 
__pycache__/dashboard.cpython-312.pyc ADDED
Binary file (844 Bytes). View file
 
__pycache__/emotion_analysis.cpython-312.pyc ADDED
Binary file (668 Bytes). View file
 
__pycache__/imports.cpython-312.pyc ADDED
Binary file (765 Bytes). View file
 
app_main_hf.py CHANGED
@@ -1,13 +1,30 @@
1
  import streamlit as st
2
  import os
 
3
  import sys
 
 
 
 
 
 
 
 
 
 
4
  import joblib
 
5
  import importlib.util
6
 
7
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
8
 
 
 
 
 
 
9
  from dashboard import show_dashboard
10
- from sentiment_analysis.sentiment_analysis import show_sentiment_analysis
11
  from emotion_analysis import show_emotion_analysis
12
  # from text_transformation import show_text_transformation
13
 
 
1
  import streamlit as st
2
  import os
3
+ import asyncio
4
  import sys
5
+
6
+ if sys.platform == "win32":
7
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
8
+ else:
9
+ try:
10
+ asyncio.get_running_loop()
11
+ except RuntimeError:
12
+ asyncio.set_event_loop(asyncio.new_event_loop())
13
+
14
+
15
  import joblib
16
+ import importlib
17
  import importlib.util
18
 
19
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
20
 
21
+ from imports import *
22
+
23
+
24
+
25
+
26
  from dashboard import show_dashboard
27
+ from sentiment_analysis.sentiment_analysis_main import show_sentiment_analysis
28
  from emotion_analysis import show_emotion_analysis
29
  # from text_transformation import show_text_transformation
30
 
imports.py CHANGED
@@ -9,5 +9,11 @@ import json
9
  import gc
10
  import psutil
11
  import os
 
12
  import importlib.util
13
- import sys
 
 
 
 
 
 
9
  import gc
10
  import psutil
11
  import os
12
+ import importlib
13
  import importlib.util
14
+ import asyncio
15
+ import sys
16
+ import pytorch_lightning as pl
17
+
18
+
19
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
- version = "2025.03.16.post3"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
@@ -192,6 +192,8 @@ dependencies = [
192
  "zict (>=3.0.0,<4.0.0)",
193
  "zipp (>=3.21.0,<4.0.0)",
194
  "zstandard (>=0.23.0,<0.24.0)",
 
 
195
  ]
196
 
197
 
 
1
  [project]
2
  name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.17.post1"
4
  description = ""
5
  authors = [
6
  { name = "Archisman Karmakar", email = "[email protected]" },
 
192
  "zict (>=3.0.0,<4.0.0)",
193
  "zipp (>=3.21.0,<4.0.0)",
194
  "zstandard (>=0.23.0,<0.24.0)",
195
+ "asyncio (>=3.4.3,<4.0.0)",
196
+ "pytorch-lightning (>=2.5.0.post0,<3.0.0)",
197
  ]
198
 
199
 
pyprojectOLD.toml ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "tachygraphy-microtext-analysis-and-normalization"
3
+ version = "2025.03.16.post3"
4
+ description = ""
5
+ authors = [
6
+ { name = "Archisman Karmakar", email = "[email protected]" },
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.12"
10
+ dependencies = [
11
+ "numpy (==2.1.3)",
12
+ "torch (>=2.6.0,<3.0.0)",
13
+ "torchvision (>=0.21.0,<0.22.0)",
14
+ "tensorflow (>=2.19.0,<3.0.0)",
15
+ "pandas (>=2.2.3,<3.0.0)",
16
+ "openpyxl (>=3.1.5,<4.0.0)",
17
+ "streamlit (>=1.43.2,<2.0.0)",
18
+ "transformers (>=4.49.0,<5.0.0)",
19
+ "datasets (>=3.4.0,<4.0.0)",
20
+ "autograd (>=1.7.0,<2.0.0)",
21
+ "ipykernel (>=6.29.5,<7.0.0)",
22
+ "matplotlib (>=3.10.1,<4.0.0)",
23
+ "plotly (>=6.0.0,<7.0.0)",
24
+ "importlib (>=1.0.4,<2.0.0)",
25
+ "joblib (>=1.4.2,<2.0.0)",
26
+ "accelerate (>=1.5.2,<2.0.0)",
27
+ "peft (>=0.14.0,<0.15.0)",
28
+ "mpi4py (>=4.0.3,<5.0.0)",
29
+ "tiktoken (>=0.9.0,<0.10.0)",
30
+ "sentencepiece (>=0.2.0,<0.3.0)",
31
+ "bs4 (>=0.0.2,<0.0.3)",
32
+ "emoji (>=2.14.1,<3.0.0)",
33
+ "safetensors (>=0.5.3,<0.6.0)",
34
+ "lxml (>=5.3.1,<6.0.0)",
35
+ "evaluate (>=0.4.3,<0.5.0)",
36
+ "scipy (>=1.15.2,<2.0.0)",
37
+ "sacrebleu (>=2.5.1,<3.0.0)",
38
+ "rouge-score (>=0.1.2,<0.2.0)",
39
+ "scikit-learn (>=1.6.1,<2.0.0)",
40
+ "regex (>=2024.11.6,<2025.0.0)",
41
+ "pywin32 (>=309,<310)",
42
+ "pycurl (>=7.45.6,<8.0.0)",
43
+ "pillow (>=11.1.0,<12.0.0)",
44
+ "diffusers (>=0.32.2,<0.33.0)",
45
+ "onnx (>=1.17.0,<2.0.0)",
46
+ "keras (>=3.9.0,<4.0.0)",
47
+ "h2o (>=3.46.0.6,<4.0.0.0)",
48
+ "distributed (>=2025.2.0,<2026.0.0)",
49
+ "absl-py (>=2.1.0,<3.0.0)",
50
+ "aiohappyeyeballs (>=2.6.1,<3.0.0)",
51
+ "aiohttp (>=3.11.13,<4.0.0)",
52
+ "aiosignal (>=1.3.2,<2.0.0)",
53
+ "altair (>=5.5.0,<6.0.0)",
54
+ "anyio (>=4.8.0,<5.0.0)",
55
+ "asttokens (>=3.0.0,<4.0.0)",
56
+ "astunparse (>=1.6.3,<2.0.0)",
57
+ "attrs (>=25.3.0,<26.0.0)",
58
+ "beautifulsoup4 (>=4.13.3,<5.0.0)",
59
+ "blinker (>=1.9.0,<2.0.0)",
60
+ "build (>=1.2.2.post1,<2.0.0)",
61
+ "cachecontrol (>=0.14.2,<0.15.0)",
62
+ "cachetools (>=5.5.2,<6.0.0)",
63
+ "certifi (>=2025.1.31,<2026.0.0)",
64
+ "charset-normalizer (>=3.4.1,<4.0.0)",
65
+ "click (>=8.1.8,<9.0.0)",
66
+ "cloudpickle (>=3.1.1,<4.0.0)",
67
+ "colorama (>=0.4.6,<0.5.0)",
68
+ "comm (>=0.2.2,<0.3.0)",
69
+ "contourpy (>=1.3.1,<2.0.0)",
70
+ "cycler (>=0.12.1,<0.13.0)",
71
+ "dask (>=2025.2.0,<2026.0.0)",
72
+ "debugpy (>=1.8.13,<2.0.0)",
73
+ "decorator (>=5.2.1,<6.0.0)",
74
+ "distlib (>=0.3.9,<0.4.0)",
75
+ "dulwich (>=0.22.8,<0.23.0)",
76
+ "et-xmlfile (>=2.0.0,<3.0.0)",
77
+ "executing (>=2.2.0,<3.0.0)",
78
+ "fastjsonschema (>=2.21.1,<3.0.0)",
79
+ "filelock (>=3.18.0,<4.0.0)",
80
+ "findpython (>=0.6.3,<0.7.0)",
81
+ "flatbuffers (>=25.2.10,<26.0.0)",
82
+ "fonttools (>=4.56.0,<5.0.0)",
83
+ "frozenlist (>=1.5.0,<2.0.0)",
84
+ "gast (>=0.6.0,<0.7.0)",
85
+ "gitdb (>=4.0.12,<5.0.0)",
86
+ "gitpython (>=3.1.44,<4.0.0)",
87
+ "google-pasta (>=0.2.0,<0.3.0)",
88
+ "grpcio (>=1.71.0,<2.0.0)",
89
+ "h11 (>=0.14.0,<0.15.0)",
90
+ "h5py (>=3.13.0,<4.0.0)",
91
+ "httpcore (>=1.0.7,<2.0.0)",
92
+ "httpx (>=0.28.1,<0.29.0)",
93
+ "huggingface-hub (>=0.29.3,<0.30.0)",
94
+ "idna (>=3.10,<4.0)",
95
+ "importlib-metadata (>=8.6.1,<9.0.0)",
96
+ "installer (>=0.7.0,<0.8.0)",
97
+ "ipython (>=9.0.2,<10.0.0)",
98
+ "ipython-pygments-lexers (>=1.1.1,<2.0.0)",
99
+ "jaraco-classes (>=3.4.0,<4.0.0)",
100
+ "jaraco-context (>=6.0.1,<7.0.0)",
101
+ "jaraco-functools (>=4.1.0,<5.0.0)",
102
+ "jedi (>=0.19.2,<0.20.0)",
103
+ "jinja2 (>=3.1.6,<4.0.0)",
104
+ "jsonschema (>=4.23.0,<5.0.0)",
105
+ "jsonschema-specifications (>=2024.10.1,<2025.0.0)",
106
+ "jupyter-client (>=8.6.3,<9.0.0)",
107
+ "jupyter-core (>=5.7.2,<6.0.0)",
108
+ "kagglehub (>=0.3.10,<0.4.0)",
109
+ "keyring (>=25.6.0,<26.0.0)",
110
+ "kiwisolver (>=1.4.8,<2.0.0)",
111
+ "libclang (>=18.1.1,<19.0.0)",
112
+ "locket (>=1.0.0,<2.0.0)",
113
+ "markdown (>=3.7,<4.0)",
114
+ "markdown-it-py (>=3.0.0,<4.0.0)",
115
+ "markupsafe (>=3.0.2,<4.0.0)",
116
+ "matplotlib-inline (>=0.1.7,<0.2.0)",
117
+ "mdurl (>=0.1.2,<0.2.0)",
118
+ "ml-dtypes (>=0.5.1,<0.6.0)",
119
+ "more-itertools (>=10.6.0,<11.0.0)",
120
+ "mpmath (>=1.3.0,<2.0.0)",
121
+ "msgpack (>=1.1.0,<2.0.0)",
122
+ "multidict (>=6.1.0,<7.0.0)",
123
+ "namex (>=0.0.8,<0.0.9)",
124
+ "narwhals (>=1.30.0,<2.0.0)",
125
+ "nest-asyncio (>=1.6.0,<2.0.0)",
126
+ "networkx (>=3.4.2,<4.0.0)",
127
+ "nltk (>=3.9.1,<4.0.0)",
128
+ "opt-einsum (>=3.4.0,<4.0.0)",
129
+ "optree (>=0.14.1,<0.15.0)",
130
+ "packaging (>=24.2,<25.0)",
131
+ "parso (>=0.8.4,<0.9.0)",
132
+ "partd (>=1.4.2,<2.0.0)",
133
+ "pbs-installer (>=2025.3.11,<2026.0.0)",
134
+ "pkginfo (>=1.12.1.2,<2.0.0.0)",
135
+ "platformdirs (>=4.3.6,<5.0.0)",
136
+ "portalocker (>=3.1.1,<4.0.0)",
137
+ "prompt-toolkit (>=3.0.50,<4.0.0)",
138
+ "propcache (>=0.3.0,<0.4.0)",
139
+ "psutil (>=7.0.0,<8.0.0)",
140
+ "pure-eval (>=0.2.3,<0.3.0)",
141
+ "pyarrow (>=19.0.1,<20.0.0)",
142
+ "pydeck (>=0.9.1,<0.10.0)",
143
+ "pygments (>=2.19.1,<3.0.0)",
144
+ "pyparsing (>=3.2.1,<4.0.0)",
145
+ "pyproject-hooks (>=1.2.0,<2.0.0)",
146
+ "python-dateutil (>=2.9.0.post0,<3.0.0)",
147
+ "pytz (>=2025.1,<2026.0)",
148
+ "pywin32-ctypes (>=0.2.3,<0.3.0)",
149
+ "pyyaml (>=6.0.2,<7.0.0)",
150
+ "pyzmq (>=26.3.0,<27.0.0)",
151
+ "rapidfuzz (>=3.12.2,<4.0.0)",
152
+ "referencing (>=0.36.2,<0.37.0)",
153
+ "requests (>=2.32.3,<3.0.0)",
154
+ "requests-toolbelt (>=1.0.0,<2.0.0)",
155
+ "rich (>=13.9.4,<14.0.0)",
156
+ "rpds-py (>=0.23.1,<0.24.0)",
157
+ "sentence-transformers (>=3.4.1,<4.0.0)",
158
+ "setuptools (>=76.0.0,<77.0.0)",
159
+ "shellingham (>=1.5.4,<2.0.0)",
160
+ "six (>=1.17.0,<2.0.0)",
161
+ "smmap (>=5.0.2,<6.0.0)",
162
+ "sniffio (>=1.3.1,<2.0.0)",
163
+ "sortedcontainers (>=2.4.0,<3.0.0)",
164
+ "soupsieve (>=2.6,<3.0)",
165
+ "stack-data (>=0.6.3,<0.7.0)",
166
+ "tabulate (>=0.9.0,<0.10.0)",
167
+ "tblib (>=3.0.0,<4.0.0)",
168
+ "tenacity (>=9.0.0,<10.0.0)",
169
+ "tensorboard (>=2.19.0,<3.0.0)",
170
+ "tensorboard-data-server (>=0.7.2,<0.8.0)",
171
+ "termcolor (>=2.5.0,<3.0.0)",
172
+ "threadpoolctl (>=3.6.0,<4.0.0)",
173
+ "tokenizers (>=0.21.1,<0.22.0)",
174
+ "toml (>=0.10.2,<0.11.0)",
175
+ "tomlkit (>=0.13.2,<0.14.0)",
176
+ "toolz (>=1.0.0,<2.0.0)",
177
+ "tornado (>=6.4.2,<7.0.0)",
178
+ "tqdm (>=4.67.1,<5.0.0)",
179
+ "traitlets (>=5.14.3,<6.0.0)",
180
+ "trove-classifiers (>=2025.3.13.13,<2026.0.0.0)",
181
+ "typing-extensions (>=4.12.2,<5.0.0)",
182
+ "tzdata (>=2025.1,<2026.0)",
183
+ "urllib3 (>=2.3.0,<3.0.0)",
184
+ "virtualenv (>=20.29.3,<21.0.0)",
185
+ "watchdog (>=6.0.0,<7.0.0)",
186
+ "wcwidth (>=0.2.13,<0.3.0)",
187
+ "werkzeug (>=3.1.3,<4.0.0)",
188
+ "wheel (>=0.45.1,<0.46.0)",
189
+ "wrapt (>=1.17.2,<2.0.0)",
190
+ "xxhash (>=3.5.0,<4.0.0)",
191
+ "yarl (>=1.18.3,<2.0.0)",
192
+ "zict (>=3.0.0,<4.0.0)",
193
+ "zipp (>=3.21.0,<4.0.0)",
194
+ "zstandard (>=0.23.0,<0.24.0)",
195
+ "asyncio (>=3.4.3,<4.0.0)",
196
+ "pytorch-lightning (>=2.5.0.post0,<3.0.0)",
197
+ ]
198
+
199
+
200
+ [build-system]
201
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
202
+ build-backend = "poetry.core.masonry.api"
requirements.txt CHANGED
@@ -1,13 +1,14 @@
1
  absl-py==2.1.0 ; python_version >= "3.12"
2
  accelerate==1.5.2 ; python_version >= "3.12"
3
  aiohappyeyeballs==2.6.1 ; python_version >= "3.12"
4
- aiohttp==3.11.13 ; python_version >= "3.12"
5
  aiosignal==1.3.2 ; python_version >= "3.12"
6
  altair==5.5.0 ; python_version >= "3.12"
7
- anyio==4.8.0 ; python_version >= "3.12"
8
  appnope==0.1.4 ; python_version >= "3.12" and platform_system == "Darwin"
9
  asttokens==3.0.0 ; python_version >= "3.12"
10
  astunparse==1.6.3 ; python_version >= "3.12"
 
11
  attrs==25.3.0 ; python_version >= "3.12"
12
  autograd==1.7.0 ; python_version >= "3.12"
13
  beautifulsoup4==4.13.3 ; python_version >= "3.12"
@@ -80,6 +81,7 @@ keras==3.9.0 ; python_version >= "3.12"
80
  keyring==25.6.0 ; python_version >= "3.12"
81
  kiwisolver==1.4.8 ; python_version >= "3.12"
82
  libclang==18.1.1 ; python_version >= "3.12"
 
83
  locket==1.0.0 ; python_version >= "3.12"
84
  lxml==5.3.1 ; python_version >= "3.12"
85
  markdown-it-py==3.0.0 ; python_version >= "3.12"
@@ -144,9 +146,10 @@ pygments==2.19.1 ; python_version >= "3.12"
144
  pyparsing==3.2.1 ; python_version >= "3.12"
145
  pyproject-hooks==1.2.0 ; python_version >= "3.12"
146
  python-dateutil==2.9.0.post0 ; python_version >= "3.12"
 
147
  pytz==2025.1 ; python_version >= "3.12"
148
- # pywin32-ctypes==0.2.3 ; python_version >= "3.12" ## ONLY FOR WINDOWS
149
- # pywin32==309 ; python_version >= "3.12" ## ONLY FOR WINDOWS
150
  pyyaml==6.0.2 ; python_version >= "3.12"
151
  pyzmq==26.3.0 ; python_version >= "3.12"
152
  rapidfuzz==3.12.2 ; python_version >= "3.12"
@@ -188,6 +191,7 @@ toml==0.10.2 ; python_version >= "3.12"
188
  tomlkit==0.13.2 ; python_version >= "3.12"
189
  toolz==1.0.0 ; python_version >= "3.12"
190
  torch==2.6.0 ; python_version >= "3.12"
 
191
  torchvision==0.21.0 ; python_version >= "3.12"
192
  tornado==6.4.2 ; python_version >= "3.12"
193
  tqdm==4.67.1 ; python_version >= "3.12"
 
1
  absl-py==2.1.0 ; python_version >= "3.12"
2
  accelerate==1.5.2 ; python_version >= "3.12"
3
  aiohappyeyeballs==2.6.1 ; python_version >= "3.12"
4
+ aiohttp==3.11.14 ; python_version >= "3.12"
5
  aiosignal==1.3.2 ; python_version >= "3.12"
6
  altair==5.5.0 ; python_version >= "3.12"
7
+ anyio==4.9.0 ; python_version >= "3.12"
8
  appnope==0.1.4 ; python_version >= "3.12" and platform_system == "Darwin"
9
  asttokens==3.0.0 ; python_version >= "3.12"
10
  astunparse==1.6.3 ; python_version >= "3.12"
11
+ asyncio==3.4.3 ; python_version >= "3.12"
12
  attrs==25.3.0 ; python_version >= "3.12"
13
  autograd==1.7.0 ; python_version >= "3.12"
14
  beautifulsoup4==4.13.3 ; python_version >= "3.12"
 
81
  keyring==25.6.0 ; python_version >= "3.12"
82
  kiwisolver==1.4.8 ; python_version >= "3.12"
83
  libclang==18.1.1 ; python_version >= "3.12"
84
+ lightning-utilities==0.14.1 ; python_version >= "3.12"
85
  locket==1.0.0 ; python_version >= "3.12"
86
  lxml==5.3.1 ; python_version >= "3.12"
87
  markdown-it-py==3.0.0 ; python_version >= "3.12"
 
146
  pyparsing==3.2.1 ; python_version >= "3.12"
147
  pyproject-hooks==1.2.0 ; python_version >= "3.12"
148
  python-dateutil==2.9.0.post0 ; python_version >= "3.12"
149
+ pytorch-lightning==2.5.0.post0 ; python_version >= "3.12"
150
  pytz==2025.1 ; python_version >= "3.12"
151
+ pywin32-ctypes==0.2.3 ; python_version >= "3.12"
152
+ pywin32==309 ; python_version >= "3.12"
153
  pyyaml==6.0.2 ; python_version >= "3.12"
154
  pyzmq==26.3.0 ; python_version >= "3.12"
155
  rapidfuzz==3.12.2 ; python_version >= "3.12"
 
191
  tomlkit==0.13.2 ; python_version >= "3.12"
192
  toolz==1.0.0 ; python_version >= "3.12"
193
  torch==2.6.0 ; python_version >= "3.12"
194
+ torchmetrics==1.6.3 ; python_version >= "3.12"
195
  torchvision==0.21.0 ; python_version >= "3.12"
196
  tornado==6.4.2 ; python_version >= "3.12"
197
  tqdm==4.67.1 ; python_version >= "3.12"
sentiment_analysis/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (264 Bytes). View file
 
sentiment_analysis/__pycache__/sentiment_analysis.cpython-312.pyc ADDED
Binary file (8.94 kB). View file
 
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (284 Bytes). View file
 
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc ADDED
Binary file (2.97 kB). View file
 
sentiment_analysis/hmv_cfg_base_stage1/imports.py CHANGED
@@ -9,5 +9,8 @@ import json
9
  import gc
10
  import psutil
11
  import os
 
12
  import importlib.util
13
- import sys
 
 
 
9
  import gc
10
  import psutil
11
  import os
12
+ import importlib
13
  import importlib.util
14
+ import asyncio
15
+ import sys
16
+ import pytorch_lightning as pl
sentiment_analysis/{sentiment_analysis.py → sentiment_analysis_main.py} RENAMED
@@ -1,296 +1,296 @@
1
- from imports import *
2
- import importlib.util
3
- import os
4
- import sys
5
- import joblib
6
-
7
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
8
-
9
- # from hmv_cfg_base_stage1.model1 import load_model as load_model1
10
- # from hmv_cfg_base_stage1.model1 import predict as predict1
11
-
12
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
13
- CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
14
- LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")
15
-
16
- # Load the model and tokenizer
17
- # model_name = "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8"
18
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
19
- # model = AutoModel.from_pretrained(model_name)
20
-
21
- SENTIMENT_POLARITY_LABELS = [
22
- "negative", "neutral", "positive"
23
- ]
24
-
25
- current_model = None
26
- current_tokenizer = None
27
-
28
- # Enabling Resource caching
29
- @st.cache_resource
30
-
31
- def load_model_config():
32
- with open(CONFIG_STAGE1, "r") as f:
33
- model_data = json.load(f)
34
-
35
- model_options = {v["name"]: v for v in model_data.values()} # Extract names for dropdown
36
- return model_data, model_options
37
-
38
- MODEL_DATA, MODEL_OPTIONS = load_model_config()
39
-
40
-
41
-
42
- # def load_model():
43
- # model = DebertaV2ForSequenceClassification.from_pretrained(model_name)
44
- # tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
45
- # return model, tokenizer
46
-
47
-
48
- # ✅ Dynamically Import Model Functions
49
- def import_from_module(module_name, function_name):
50
- try:
51
- module = importlib.import_module(module_name)
52
- return getattr(module, function_name)
53
- except (ModuleNotFoundError, AttributeError) as e:
54
- st.error(f"❌ Import Error: {e}")
55
- return None
56
-
57
-
58
- def free_memory():
59
- # """Free up CPU & GPU memory before loading a new model."""
60
- global current_model, current_tokenizer
61
-
62
- if current_model is not None:
63
- del current_model # Delete the existing model
64
- current_model = None # Reset reference
65
-
66
- if current_tokenizer is not None:
67
- del current_tokenizer # Delete the tokenizer
68
- current_tokenizer = None
69
-
70
- gc.collect() # Force garbage collection for CPU memory
71
-
72
- if torch.cuda.is_available():
73
- torch.cuda.empty_cache() # Free GPU memory
74
- torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
75
-
76
- # If running on CPU, reclaim memory using OS-level commands
77
- try:
78
- if torch.cuda.is_available() is False:
79
- psutil.virtual_memory() # Refresh memory stats
80
- except Exception as e:
81
- print(f"Memory cleanup error: {e}")
82
-
83
-
84
- def load_selected_model(model_name):
85
- global current_model, current_tokenizer
86
-
87
- free_memory()
88
-
89
- # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
90
- # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
91
- # st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
92
-
93
- if model_name not in MODEL_OPTIONS:
94
- st.error(f"⚠️ Model '{model_name}' not found in config!")
95
- return None, None, None
96
-
97
- model_info = MODEL_OPTIONS[model_name]
98
- hf_location = model_info["hf_location"]
99
-
100
- model_module = model_info["module_path"]
101
- load_function = model_info["load_function"]
102
- predict_function = model_info["predict_function"]
103
-
104
- load_model_func = import_from_module(model_module, load_function)
105
- predict_func = import_from_module(model_module, predict_function)
106
-
107
- if load_model_func is None or predict_func is None:
108
- st.error("❌ Model functions could not be loaded!")
109
- return None, None, None
110
-
111
- model, tokenizer = load_model_func()
112
-
113
- current_model, current_tokenizer = model, tokenizer
114
- return model, tokenizer, predict_func
115
-
116
- # def load_selected_model(model_name):
117
- # # """Load model and tokenizer based on user selection."""
118
- # global current_model, current_tokenizer
119
-
120
- # # Free memory before loading a new model
121
- # free_memory()
122
-
123
- # if model_name not in MODEL_OPTIONS:
124
- # st.error(f"⚠️ Model '{model_name}' not found in config!")
125
- # return None, None
126
-
127
- # model_info = MODEL_OPTIONS[model_name]
128
- # hf_location = model_info["hf_location"]
129
-
130
- # model_module = model_info["module_path"]
131
- # # load_function = "load_model"
132
- # # predict_function = "predict"
133
-
134
- # load_function = model_info["load_function"]
135
- # predict_function = model_info["predict_function"]
136
-
137
- # # tokenizer_class = globals()[model_info["tokenizer_class"]]
138
- # # model_class = globals()[model_info["model_class"]]
139
-
140
- # # tokenizer = tokenizer_class.from_pretrained(hf_location)
141
-
142
-
143
- # load_model_func = import_from_module(model_module, load_function)
144
- # predict_func = import_from_module(model_module, predict_function)
145
-
146
- # # # Load model
147
- # # if model_info["type"] == "custom_checkpoint" or model_info["type"] == "custom_model":
148
- # # model = torch.load(hf_location, map_location="cpu") # Load PyTorch model
149
- # # elif model_info["type"] == "hf_automodel_finetuned_dbt3":
150
- # # tokenizer_class = globals()[model_info["tokenizer_class"]]
151
- # # model_class = globals()[model_info["model_class"]]
152
- # # tokenizer = tokenizer_class.from_pretrained(hf_location)
153
- # # model = model_class.from_pretrained(hf_location,
154
- # # problem_type=model_info["problem_type"],
155
- # # num_labels=model_info["num_labels"]
156
- # # )
157
- # # else:
158
- # # st.error("Invalid model selection")
159
- # # return None, None
160
-
161
-
162
- # if load_model_func is None or predict_func is None:
163
- # st.error("❌ Model functions could not be loaded!")
164
- # return None, None
165
-
166
- # # current_model, current_tokenizer = model, tokenizer # Store references
167
- # # return model, tokenizer
168
-
169
- # model, tokenizer = load_model_func(hf_location)
170
-
171
- # current_model, current_tokenizer = model, tokenizer
172
- # return model, tokenizer, predict_func
173
-
174
-
175
-
176
- def predict(text, model, tokenizer, device, max_len=128):
177
- # Tokenize and pad the input text
178
- inputs = tokenizer(
179
- text,
180
- add_special_tokens=True,
181
- padding=True,
182
- truncation=False,
183
- return_tensors="pt",
184
- return_token_type_ids=False,
185
- ).to(device) # Move input tensors to the correct device
186
-
187
- with torch.no_grad():
188
- outputs = model(**inputs)
189
-
190
- # Apply sigmoid activation (for BCEWithLogitsLoss)
191
- probabilities = outputs.logits.cpu().numpy()
192
-
193
- return probabilities
194
-
195
- # def show_sentiment_analysis():
196
-
197
- # Add your sentiment analysis code here
198
-
199
- # user_input = st.text_input("Enter text for sentiment analysis:")
200
- # user_input = st.text_area("Enter text for sentiment analysis:", height=200)
201
- # user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)
202
-
203
- def show_sentiment_analysis():
204
- st.title("Stage 1: Sentiment Polarity Analysis")
205
- st.write("This section will handle sentiment analysis.")
206
-
207
- if "selected_model" not in st.session_state:
208
- st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0] # Default selection
209
-
210
- if "clear_output" not in st.session_state:
211
- st.session_state.clear_output = False
212
-
213
- st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")
214
-
215
- selected_model = st.session_state.selected_model
216
-
217
- if selected_model not in MODEL_OPTIONS:
218
- st.error(f"❌ Selected model '{selected_model}' not found!")
219
- st.stop()
220
-
221
- st.session_state.clear_output = True # Reset output when model changes
222
-
223
-
224
- # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
225
- # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model]) # ✅ Check selected model
226
-
227
-
228
- user_input = st.text_input("Enter text for sentiment analysis:")
229
-
230
- if user_input:
231
- # Make prediction
232
-
233
- # model, tokenizer = load_model()
234
- # model, tokenizer = load_selected_model(selected_model)
235
-
236
- model, tokenizer, predict_func = load_selected_model(selected_model)
237
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
238
-
239
- if model is None:
240
- st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
241
- st.stop()
242
-
243
- model.to(device)
244
-
245
- # predictions = predict(user_input, model, tokenizer, device)
246
-
247
- predictions = predict_func(user_input, model, tokenizer, device)
248
-
249
- # Squeeze predictions to remove extra dimensions
250
- predictions_array = predictions.squeeze()
251
-
252
- # Convert to binary predictions (argmax)
253
- binary_predictions = np.zeros_like(predictions_array)
254
- max_indices = np.argmax(predictions_array)
255
- binary_predictions[max_indices] = 1
256
-
257
- # Display raw predictions
258
- st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
259
-
260
- # Display binary classification result
261
- st.write(f"**Predicted Sentiment:**")
262
- st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
263
- # st.write(f"**NEUTRAL:** {binary_predictions[1]}")
264
- # st.write(f"**POSITIVE:** {binary_predictions[2]}")
265
-
266
- # 1️⃣ **Polar Plot (Plotly)**
267
- sentiment_polarities = predictions_array.tolist()
268
- fig_polar = px.line_polar(
269
- pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)),
270
- r='r', theta='theta', line_close=True
271
- )
272
- st.plotly_chart(fig_polar)
273
-
274
- # 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
275
- normalized_predictions = predictions_array / predictions_array.sum()
276
-
277
- fig, ax = plt.subplots(figsize=(8, 2))
278
- left = 0
279
- for i in range(len(normalized_predictions)):
280
- ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
281
- left += normalized_predictions[i]
282
-
283
- # Configure the chart
284
- ax.set_xlim(0, 1)
285
- ax.set_yticks([])
286
- ax.set_xticks(np.arange(0, 1.1, 0.1))
287
- ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
288
- plt.title("Sentiment Polarity Prediction Distribution")
289
-
290
- # Display in Streamlit
291
- st.pyplot(fig)
292
-
293
-
294
-
295
- if __name__ == "__main__":
296
  show_sentiment_analysis()
 
1
+ from imports import *
2
+ import importlib.util
3
+ import os
4
+ import sys
5
+ import joblib
6
+
7
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
8
+
9
+ # from hmv_cfg_base_stage1.model1 import load_model as load_model1
10
+ # from hmv_cfg_base_stage1.model1 import predict as predict1
11
+
12
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
13
+ CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
14
+ LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")
15
+
16
+ # Load the model and tokenizer
17
+ # model_name = "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8"
18
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ # model = AutoModel.from_pretrained(model_name)
20
+
21
+ SENTIMENT_POLARITY_LABELS = [
22
+ "negative", "neutral", "positive"
23
+ ]
24
+
25
+ current_model = None
26
+ current_tokenizer = None
27
+
28
+ # Enabling Resource caching
29
+ @st.cache_resource
30
+
31
+ def load_model_config():
32
+ with open(CONFIG_STAGE1, "r") as f:
33
+ model_data = json.load(f)
34
+
35
+ model_options = {v["name"]: v for v in model_data.values()} # Extract names for dropdown
36
+ return model_data, model_options
37
+
38
+ MODEL_DATA, MODEL_OPTIONS = load_model_config()
39
+
40
+
41
+
42
+ # def load_model():
43
+ # model = DebertaV2ForSequenceClassification.from_pretrained(model_name)
44
+ # tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
45
+ # return model, tokenizer
46
+
47
+
48
+ # ✅ Dynamically Import Model Functions
49
+ def import_from_module(module_name, function_name):
50
+ try:
51
+ module = importlib.import_module(module_name)
52
+ return getattr(module, function_name)
53
+ except (ModuleNotFoundError, AttributeError) as e:
54
+ st.error(f"❌ Import Error: {e}")
55
+ return None
56
+
57
+
58
+ def free_memory():
59
+ # """Free up CPU & GPU memory before loading a new model."""
60
+ global current_model, current_tokenizer
61
+
62
+ if current_model is not None:
63
+ del current_model # Delete the existing model
64
+ current_model = None # Reset reference
65
+
66
+ if current_tokenizer is not None:
67
+ del current_tokenizer # Delete the tokenizer
68
+ current_tokenizer = None
69
+
70
+ gc.collect() # Force garbage collection for CPU memory
71
+
72
+ if torch.cuda.is_available():
73
+ torch.cuda.empty_cache() # Free GPU memory
74
+ torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
75
+
76
+ # If running on CPU, reclaim memory using OS-level commands
77
+ try:
78
+ if torch.cuda.is_available() is False:
79
+ psutil.virtual_memory() # Refresh memory stats
80
+ except Exception as e:
81
+ print(f"Memory cleanup error: {e}")
82
+
83
+
84
+ def load_selected_model(model_name):
85
+ global current_model, current_tokenizer
86
+
87
+ free_memory()
88
+
89
+ # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
90
+ # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
91
+ # st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
92
+
93
+ if model_name not in MODEL_OPTIONS:
94
+ st.error(f"⚠️ Model '{model_name}' not found in config!")
95
+ return None, None, None
96
+
97
+ model_info = MODEL_OPTIONS[model_name]
98
+ hf_location = model_info["hf_location"]
99
+
100
+ model_module = model_info["module_path"]
101
+ load_function = model_info["load_function"]
102
+ predict_function = model_info["predict_function"]
103
+
104
+ load_model_func = import_from_module(model_module, load_function)
105
+ predict_func = import_from_module(model_module, predict_function)
106
+
107
+ if load_model_func is None or predict_func is None:
108
+ st.error("❌ Model functions could not be loaded!")
109
+ return None, None, None
110
+
111
+ model, tokenizer = load_model_func()
112
+
113
+ current_model, current_tokenizer = model, tokenizer
114
+ return model, tokenizer, predict_func
115
+
116
+ # def load_selected_model(model_name):
117
+ # # """Load model and tokenizer based on user selection."""
118
+ # global current_model, current_tokenizer
119
+
120
+ # # Free memory before loading a new model
121
+ # free_memory()
122
+
123
+ # if model_name not in MODEL_OPTIONS:
124
+ # st.error(f"⚠️ Model '{model_name}' not found in config!")
125
+ # return None, None
126
+
127
+ # model_info = MODEL_OPTIONS[model_name]
128
+ # hf_location = model_info["hf_location"]
129
+
130
+ # model_module = model_info["module_path"]
131
+ # # load_function = "load_model"
132
+ # # predict_function = "predict"
133
+
134
+ # load_function = model_info["load_function"]
135
+ # predict_function = model_info["predict_function"]
136
+
137
+ # # tokenizer_class = globals()[model_info["tokenizer_class"]]
138
+ # # model_class = globals()[model_info["model_class"]]
139
+
140
+ # # tokenizer = tokenizer_class.from_pretrained(hf_location)
141
+
142
+
143
+ # load_model_func = import_from_module(model_module, load_function)
144
+ # predict_func = import_from_module(model_module, predict_function)
145
+
146
+ # # # Load model
147
+ # # if model_info["type"] == "custom_checkpoint" or model_info["type"] == "custom_model":
148
+ # # model = torch.load(hf_location, map_location="cpu") # Load PyTorch model
149
+ # # elif model_info["type"] == "hf_automodel_finetuned_dbt3":
150
+ # # tokenizer_class = globals()[model_info["tokenizer_class"]]
151
+ # # model_class = globals()[model_info["model_class"]]
152
+ # # tokenizer = tokenizer_class.from_pretrained(hf_location)
153
+ # # model = model_class.from_pretrained(hf_location,
154
+ # # problem_type=model_info["problem_type"],
155
+ # # num_labels=model_info["num_labels"]
156
+ # # )
157
+ # # else:
158
+ # # st.error("Invalid model selection")
159
+ # # return None, None
160
+
161
+
162
+ # if load_model_func is None or predict_func is None:
163
+ # st.error("❌ Model functions could not be loaded!")
164
+ # return None, None
165
+
166
+ # # current_model, current_tokenizer = model, tokenizer # Store references
167
+ # # return model, tokenizer
168
+
169
+ # model, tokenizer = load_model_func(hf_location)
170
+
171
+ # current_model, current_tokenizer = model, tokenizer
172
+ # return model, tokenizer, predict_func
173
+
174
+
175
+
176
+ def predict(text, model, tokenizer, device, max_len=128):
177
+ # Tokenize and pad the input text
178
+ inputs = tokenizer(
179
+ text,
180
+ add_special_tokens=True,
181
+ padding=True,
182
+ truncation=False,
183
+ return_tensors="pt",
184
+ return_token_type_ids=False,
185
+ ).to(device) # Move input tensors to the correct device
186
+
187
+ with torch.no_grad():
188
+ outputs = model(**inputs)
189
+
190
+ # Apply sigmoid activation (for BCEWithLogitsLoss)
191
+ probabilities = outputs.logits.cpu().numpy()
192
+
193
+ return probabilities
194
+
195
+ # def show_sentiment_analysis():
196
+
197
+ # Add your sentiment analysis code here
198
+
199
+ # user_input = st.text_input("Enter text for sentiment analysis:")
200
+ # user_input = st.text_area("Enter text for sentiment analysis:", height=200)
201
+ # user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)
202
+
203
+ def show_sentiment_analysis():
204
+ st.title("Stage 1: Sentiment Polarity Analysis")
205
+ st.write("This section will handle sentiment analysis.")
206
+
207
+ if "selected_model" not in st.session_state:
208
+ st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0] # Default selection
209
+
210
+ if "clear_output" not in st.session_state:
211
+ st.session_state.clear_output = False
212
+
213
+ st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")
214
+
215
+ selected_model = st.session_state.selected_model
216
+
217
+ if selected_model not in MODEL_OPTIONS:
218
+ st.error(f"❌ Selected model '{selected_model}' not found!")
219
+ st.stop()
220
+
221
+ st.session_state.clear_output = True # Reset output when model changes
222
+
223
+
224
+ # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
225
+ # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model]) # ✅ Check selected model
226
+
227
+
228
+ user_input = st.text_input("Enter text for sentiment analysis:")
229
+
230
+ if user_input:
231
+ # Make prediction
232
+
233
+ # model, tokenizer = load_model()
234
+ # model, tokenizer = load_selected_model(selected_model)
235
+
236
+ model, tokenizer, predict_func = load_selected_model(selected_model)
237
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
238
+
239
+ if model is None:
240
+ st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
241
+ st.stop()
242
+
243
+ model.to(device)
244
+
245
+ # predictions = predict(user_input, model, tokenizer, device)
246
+
247
+ predictions = predict_func(user_input, model, tokenizer, device)
248
+
249
+ # Squeeze predictions to remove extra dimensions
250
+ predictions_array = predictions.squeeze()
251
+
252
+ # Convert to binary predictions (argmax)
253
+ binary_predictions = np.zeros_like(predictions_array)
254
+ max_indices = np.argmax(predictions_array)
255
+ binary_predictions[max_indices] = 1
256
+
257
+ # Display raw predictions
258
+ st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
259
+
260
+ # Display binary classification result
261
+ st.write(f"**Predicted Sentiment:**")
262
+ st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
263
+ # st.write(f"**NEUTRAL:** {binary_predictions[1]}")
264
+ # st.write(f"**POSITIVE:** {binary_predictions[2]}")
265
+
266
+ # 1️⃣ **Polar Plot (Plotly)**
267
+ sentiment_polarities = predictions_array.tolist()
268
+ fig_polar = px.line_polar(
269
+ pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)),
270
+ r='r', theta='theta', line_close=True
271
+ )
272
+ st.plotly_chart(fig_polar)
273
+
274
+ # 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
275
+ normalized_predictions = predictions_array / predictions_array.sum()
276
+
277
+ fig, ax = plt.subplots(figsize=(8, 2))
278
+ left = 0
279
+ for i in range(len(normalized_predictions)):
280
+ ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
281
+ left += normalized_predictions[i]
282
+
283
+ # Configure the chart
284
+ ax.set_xlim(0, 1)
285
+ ax.set_yticks([])
286
+ ax.set_xticks(np.arange(0, 1.1, 0.1))
287
+ ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
288
+ plt.title("Sentiment Polarity Prediction Distribution")
289
+
290
+ # Display in Streamlit
291
+ st.pyplot(fig)
292
+
293
+
294
+
295
+ if __name__ == "__main__":
296
  show_sentiment_analysis()