Archisman Karmakar
commited on
Commit
·
f31213f
1
Parent(s):
986513a
2025.03.17.post1
Browse files- .github/workflows/ossar.yml +4 -0
- __pycache__/dashboard.cpython-312.pyc +0 -0
- __pycache__/emotion_analysis.cpython-312.pyc +0 -0
- __pycache__/imports.cpython-312.pyc +0 -0
- app_main_hf.py +18 -1
- imports.py +7 -1
- poetry.lock +0 -0
- pyproject.toml +3 -1
- pyprojectOLD.toml +202 -0
- requirements.txt +8 -4
- sentiment_analysis/__pycache__/__init__.cpython-312.pyc +0 -0
- sentiment_analysis/__pycache__/sentiment_analysis.cpython-312.pyc +0 -0
- sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc +0 -0
- sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc +0 -0
- sentiment_analysis/hmv_cfg_base_stage1/imports.py +4 -1
- sentiment_analysis/{sentiment_analysis.py → sentiment_analysis_main.py} +295 -295
.github/workflows/ossar.yml
CHANGED
@@ -31,6 +31,10 @@ jobs:
|
|
31 |
runs-on: windows-latest
|
32 |
|
33 |
steps:
|
|
|
|
|
|
|
|
|
34 |
- name: Checkout repository
|
35 |
uses: actions/checkout@v4
|
36 |
|
|
|
31 |
runs-on: windows-latest
|
32 |
|
33 |
steps:
|
34 |
+
|
35 |
+
- name: Enable long paths in Git
|
36 |
+
run: git config --system core.longpaths true
|
37 |
+
|
38 |
- name: Checkout repository
|
39 |
uses: actions/checkout@v4
|
40 |
|
__pycache__/dashboard.cpython-312.pyc
ADDED
Binary file (844 Bytes). View file
|
|
__pycache__/emotion_analysis.cpython-312.pyc
ADDED
Binary file (668 Bytes). View file
|
|
__pycache__/imports.cpython-312.pyc
ADDED
Binary file (765 Bytes). View file
|
|
app_main_hf.py
CHANGED
@@ -1,13 +1,30 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
|
|
3 |
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import joblib
|
|
|
5 |
import importlib.util
|
6 |
|
7 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
8 |
|
|
|
|
|
|
|
|
|
|
|
9 |
from dashboard import show_dashboard
|
10 |
-
from sentiment_analysis.
|
11 |
from emotion_analysis import show_emotion_analysis
|
12 |
# from text_transformation import show_text_transformation
|
13 |
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
+
import asyncio
|
4 |
import sys
|
5 |
+
|
6 |
+
if sys.platform == "win32":
|
7 |
+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
8 |
+
else:
|
9 |
+
try:
|
10 |
+
asyncio.get_running_loop()
|
11 |
+
except RuntimeError:
|
12 |
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
13 |
+
|
14 |
+
|
15 |
import joblib
|
16 |
+
import importlib
|
17 |
import importlib.util
|
18 |
|
19 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
20 |
|
21 |
+
from imports import *
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
from dashboard import show_dashboard
|
27 |
+
from sentiment_analysis.sentiment_analysis_main import show_sentiment_analysis
|
28 |
from emotion_analysis import show_emotion_analysis
|
29 |
# from text_transformation import show_text_transformation
|
30 |
|
imports.py
CHANGED
@@ -9,5 +9,11 @@ import json
|
|
9 |
import gc
|
10 |
import psutil
|
11 |
import os
|
|
|
12 |
import importlib.util
|
13 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
9 |
import gc
|
10 |
import psutil
|
11 |
import os
|
12 |
+
import importlib
|
13 |
import importlib.util
|
14 |
+
import asyncio
|
15 |
+
import sys
|
16 |
+
import pytorch_lightning as pl
|
17 |
+
|
18 |
+
|
19 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[project]
|
2 |
name = "tachygraphy-microtext-analysis-and-normalization"
|
3 |
-
version = "2025.03.
|
4 |
description = ""
|
5 |
authors = [
|
6 |
{ name = "Archisman Karmakar", email = "[email protected]" },
|
@@ -192,6 +192,8 @@ dependencies = [
|
|
192 |
"zict (>=3.0.0,<4.0.0)",
|
193 |
"zipp (>=3.21.0,<4.0.0)",
|
194 |
"zstandard (>=0.23.0,<0.24.0)",
|
|
|
|
|
195 |
]
|
196 |
|
197 |
|
|
|
1 |
[project]
|
2 |
name = "tachygraphy-microtext-analysis-and-normalization"
|
3 |
+
version = "2025.03.17.post1"
|
4 |
description = ""
|
5 |
authors = [
|
6 |
{ name = "Archisman Karmakar", email = "[email protected]" },
|
|
|
192 |
"zict (>=3.0.0,<4.0.0)",
|
193 |
"zipp (>=3.21.0,<4.0.0)",
|
194 |
"zstandard (>=0.23.0,<0.24.0)",
|
195 |
+
"asyncio (>=3.4.3,<4.0.0)",
|
196 |
+
"pytorch-lightning (>=2.5.0.post0,<3.0.0)",
|
197 |
]
|
198 |
|
199 |
|
pyprojectOLD.toml
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "tachygraphy-microtext-analysis-and-normalization"
|
3 |
+
version = "2025.03.16.post3"
|
4 |
+
description = ""
|
5 |
+
authors = [
|
6 |
+
{ name = "Archisman Karmakar", email = "[email protected]" },
|
7 |
+
]
|
8 |
+
readme = "README.md"
|
9 |
+
requires-python = ">=3.12"
|
10 |
+
dependencies = [
|
11 |
+
"numpy (==2.1.3)",
|
12 |
+
"torch (>=2.6.0,<3.0.0)",
|
13 |
+
"torchvision (>=0.21.0,<0.22.0)",
|
14 |
+
"tensorflow (>=2.19.0,<3.0.0)",
|
15 |
+
"pandas (>=2.2.3,<3.0.0)",
|
16 |
+
"openpyxl (>=3.1.5,<4.0.0)",
|
17 |
+
"streamlit (>=1.43.2,<2.0.0)",
|
18 |
+
"transformers (>=4.49.0,<5.0.0)",
|
19 |
+
"datasets (>=3.4.0,<4.0.0)",
|
20 |
+
"autograd (>=1.7.0,<2.0.0)",
|
21 |
+
"ipykernel (>=6.29.5,<7.0.0)",
|
22 |
+
"matplotlib (>=3.10.1,<4.0.0)",
|
23 |
+
"plotly (>=6.0.0,<7.0.0)",
|
24 |
+
"importlib (>=1.0.4,<2.0.0)",
|
25 |
+
"joblib (>=1.4.2,<2.0.0)",
|
26 |
+
"accelerate (>=1.5.2,<2.0.0)",
|
27 |
+
"peft (>=0.14.0,<0.15.0)",
|
28 |
+
"mpi4py (>=4.0.3,<5.0.0)",
|
29 |
+
"tiktoken (>=0.9.0,<0.10.0)",
|
30 |
+
"sentencepiece (>=0.2.0,<0.3.0)",
|
31 |
+
"bs4 (>=0.0.2,<0.0.3)",
|
32 |
+
"emoji (>=2.14.1,<3.0.0)",
|
33 |
+
"safetensors (>=0.5.3,<0.6.0)",
|
34 |
+
"lxml (>=5.3.1,<6.0.0)",
|
35 |
+
"evaluate (>=0.4.3,<0.5.0)",
|
36 |
+
"scipy (>=1.15.2,<2.0.0)",
|
37 |
+
"sacrebleu (>=2.5.1,<3.0.0)",
|
38 |
+
"rouge-score (>=0.1.2,<0.2.0)",
|
39 |
+
"scikit-learn (>=1.6.1,<2.0.0)",
|
40 |
+
"regex (>=2024.11.6,<2025.0.0)",
|
41 |
+
"pywin32 (>=309,<310)",
|
42 |
+
"pycurl (>=7.45.6,<8.0.0)",
|
43 |
+
"pillow (>=11.1.0,<12.0.0)",
|
44 |
+
"diffusers (>=0.32.2,<0.33.0)",
|
45 |
+
"onnx (>=1.17.0,<2.0.0)",
|
46 |
+
"keras (>=3.9.0,<4.0.0)",
|
47 |
+
"h2o (>=3.46.0.6,<4.0.0.0)",
|
48 |
+
"distributed (>=2025.2.0,<2026.0.0)",
|
49 |
+
"absl-py (>=2.1.0,<3.0.0)",
|
50 |
+
"aiohappyeyeballs (>=2.6.1,<3.0.0)",
|
51 |
+
"aiohttp (>=3.11.13,<4.0.0)",
|
52 |
+
"aiosignal (>=1.3.2,<2.0.0)",
|
53 |
+
"altair (>=5.5.0,<6.0.0)",
|
54 |
+
"anyio (>=4.8.0,<5.0.0)",
|
55 |
+
"asttokens (>=3.0.0,<4.0.0)",
|
56 |
+
"astunparse (>=1.6.3,<2.0.0)",
|
57 |
+
"attrs (>=25.3.0,<26.0.0)",
|
58 |
+
"beautifulsoup4 (>=4.13.3,<5.0.0)",
|
59 |
+
"blinker (>=1.9.0,<2.0.0)",
|
60 |
+
"build (>=1.2.2.post1,<2.0.0)",
|
61 |
+
"cachecontrol (>=0.14.2,<0.15.0)",
|
62 |
+
"cachetools (>=5.5.2,<6.0.0)",
|
63 |
+
"certifi (>=2025.1.31,<2026.0.0)",
|
64 |
+
"charset-normalizer (>=3.4.1,<4.0.0)",
|
65 |
+
"click (>=8.1.8,<9.0.0)",
|
66 |
+
"cloudpickle (>=3.1.1,<4.0.0)",
|
67 |
+
"colorama (>=0.4.6,<0.5.0)",
|
68 |
+
"comm (>=0.2.2,<0.3.0)",
|
69 |
+
"contourpy (>=1.3.1,<2.0.0)",
|
70 |
+
"cycler (>=0.12.1,<0.13.0)",
|
71 |
+
"dask (>=2025.2.0,<2026.0.0)",
|
72 |
+
"debugpy (>=1.8.13,<2.0.0)",
|
73 |
+
"decorator (>=5.2.1,<6.0.0)",
|
74 |
+
"distlib (>=0.3.9,<0.4.0)",
|
75 |
+
"dulwich (>=0.22.8,<0.23.0)",
|
76 |
+
"et-xmlfile (>=2.0.0,<3.0.0)",
|
77 |
+
"executing (>=2.2.0,<3.0.0)",
|
78 |
+
"fastjsonschema (>=2.21.1,<3.0.0)",
|
79 |
+
"filelock (>=3.18.0,<4.0.0)",
|
80 |
+
"findpython (>=0.6.3,<0.7.0)",
|
81 |
+
"flatbuffers (>=25.2.10,<26.0.0)",
|
82 |
+
"fonttools (>=4.56.0,<5.0.0)",
|
83 |
+
"frozenlist (>=1.5.0,<2.0.0)",
|
84 |
+
"gast (>=0.6.0,<0.7.0)",
|
85 |
+
"gitdb (>=4.0.12,<5.0.0)",
|
86 |
+
"gitpython (>=3.1.44,<4.0.0)",
|
87 |
+
"google-pasta (>=0.2.0,<0.3.0)",
|
88 |
+
"grpcio (>=1.71.0,<2.0.0)",
|
89 |
+
"h11 (>=0.14.0,<0.15.0)",
|
90 |
+
"h5py (>=3.13.0,<4.0.0)",
|
91 |
+
"httpcore (>=1.0.7,<2.0.0)",
|
92 |
+
"httpx (>=0.28.1,<0.29.0)",
|
93 |
+
"huggingface-hub (>=0.29.3,<0.30.0)",
|
94 |
+
"idna (>=3.10,<4.0)",
|
95 |
+
"importlib-metadata (>=8.6.1,<9.0.0)",
|
96 |
+
"installer (>=0.7.0,<0.8.0)",
|
97 |
+
"ipython (>=9.0.2,<10.0.0)",
|
98 |
+
"ipython-pygments-lexers (>=1.1.1,<2.0.0)",
|
99 |
+
"jaraco-classes (>=3.4.0,<4.0.0)",
|
100 |
+
"jaraco-context (>=6.0.1,<7.0.0)",
|
101 |
+
"jaraco-functools (>=4.1.0,<5.0.0)",
|
102 |
+
"jedi (>=0.19.2,<0.20.0)",
|
103 |
+
"jinja2 (>=3.1.6,<4.0.0)",
|
104 |
+
"jsonschema (>=4.23.0,<5.0.0)",
|
105 |
+
"jsonschema-specifications (>=2024.10.1,<2025.0.0)",
|
106 |
+
"jupyter-client (>=8.6.3,<9.0.0)",
|
107 |
+
"jupyter-core (>=5.7.2,<6.0.0)",
|
108 |
+
"kagglehub (>=0.3.10,<0.4.0)",
|
109 |
+
"keyring (>=25.6.0,<26.0.0)",
|
110 |
+
"kiwisolver (>=1.4.8,<2.0.0)",
|
111 |
+
"libclang (>=18.1.1,<19.0.0)",
|
112 |
+
"locket (>=1.0.0,<2.0.0)",
|
113 |
+
"markdown (>=3.7,<4.0)",
|
114 |
+
"markdown-it-py (>=3.0.0,<4.0.0)",
|
115 |
+
"markupsafe (>=3.0.2,<4.0.0)",
|
116 |
+
"matplotlib-inline (>=0.1.7,<0.2.0)",
|
117 |
+
"mdurl (>=0.1.2,<0.2.0)",
|
118 |
+
"ml-dtypes (>=0.5.1,<0.6.0)",
|
119 |
+
"more-itertools (>=10.6.0,<11.0.0)",
|
120 |
+
"mpmath (>=1.3.0,<2.0.0)",
|
121 |
+
"msgpack (>=1.1.0,<2.0.0)",
|
122 |
+
"multidict (>=6.1.0,<7.0.0)",
|
123 |
+
"namex (>=0.0.8,<0.0.9)",
|
124 |
+
"narwhals (>=1.30.0,<2.0.0)",
|
125 |
+
"nest-asyncio (>=1.6.0,<2.0.0)",
|
126 |
+
"networkx (>=3.4.2,<4.0.0)",
|
127 |
+
"nltk (>=3.9.1,<4.0.0)",
|
128 |
+
"opt-einsum (>=3.4.0,<4.0.0)",
|
129 |
+
"optree (>=0.14.1,<0.15.0)",
|
130 |
+
"packaging (>=24.2,<25.0)",
|
131 |
+
"parso (>=0.8.4,<0.9.0)",
|
132 |
+
"partd (>=1.4.2,<2.0.0)",
|
133 |
+
"pbs-installer (>=2025.3.11,<2026.0.0)",
|
134 |
+
"pkginfo (>=1.12.1.2,<2.0.0.0)",
|
135 |
+
"platformdirs (>=4.3.6,<5.0.0)",
|
136 |
+
"portalocker (>=3.1.1,<4.0.0)",
|
137 |
+
"prompt-toolkit (>=3.0.50,<4.0.0)",
|
138 |
+
"propcache (>=0.3.0,<0.4.0)",
|
139 |
+
"psutil (>=7.0.0,<8.0.0)",
|
140 |
+
"pure-eval (>=0.2.3,<0.3.0)",
|
141 |
+
"pyarrow (>=19.0.1,<20.0.0)",
|
142 |
+
"pydeck (>=0.9.1,<0.10.0)",
|
143 |
+
"pygments (>=2.19.1,<3.0.0)",
|
144 |
+
"pyparsing (>=3.2.1,<4.0.0)",
|
145 |
+
"pyproject-hooks (>=1.2.0,<2.0.0)",
|
146 |
+
"python-dateutil (>=2.9.0.post0,<3.0.0)",
|
147 |
+
"pytz (>=2025.1,<2026.0)",
|
148 |
+
"pywin32-ctypes (>=0.2.3,<0.3.0)",
|
149 |
+
"pyyaml (>=6.0.2,<7.0.0)",
|
150 |
+
"pyzmq (>=26.3.0,<27.0.0)",
|
151 |
+
"rapidfuzz (>=3.12.2,<4.0.0)",
|
152 |
+
"referencing (>=0.36.2,<0.37.0)",
|
153 |
+
"requests (>=2.32.3,<3.0.0)",
|
154 |
+
"requests-toolbelt (>=1.0.0,<2.0.0)",
|
155 |
+
"rich (>=13.9.4,<14.0.0)",
|
156 |
+
"rpds-py (>=0.23.1,<0.24.0)",
|
157 |
+
"sentence-transformers (>=3.4.1,<4.0.0)",
|
158 |
+
"setuptools (>=76.0.0,<77.0.0)",
|
159 |
+
"shellingham (>=1.5.4,<2.0.0)",
|
160 |
+
"six (>=1.17.0,<2.0.0)",
|
161 |
+
"smmap (>=5.0.2,<6.0.0)",
|
162 |
+
"sniffio (>=1.3.1,<2.0.0)",
|
163 |
+
"sortedcontainers (>=2.4.0,<3.0.0)",
|
164 |
+
"soupsieve (>=2.6,<3.0)",
|
165 |
+
"stack-data (>=0.6.3,<0.7.0)",
|
166 |
+
"tabulate (>=0.9.0,<0.10.0)",
|
167 |
+
"tblib (>=3.0.0,<4.0.0)",
|
168 |
+
"tenacity (>=9.0.0,<10.0.0)",
|
169 |
+
"tensorboard (>=2.19.0,<3.0.0)",
|
170 |
+
"tensorboard-data-server (>=0.7.2,<0.8.0)",
|
171 |
+
"termcolor (>=2.5.0,<3.0.0)",
|
172 |
+
"threadpoolctl (>=3.6.0,<4.0.0)",
|
173 |
+
"tokenizers (>=0.21.1,<0.22.0)",
|
174 |
+
"toml (>=0.10.2,<0.11.0)",
|
175 |
+
"tomlkit (>=0.13.2,<0.14.0)",
|
176 |
+
"toolz (>=1.0.0,<2.0.0)",
|
177 |
+
"tornado (>=6.4.2,<7.0.0)",
|
178 |
+
"tqdm (>=4.67.1,<5.0.0)",
|
179 |
+
"traitlets (>=5.14.3,<6.0.0)",
|
180 |
+
"trove-classifiers (>=2025.3.13.13,<2026.0.0.0)",
|
181 |
+
"typing-extensions (>=4.12.2,<5.0.0)",
|
182 |
+
"tzdata (>=2025.1,<2026.0)",
|
183 |
+
"urllib3 (>=2.3.0,<3.0.0)",
|
184 |
+
"virtualenv (>=20.29.3,<21.0.0)",
|
185 |
+
"watchdog (>=6.0.0,<7.0.0)",
|
186 |
+
"wcwidth (>=0.2.13,<0.3.0)",
|
187 |
+
"werkzeug (>=3.1.3,<4.0.0)",
|
188 |
+
"wheel (>=0.45.1,<0.46.0)",
|
189 |
+
"wrapt (>=1.17.2,<2.0.0)",
|
190 |
+
"xxhash (>=3.5.0,<4.0.0)",
|
191 |
+
"yarl (>=1.18.3,<2.0.0)",
|
192 |
+
"zict (>=3.0.0,<4.0.0)",
|
193 |
+
"zipp (>=3.21.0,<4.0.0)",
|
194 |
+
"zstandard (>=0.23.0,<0.24.0)",
|
195 |
+
"asyncio (>=3.4.3,<4.0.0)",
|
196 |
+
"pytorch-lightning (>=2.5.0.post0,<3.0.0)",
|
197 |
+
]
|
198 |
+
|
199 |
+
|
200 |
+
[build-system]
|
201 |
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
202 |
+
build-backend = "poetry.core.masonry.api"
|
requirements.txt
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
absl-py==2.1.0 ; python_version >= "3.12"
|
2 |
accelerate==1.5.2 ; python_version >= "3.12"
|
3 |
aiohappyeyeballs==2.6.1 ; python_version >= "3.12"
|
4 |
-
aiohttp==3.11.
|
5 |
aiosignal==1.3.2 ; python_version >= "3.12"
|
6 |
altair==5.5.0 ; python_version >= "3.12"
|
7 |
-
anyio==4.
|
8 |
appnope==0.1.4 ; python_version >= "3.12" and platform_system == "Darwin"
|
9 |
asttokens==3.0.0 ; python_version >= "3.12"
|
10 |
astunparse==1.6.3 ; python_version >= "3.12"
|
|
|
11 |
attrs==25.3.0 ; python_version >= "3.12"
|
12 |
autograd==1.7.0 ; python_version >= "3.12"
|
13 |
beautifulsoup4==4.13.3 ; python_version >= "3.12"
|
@@ -80,6 +81,7 @@ keras==3.9.0 ; python_version >= "3.12"
|
|
80 |
keyring==25.6.0 ; python_version >= "3.12"
|
81 |
kiwisolver==1.4.8 ; python_version >= "3.12"
|
82 |
libclang==18.1.1 ; python_version >= "3.12"
|
|
|
83 |
locket==1.0.0 ; python_version >= "3.12"
|
84 |
lxml==5.3.1 ; python_version >= "3.12"
|
85 |
markdown-it-py==3.0.0 ; python_version >= "3.12"
|
@@ -144,9 +146,10 @@ pygments==2.19.1 ; python_version >= "3.12"
|
|
144 |
pyparsing==3.2.1 ; python_version >= "3.12"
|
145 |
pyproject-hooks==1.2.0 ; python_version >= "3.12"
|
146 |
python-dateutil==2.9.0.post0 ; python_version >= "3.12"
|
|
|
147 |
pytz==2025.1 ; python_version >= "3.12"
|
148 |
-
|
149 |
-
|
150 |
pyyaml==6.0.2 ; python_version >= "3.12"
|
151 |
pyzmq==26.3.0 ; python_version >= "3.12"
|
152 |
rapidfuzz==3.12.2 ; python_version >= "3.12"
|
@@ -188,6 +191,7 @@ toml==0.10.2 ; python_version >= "3.12"
|
|
188 |
tomlkit==0.13.2 ; python_version >= "3.12"
|
189 |
toolz==1.0.0 ; python_version >= "3.12"
|
190 |
torch==2.6.0 ; python_version >= "3.12"
|
|
|
191 |
torchvision==0.21.0 ; python_version >= "3.12"
|
192 |
tornado==6.4.2 ; python_version >= "3.12"
|
193 |
tqdm==4.67.1 ; python_version >= "3.12"
|
|
|
1 |
absl-py==2.1.0 ; python_version >= "3.12"
|
2 |
accelerate==1.5.2 ; python_version >= "3.12"
|
3 |
aiohappyeyeballs==2.6.1 ; python_version >= "3.12"
|
4 |
+
aiohttp==3.11.14 ; python_version >= "3.12"
|
5 |
aiosignal==1.3.2 ; python_version >= "3.12"
|
6 |
altair==5.5.0 ; python_version >= "3.12"
|
7 |
+
anyio==4.9.0 ; python_version >= "3.12"
|
8 |
appnope==0.1.4 ; python_version >= "3.12" and platform_system == "Darwin"
|
9 |
asttokens==3.0.0 ; python_version >= "3.12"
|
10 |
astunparse==1.6.3 ; python_version >= "3.12"
|
11 |
+
asyncio==3.4.3 ; python_version >= "3.12"
|
12 |
attrs==25.3.0 ; python_version >= "3.12"
|
13 |
autograd==1.7.0 ; python_version >= "3.12"
|
14 |
beautifulsoup4==4.13.3 ; python_version >= "3.12"
|
|
|
81 |
keyring==25.6.0 ; python_version >= "3.12"
|
82 |
kiwisolver==1.4.8 ; python_version >= "3.12"
|
83 |
libclang==18.1.1 ; python_version >= "3.12"
|
84 |
+
lightning-utilities==0.14.1 ; python_version >= "3.12"
|
85 |
locket==1.0.0 ; python_version >= "3.12"
|
86 |
lxml==5.3.1 ; python_version >= "3.12"
|
87 |
markdown-it-py==3.0.0 ; python_version >= "3.12"
|
|
|
146 |
pyparsing==3.2.1 ; python_version >= "3.12"
|
147 |
pyproject-hooks==1.2.0 ; python_version >= "3.12"
|
148 |
python-dateutil==2.9.0.post0 ; python_version >= "3.12"
|
149 |
+
pytorch-lightning==2.5.0.post0 ; python_version >= "3.12"
|
150 |
pytz==2025.1 ; python_version >= "3.12"
|
151 |
+
pywin32-ctypes==0.2.3 ; python_version >= "3.12"
|
152 |
+
pywin32==309 ; python_version >= "3.12"
|
153 |
pyyaml==6.0.2 ; python_version >= "3.12"
|
154 |
pyzmq==26.3.0 ; python_version >= "3.12"
|
155 |
rapidfuzz==3.12.2 ; python_version >= "3.12"
|
|
|
191 |
tomlkit==0.13.2 ; python_version >= "3.12"
|
192 |
toolz==1.0.0 ; python_version >= "3.12"
|
193 |
torch==2.6.0 ; python_version >= "3.12"
|
194 |
+
torchmetrics==1.6.3 ; python_version >= "3.12"
|
195 |
torchvision==0.21.0 ; python_version >= "3.12"
|
196 |
tornado==6.4.2 ; python_version >= "3.12"
|
197 |
tqdm==4.67.1 ; python_version >= "3.12"
|
sentiment_analysis/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (264 Bytes). View file
|
|
sentiment_analysis/__pycache__/sentiment_analysis.cpython-312.pyc
ADDED
Binary file (8.94 kB). View file
|
|
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (284 Bytes). View file
|
|
sentiment_analysis/hmv_cfg_base_stage1/__pycache__/model1.cpython-312.pyc
ADDED
Binary file (2.97 kB). View file
|
|
sentiment_analysis/hmv_cfg_base_stage1/imports.py
CHANGED
@@ -9,5 +9,8 @@ import json
|
|
9 |
import gc
|
10 |
import psutil
|
11 |
import os
|
|
|
12 |
import importlib.util
|
13 |
-
import
|
|
|
|
|
|
9 |
import gc
|
10 |
import psutil
|
11 |
import os
|
12 |
+
import importlib
|
13 |
import importlib.util
|
14 |
+
import asyncio
|
15 |
+
import sys
|
16 |
+
import pytorch_lightning as pl
|
sentiment_analysis/{sentiment_analysis.py → sentiment_analysis_main.py}
RENAMED
@@ -1,296 +1,296 @@
|
|
1 |
-
from imports import *
|
2 |
-
import importlib.util
|
3 |
-
import os
|
4 |
-
import sys
|
5 |
-
import joblib
|
6 |
-
|
7 |
-
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
8 |
-
|
9 |
-
# from hmv_cfg_base_stage1.model1 import load_model as load_model1
|
10 |
-
# from hmv_cfg_base_stage1.model1 import predict as predict1
|
11 |
-
|
12 |
-
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
13 |
-
CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
|
14 |
-
LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")
|
15 |
-
|
16 |
-
# Load the model and tokenizer
|
17 |
-
# model_name = "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8"
|
18 |
-
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
19 |
-
# model = AutoModel.from_pretrained(model_name)
|
20 |
-
|
21 |
-
SENTIMENT_POLARITY_LABELS = [
|
22 |
-
"negative", "neutral", "positive"
|
23 |
-
]
|
24 |
-
|
25 |
-
current_model = None
|
26 |
-
current_tokenizer = None
|
27 |
-
|
28 |
-
# Enabling Resource caching
|
29 |
-
@st.cache_resource
|
30 |
-
|
31 |
-
def load_model_config():
|
32 |
-
with open(CONFIG_STAGE1, "r") as f:
|
33 |
-
model_data = json.load(f)
|
34 |
-
|
35 |
-
model_options = {v["name"]: v for v in model_data.values()} # Extract names for dropdown
|
36 |
-
return model_data, model_options
|
37 |
-
|
38 |
-
MODEL_DATA, MODEL_OPTIONS = load_model_config()
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
# def load_model():
|
43 |
-
# model = DebertaV2ForSequenceClassification.from_pretrained(model_name)
|
44 |
-
# tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
|
45 |
-
# return model, tokenizer
|
46 |
-
|
47 |
-
|
48 |
-
# ✅ Dynamically Import Model Functions
|
49 |
-
def import_from_module(module_name, function_name):
|
50 |
-
try:
|
51 |
-
module = importlib.import_module(module_name)
|
52 |
-
return getattr(module, function_name)
|
53 |
-
except (ModuleNotFoundError, AttributeError) as e:
|
54 |
-
st.error(f"❌ Import Error: {e}")
|
55 |
-
return None
|
56 |
-
|
57 |
-
|
58 |
-
def free_memory():
|
59 |
-
# """Free up CPU & GPU memory before loading a new model."""
|
60 |
-
global current_model, current_tokenizer
|
61 |
-
|
62 |
-
if current_model is not None:
|
63 |
-
del current_model # Delete the existing model
|
64 |
-
current_model = None # Reset reference
|
65 |
-
|
66 |
-
if current_tokenizer is not None:
|
67 |
-
del current_tokenizer # Delete the tokenizer
|
68 |
-
current_tokenizer = None
|
69 |
-
|
70 |
-
gc.collect() # Force garbage collection for CPU memory
|
71 |
-
|
72 |
-
if torch.cuda.is_available():
|
73 |
-
torch.cuda.empty_cache() # Free GPU memory
|
74 |
-
torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
|
75 |
-
|
76 |
-
# If running on CPU, reclaim memory using OS-level commands
|
77 |
-
try:
|
78 |
-
if torch.cuda.is_available() is False:
|
79 |
-
psutil.virtual_memory() # Refresh memory stats
|
80 |
-
except Exception as e:
|
81 |
-
print(f"Memory cleanup error: {e}")
|
82 |
-
|
83 |
-
|
84 |
-
def load_selected_model(model_name):
|
85 |
-
global current_model, current_tokenizer
|
86 |
-
|
87 |
-
free_memory()
|
88 |
-
|
89 |
-
# st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
|
90 |
-
# st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
|
91 |
-
# st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
|
92 |
-
|
93 |
-
if model_name not in MODEL_OPTIONS:
|
94 |
-
st.error(f"⚠️ Model '{model_name}' not found in config!")
|
95 |
-
return None, None, None
|
96 |
-
|
97 |
-
model_info = MODEL_OPTIONS[model_name]
|
98 |
-
hf_location = model_info["hf_location"]
|
99 |
-
|
100 |
-
model_module = model_info["module_path"]
|
101 |
-
load_function = model_info["load_function"]
|
102 |
-
predict_function = model_info["predict_function"]
|
103 |
-
|
104 |
-
load_model_func = import_from_module(model_module, load_function)
|
105 |
-
predict_func = import_from_module(model_module, predict_function)
|
106 |
-
|
107 |
-
if load_model_func is None or predict_func is None:
|
108 |
-
st.error("❌ Model functions could not be loaded!")
|
109 |
-
return None, None, None
|
110 |
-
|
111 |
-
model, tokenizer = load_model_func()
|
112 |
-
|
113 |
-
current_model, current_tokenizer = model, tokenizer
|
114 |
-
return model, tokenizer, predict_func
|
115 |
-
|
116 |
-
# def load_selected_model(model_name):
|
117 |
-
# # """Load model and tokenizer based on user selection."""
|
118 |
-
# global current_model, current_tokenizer
|
119 |
-
|
120 |
-
# # Free memory before loading a new model
|
121 |
-
# free_memory()
|
122 |
-
|
123 |
-
# if model_name not in MODEL_OPTIONS:
|
124 |
-
# st.error(f"⚠️ Model '{model_name}' not found in config!")
|
125 |
-
# return None, None
|
126 |
-
|
127 |
-
# model_info = MODEL_OPTIONS[model_name]
|
128 |
-
# hf_location = model_info["hf_location"]
|
129 |
-
|
130 |
-
# model_module = model_info["module_path"]
|
131 |
-
# # load_function = "load_model"
|
132 |
-
# # predict_function = "predict"
|
133 |
-
|
134 |
-
# load_function = model_info["load_function"]
|
135 |
-
# predict_function = model_info["predict_function"]
|
136 |
-
|
137 |
-
# # tokenizer_class = globals()[model_info["tokenizer_class"]]
|
138 |
-
# # model_class = globals()[model_info["model_class"]]
|
139 |
-
|
140 |
-
# # tokenizer = tokenizer_class.from_pretrained(hf_location)
|
141 |
-
|
142 |
-
|
143 |
-
# load_model_func = import_from_module(model_module, load_function)
|
144 |
-
# predict_func = import_from_module(model_module, predict_function)
|
145 |
-
|
146 |
-
# # # Load model
|
147 |
-
# # if model_info["type"] == "custom_checkpoint" or model_info["type"] == "custom_model":
|
148 |
-
# # model = torch.load(hf_location, map_location="cpu") # Load PyTorch model
|
149 |
-
# # elif model_info["type"] == "hf_automodel_finetuned_dbt3":
|
150 |
-
# # tokenizer_class = globals()[model_info["tokenizer_class"]]
|
151 |
-
# # model_class = globals()[model_info["model_class"]]
|
152 |
-
# # tokenizer = tokenizer_class.from_pretrained(hf_location)
|
153 |
-
# # model = model_class.from_pretrained(hf_location,
|
154 |
-
# # problem_type=model_info["problem_type"],
|
155 |
-
# # num_labels=model_info["num_labels"]
|
156 |
-
# # )
|
157 |
-
# # else:
|
158 |
-
# # st.error("Invalid model selection")
|
159 |
-
# # return None, None
|
160 |
-
|
161 |
-
|
162 |
-
# if load_model_func is None or predict_func is None:
|
163 |
-
# st.error("❌ Model functions could not be loaded!")
|
164 |
-
# return None, None
|
165 |
-
|
166 |
-
# # current_model, current_tokenizer = model, tokenizer # Store references
|
167 |
-
# # return model, tokenizer
|
168 |
-
|
169 |
-
# model, tokenizer = load_model_func(hf_location)
|
170 |
-
|
171 |
-
# current_model, current_tokenizer = model, tokenizer
|
172 |
-
# return model, tokenizer, predict_func
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
def predict(text, model, tokenizer, device, max_len=128):
|
177 |
-
# Tokenize and pad the input text
|
178 |
-
inputs = tokenizer(
|
179 |
-
text,
|
180 |
-
add_special_tokens=True,
|
181 |
-
padding=True,
|
182 |
-
truncation=False,
|
183 |
-
return_tensors="pt",
|
184 |
-
return_token_type_ids=False,
|
185 |
-
).to(device) # Move input tensors to the correct device
|
186 |
-
|
187 |
-
with torch.no_grad():
|
188 |
-
outputs = model(**inputs)
|
189 |
-
|
190 |
-
# Apply sigmoid activation (for BCEWithLogitsLoss)
|
191 |
-
probabilities = outputs.logits.cpu().numpy()
|
192 |
-
|
193 |
-
return probabilities
|
194 |
-
|
195 |
-
# def show_sentiment_analysis():
|
196 |
-
|
197 |
-
# Add your sentiment analysis code here
|
198 |
-
|
199 |
-
# user_input = st.text_input("Enter text for sentiment analysis:")
|
200 |
-
# user_input = st.text_area("Enter text for sentiment analysis:", height=200)
|
201 |
-
# user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)
|
202 |
-
|
203 |
-
def show_sentiment_analysis():
|
204 |
-
st.title("Stage 1: Sentiment Polarity Analysis")
|
205 |
-
st.write("This section will handle sentiment analysis.")
|
206 |
-
|
207 |
-
if "selected_model" not in st.session_state:
|
208 |
-
st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0] # Default selection
|
209 |
-
|
210 |
-
if "clear_output" not in st.session_state:
|
211 |
-
st.session_state.clear_output = False
|
212 |
-
|
213 |
-
st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")
|
214 |
-
|
215 |
-
selected_model = st.session_state.selected_model
|
216 |
-
|
217 |
-
if selected_model not in MODEL_OPTIONS:
|
218 |
-
st.error(f"❌ Selected model '{selected_model}' not found!")
|
219 |
-
st.stop()
|
220 |
-
|
221 |
-
st.session_state.clear_output = True # Reset output when model changes
|
222 |
-
|
223 |
-
|
224 |
-
# st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
|
225 |
-
# st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model]) # ✅ Check selected model
|
226 |
-
|
227 |
-
|
228 |
-
user_input = st.text_input("Enter text for sentiment analysis:")
|
229 |
-
|
230 |
-
if user_input:
|
231 |
-
# Make prediction
|
232 |
-
|
233 |
-
# model, tokenizer = load_model()
|
234 |
-
# model, tokenizer = load_selected_model(selected_model)
|
235 |
-
|
236 |
-
model, tokenizer, predict_func = load_selected_model(selected_model)
|
237 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
238 |
-
|
239 |
-
if model is None:
|
240 |
-
st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
|
241 |
-
st.stop()
|
242 |
-
|
243 |
-
model.to(device)
|
244 |
-
|
245 |
-
# predictions = predict(user_input, model, tokenizer, device)
|
246 |
-
|
247 |
-
predictions = predict_func(user_input, model, tokenizer, device)
|
248 |
-
|
249 |
-
# Squeeze predictions to remove extra dimensions
|
250 |
-
predictions_array = predictions.squeeze()
|
251 |
-
|
252 |
-
# Convert to binary predictions (argmax)
|
253 |
-
binary_predictions = np.zeros_like(predictions_array)
|
254 |
-
max_indices = np.argmax(predictions_array)
|
255 |
-
binary_predictions[max_indices] = 1
|
256 |
-
|
257 |
-
# Display raw predictions
|
258 |
-
st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
|
259 |
-
|
260 |
-
# Display binary classification result
|
261 |
-
st.write(f"**Predicted Sentiment:**")
|
262 |
-
st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
|
263 |
-
# st.write(f"**NEUTRAL:** {binary_predictions[1]}")
|
264 |
-
# st.write(f"**POSITIVE:** {binary_predictions[2]}")
|
265 |
-
|
266 |
-
# 1️⃣ **Polar Plot (Plotly)**
|
267 |
-
sentiment_polarities = predictions_array.tolist()
|
268 |
-
fig_polar = px.line_polar(
|
269 |
-
pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)),
|
270 |
-
r='r', theta='theta', line_close=True
|
271 |
-
)
|
272 |
-
st.plotly_chart(fig_polar)
|
273 |
-
|
274 |
-
# 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
|
275 |
-
normalized_predictions = predictions_array / predictions_array.sum()
|
276 |
-
|
277 |
-
fig, ax = plt.subplots(figsize=(8, 2))
|
278 |
-
left = 0
|
279 |
-
for i in range(len(normalized_predictions)):
|
280 |
-
ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
|
281 |
-
left += normalized_predictions[i]
|
282 |
-
|
283 |
-
# Configure the chart
|
284 |
-
ax.set_xlim(0, 1)
|
285 |
-
ax.set_yticks([])
|
286 |
-
ax.set_xticks(np.arange(0, 1.1, 0.1))
|
287 |
-
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
|
288 |
-
plt.title("Sentiment Polarity Prediction Distribution")
|
289 |
-
|
290 |
-
# Display in Streamlit
|
291 |
-
st.pyplot(fig)
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
if __name__ == "__main__":
|
296 |
show_sentiment_analysis()
|
|
|
1 |
+
from imports import *
|
2 |
+
import importlib.util
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
import joblib
|
6 |
+
|
7 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))
|
8 |
+
|
9 |
+
# from hmv_cfg_base_stage1.model1 import load_model as load_model1
|
10 |
+
# from hmv_cfg_base_stage1.model1 import predict as predict1
|
11 |
+
|
12 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
13 |
+
CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
|
14 |
+
LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")
|
15 |
+
|
16 |
+
# Load the model and tokenizer
|
17 |
+
# model_name = "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8"
|
18 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
19 |
+
# model = AutoModel.from_pretrained(model_name)
|
20 |
+
|
21 |
+
SENTIMENT_POLARITY_LABELS = [
|
22 |
+
"negative", "neutral", "positive"
|
23 |
+
]
|
24 |
+
|
25 |
+
current_model = None
|
26 |
+
current_tokenizer = None
|
27 |
+
|
28 |
+
# Enabling Resource caching
|
29 |
+
@st.cache_resource
|
30 |
+
|
31 |
+
def load_model_config():
|
32 |
+
with open(CONFIG_STAGE1, "r") as f:
|
33 |
+
model_data = json.load(f)
|
34 |
+
|
35 |
+
model_options = {v["name"]: v for v in model_data.values()} # Extract names for dropdown
|
36 |
+
return model_data, model_options
|
37 |
+
|
38 |
+
MODEL_DATA, MODEL_OPTIONS = load_model_config()
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
# def load_model():
|
43 |
+
# model = DebertaV2ForSequenceClassification.from_pretrained(model_name)
|
44 |
+
# tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
|
45 |
+
# return model, tokenizer
|
46 |
+
|
47 |
+
|
48 |
+
# ✅ Dynamically Import Model Functions
|
49 |
+
def import_from_module(module_name, function_name):
|
50 |
+
try:
|
51 |
+
module = importlib.import_module(module_name)
|
52 |
+
return getattr(module, function_name)
|
53 |
+
except (ModuleNotFoundError, AttributeError) as e:
|
54 |
+
st.error(f"❌ Import Error: {e}")
|
55 |
+
return None
|
56 |
+
|
57 |
+
|
58 |
+
def free_memory():
|
59 |
+
# """Free up CPU & GPU memory before loading a new model."""
|
60 |
+
global current_model, current_tokenizer
|
61 |
+
|
62 |
+
if current_model is not None:
|
63 |
+
del current_model # Delete the existing model
|
64 |
+
current_model = None # Reset reference
|
65 |
+
|
66 |
+
if current_tokenizer is not None:
|
67 |
+
del current_tokenizer # Delete the tokenizer
|
68 |
+
current_tokenizer = None
|
69 |
+
|
70 |
+
gc.collect() # Force garbage collection for CPU memory
|
71 |
+
|
72 |
+
if torch.cuda.is_available():
|
73 |
+
torch.cuda.empty_cache() # Free GPU memory
|
74 |
+
torch.cuda.ipc_collect() # Clean up PyTorch GPU cache
|
75 |
+
|
76 |
+
# If running on CPU, reclaim memory using OS-level commands
|
77 |
+
try:
|
78 |
+
if torch.cuda.is_available() is False:
|
79 |
+
psutil.virtual_memory() # Refresh memory stats
|
80 |
+
except Exception as e:
|
81 |
+
print(f"Memory cleanup error: {e}")
|
82 |
+
|
83 |
+
|
84 |
+
def load_selected_model(model_name):
|
85 |
+
global current_model, current_tokenizer
|
86 |
+
|
87 |
+
free_memory()
|
88 |
+
|
89 |
+
# st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
|
90 |
+
# st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name]) # ✅ Check selected model
|
91 |
+
# st.write("DEBUG: Model Name:", model_name) # ✅ Check selected model
|
92 |
+
|
93 |
+
if model_name not in MODEL_OPTIONS:
|
94 |
+
st.error(f"⚠️ Model '{model_name}' not found in config!")
|
95 |
+
return None, None, None
|
96 |
+
|
97 |
+
model_info = MODEL_OPTIONS[model_name]
|
98 |
+
hf_location = model_info["hf_location"]
|
99 |
+
|
100 |
+
model_module = model_info["module_path"]
|
101 |
+
load_function = model_info["load_function"]
|
102 |
+
predict_function = model_info["predict_function"]
|
103 |
+
|
104 |
+
load_model_func = import_from_module(model_module, load_function)
|
105 |
+
predict_func = import_from_module(model_module, predict_function)
|
106 |
+
|
107 |
+
if load_model_func is None or predict_func is None:
|
108 |
+
st.error("❌ Model functions could not be loaded!")
|
109 |
+
return None, None, None
|
110 |
+
|
111 |
+
model, tokenizer = load_model_func()
|
112 |
+
|
113 |
+
current_model, current_tokenizer = model, tokenizer
|
114 |
+
return model, tokenizer, predict_func
|
115 |
+
|
116 |
+
# def load_selected_model(model_name):
|
117 |
+
# # """Load model and tokenizer based on user selection."""
|
118 |
+
# global current_model, current_tokenizer
|
119 |
+
|
120 |
+
# # Free memory before loading a new model
|
121 |
+
# free_memory()
|
122 |
+
|
123 |
+
# if model_name not in MODEL_OPTIONS:
|
124 |
+
# st.error(f"⚠️ Model '{model_name}' not found in config!")
|
125 |
+
# return None, None
|
126 |
+
|
127 |
+
# model_info = MODEL_OPTIONS[model_name]
|
128 |
+
# hf_location = model_info["hf_location"]
|
129 |
+
|
130 |
+
# model_module = model_info["module_path"]
|
131 |
+
# # load_function = "load_model"
|
132 |
+
# # predict_function = "predict"
|
133 |
+
|
134 |
+
# load_function = model_info["load_function"]
|
135 |
+
# predict_function = model_info["predict_function"]
|
136 |
+
|
137 |
+
# # tokenizer_class = globals()[model_info["tokenizer_class"]]
|
138 |
+
# # model_class = globals()[model_info["model_class"]]
|
139 |
+
|
140 |
+
# # tokenizer = tokenizer_class.from_pretrained(hf_location)
|
141 |
+
|
142 |
+
|
143 |
+
# load_model_func = import_from_module(model_module, load_function)
|
144 |
+
# predict_func = import_from_module(model_module, predict_function)
|
145 |
+
|
146 |
+
# # # Load model
|
147 |
+
# # if model_info["type"] == "custom_checkpoint" or model_info["type"] == "custom_model":
|
148 |
+
# # model = torch.load(hf_location, map_location="cpu") # Load PyTorch model
|
149 |
+
# # elif model_info["type"] == "hf_automodel_finetuned_dbt3":
|
150 |
+
# # tokenizer_class = globals()[model_info["tokenizer_class"]]
|
151 |
+
# # model_class = globals()[model_info["model_class"]]
|
152 |
+
# # tokenizer = tokenizer_class.from_pretrained(hf_location)
|
153 |
+
# # model = model_class.from_pretrained(hf_location,
|
154 |
+
# # problem_type=model_info["problem_type"],
|
155 |
+
# # num_labels=model_info["num_labels"]
|
156 |
+
# # )
|
157 |
+
# # else:
|
158 |
+
# # st.error("Invalid model selection")
|
159 |
+
# # return None, None
|
160 |
+
|
161 |
+
|
162 |
+
# if load_model_func is None or predict_func is None:
|
163 |
+
# st.error("❌ Model functions could not be loaded!")
|
164 |
+
# return None, None
|
165 |
+
|
166 |
+
# # current_model, current_tokenizer = model, tokenizer # Store references
|
167 |
+
# # return model, tokenizer
|
168 |
+
|
169 |
+
# model, tokenizer = load_model_func(hf_location)
|
170 |
+
|
171 |
+
# current_model, current_tokenizer = model, tokenizer
|
172 |
+
# return model, tokenizer, predict_func
|
173 |
+
|
174 |
+
|
175 |
+
|
176 |
+
def predict(text, model, tokenizer, device, max_len=128):
|
177 |
+
# Tokenize and pad the input text
|
178 |
+
inputs = tokenizer(
|
179 |
+
text,
|
180 |
+
add_special_tokens=True,
|
181 |
+
padding=True,
|
182 |
+
truncation=False,
|
183 |
+
return_tensors="pt",
|
184 |
+
return_token_type_ids=False,
|
185 |
+
).to(device) # Move input tensors to the correct device
|
186 |
+
|
187 |
+
with torch.no_grad():
|
188 |
+
outputs = model(**inputs)
|
189 |
+
|
190 |
+
# Apply sigmoid activation (for BCEWithLogitsLoss)
|
191 |
+
probabilities = outputs.logits.cpu().numpy()
|
192 |
+
|
193 |
+
return probabilities
|
194 |
+
|
195 |
+
# def show_sentiment_analysis():
|
196 |
+
|
197 |
+
# Add your sentiment analysis code here
|
198 |
+
|
199 |
+
# user_input = st.text_input("Enter text for sentiment analysis:")
|
200 |
+
# user_input = st.text_area("Enter text for sentiment analysis:", height=200)
|
201 |
+
# user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)
|
202 |
+
|
203 |
+
def show_sentiment_analysis():
|
204 |
+
st.title("Stage 1: Sentiment Polarity Analysis")
|
205 |
+
st.write("This section will handle sentiment analysis.")
|
206 |
+
|
207 |
+
if "selected_model" not in st.session_state:
|
208 |
+
st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0] # Default selection
|
209 |
+
|
210 |
+
if "clear_output" not in st.session_state:
|
211 |
+
st.session_state.clear_output = False
|
212 |
+
|
213 |
+
st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")
|
214 |
+
|
215 |
+
selected_model = st.session_state.selected_model
|
216 |
+
|
217 |
+
if selected_model not in MODEL_OPTIONS:
|
218 |
+
st.error(f"❌ Selected model '{selected_model}' not found!")
|
219 |
+
st.stop()
|
220 |
+
|
221 |
+
st.session_state.clear_output = True # Reset output when model changes
|
222 |
+
|
223 |
+
|
224 |
+
# st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys()) # ✅ See available models
|
225 |
+
# st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model]) # ✅ Check selected model
|
226 |
+
|
227 |
+
|
228 |
+
user_input = st.text_input("Enter text for sentiment analysis:")
|
229 |
+
|
230 |
+
if user_input:
|
231 |
+
# Make prediction
|
232 |
+
|
233 |
+
# model, tokenizer = load_model()
|
234 |
+
# model, tokenizer = load_selected_model(selected_model)
|
235 |
+
|
236 |
+
model, tokenizer, predict_func = load_selected_model(selected_model)
|
237 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
238 |
+
|
239 |
+
if model is None:
|
240 |
+
st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
|
241 |
+
st.stop()
|
242 |
+
|
243 |
+
model.to(device)
|
244 |
+
|
245 |
+
# predictions = predict(user_input, model, tokenizer, device)
|
246 |
+
|
247 |
+
predictions = predict_func(user_input, model, tokenizer, device)
|
248 |
+
|
249 |
+
# Squeeze predictions to remove extra dimensions
|
250 |
+
predictions_array = predictions.squeeze()
|
251 |
+
|
252 |
+
# Convert to binary predictions (argmax)
|
253 |
+
binary_predictions = np.zeros_like(predictions_array)
|
254 |
+
max_indices = np.argmax(predictions_array)
|
255 |
+
binary_predictions[max_indices] = 1
|
256 |
+
|
257 |
+
# Display raw predictions
|
258 |
+
st.write(f"**Predicted Sentiment Scores:** {predictions_array}")
|
259 |
+
|
260 |
+
# Display binary classification result
|
261 |
+
st.write(f"**Predicted Sentiment:**")
|
262 |
+
st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
|
263 |
+
# st.write(f"**NEUTRAL:** {binary_predictions[1]}")
|
264 |
+
# st.write(f"**POSITIVE:** {binary_predictions[2]}")
|
265 |
+
|
266 |
+
# 1️⃣ **Polar Plot (Plotly)**
|
267 |
+
sentiment_polarities = predictions_array.tolist()
|
268 |
+
fig_polar = px.line_polar(
|
269 |
+
pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)),
|
270 |
+
r='r', theta='theta', line_close=True
|
271 |
+
)
|
272 |
+
st.plotly_chart(fig_polar)
|
273 |
+
|
274 |
+
# 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
|
275 |
+
normalized_predictions = predictions_array / predictions_array.sum()
|
276 |
+
|
277 |
+
fig, ax = plt.subplots(figsize=(8, 2))
|
278 |
+
left = 0
|
279 |
+
for i in range(len(normalized_predictions)):
|
280 |
+
ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
|
281 |
+
left += normalized_predictions[i]
|
282 |
+
|
283 |
+
# Configure the chart
|
284 |
+
ax.set_xlim(0, 1)
|
285 |
+
ax.set_yticks([])
|
286 |
+
ax.set_xticks(np.arange(0, 1.1, 0.1))
|
287 |
+
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
|
288 |
+
plt.title("Sentiment Polarity Prediction Distribution")
|
289 |
+
|
290 |
+
# Display in Streamlit
|
291 |
+
st.pyplot(fig)
|
292 |
+
|
293 |
+
|
294 |
+
|
295 |
+
if __name__ == "__main__":
|
296 |
show_sentiment_analysis()
|