Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- Hinglish_Profanity_List.csv +210 -0
- ocr.py +55 -0
- requirements.txt +98 -0
Hinglish_Profanity_List.csv
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
profanity_hn,profanity_en,ind
|
2 |
+
badir,idiot,1
|
3 |
+
badirchand,idiot,1
|
4 |
+
bakland,idiot,1
|
5 |
+
bhadva,pimp,2
|
6 |
+
bhootnika,son of a witch,3
|
7 |
+
chinaal,whore,3
|
8 |
+
chup,shut up,1
|
9 |
+
chutia,fucker ,5
|
10 |
+
ghasti,hooker,4
|
11 |
+
chutiya,fucker,5
|
12 |
+
haraami,bastard,5
|
13 |
+
haraam,bastard,5
|
14 |
+
hijra,transsexual ,3
|
15 |
+
hinjda,transsexual ,3
|
16 |
+
jaanvar,animal,1
|
17 |
+
kutta,dog,2
|
18 |
+
kutiya,bitch,3
|
19 |
+
khota,donkey,1
|
20 |
+
auladheen,sonless,2
|
21 |
+
jaat,breed,1
|
22 |
+
najayaz,illegitimate,3
|
23 |
+
gandpaidaish,badborn,2
|
24 |
+
saala,sister�s husband,2
|
25 |
+
kutti,bitch,2
|
26 |
+
soover,swine,3
|
27 |
+
tatti,shit,3
|
28 |
+
potty,shit,3
|
29 |
+
bahenchod,sister fucker,9
|
30 |
+
bahanchod,sister fucker,9
|
31 |
+
bahencho,sister fucker,9
|
32 |
+
bancho,sister fucker,7
|
33 |
+
bahenke,sister�s ,6
|
34 |
+
laude,dick,7
|
35 |
+
takke,balls,7
|
36 |
+
betichod,daughter fucker,9
|
37 |
+
bhaichod,brother fucker,8
|
38 |
+
bhains,buffalo,1
|
39 |
+
jhalla,faggot,6
|
40 |
+
jhant,pubic,7
|
41 |
+
nabaal,hairless,4
|
42 |
+
pissu,bug,3
|
43 |
+
kutte,dog,2
|
44 |
+
maadherchod,mother fucker,10
|
45 |
+
madarchod,motherfucker,10
|
46 |
+
padma,fat bitch,6
|
47 |
+
raand,whore,9
|
48 |
+
jamai,son-in-law,5
|
49 |
+
randwa,male prostitute,8
|
50 |
+
randi,hooker,8
|
51 |
+
bachachod,son fucker,8
|
52 |
+
bachichod,daughter fucker,8
|
53 |
+
soower,swine,3
|
54 |
+
bachchechod,children fucker,8
|
55 |
+
ullu,idiot,1
|
56 |
+
pathe,idiot,1
|
57 |
+
banda,semi-dick,7
|
58 |
+
booblay,boobs,7
|
59 |
+
booby,boobs,7
|
60 |
+
buble,boobs,7
|
61 |
+
babla,boobs,7
|
62 |
+
bhonsriwala,fucker,8
|
63 |
+
bhonsdiwala,fucker,8
|
64 |
+
ched,pussy,8
|
65 |
+
chut,pussy,8
|
66 |
+
chod,fuck,7
|
67 |
+
chodu,fucker,7
|
68 |
+
chodra,fucker,7
|
69 |
+
choochi,boobs,8
|
70 |
+
chuchi,boobs,8
|
71 |
+
gaandu,asshole,9
|
72 |
+
gandu,asshole,9
|
73 |
+
gaand,ass,9
|
74 |
+
lavda, dick ,8
|
75 |
+
lawda,dick,8
|
76 |
+
lauda,dick,8
|
77 |
+
lund�, dick,8
|
78 |
+
balchod,hair fucker,7
|
79 |
+
lavander,dick head,8
|
80 |
+
muth,masturbate ,7
|
81 |
+
maacho,mother fucker,10
|
82 |
+
mammey,boobs,8
|
83 |
+
tatte,boobs,8
|
84 |
+
toto,penis,8
|
85 |
+
toota,broken,7
|
86 |
+
backar,gossip,2
|
87 |
+
bhandwe,pimp,7
|
88 |
+
bhosadchod,ass fucker,9
|
89 |
+
bhosad,pussy,8
|
90 |
+
bumchod,ass fucker,9
|
91 |
+
bum,ass,8
|
92 |
+
bur,pussy,9
|
93 |
+
chatani,ketchup,1
|
94 |
+
cunt,pussy,8
|
95 |
+
cuntmama,pussy,8
|
96 |
+
chipkali,lizzard,1
|
97 |
+
pasine,sweat,1
|
98 |
+
jhaat,cunt,8
|
99 |
+
chodela,fucked up,6
|
100 |
+
bhagatchod,saint fucker,7
|
101 |
+
chhola,clit,7
|
102 |
+
chudai,fucking,7
|
103 |
+
chudaikhana,whore house,9
|
104 |
+
chunni,clit,8
|
105 |
+
choot,pussy,8
|
106 |
+
bhoot,ghost,1
|
107 |
+
dhakkan,idiot,1
|
108 |
+
bhajiye,snack,1
|
109 |
+
fateychu,torn pussy,9
|
110 |
+
gandnatije,Bad result,2
|
111 |
+
lundtopi,condom,4
|
112 |
+
gaandu,ass,8
|
113 |
+
gaandfat,ass,8
|
114 |
+
gaandmasti,ass,8
|
115 |
+
makhanchudai,fucking,8
|
116 |
+
gaandmarau,ass fuck,9
|
117 |
+
gandu,faggot,8
|
118 |
+
chaatu,licker,6
|
119 |
+
beej,semen,6
|
120 |
+
choosu,sucker,7
|
121 |
+
fakeerchod,saint fucker,8
|
122 |
+
lundoos,dick,8
|
123 |
+
shorba,semen,7
|
124 |
+
binbheja,brainless,3
|
125 |
+
bhadwe,pimp,6
|
126 |
+
parichod,angel fucker,9
|
127 |
+
nirodh,condom.,5
|
128 |
+
pucchi,pussy,8
|
129 |
+
baajer,fucker,8
|
130 |
+
choud,fuck,8
|
131 |
+
bhosda,pussy,9
|
132 |
+
sadi,stinking,5
|
133 |
+
choos,suck,5
|
134 |
+
maka,mother�s,7
|
135 |
+
chinaal,prostitute,7
|
136 |
+
gadde,boobs,7
|
137 |
+
joon,bug,3
|
138 |
+
chullugand,handful dirt,4
|
139 |
+
doob,drown,1
|
140 |
+
khatmal,bug,1
|
141 |
+
gandkate,ass,6
|
142 |
+
bambu,bamboo,4
|
143 |
+
lassan,garlic,1
|
144 |
+
danda,stick,2
|
145 |
+
keera,bug,2
|
146 |
+
keeda,bug,2
|
147 |
+
hazaarchu,thousand pussy,7
|
148 |
+
paidaishikeeda,born bug,5
|
149 |
+
kali,nigger,5
|
150 |
+
safaid,american,2
|
151 |
+
poot,son,2
|
152 |
+
behendi,sister,5
|
153 |
+
chus,sucker,6
|
154 |
+
machudi,mother fucker,10
|
155 |
+
chodoonga,fuck,8
|
156 |
+
baapchu,father pussy,9
|
157 |
+
laltern,lantern,5
|
158 |
+
suhaagchudai,wedding fuck,8
|
159 |
+
raatchuda,night fuck,7
|
160 |
+
kaalu,migga,6
|
161 |
+
neech,low caste,7
|
162 |
+
chikna,gay,6
|
163 |
+
meetha,gay,6
|
164 |
+
beechka,gay,6
|
165 |
+
chooche,boobs,8
|
166 |
+
patichod,husband,8
|
167 |
+
rundi,prostitute,7
|
168 |
+
makkhi,fly,1
|
169 |
+
biwichod,wife fucker,9
|
170 |
+
chodhunga,fuck,8
|
171 |
+
haathi,elephant,1
|
172 |
+
kute,dog,2
|
173 |
+
jhanten,pubic hair,8
|
174 |
+
kaat,cut,3
|
175 |
+
gandi,filthy,3
|
176 |
+
gadha,donkey,1
|
177 |
+
bimaar,ill,2
|
178 |
+
badboodar,smelly,2
|
179 |
+
dum,tail,2
|
180 |
+
raandsaala,sister�s brother pimp,7
|
181 |
+
phudi,pussy,7
|
182 |
+
chute,pussy,7
|
183 |
+
kussi,ass,7
|
184 |
+
khandanchod,family fucker,9
|
185 |
+
ghussa,fuck,6
|
186 |
+
maarey,dead,4
|
187 |
+
chipkili,lizard,1
|
188 |
+
unday,eggs,1
|
189 |
+
budh,cunt,7
|
190 |
+
chaarpai,cot,1
|
191 |
+
chodun,fuck,5
|
192 |
+
chatri,condom,3
|
193 |
+
chode,fuck,6
|
194 |
+
chodho,fuck,6
|
195 |
+
mullekatue,Derogatory abuse to muslims,6
|
196 |
+
mullikatui,Derogatory Abuse to female muslim,6
|
197 |
+
mullekebaal,Derogatory Abuse to muslim,6
|
198 |
+
momedankatue,Derogatory Abuse to muslim,6
|
199 |
+
katua,dick cut,8
|
200 |
+
chutiyapa,fuck all,8
|
201 |
+
bc,sister fucker,10
|
202 |
+
mc,mother fucker,10
|
203 |
+
chudwaya,fuck,7
|
204 |
+
kutton,dog,2
|
205 |
+
jungli,wild,2
|
206 |
+
vahiyaat,disgusting,4
|
207 |
+
jihadi,terrorist,4
|
208 |
+
atankvadi,terrorist,4
|
209 |
+
atankwadi,terrorist,4
|
210 |
+
aatanki,terorist,4
|
ocr.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import easyocr
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
import pandas as pd
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
clientImg = Client("dj-dawgs-ipd/IPD-Image-ViT-Finetune")
|
7 |
+
clientEngText = Client("dj-dawgs-ipd/IPD-Text-English-Finetune")
|
8 |
+
clientHingText = Client("dj-dawgs-ipd/IPD-Text-Hinglish")
|
9 |
+
|
10 |
+
profanity_df = pd.read_csv('Hinglish_Profanity_List.csv' , encoding = 'utf-8')
|
11 |
+
profanity_hn = profanity_df['profanity_hn']
|
12 |
+
|
13 |
+
def extract_text(image):
|
14 |
+
reader = easyocr.Reader(['en'])
|
15 |
+
data = [result[1] for result in reader.readtext(image)]
|
16 |
+
return ' '.join([l for l in data])
|
17 |
+
|
18 |
+
def predict(image):
|
19 |
+
|
20 |
+
imgResult = clientImg.predict(
|
21 |
+
image=handle_file(image),
|
22 |
+
api_name="/predict"
|
23 |
+
)
|
24 |
+
|
25 |
+
if float(imgResult[1]['label']) > 0.95:
|
26 |
+
return ["hate" , imgResult[0]]
|
27 |
+
else:
|
28 |
+
|
29 |
+
ocr_text = extract_text(image)
|
30 |
+
|
31 |
+
engResult = clientEngText.predict(
|
32 |
+
text=ocr_text,
|
33 |
+
api_name="/classify_text"
|
34 |
+
)
|
35 |
+
|
36 |
+
hingResult = clientHingText.predict(
|
37 |
+
text=ocr_text,
|
38 |
+
api_name="/predict"
|
39 |
+
)
|
40 |
+
|
41 |
+
if engResult[0] == "NEITHER" or hingResult[0] == "NAG":
|
42 |
+
return ["not_hate" , None]
|
43 |
+
else:
|
44 |
+
return ["hate" , None]
|
45 |
+
|
46 |
+
iface = gr.Interface(fn=predict,
|
47 |
+
inputs = gr.Image(type='pil'),
|
48 |
+
outputs=[gr.Label(label = "Class") , gr.Label(label = "Hate Symbol(if any)")],
|
49 |
+
title = "Hate Speech Detection in Image",
|
50 |
+
description = "Detect hateful symbols or text in Image"
|
51 |
+
)
|
52 |
+
|
53 |
+
if __name__ == "__main__":
|
54 |
+
iface.launch()
|
55 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
annotated-types==0.7.0
|
3 |
+
anyio==4.6.0
|
4 |
+
asttokens==2.4.1
|
5 |
+
certifi==2024.8.30
|
6 |
+
charset-normalizer==3.3.2
|
7 |
+
click==8.1.7
|
8 |
+
colorama==0.4.6
|
9 |
+
comm==0.2.2
|
10 |
+
contourpy==1.3.0
|
11 |
+
cycler==0.12.1
|
12 |
+
debugpy==1.8.6
|
13 |
+
decorator==5.1.1
|
14 |
+
easyocr==1.7.2
|
15 |
+
executing==2.1.0
|
16 |
+
fastapi==0.115.0
|
17 |
+
ffmpy==0.4.0
|
18 |
+
filelock==3.16.1
|
19 |
+
fonttools==4.54.1
|
20 |
+
fsspec==2024.9.0
|
21 |
+
gradio==4.44.0
|
22 |
+
gradio_client==1.3.0
|
23 |
+
h11==0.14.0
|
24 |
+
httpcore==1.0.5
|
25 |
+
httpx==0.27.2
|
26 |
+
huggingface-hub==0.25.1
|
27 |
+
idna==3.10
|
28 |
+
imageio==2.35.1
|
29 |
+
importlib_resources==6.4.5
|
30 |
+
ipykernel==6.29.5
|
31 |
+
ipython==8.27.0
|
32 |
+
jedi==0.19.1
|
33 |
+
Jinja2==3.1.4
|
34 |
+
jupyter_client==8.6.3
|
35 |
+
jupyter_core==5.7.2
|
36 |
+
kiwisolver==1.4.7
|
37 |
+
lazy_loader==0.4
|
38 |
+
markdown-it-py==3.0.0
|
39 |
+
MarkupSafe==2.1.5
|
40 |
+
matplotlib==3.9.2
|
41 |
+
matplotlib-inline==0.1.7
|
42 |
+
mdurl==0.1.2
|
43 |
+
mpmath==1.3.0
|
44 |
+
nest-asyncio==1.6.0
|
45 |
+
networkx==3.3
|
46 |
+
ninja==1.11.1.1
|
47 |
+
numpy==2.1.1
|
48 |
+
opencv-python-headless==4.10.0.84
|
49 |
+
orjson==3.10.7
|
50 |
+
packaging==24.1
|
51 |
+
pandas==2.2.3
|
52 |
+
parso==0.8.4
|
53 |
+
pillow==10.4.0
|
54 |
+
platformdirs==4.3.6
|
55 |
+
prompt_toolkit==3.0.48
|
56 |
+
psutil==6.0.0
|
57 |
+
pure_eval==0.2.3
|
58 |
+
pyclipper==1.3.0.post5
|
59 |
+
pydantic==2.9.2
|
60 |
+
pydantic_core==2.23.4
|
61 |
+
pydub==0.25.1
|
62 |
+
Pygments==2.18.0
|
63 |
+
pyparsing==3.1.4
|
64 |
+
pytesseract==0.3.13
|
65 |
+
python-bidi==0.6.0
|
66 |
+
python-dateutil==2.9.0.post0
|
67 |
+
python-multipart==0.0.12
|
68 |
+
pytz==2024.2
|
69 |
+
pywin32==306
|
70 |
+
PyYAML==6.0.2
|
71 |
+
pyzmq==26.2.0
|
72 |
+
requests==2.32.3
|
73 |
+
rich==13.8.1
|
74 |
+
ruff==0.6.8
|
75 |
+
scikit-image==0.24.0
|
76 |
+
scipy==1.14.1
|
77 |
+
semantic-version==2.10.0
|
78 |
+
shapely==2.0.6
|
79 |
+
shellingham==1.5.4
|
80 |
+
six==1.16.0
|
81 |
+
sniffio==1.3.1
|
82 |
+
stack-data==0.6.3
|
83 |
+
starlette==0.38.6
|
84 |
+
sympy==1.13.3
|
85 |
+
tifffile==2024.9.20
|
86 |
+
tomlkit==0.12.0
|
87 |
+
torch==2.4.1
|
88 |
+
torchvision==0.19.1
|
89 |
+
tornado==6.4.1
|
90 |
+
tqdm==4.66.5
|
91 |
+
traitlets==5.14.3
|
92 |
+
typer==0.12.5
|
93 |
+
typing_extensions==4.12.2
|
94 |
+
tzdata==2024.2
|
95 |
+
urllib3==2.2.3
|
96 |
+
uvicorn==0.31.0
|
97 |
+
wcwidth==0.2.13
|
98 |
+
websockets==12.0
|