Amiruzzaman commited on
Commit
76b86e7
·
verified ·
1 Parent(s): 3fa0fb5

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +2 -0
  2. app.py +117 -0
  3. requirements.txt +1 -0
  4. tfidf_vectorizer.sav +3 -0
  5. trained_model.sav +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tfidf_vectorizer.sav filter=lfs diff=lfs merge=lfs -text
37
+ trained_model.sav filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+
4
+ # Load the trained model
5
+ model_filename = 'trained_model.sav'
6
+ with open(model_filename, 'rb') as model_file:
7
+ model = pickle.load(model_file)
8
+
9
+ # Load the TF-IDF vectorizer
10
+ vectorizer_filename = 'tfidf_vectorizer.sav'
11
+ with open(vectorizer_filename, 'rb') as vectorizer_file:
12
+ vectorizer = pickle.load(vectorizer_file)
13
+
14
+ # Define a function to predict cyberbullying and filter bad words
15
+ def predict_cyberbullying_and_filter_bad_words(text):
16
+ text = text.lower()
17
+ # Vectorize the input text using the loaded vectorizer
18
+ text_tfidf = vectorizer.transform([text])
19
+
20
+ # Use the trained model to make a prediction
21
+ prediction = model.predict(text_tfidf)
22
+
23
+ # Initialize variables to store the filtered text and bad words
24
+ filtered_text = text
25
+ bad_words = []
26
+
27
+ # List of cyberbullying words
28
+ cyberbullying_words = ["bitch", "fuck", "asshole", "shitty", "ass", "slut", "cunt", "motherfucker", "wanker", "dick", "shit", "bastard",
29
+ "pissed off", "arse", "bugger", "bloody", "whore", "anal", "anus", "arse", "arrse", "assbag", "assbandit",
30
+ "assbanger", "assbite", "assclown", "asscock", "asscracker", "asses", "assface", "assfuck", "assfucker",
31
+ "assfukka", "assgoblin", "asshat", "asshead", "asshole", "assholes", "asshopper", "assjacker", "asslick",
32
+ "asslicker", "assmonkey", "assmunch", "assmuncher", "assnigger", "asspirate", "assshit", "assshole", "asssucker",
33
+ "asswad", "asswhole", "asswipe", "auto erotic", "autoerotic", "asswound", "boobs", "bitch", "b1tch", "ballbag",
34
+ "balls", "ballsack", "bampot", "bangbros", "bareback", "barely legal", "barenaked", "bastard", "bastardo", "bastinado",
35
+ "bbw", "bdsm", "bitches", "bitchin", "bitching", "bitchtits", "bitchy", "blowjob", "boob", "boobs", "booobs", "boooobs",
36
+ "booooobs", "booooooobs", "brotherfucker", "bumblefuck", "bung hole", "buttcheeks", "buttfucka",
37
+ "buttfucker", "butthole", "buttmuch", "buttplug", "cock", "cocksucker", "camgirl", "camslut",
38
+ "camwhore", "circlejerk", "clit", "cleveland steamer", "clit", "clitface", "clitfuck", "clitoris",
39
+ "clits", "clover clamps", "clusterfuck", "cockass", "cockbite", "cockburger", "cockeye", "cockface",
40
+ "cockfucker", "cockhead", "cockjockey", "cockknoker", "cocklump", "cockmaster", "cockmongler",
41
+ "cockmongruel", "cockmonkey", "cockmunch", "cockmuncher", "cocknose", "cocknugget", "cocks",
42
+ "cockshit", "cocksmith", "cocksmoke", "cocksmoker", "cocksniffer", "cocksuck", "cocksucked",
43
+ "cocksucker", "cocksucking", "cocksucks", "cocksuka", "cocksukka", "cockwaffle", "cok", "cokmuncher",
44
+ "coons", "cooter", "coprolagnia", "coprophilia", "cornhole", "creampie", "crotte", "cum", "cumbubble",
45
+ "cumdumpster", "cumguzzler", "cumjockey", "cummer", "cumming", "cums", "cumshot", "cumslut", "cumtart",
46
+ "cunilingus", "cunillingus", "cunnie", "cunnilingus", "cunt", "cuntass", "cuntface", "cunthole",
47
+ "cuntlick", "cuntlicker", "cuntlicking", "cuntrag", "cunts", "cuntslut", "cyalis", "cyberfuc",
48
+ "cyberfuck", "cyberfucked", "cyberfucker", "cyberfuckers", "cyberfucking", "d1ck", "darkie",
49
+ "date rape", "daterape", "deep throat", "deepthroat", "deggo", "dendrophilia", "dick", "dickbag",
50
+ "dickbeaters", "dickface", "dickfuck", "dickfucker", "dickhead", "dickhole", "dickjuice", "dickmilk ",
51
+ "dickmonger", "dicks", "dickslap", "dicksucker", "dicksucking", "dicktickler", "dickwad",
52
+ "dickweasel", "dickweed", "dickwod", "dike", "dildo", "dildos", "doggystyle ","donkeyribber",
53
+ "doochbag", "double dong", "penetration", "doublelift", "douche", "douchebag", "dumbass",
54
+ "dumbcunt", "dumbfuck", "ejaculate", "ejaculated", "ejaculates", "ejaculating", "ejaculatings",
55
+ "ejaculation", "ejakulate", "erotism", "eunuch", "f u c k", "fagfucker", "fagging", "faggit",
56
+ "faggitt", "faggot", "faggotcock", "fanny", "fannyflaps", "fannyfucker", "fanyy", "fatass", "fuck",
57
+ "fucker", "fucking", "fecal", "feck", "fecker", "felch", "felching", "fellate", "fellatio", "feltch",
58
+ "female squirting", "femdom", "figging", "fingerbang", "fingerfuck", "fingerfucked", "fingerfucker",
59
+ "fingerfuckers", "fingerfucking", "fingerfucks", "fingering", "fistfuck", "fistfucked", "fistfucker",
60
+ "fistfuckers", "fistfucking", "fistfuckings", "fistfucks", "fisting",
61
+ "footjob", "frotting", "fuckass", "fuckbag", "fuckboy", "fuckbrain", "fuckbutt",
62
+ "fuckersucker", "fuckface", "fuckhead", "fuckheads", "fuckhole", "fuckin", "fucking",
63
+ "fuckings", "fucking shit motherfucker", "fuckme", "fucknut", "fucknutt", "fuckoff", "fucks",
64
+ "fuckstick", "fucktard", "fucktards", "fucktart", "fucktwat", "fuckup", "fuckwad", "fuckwhit",
65
+ "fuckwit", "fuckwitt", "fudge packer", "fudgepacker", "fuk", "fuker", "fukker", "fukkin", "fuks",
66
+ "fukwhit", "fukwit", "futanari", "fux", "fuxor", "g-spot", "gangbang", "gangbanged", "gangbangs",
67
+ "gayass", "gaybob", "gaydo", "gayfuck", "gayfuckist", "goregasm", "handjob", "hard core", "hardcore",
68
+ "hardcoresex", "hooker", "arse", "ass fuck", "ass hole", "assfucker", "asshole", "assshole", "bastard",
69
+ "fucking bitch", "cock", "bloody hell", "boong", "cockfucker", "cocksuck", "coon", "cyberfuck",
70
+ "erection", "erotic", "faggot fuck", "fuck off", "fuck you", "fuckass", "fuckhole", "hardcore",
71
+ "lesbian", "lesbians", "motherfuck", "negro", "nigger", "orgasim", "orgasm", "penis", "penisfucker",
72
+ "piss", "piss off", "pussy", "sexy shit", "sexy slut", "son of a bitch", "suck tits", "xxx",
73
+ "kill yourself", "fuck yourself", "beheading", "terrorist"]
74
+
75
+ # Check for and filter out bad words from the text
76
+ for word in cyberbullying_words:
77
+ if word.lower() in text:
78
+ filtered_text = filtered_text.replace(word, '*' * len(word))
79
+ bad_words.append(word)
80
+
81
+ # Map the prediction to a human-readable label
82
+
83
+ return prediction[0], filtered_text, bad_words
84
+
85
+ # Create a Streamlit app
86
+ st.title("Cyberbullying Detection App (English)")
87
+
88
+ # Add a text input field
89
+ user_input = st.text_area("Enter text:", "")
90
+
91
+ # Predict when a button is clicked
92
+ if st.button("Predict"):
93
+ if user_input:
94
+ prediction, filtered_text, bad_words = predict_cyberbullying_and_filter_bad_words(user_input)
95
+ if prediction != "not_cyberbullying":
96
+ st.write("Prediction: Cyberbullying")
97
+ st.write(f"Cyberbullying Type: {prediction}")
98
+ else:
99
+ st.write("Prediction: Not Cyberbullying")
100
+ if bad_words:
101
+ st.write(f"Bad Words: {', '.join(bad_words)}")
102
+ else:
103
+ st.write("<span style='color:cyan;'>No bad words found.</span>", unsafe_allow_html=True)
104
+ if bad_words:
105
+ st.write("Filtered Text:")
106
+ st.write(f"<span style='color:red; font-weight:bold'>{filtered_text}</span>", unsafe_allow_html=True)
107
+ else:
108
+ st.write("Original Text:")
109
+ st.write(f"{filtered_text}", unsafe_allow_html=True)
110
+
111
+
112
+ st.header("Sample Texts")
113
+ st.write("It's always the filthy " + "<span style='color:red; font-weight:bold'>bitch</span> that creates problem between us", unsafe_allow_html=True)
114
+ st.write("Do you believe it is appropriate to refer to a Muslim as a " + "<span style='color:red; font-weight:bold'>terrorist</span>?", unsafe_allow_html=True)
115
+ st.write("I hope you're doing well and having a great day. Let's catch up soon! 😊")
116
+ st.write("The team's score is disgraceful.")
117
+
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ scikit-learn
tfidf_vectorizer.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4348f9a8bf9f6296ccebee2a79efca27ff06d2ff76f48e8c587000c17143a71e
3
+ size 1047315
trained_model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ebdf45a74171d8a3dc272d2d21b3022979576f601f20ad5ec05445887935999
3
+ size 1440050