jbilcke-hf HF staff commited on
Commit
b2a37ba
·
1 Parent(s): 4e03ac8

making the censorship filter more restrictive

Browse files
Files changed (1) hide show
  1. src/app/engine/censorship.ts +18 -4
src/app/engine/censorship.ts CHANGED
@@ -114,7 +114,9 @@ const banned = [
114
  ]
115
 
116
  const getFingerprint = (word: string) => {
117
- return computeSecretFingerprint(word.toLocaleLowerCase())
 
 
118
  }
119
 
120
  const encode = (list: string[]) => {
@@ -128,10 +130,13 @@ const encode = (list: string[]) => {
128
  export const filterOutBadWords = (sentence: string) => {
129
  if (process.env.ENABLE_CENSORSHIP !== "true") { return sentence }
130
 
131
- const words = sentence.split(" ")
132
- return words.map(word => {
133
- const fingerprint = getFingerprint(word)
134
 
 
 
 
 
 
135
  let result: string = word
136
  // some users want to play it smart and bypass our system so let's play too
137
  if (chickens.includes(fingerprint)) {
@@ -145,6 +150,15 @@ export const filterOutBadWords = (sentence: string) => {
145
  } else if (banned.includes(fingerprint)) {
146
  result = "_BANNED_"
147
  }
 
 
 
 
148
  return result
149
  }).filter(item => item !== "_BANNED_").join(" ")
 
 
 
 
 
150
  }
 
114
  ]
115
 
116
  const getFingerprint = (word: string) => {
117
+ return computeSecretFingerprint(
118
+ word.toLocaleLowerCase().replaceAll(/[^a-zA-Z0-9]/gi, "")
119
+ )
120
  }
121
 
122
  const encode = (list: string[]) => {
 
130
  export const filterOutBadWords = (sentence: string) => {
131
  if (process.env.ENABLE_CENSORSHIP !== "true") { return sentence }
132
 
133
+ let requireCensorship = false
 
 
134
 
135
+ const words = sentence.replaceAll(/[^a-zA-Z0-9]/gi, " ").replaceAll(/\s+/gi, " ").trim().split(" ")
136
+
137
+ const sanitized = words.map(word => {
138
+ const fingerprint = getFingerprint(word)
139
+
140
  let result: string = word
141
  // some users want to play it smart and bypass our system so let's play too
142
  if (chickens.includes(fingerprint)) {
 
150
  } else if (banned.includes(fingerprint)) {
151
  result = "_BANNED_"
152
  }
153
+
154
+ if (result !== word) {
155
+ requireCensorship = true
156
+ }
157
  return result
158
  }).filter(item => item !== "_BANNED_").join(" ")
159
+
160
+ // if the user didn't try to use a bad word, we leave it untouched
161
+ // he words array has been degraded by the replace operation, but it removes commas etc which isn't great
162
+ // so if the request was genuine and SFW, it's best to return the original prompt
163
+ return requireCensorship ? sanitized : sentence
164
  }