Skip to content

Commit

Permalink
Update moderation
Browse files Browse the repository at this point in the history
  • Loading branch information
robalb committed Jan 11, 2025
1 parent 672bcd8 commit a020d02
Show file tree
Hide file tree
Showing 5 changed files with 315 additions and 85 deletions.
10 changes: 9 additions & 1 deletion backend/internal/morse/moderation.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
)

// https://github.com/zacanger/profane-words/blob/master/words.json
//go:embed moderation_wordlist.txt
//go:embed moderation_badwords.txt
var badwordsListEncoded string

func badwordsListDecode(input string) []string {
Expand Down Expand Up @@ -56,6 +56,12 @@ func ContainsBadLanguage(input string) bool {
input = replacementPattern.ReplaceAllString(input, "")
input = normalize(input)

//remove false positives
for _, goodWord := range falsePositiveWordsList {
normalized := normalize(goodWord)
input = strings.ReplaceAll(input, normalized, " ")
}

badList := badwordsListDecode(badwordsListEncoded)
for _, badWord := range badList {
normalized := normalize(badWord)
Expand All @@ -65,3 +71,5 @@ func ContainsBadLanguage(input string) bool {
}
return false
}


229 changes: 229 additions & 0 deletions backend/internal/morse/moderation_badwords.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
nohfr
nubyr
nany
nagvfrzvgvp
nahf
neefr
nefpuybpu
nefr
nff
nffubyr
oqfz
ornfgvny
orngpu
orngbss
orfgvnyvgl
ovtoynpx
ovgpu
oynpxzna
oynpxf
oybjwbo
oybjlbhe
obyyvpx
obyybpx
obyybx
obarq
obare
obbo
obbbof
obbmre
oernfg
ohgg
ohggubyr
pnzrygbr
pyvg
pbpnvar
pbpx
pbaqbz
phz
phag
qnexvrf
qvncre
qvpx
qvyqb
qvatyr
qbttlfglyr
qbzvange
qbcuvyr
qeluhzc
rngzr
rngfuvg
rarzn
rerpgvba
rkperzrag
sntt
sntvg
sntf
sntg
sntgneq
snaphyb
srpny
sryngvb
srypure
srypuvat
srzqbz
srgvfu
svfgrq
svfgvat
sevtt
shppx
shpx
shuere
shxre
shxvat
tnatonat
tncr
tnfgurwrjf
tnl
travgnyf
tveyba
tveyfba
uragnv
ureohg
urecrf
uvfohg
uvgyre
ubbgref
ubeal
vaprfg
vagrepbefr
vagrepbhefr
vfenry
wrexbss
wrj
wvm
whaxvr
xrgnzvar
xvyy
xvyyzlfrys
xvyylbhefrys
yrfovna
yrfob
yvpxre
yvpxzl
yvxrntvey
yvxrtvey
znfgheon
zbyrfg
zlohg
zlubyr
zlzrng
anxrq
anmv
arterf
arteb
arbanmv
avtn
avtt
avtyrg
avtabt
avtbre
avtbe
avte
avtf
avthe
avvtn
avvtre
avvtt
avccyr
ahqr
ahqvr
ahgfnpx
ahggrq
betnfz
betvrf
betl
bevsvpr
bfnzn
cnagvr
cnagvrf
crqb
crr
cravyr
cravf
creirefvba
cvrubyr
cbbc
cbea
cebfgvghg
chffl
dhrre
djrre
encr
encvat
encvfg
egneq
fnqvfz
fnqvfg
fpebghz
frzra
frk
funt
furznyr
fuvonev
fuvg
fuhghc
fvffl
fxnapx
fxnax
fxhz
fybcrl
fybccl
fyhg
fzrtzn
fbqbz
fcrez
fdhveg
fgreonvg
fgreong
fgbarq
fgbare
fgencba
fgeonvg
fgeongr
fgheongr
fhpxrq
fhpxre
fhpxvat
fhpxzl
fhvpvqr
finfgvpn
fjnfgvpn
fjnfgvxn
gnxrbssure
gnxrbsslbhe
gurveohg
guerrfbzr
gvgf
gbvyrg
gbyznpb
gbathrvan
gbeghe
gbhpuzl
genaavr
genaal
genafirfgvgr
gheq
hcfxveg
intvan
ivoengbe
ihyyin
ihyin
jnaxvat
jnaxl
jrravr
jrvzne
jrvare
jrgqernz
juber
jvttn
jvttre
jvguntvey
jvgunjbzna
jvgutvey
jvgujbzra
lnbv
lbhqvr
lbheohg
lbheubyr
mbbcuvy
69 changes: 69 additions & 0 deletions backend/internal/morse/moderation_goodwords.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package morse


// english false positives adapted from
// https://github.com/TwiN/go-away/ (MIT license)
var falsePositiveWordsList = []string{
"analy", // analysis, analytics
"arsenal",
"assassin",
"assaying", // was saying
"assert",
"assign",
"assimil",
"assist",
"associat",
"assum", // assuming, assumption, assumed
"assur", // assurance
"banal",
"basement",
"bass",
"cass", // cassie, cassandra, carcass
"butter", // butter, butterfly
"butthe",
"button",
"canvass",
"circum",
"clitheroe",
"cockburn",
"cocktail",
"cumber",
"cumbing",
"cumulat",
"dickvandyke",
"document",
"evaluate",
"exclusive",
"expensive",
"explain",
"expression",
"grass",
"harass",
"hotwater",
"identit",
"kassa", // kassandra
"kassi", // kassie, kassidy
"lass", // class
"leafage",
"libshitz",
"magnacumlaude",
"mass",
"mocha",
"pass", // compass, passion
"penistone",
"phoebe",
"phoenix",
"pushit",
"sassy",
"saturday",
"scrap", // scrap, scrape, scraping
"serfage",
"sexist", // systems exist, sexist
"shoe",
"scunthorpe",
"stitch",
"sussex",
"therapist",
"therapeutic",
"wharfage",
}
8 changes: 8 additions & 0 deletions backend/internal/morse/moderation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ func TestContainsHateSpeech(t *testing.T) {
{"e", false},
{"t", false},
{"hi hello h i goodbye goodbie", false},
{"got to go, see you later see ya later", false},
{"the quick brown fox jumps over the lazy dog", false},
{"where are u from? i'm from italy", false},
{"shllo w rld ", false},
{"the quick brown fox j mh w rld ", false},
Expand All @@ -27,8 +29,14 @@ func TestContainsHateSpeech(t *testing.T) {
{"text containing tolmaco", true},
{"text containing t0!m4c0", true},
{"to lma co spacing ", true},
{"t-o-l-m-a-c----o", true},
{"t 0 l m 4 c o spacing ", true},
{"[email protected] spacing ", true},

{"This test will not --pass-- if there is a false positive", false},
{"I'm a business analyst living in sussex.", false},
{"1 in 10 therapists recommend playing the bass", false},
{"It's banal to assume that assassins and cocktails wil not make these tests pass", false},
}

for _, test := range tests {
Expand Down
Loading

0 comments on commit a020d02

Please sign in to comment.