Skip to content

Commit 22333a9

Browse files
committed
Add word-list based spam filtering
1 parent 9905742 commit 22333a9

File tree

1 file changed

+60
-15
lines changed

1 file changed

+60
-15
lines changed

src/forum.nim

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ var
5858
mailer: Mailer
5959
karaxHtml: string
6060

61+
when defined(wordlistSpamCutoff):
62+
const wordlistSpamCutoff {.intdefine.} = 0
63+
echo "Wordlist spam cutoff enabled! ", wordlistSpamCutoff
64+
var wordList: Table[string, float]
65+
6166
proc init(c: TForumData) =
6267
c.userPass = ""
6368
c.userName = ""
@@ -242,20 +247,21 @@ proc rateLimitCheck(c: TForumData): bool =
242247
if last300s > 6: return true
243248
return false
244249

245-
proc stopForumSpamCheck(c: TForumData): bool =
246-
if c.rank == Moderated:
247-
let
248-
client = newHttpClient()
249-
resp = client.get("https://api.stopforumspam.org/api?emailhash=" & c.email.getMd5 & "&json")
250-
if resp.code == Http200:
251-
let jresp = resp.body.parseJson
252-
if jresp["success"].num == 1 and jresp["emailhash"].hasKey("confidence") and jresp["emailhash"]["confidence"].str.parseFloat > 0.0:
253-
exec(
254-
db,
255-
sql"update person set status = ? where name = ?;",
256-
AutoSpammer, c.userName
257-
)
258-
return true
250+
when not defined(skipStopForumSpamCheck):
251+
proc stopForumSpamCheck(c: TForumData): bool =
252+
if c.rank == Moderated:
253+
let
254+
client = newHttpClient()
255+
resp = client.get("https://api.stopforumspam.org/api?emailhash=" & c.email.getMd5 & "&json")
256+
if resp.code == Http200:
257+
let jresp = resp.body.parseJson
258+
if jresp["success"].num == 1 and jresp["emailhash"].hasKey("confidence") and jresp["emailhash"]["confidence"].str.parseFloat > 0.0:
259+
exec(
260+
db,
261+
sql"update person set status = ? where name = ?;",
262+
AutoSpammer, c.userName
263+
)
264+
return true
259265

260266
proc spamHeuristicsCheck(c: TForumData, content: string, topic = ""): bool =
261267
if c.rank == Moderated:
@@ -279,15 +285,39 @@ proc spamHeuristicsCheck(c: TForumData, content: string, topic = ""): bool =
279285
wordlistHitCount = wordlistHits.count(marker)
280286
if wordlistHitCount >= 2:
281287
spamScore += wordlistHitCount.float * 0.3
288+
echo "Post by user ", c.userName, " has spam heuristics score ", spamScore
282289
if spamScore > 1:
283-
echo "Post by user ", c.userid, " trigger AutoSpam with score ", spamScore
284290
exec(
285291
db,
286292
sql"update person set status = ? where name = ?;",
287293
AutoSpammer, c.userName
288294
)
289295
return true
290296

297+
when defined(wordlistSpamCutoff):
298+
proc wordlistSpamCheck(c: TForumData, content: string): bool =
299+
if c.rank == Moderated:
300+
var
301+
wordCount = 0
302+
score = 0.0
303+
let words = content.toLowerAscii.splitWhitespace()
304+
for word in words:
305+
{.gcsafe.}:
306+
if wordList.hasKey(word):
307+
wordCount += 1
308+
score += wordList[word]
309+
if wordCount == 0:
310+
return false # No known words in list, extremely unlikely
311+
score = score / wordCount.float
312+
echo "Post by user ", c.userName, " has a wordlist spam score ", score
313+
if score > wordlistSpamCutoff / 1000:
314+
exec(
315+
db,
316+
sql"update person set status = ? where name = ?;",
317+
AutoSpammer, c.userName
318+
)
319+
return true
320+
291321
proc verifyIdentHash(
292322
c: TForumData, name: string, epoch: int64, ident: string
293323
) =
@@ -531,6 +561,10 @@ proc executeReply(c: TForumData, threadId: int, content: string,
531561
if spamHeuristicsCheck(c, content):
532562
raise newForumError("Your account has been automatically marked as spam. If you believe this is a mistake please contact a moderator. " & supportUrl)
533563

564+
when defined(wordlistSpamCutoff):
565+
if wordlistSpamCheck(c, content):
566+
raise newForumError("Your account has been automatically marked as spam. If you believe this is a mistake please contact a moderator. " & supportUrl)
567+
534568
if content.strip().len == 0:
535569
raise newForumError("Message cannot be empty")
536570

@@ -673,6 +707,10 @@ proc executeNewThread(c: TForumData, subject, msg, categoryID: string): (int64,
673707
if spamHeuristicsCheck(c, msg, subject):
674708
raise newForumError("Your account has been automatically marked as spam. If you believe this is a mistake please contact a moderator. " & supportUrl)
675709

710+
when defined(wordlistSpamCutoff):
711+
if wordlistSpamCheck(c, msg):
712+
raise newForumError("Your account has been automatically marked as spam. If you believe this is a mistake please contact a moderator. " & supportUrl)
713+
676714
result[0] = tryInsertID(db, query, subject, categoryID).int
677715
if result[0] < 0:
678716
raise newForumError("Subject already exists", @["subject"])
@@ -897,6 +935,13 @@ proc updateProfile(
897935

898936
include "main.tmpl"
899937

938+
when defined(wordlistSpamCutoff):
939+
if fileExists("wordlist.csv"):
940+
echo "Found spam score wordlist"
941+
for line in "wordlist.csv".lines:
942+
let split = line.split(',', 1)
943+
wordList[split[1][1..^2]] = split[0].parseFloat
944+
900945
initialise()
901946

902947
settings:

0 commit comments

Comments
 (0)