From 70b9d497395e7e136458294ba273622b373249f5 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Tue, 18 Feb 2025 09:15:20 -0300 Subject: [PATCH 01/19] chore: split checks into rules Signed-off-by: Felipe Zipitria --- src/crs_linter/cli.py | 66 +------ src/crs_linter/linter.py | 92 ++++++--- src/crs_linter/rules/__init__.py | 17 ++ src/crs_linter/rules/approved_tags.py | 22 +++ src/crs_linter/rules/capture.py | 63 +++++++ src/crs_linter/rules/crs_tag.py | 38 ++++ src/crs_linter/rules/deprecated.py | 23 +++ src/crs_linter/rules/duplicated.py | 13 ++ src/crs_linter/rules/ignore_case.py | 73 ++++++++ src/crs_linter/rules/indentation.py | 42 +++++ src/crs_linter/rules/lowercase_ignorecase.py | 22 +++ src/crs_linter/rules/ordered_actions.py | 90 +++++++++ src/crs_linter/rules/pl_consistency.py | 150 +++++++++++++++ src/crs_linter/rules/pl_tags.py | 0 src/crs_linter/rules/variables_usage.py | 185 +++++++++++++++++++ src/crs_linter/rules/version.py | 55 ++++++ 16 files changed, 866 insertions(+), 85 deletions(-) mode change 100755 => 100644 src/crs_linter/linter.py create mode 100644 src/crs_linter/rules/__init__.py create mode 100644 src/crs_linter/rules/approved_tags.py create mode 100644 src/crs_linter/rules/capture.py create mode 100644 src/crs_linter/rules/crs_tag.py create mode 100644 src/crs_linter/rules/deprecated.py create mode 100644 src/crs_linter/rules/duplicated.py create mode 100644 src/crs_linter/rules/ignore_case.py create mode 100644 src/crs_linter/rules/indentation.py create mode 100644 src/crs_linter/rules/lowercase_ignorecase.py create mode 100644 src/crs_linter/rules/ordered_actions.py create mode 100644 src/crs_linter/rules/pl_consistency.py create mode 100644 src/crs_linter/rules/pl_tags.py create mode 100644 src/crs_linter/rules/variables_usage.py create mode 100644 src/crs_linter/rules/version.py diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index a15fd72..551cd3b 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -11,14 +11,9 @@ from dulwich.contrib.release_robot import get_current_version from semver import Version -try: - from linter import Check -except ImportError: - from crs_linter.linter import Check -try: - from logger import Logger, Output -except ImportError: - from crs_linter.logger import Logger, Output +from crs_linter.linter import Linter +from crs_linter.logger import Logger, Output +from crs_linter.rules import indentation def remove_comments(data): @@ -187,54 +182,6 @@ def get_crs_version(directory, version=None, head_ref=None, commit_message=None) return crs_version -def check_indentation(filename, content): - error = False - - ### make a diff to check the indentations - try: - with open(filename, "r") as fp: - from_lines = fp.readlines() - if os.path.basename(filename) == "crs-setup.conf.example": - from_lines = remove_comments("".join(from_lines)).split("\n") - from_lines = [l + "\n" for l in from_lines] - except FileNotFoundError: - logger.error(f"Can't open file for indentation check: {filename}") - error = True - - # virtual output - writer = msc_pyparser.MSCWriter(content) - writer.generate() - output = [] - for l in writer.output: - output += [l + "\n" for l in l.split("\n") if l != "\n"] - - if len(from_lines) < len(output): - from_lines.append("\n") - elif len(from_lines) > len(output): - output.append("\n") - - diff = difflib.unified_diff(from_lines, output) - if from_lines == output: - logger.debug("Indentation check ok.") - else: - logger.debug("Indentation check found error(s)") - error = True - for d in diff: - d = d.strip("\n") - r = re.match(r"^@@ -(\d+),(\d+) \+\d+,\d+ @@$", d) - if r: - line1, line2 = [int(i) for i in r.groups()] - logger.error( - "an indentation error was found", - file=filename, - title="Indentation error", - line=line1, - end_line=line1 + line2, - ) - - return error - - def read_files(filenames): global logger @@ -288,7 +235,7 @@ def _arg_in_argv(argv, args): def parse_args(argv): parser = argparse.ArgumentParser( - prog="crs-linter", description="CRS Rules Check tool" + prog="crs-linter", description="CRS Rules Linter tool" ) parser.add_argument( "-o", @@ -421,7 +368,8 @@ def main(): for f in parsed.keys(): logger.start_group(f) logger.debug(f) - c = Check(parsed[f], f, txvars) + c = Linter(parsed[f], f, txvars) + ### check case usings c.check_ignore_case() @@ -450,7 +398,7 @@ def main(): title="Action order check", ) - error = check_indentation(f, parsed[f]) + error = indentation.check(f, parsed[f]) if error: retval = 1 diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py old mode 100755 new mode 100644 index a335483..24cdda1 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -1,39 +1,48 @@ -import sys import msc_pyparser -import difflib -import argparse import re -import subprocess -import logging -import os.path -def parse_config(text): - try: - mparser = msc_pyparser.MSCParser() - mparser.parser.parse(text) - return mparser.configlines +class LintProblem: + """Represents a linting problem found by crs-linter.""" + def __init__(self, line, end_line, column=None, desc='', rule=None): + #: Line on which the problem was found (starting at 1) + self.line = line + #: Line on which the problem ends + self.end_line = end_line + #: Column on which the problem was found (starting at 1) + self.column = column + #: Human-readable description of the problem + self.desc = desc + #: Identifier of the rule that detected the problem + self.rule = rule + self.level = None - except Exception as e: - print(e) + @property + def message(self): + if self.rule is not None: + return f'{self.desc} ({self.rule})' + return self.desc + def __eq__(self, other): + return (self.line == other.line and + self.column == other.column and + self.rule == other.rule) -def parse_file(filename): - try: - mparser = msc_pyparser.MSCParser() - with open(filename, "r") as f: - mparser.parser.parse(f.read()) - return mparser.configlines + def __lt__(self, other): + return (self.line < other.line or + (self.line == other.line and self.column < other.column)) - except Exception as e: - print(e) + def __repr__(self): + return f'{self.line}:{self.column}: {self.message}' -class Check(): - def __init__(self, data, filename=None, txvars={}): +class Linter: + ids = {} # list of rule id's and their location in files + vars = {} # list of TX variables and their location in files + def __init__(self, data, filename=None, txvars=None): # txvars is a global used hash table, but processing of rules is a sequential flow # all rules need this global table - self.globtxvars = txvars + self.globtxvars = txvars or {} # list available operators, actions, transformations and ctl args self.operators = "beginsWith|containsWord|contains|detectSQLi|detectXSS|endsWith|eq|fuzzyHash|geoLookup|ge|gsbLookup|gt|inspectFile|ipMatch|ipMatchF|ipMatchFromFile|le|lt|noMatch|pmFromFile|pmf|pm|rbl|rsub|rx|streq|strmatch|unconditionalMatch|validateByteRange|validateDTD|validateHash|validateSchema|validateUrlEncoding|validateUtf8Encoding|verifyCC|verifyCPF|verifySSN|within".split( "|" @@ -101,7 +110,6 @@ def __init__(self, data, filename=None, txvars={}): self.ids = {} # list of rule id's and their location in files # Any of these variables below are used to store the errors - self.error_case_mistmatch = [] # list of case mismatch errors self.error_action_order = [] # list of ordered action errors self.error_wrong_ctl_auditlogparts = [] # list of wrong ctl:auditLogParts @@ -120,10 +128,11 @@ def __init__(self, data, filename=None, txvars={}): self.error_tx_N_without_capture_action = ( [] ) # list of rules which uses TX.N without previous 'capture' - self.error_rule_hasnotest = ( + self.error_rule_hasnotest = ( [] ) # list of rules which don't have any tests # regex to produce tag from filename: + import os.path self.re_fname = re.compile(r"(REQUEST|RESPONSE)\-\d{3}\-") self.filename_tag_exclusions = [] @@ -231,6 +240,7 @@ def check_ignore_case(self): e["message"] += f" (rule: {self.current_ruleid})" def check_action_order(self): + import sys for d in self.data: if "actions" in d: max_order = 0 # maximum position of read actions @@ -771,6 +781,7 @@ def gen_crs_file_tag(self, fname=None): """ generate tag from filename """ + import os.path filename_for_tag = fname if fname is not None else self.filename filename = self.re_fname.sub("", os.path.basename(os.path.splitext(filename_for_tag)[0])) filename = filename.replace("APPLICATION-", "") @@ -993,3 +1004,32 @@ def find_ids_without_tests(self, test_cases, exclusion_list): 'message': f"rule does not have any tests; rule id: {rid}'" }) return True + + +def parse_config(text): + try: + mparser = msc_pyparser.MSCParser() + mparser.parser.parse(text) + return mparser.configlines + + except Exception as e: + print(e) + + +def parse_file(filename): + try: + mparser = msc_pyparser.MSCParser() + with open(filename, "r") as f: + mparser.parser.parse(f.read()) + return mparser.configlines + + except Exception as e: + print(e) + + +def get_id(actions): + """ Return the ID from actions """ + for a in actions: + if a["act_name"] == "id": + return int(a["act_arg"]) + return 0 diff --git a/src/crs_linter/rules/__init__.py b/src/crs_linter/rules/__init__.py new file mode 100644 index 0000000..34761fb --- /dev/null +++ b/src/crs_linter/rules/__init__.py @@ -0,0 +1,17 @@ +# import all checks + +from . import ( + approved_tags, + capture, + crs_tag, + deprecated, + duplicated, + ignore_case, + indentation, + lowercase_ignorecase, + ordered_actions, + pl_consistency, + pl_tags, + variables_usage, + version +) diff --git a/src/crs_linter/rules/approved_tags.py b/src/crs_linter/rules/approved_tags.py new file mode 100644 index 0000000..98b7a9b --- /dev/null +++ b/src/crs_linter/rules/approved_tags.py @@ -0,0 +1,22 @@ +from src.crs_linter.linter import LintProblem + + +def check_tags(self, tags): + """ + check that only tags from the util/APPROVED_TAGS file are used + """ + chained = False + ruleid = 0 + for d in self.data: + if "actions" in d: + for a in d["actions"]: + if a["act_name"] == "tag": + tag = a["act_arg"] + # check wheter tag is in tagslist + if tags.count(tag) == 0: + yield LintProblem( + rule_id=ruleid, + line=a["lineno"], + end_line=a["lineno"], + desc=f'rule uses unknown tag: "{tag}"; only tags registered in the util/APPROVED_TAGS file may be used; rule id: {ruleid}' + ) diff --git a/src/crs_linter/rules/capture.py b/src/crs_linter/rules/capture.py new file mode 100644 index 0000000..3f33a04 --- /dev/null +++ b/src/crs_linter/rules/capture.py @@ -0,0 +1,63 @@ +def check_capture_action(self): + """ + check that every chained rule has a `capture` action if it uses TX.N variable + """ + chained = False + ruleid = 0 + chainlevel = 0 + capture_level = None + re_number = re.compile(r"^\d$") + has_capture = False + use_captured_var = False + captured_var_chain_level = 0 + for d in self.data: + # only the SecRule object is relevant + if d["type"].lower() == "secrule": + for v in d["variables"]: + if v["variable"].lower() == "tx" and re_number.match( + v["variable_part"] + ): + # only the first occurrence required + if not use_captured_var: + use_captured_var = True + captured_var_chain_level = chainlevel + if "actions" in d: + if not chained: + ruleid = 0 + chainlevel = 0 + else: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + chainlevel += 1 + if a["act_name"] == "capture": + capture_level = chainlevel + has_capture = True + if ruleid > 0 and not chained: # end of chained rule + if use_captured_var: + # we allow if target with TX:N is in the first rule + # of a chained rule without 'capture' + if captured_var_chain_level > 0: + if ( + not has_capture + or captured_var_chain_level < capture_level + ): + self.error_tx_N_without_capture_action.append( + { + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f"rule uses TX.N without capture; rule id: {ruleid}'", + } + ) + # clear variables + chained = False + chainlevel = 0 + has_capture = False + capture_level = 0 + captured_var_chain_level = 0 + use_captured_var = False + ruleid = 0 diff --git a/src/crs_linter/rules/crs_tag.py b/src/crs_linter/rules/crs_tag.py new file mode 100644 index 0000000..67758f0 --- /dev/null +++ b/src/crs_linter/rules/crs_tag.py @@ -0,0 +1,38 @@ + +def check_crs_tag(self): + """ + check that every rule has a `tag:'OWASP_CRS'` action + """ + chained = False + ruleid = 0 + has_crs = False + for d in self.data: + if "actions" in d: + chainlevel = 0 + + if not chained: + ruleid = 0 + has_crs = False + chainlevel = 0 + else: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + chainlevel += 1 + if a["act_name"] == "tag": + if chainlevel == 0: + if a["act_arg"] == "OWASP_CRS": + has_crs = True + if ruleid > 0 and not has_crs: + self.error_no_crstag.append( + { + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f"rule does not have tag with value 'OWASP_CRS'; rule id: {ruleid}", + } + ) + diff --git a/src/crs_linter/rules/deprecated.py b/src/crs_linter/rules/deprecated.py new file mode 100644 index 0000000..c45f95a --- /dev/null +++ b/src/crs_linter/rules/deprecated.py @@ -0,0 +1,23 @@ +def check_ctl_audit_log(self): + """check there is no ctl:auditLogParts action in any rules""" + for d in self.data: + if "actions" in d: + for a in d["actions"]: + # get the 'id' of rule + self.curr_lineno = a["lineno"] + if a["act_name"] == "id": + self.current_ruleid = int(a["act_arg"]) + + # check if action is ctl:auditLogParts + if ( + a["act_name"].lower() == "ctl" + and a["act_arg"].lower() == "auditlogparts" + ): + self.error_wrong_ctl_auditlogparts.append( + { + "ruleid": self.current_ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": "", + } + ) diff --git a/src/crs_linter/rules/duplicated.py b/src/crs_linter/rules/duplicated.py new file mode 100644 index 0000000..b08bc67 --- /dev/null +++ b/src/crs_linter/rules/duplicated.py @@ -0,0 +1,13 @@ +from src.crs_linter.linter import LintProblem, get_id + + +def check(data, ids): + """ Checks the duplicated rule ID """ + for d in data: + if "actions" in d: + rule_id = get_id(d["actions"]) + if rule_id in ids: + yield LintProblem( + rule_id, + desc="id %d is duplicated, previous place: %s:%d" + ) diff --git a/src/crs_linter/rules/ignore_case.py b/src/crs_linter/rules/ignore_case.py new file mode 100644 index 0000000..33f1274 --- /dev/null +++ b/src/crs_linter/rules/ignore_case.py @@ -0,0 +1,73 @@ +def check_ignore_case(self): + # check the ignore cases at operators, actions, + # transformations and ctl arguments + for d in self.data: + if "actions" in d: + aidx = 0 # index of action in list + if not self.chained: + self.current_ruleid = 0 + else: + self.chained = False + + for a in d["actions"]: + action = a["act_name"].lower() + self.curr_lineno = a["lineno"] + if action == "id": + self.current_ruleid = int(a["act_arg"]) + + if action == "chain": + self.chained = True + + # check the action is valid + if action not in self.actionsl: + self.store_error(f"Invalid action {action}") + # check the action case sensitive format + if ( + self.actions[self.actionsl.index(action)] + != a["act_name"] + ): + self.store_error(f"Action case mismatch: {action}") + + if a["act_name"] == "ctl": + # check the ctl argument is valid + if a["act_arg"].lower() not in self.ctlsl: + self.store_error(f'Invalid ctl {a["act_arg"]}') + # check the ctl argument case sensitive format + if ( + self.ctls[self.ctlsl.index(a["act_arg"].lower())] + != a["act_arg"] + ): + self.store_error(f'Ctl case mismatch: {a["act_arg"]}') + if a["act_name"] == "t": + # check the transform is valid + if a["act_arg"].lower() not in self.transformsl: + self.store_error(f'Invalid transform: {a["act_arg"]}') + # check the transform case sensitive format + if ( + self.transforms[ + self.transformsl.index(a["act_arg"].lower()) + ] + != a["act_arg"] + ): + self.store_error( + f'Transform case mismatch : {a["act_arg"]}' + ) + aidx += 1 + if "operator" in d and d["operator"] != "": + self.curr_lineno = d["oplineno"] + # strip the operator + op = d["operator"].replace("!", "").replace("@", "") + # check the operator is valid + if op.lower() not in self.operatorsl: + self.store_error(f'Invalid operator: {d["operator"]}') + # check the operator case sensitive format + if self.operators[self.operatorsl.index(op.lower())] != op: + self.store_error(f'Operator case mismatch: {d["operator"]}') + else: + if d["type"].lower() == "secrule": + self.curr_lineno = d["lineno"] + self.store_error("Empty operator isn't allowed") + if self.current_ruleid > 0: + for e in self.error_case_mistmatch: + e["ruleid"] = self.current_ruleid + e["message"] += f" (rule: {self.current_ruleid})" diff --git a/src/crs_linter/rules/indentation.py b/src/crs_linter/rules/indentation.py new file mode 100644 index 0000000..ae5cd6a --- /dev/null +++ b/src/crs_linter/rules/indentation.py @@ -0,0 +1,42 @@ + +def check(filename, content): + error = False + + ### make a diff to check the indentations + try: + with open(filename, "r") as fp: + from_lines = fp.readlines() + if filename.startswith("crs-setup.conf.example"): + from_lines = remove_comments("".join(from_lines)).split("\n") + from_lines = [l + "\n" for l in from_lines] + except: + logger.error(f"Can't open file for indentation check: {filename}") + error = True + + # virtual output + writer = msc_pyparser.MSCWriter(content) + writer.generate() + output = [] + for l in writer.output: + output += [l + "\n" for l in l.split("\n") if l != "\n"] + + if len(from_lines) < len(output): + from_lines.append("\n") + elif len(from_lines) > len(output): + output.append("\n") + + diff = difflib.unified_diff(from_lines, output) + if from_lines == output: + logger.debug("Indentation check ok.") + else: + logger.debug("Indentation check found error(s)") + error = True + + for d in diff: + d = d.strip("\n") + r = re.match(r"^@@ -(\d+),(\d+) \+\d+,\d+ @@$", d) + if r: + line1, line2 = [int(i) for i in r.groups()] + logger.error("an indentation error was found", file=filename, title="Indentation error", line=line1, end_line=line1 + line2) + + return error diff --git a/src/crs_linter/rules/lowercase_ignorecase.py b/src/crs_linter/rules/lowercase_ignorecase.py new file mode 100644 index 0000000..1752ea5 --- /dev/null +++ b/src/crs_linter/rules/lowercase_ignorecase.py @@ -0,0 +1,22 @@ +def check_lowercase_ignorecase(self): + ruleid = 0 + for d in self.data: + if d["type"].lower() == "secrule": + if d["operator"] == "@rx": + regex = d["operator_argument"] + if regex.startswith("(?i)"): + if "actions" in d: + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "t": + # check the transform is valid + if a["act_arg"].lower() == "lowercase": + self.error_combined_transformation_and_ignorecase.append( + { + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f'rule uses (?i) in combination with t:lowercase: \'{a["act_arg"]}\'; rule id: {ruleid}', + } + ) diff --git a/src/crs_linter/rules/ordered_actions.py b/src/crs_linter/rules/ordered_actions.py new file mode 100644 index 0000000..c940ca2 --- /dev/null +++ b/src/crs_linter/rules/ordered_actions.py @@ -0,0 +1,90 @@ +from crs_linter.linter import LintProblem + +ORDERED_ACTIONS = [ + "id", # 0 + "phase", # 1 + "allow", + "block", + "deny", + "drop", + "pass", + "proxy", + "redirect", + "status", + "capture", # 10 + "t", + "log", + "nolog", + "auditlog", + "noauditlog", + "msg", + "logdata", + "tag", + "sanitisearg", + "sanitiserequestheader", # 20 + "sanitisematched", + "sanitisematchedbytes", + "ctl", + "ver", + "severity", + "multimatch", + "initcol", + "setenv", + "setvar", + "expirevar", # 30 + "chain", + "skip", + "skipafter", +] + +def check(data): + global act_idx, current_rule_id + chained = False + + for d in data: + if "actions" in d: + max_order = 0 # maximum position of read actions + if not chained: + current_rule_id = _get_id(d["actions"]) + else: + chained = False + + for index, a in enumerate(d["actions"]): + action = a["act_name"].lower() + # get the 'id' of rule + current_lineno = a["lineno"] + + # check if chained + if a["act_name"] == "chain": + chained = True + + # get the index of action from the ordered list + # above from constructor + try: + act_idx = ORDERED_ACTIONS.index(action) + except ValueError: + yield LintProblem( + line=current_lineno, + end_line=current_lineno, + rule_id=current_rule_id, + desc=f'action "{action}" at pos {index - 1} is in the wrong order: "{action}" at pos {index}', + rule="ordered_actions", + ) + + # if the index of current action is @ge than the previous + # max value, load it into max_order + if act_idx >= max_order: + max_order = act_idx + else: + # action is the previous action's position in list + # act_idx is the current action's position in list + # if the prev is @gt actually, means it's at wrong position + if act_idx < max_order: + yield LintProblem( + line=current_lineno, + end_line=current_lineno, + rule_id=current_rule_id, + desc=f'action "{action}" at pos {index - 1} is in the wrong order: "{action}" at pos {index}', + rule="ordered_actions", + ) + diff --git a/src/crs_linter/rules/pl_consistency.py b/src/crs_linter/rules/pl_consistency.py new file mode 100644 index 0000000..9e41606 --- /dev/null +++ b/src/crs_linter/rules/pl_consistency.py @@ -0,0 +1,150 @@ +def check_pl_consistency(self): + """this method checks the PL consistency + + the function iterates through the rules, and catches the set PL, eg: + + SecRule TX:DETECTION_PARANOIA_LEVEL "@lt 1" ... + this means we are on PL1 currently + + all rules must consist with current PL at the used tags and variables + + eg: + tag:'paranoia-level/1' + ^ + setvar:'tx.outbound_anomaly_score_pl1=+%{tx.error_anomaly_score}'" + ^^^ + additional relations: + * all rules must have the "tag:'paranoia-level/N'" if it does not have "nolog" action + * if rule have "nolog" action it must not have "tag:'paranoia-level/N'" action + * anomaly scoring value on current PL must increment by value corresponding to severity + + """ + curr_pl = 0 + tags = [] # collect tags + _txvars = {} # collect setvars and values + _txvlines = {} # collect setvars and its lines + severity = None # severity + has_nolog = False # nolog action exists + + for d in self.data: + # find the current PL + if d["type"].lower() in ["secrule"]: + for v in d["variables"]: + if ( + v["variable"].lower() == "tx" + and v["variable_part"].lower() == "detection_paranoia_level" + and d["operator"] == "@lt" + and re.match(r"^\d$", d["operator_argument"]) + ): + curr_pl = int(d["operator_argument"]) + + if "actions" in d: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "severity": + severity = a["act_arg"].replace("'", "").lower() + if a["act_name"] == "tag": + tags.append(a) + if a["act_name"] == "setvar": + if a["act_arg"][0:2].lower() == "tx": + # this hack necessary, because sometimes we use setvar argument + # between '', sometimes not + # eg + # setvar:crs_setup_version=334 + # setvar:'tx.inbound_anomaly_score_threshold=5' + txv = a["act_arg"][3:].split("=") + txv[0] = txv[0].lower() # variable name + if len(txv) > 1: + # variable value + txv[1] = txv[1].lower().strip(r"+\{}") + else: + txv.append(a["act_arg_val"].strip(r"+\{}")) + _txvars[txv[0]] = txv[1] + _txvlines[txv[0]] = a["lineno"] + if a["act_name"] == "nolog": + has_nolog = True + if a["act_name"] == "chain": + chained = True + + has_pl_tag = False + for a in tags: + if a["act_arg"][0:14] == "paranoia-level": + has_pl_tag = True + pltag = int(a["act_arg"].split("/")[1]) + if has_nolog: + self.error_inconsistent_pltags.append( + { + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f'tag \'{a["act_arg"]}\' with \'nolog\' action, rule id: {ruleid}', + } + ) + elif pltag != curr_pl and curr_pl > 0: + self.error_inconsistent_pltags.append( + { + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f'tag \'{a["act_arg"]}\' on PL {curr_pl}, rule id: {ruleid}', + } + ) + + if not has_pl_tag and not has_nolog and curr_pl >= 1: + self.error_inconsistent_pltags.append( + { + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f"rule does not have `paranoia-level/{curr_pl}` action, rule id: {ruleid}", + } + ) + + for t in _txvars: + subst_val = re.search( + r"%\{tx.[a-z]+_anomaly_score}", _txvars[t], re.I + ) + val = re.sub(r"[+%{}]", "", _txvars[t]).lower() + # check if last char is a numeric, eg ...anomaly_score_pl1 + scorepl = re.search(r"anomaly_score_pl\d$", t) + if scorepl: + if curr_pl > 0 and int(t[-1]) != curr_pl: + self.error_inconsistent_plscores.append( + { + "ruleid": ruleid, + "line": _txvlines[t], + "endLine": _txvlines[t], + "message": f"variable {t} on PL {curr_pl}, rule id: {ruleid}", + } + ) + if severity is None and subst_val: # - do we need this? + self.error_inconsistent_plscores.append( + { + "ruleid": ruleid, + "line": _txvlines[t], + "endLine": _txvlines[t], + "message": f"missing severity action, rule id: {ruleid}", + } + ) + else: + if val != "tx.%s_anomaly_score" % (severity) and val != "0": + self.error_inconsistent_plscores.append( + { + "ruleid": ruleid, + "line": _txvlines[t], + "endLine": _txvlines[t], + "message": f"invalid value for anomaly_score_pl{t[-1]}: {val} with severity {severity}, rule id: {ruleid}", + } + ) + # variable was found so we need to mark it as used + self.globtxvars[t]["used"] = True + + # reset local variables if we are done with a rule <==> no more 'chain' action + if not chained: + tags = [] # collect tags + _txvars = {} # collect setvars and values + _txvlines = {} # collect setvars and its lines + severity = None # severity + has_nolog = False # rule has nolog action diff --git a/src/crs_linter/rules/pl_tags.py b/src/crs_linter/rules/pl_tags.py new file mode 100644 index 0000000..e69de29 diff --git a/src/crs_linter/rules/variables_usage.py b/src/crs_linter/rules/variables_usage.py new file mode 100644 index 0000000..db39db6 --- /dev/null +++ b/src/crs_linter/rules/variables_usage.py @@ -0,0 +1,185 @@ +def check_tx_variable(self): + """this function checks if a used TX variable has set + + a variable is used when: + * it's an operator argument: "@rx %{TX.foo}" + * it's a target: SecRule TX.foo "@..." + * it's a right side value in a value giving: setvar:tx.bar=tx.foo + + this function collects the variables if it is used but not set previously + """ + # set if rule checks the existence of var, e.g., `&TX:foo "@eq 1"` + check_exists = None + has_disruptive = False # set if rule contains disruptive action + chained = False + for d in self.data: + if d["type"].lower() in ["secrule", "secaction"]: + if not chained: + # works only in Apache, libmodsecurity uses default phase 1 + phase = 2 + ruleid = 0 + else: + chained = False + + # iterate over actions and collect these values: + # ruleid, phase, chained, rule has or not any disruptive action + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "phase": + phase = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + if a["act_name"] in [ + "block", + "deny", + "drop", + "allow", + "proxy", + "redirect", + ]: + has_disruptive = True + + # check wheter tx.var is used at setvar's right side + val_act = [] + val_act_arg = [] + # example: + # setvar:'tx.inbound_anomaly_score_threshold=5' + # + # act_arg <- tx.inbound_anomaly_score_threshold + # act_atg_val <- 5 + # + # example2 (same as above, but no single quotes!): + # setvar:tx.inbound_anomaly_score_threshold=5 + # act_arg <- tx.inbound_anomaly_score_threshold + # act_atg_val <- 5 + # + if "act_arg" in a and a["act_arg"] is not None: + val_act = re.findall(r"%\{(tx.[^%]*)}", a["act_arg"], re.I) + if "act_arg_val" in a and a["act_arg_val"] is not None: + val_act_arg = re.findall( + r"%\{(tx.[^%]*)}", a["act_arg_val"], re.I + ) + for v in val_act + val_act_arg: + v = v.lower().replace("tx.", "") + # check whether the variable is a captured var, eg TX.1 - we do not care that case + if not re.match(r"^\d$", v, re.I): + # v holds the tx.ANY variable, but not the captured ones + # we should collect these variables + if ( + v not in self.globtxvars + or phase < self.globtxvars[v]["phase"] + ): + self.error_undefined_txvars.append( + { + "var": v, + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f"TX variable '{v}' not set / later set (rvar) in rule {ruleid}", + } + ) + else: + self.globtxvars[v]["used"] = True + else: + if v in self.globtxvars: + self.globtxvars[v]["used"] = True + + if "operator_argument" in d: + oparg = re.findall(r"%\{(tx.[^%]*)}", d["operator_argument"], re.I) + if oparg: + for o in oparg: + o = o.lower() + o = re.sub(r"tx\.", "", o, re.I) + if ( + ( + o not in self.globtxvars + or phase < self.globtxvars[o]["phase"] + ) + and not re.match(r"^\d$", o) + and not re.match(r"/.*/", o) + and check_exists is None + ): + self.error_undefined_txvars.append( + { + "var": o, + "ruleid": ruleid, + "line": d["lineno"], + "endLine": d["lineno"], + "message": "TX variable '%s' not set / later set (OPARG) in rule %d" + % (o, ruleid), + } + ) + elif ( + o in self.globtxvars + and phase >= self.globtxvars[o]["phase"] + and not re.match(r"^\d$", o) + and not re.match(r"/.*/", o) + ): + self.globtxvars[o]["used"] = True + if "variables" in d: + for v in d["variables"]: + # check if the variable is TX and has not a & prefix, which counts + # the variable length + if v["variable"].lower() == "tx": + if not v["counter"]: + # * if the variable part (after '.' or ':') is not there in + # the list of collected TX variables, and + # * not a numeric, eg TX:2, and + # * not a regular expression, between '/' chars, eg TX:/^foo/ + # OR + # * rule's phase lower than declaration's phase + rvar = v["variable_part"].lower() + if ( + ( + rvar not in self.globtxvars + or ( + ruleid != self.globtxvars[rvar]["ruleid"] + and phase < self.globtxvars[rvar]["phase"] + ) + ) + and not re.match(r"^\d$", rvar) + and not re.match(r"/.*/", rvar) + ): + self.error_undefined_txvars.append( + { + "var": rvar, + "ruleid": ruleid, + "line": d["lineno"], + "endLine": d["lineno"], + "message": "TX variable '%s' not set / later set (VAR)" + % (v["variable_part"]), + } + ) + elif ( + rvar in self.globtxvars + and phase >= self.globtxvars[rvar]["phase"] + and not re.match(r"^\d$", rvar) + and not re.match(r"/.*/", rvar) + ): + self.globtxvars[rvar]["used"] = True + else: + check_exists = True + self.globtxvars[v["variable_part"].lower()] = { + "var": v["variable_part"].lower(), + "phase": phase, + "used": False, + "file": self.filename, + "ruleid": ruleid, + "message": "", + "line": d["lineno"], + "endLine": d["lineno"], + } + if has_disruptive: + self.globtxvars[v["variable_part"].lower()][ + "used" + ] = True + if ( + len(self.error_undefined_txvars) > 0 + and self.error_undefined_txvars[-1]["var"] + == v["variable_part"].lower() + ): + del self.error_undefined_txvars[-1] + if not chained: + check_exists = None + has_disruptive = False diff --git a/src/crs_linter/rules/version.py b/src/crs_linter/rules/version.py new file mode 100644 index 0000000..93b4e24 --- /dev/null +++ b/src/crs_linter/rules/version.py @@ -0,0 +1,55 @@ +def check_ver_action(self, version): + """ + check that every rule has a `ver` action + """ + chained = False + ruleid = 0 + has_ver = False + ver_is_ok = False + crsversion = version + ruleversion = "" + for d in self.data: + if "actions" in d: + chainlevel = 0 + + if not chained: + ruleid = 0 + has_ver = False + ver_is_ok = False + chainlevel = 0 + else: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + chainlevel += 1 + if a["act_name"] == "ver": + if chainlevel == 0: + has_ver = True + if a["act_arg"] == version: + ver_is_ok = True + else: + ruleversion = a["act_arg"] + if ruleid > 0 and chainlevel == 0: + if not has_ver: + self.error_no_ver_action_or_wrong_version.append( + { + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f"rule does not have 'ver' action; rule id: {ruleid}", + } + ) + else: + if not ver_is_ok: + self.error_no_ver_action_or_wrong_version.append( + { + "ruleid": ruleid, + "line": a["lineno"], + "endLine": a["lineno"], + "message": f"rule's 'ver' action has incorrect value; rule id: {ruleid}, version: '{ruleversion}', expected: '{crsversion}'", + } + ) + From c5b7eca135cecfcd0243997c348cb4ecc0ad7364 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Tue, 7 Oct 2025 16:50:36 -0300 Subject: [PATCH 02/19] fix: imports Signed-off-by: Felipe Zipitria --- src/crs_linter/linter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py index 24cdda1..e9e679b 100644 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -1,5 +1,8 @@ import msc_pyparser import re +import os.path +import sys + class LintProblem: """Represents a linting problem found by crs-linter.""" @@ -132,7 +135,6 @@ def __init__(self, data, filename=None, txvars=None): [] ) # list of rules which don't have any tests # regex to produce tag from filename: - import os.path self.re_fname = re.compile(r"(REQUEST|RESPONSE)\-\d{3}\-") self.filename_tag_exclusions = [] @@ -240,7 +242,6 @@ def check_ignore_case(self): e["message"] += f" (rule: {self.current_ruleid})" def check_action_order(self): - import sys for d in self.data: if "actions" in d: max_order = 0 # maximum position of read actions @@ -781,7 +782,6 @@ def gen_crs_file_tag(self, fname=None): """ generate tag from filename """ - import os.path filename_for_tag = fname if fname is not None else self.filename filename = self.re_fname.sub("", os.path.basename(os.path.splitext(filename_for_tag)[0])) filename = filename.replace("APPLICATION-", "") From 45b709d9640c0bd519785f816a0923cac0e29882 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Tue, 7 Oct 2025 18:57:39 -0300 Subject: [PATCH 03/19] fix: intermediates Signed-off-by: Felipe Zipitria --- src/crs_linter/linter.py | 206 -------------------------- src/crs_linter/rules/__init__.py | 1 + src/crs_linter/rules/approved_tags.py | 5 +- src/crs_linter/rules/version.py | 1 - tests/test_linter.py | 44 +++--- 5 files changed, 25 insertions(+), 232 deletions(-) diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py index e9e679b..78eb4ef 100644 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -755,29 +755,6 @@ def check_tags(self, tagslist): } ) - def check_lowercase_ignorecase(self): - ruleid = 0 - for d in self.data: - if d["type"].lower() == "secrule": - if d["operator"] == "@rx": - regex = d["operator_argument"] - if regex.startswith("(?i)"): - if "actions" in d: - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "t": - # check the transform is valid - if a["act_arg"].lower() == "lowercase": - self.error_combined_transformation_and_ignorecase.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f'rule uses (?i) in combination with t:lowercase: \'{a["act_arg"]}\'; rule id: {ruleid}', - } - ) - def gen_crs_file_tag(self, fname=None): """ generate tag from filename @@ -787,189 +764,6 @@ def gen_crs_file_tag(self, fname=None): filename = filename.replace("APPLICATION-", "") return "/".join(["OWASP_CRS", filename]) - def check_crs_tag(self, excl_list): - """ - check that every rule has a `tag:'OWASP_CRS'` and `tag:'$FILENAME$'` action - """ - filenametag = self.gen_crs_file_tag() - if len(self.filename_tag_exclusions) == 0: - if len(excl_list) > 0: - self.filename_tag_exclusions = [self.gen_crs_file_tag(f) for f in excl_list] - chained = False - ruleid = 0 - has_crs = False - has_crs_fname = False - tagcnt = 0 # counter to help check - crstagnr = 0 # hold the position of OWASP_CRS tag - for d in self.data: - if "actions" in d: - chainlevel = 0 - - if not chained: - ruleid = 0 - has_crs = False - has_crs_fname = False - chainlevel = 0 - else: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - chainlevel += 1 - if a["act_name"] == "tag": - tagcnt += 1 - if chainlevel == 0: - if a["act_arg"] == "OWASP_CRS": - has_crs = True - crstagnr = tagcnt - if a['act_arg'] == filenametag: - if crstagnr == 0 or tagcnt == crstagnr + 1: - has_crs_fname = True - if ruleid > 0 and not has_crs: - self.error_no_crstag.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule does not have tag with value 'OWASP_CRS'; rule id: {ruleid}", - } - ) - # see the exclusion list of files which does not require the filename tag - if filenametag not in self.filename_tag_exclusions: - # check wether the rule is an administrative rule - is_admin_rule = True if (ruleid % 1000 < 100) else False - # admin rules do not need filename tags - if not is_admin_rule and not has_crs_fname: - self.error_no_crstag.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule does not have tag with value '{filenametag}'; rule id: {ruleid}", - } - ) - - def check_ver_action(self, version): - """ - check that every rule has a `ver` action - """ - chained = False - ruleid = 0 - has_ver = False - ver_is_ok = False - crsversion = version - ruleversion = "" - for d in self.data: - if "actions" in d: - chainlevel = 0 - - if not chained: - ruleid = 0 - has_ver = False - ver_is_ok = False - chainlevel = 0 - else: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - chainlevel += 1 - if a["act_name"] == "ver": - if chainlevel == 0: - has_ver = True - if a["act_arg"] == version: - ver_is_ok = True - else: - ruleversion = a["act_arg"] - if ruleid > 0 and chainlevel == 0: - if not has_ver: - self.error_no_ver_action_or_wrong_version.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule does not have 'ver' action; rule id: {ruleid}", - } - ) - else: - if not ver_is_ok: - self.error_no_ver_action_or_wrong_version.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule's 'ver' action has incorrect value; rule id: {ruleid}, version: '{ruleversion}', expected: '{crsversion}'", - } - ) - - def check_capture_action(self): - """ - check that every chained rule has a `capture` action if it uses TX.N variable - """ - chained = False - ruleid = 0 - chainlevel = 0 - capture_level = None - re_number = re.compile(r"^\d$") - has_capture = False - use_captured_var = False - captured_var_chain_level = 0 - for d in self.data: - # only the SecRule object is relevant - if d["type"].lower() == "secrule": - for v in d["variables"]: - if v["variable"].lower() == "tx" and re_number.match( - v["variable_part"] - ): - # only the first occurrence required - if not use_captured_var: - use_captured_var = True - captured_var_chain_level = chainlevel - if "actions" in d: - if not chained: - ruleid = 0 - chainlevel = 0 - else: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - chainlevel += 1 - if a["act_name"] == "capture": - capture_level = chainlevel - has_capture = True - if ruleid > 0 and not chained: # end of chained rule - if use_captured_var: - # we allow if target with TX:N is in the first rule - # of a chained rule without 'capture' - if captured_var_chain_level > 0: - if ( - not has_capture - or captured_var_chain_level < capture_level - ): - self.error_tx_N_without_capture_action.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule uses TX.N without capture; rule id: {ruleid}'", - } - ) - # clear variables - chained = False - chainlevel = 0 - has_capture = False - capture_level = 0 - captured_var_chain_level = 0 - use_captured_var = False - ruleid = 0 - def find_ids_without_tests(self, test_cases, exclusion_list): """ s: the parsed structure diff --git a/src/crs_linter/rules/__init__.py b/src/crs_linter/rules/__init__.py index 34761fb..6f040c3 100644 --- a/src/crs_linter/rules/__init__.py +++ b/src/crs_linter/rules/__init__.py @@ -1,4 +1,5 @@ # import all checks +from crs_linter.linter import LintProblem from . import ( approved_tags, diff --git a/src/crs_linter/rules/approved_tags.py b/src/crs_linter/rules/approved_tags.py index 98b7a9b..d38435c 100644 --- a/src/crs_linter/rules/approved_tags.py +++ b/src/crs_linter/rules/approved_tags.py @@ -1,5 +1,4 @@ -from src.crs_linter.linter import LintProblem - +from crs_linter.linter import LintProblem def check_tags(self, tags): """ @@ -11,7 +10,7 @@ def check_tags(self, tags): if "actions" in d: for a in d["actions"]: if a["act_name"] == "tag": - tag = a["act_arg"] + tag = a["act_arg"] # check wheter tag is in tagslist if tags.count(tag) == 0: yield LintProblem( diff --git a/src/crs_linter/rules/version.py b/src/crs_linter/rules/version.py index 93b4e24..8bdde2f 100644 --- a/src/crs_linter/rules/version.py +++ b/src/crs_linter/rules/version.py @@ -52,4 +52,3 @@ def check_ver_action(self, version): "message": f"rule's 'ver' action has incorrect value; rule id: {ruleid}, version: '{ruleversion}', expected: '{crsversion}'", } ) - diff --git a/tests/test_linter.py b/tests/test_linter.py index c05e1f5..a19683f 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -1,4 +1,4 @@ -from crs_linter.linter import Check, parse_config +from crs_linter.linter import Linter, parse_config def test_parser(): @@ -11,7 +11,7 @@ def test_parser(): def test_check_ignore_proper_case(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,log,status:403"' p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_ignore_case() assert len(c.error_case_mistmatch) == 0 @@ -21,7 +21,7 @@ def test_check_ignore_case_fail_invalid_action_case(): """Two actions are in the wrong case.""" t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,LOG,NoLOg,status:403"' p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_ignore_case() assert len(c.error_case_mistmatch) == 2 @@ -31,7 +31,7 @@ def test_check_action_order(): """Test that the actions are in the correct order.""" t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,nolog"' p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_action_order() assert len(c.error_action_order) == 0 @@ -41,7 +41,7 @@ def test_check_action_fail_wrong_order(): """Test if the action is in the wrong order. status should go before log""" t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,log,status:403"' p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_action_order() assert len(c.error_action_order) == 1 @@ -51,7 +51,7 @@ def test_check_ctl_auditctl_log_parts(): """Test that there is no ctl:auditLogParts action in any rules""" t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,log,status:403"' p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_ctl_audit_log() assert len(c.error_wrong_ctl_auditlogparts) == 0 @@ -60,7 +60,7 @@ def test_check_ctl_auditctl_log_parts(): def test_check_wrong_ctl_audit_log_parts(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Pizza" "id:1,phase:1,log,ctl:auditLogParts=+E"' p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_ctl_audit_log() assert len(c.error_wrong_ctl_auditlogparts) == 1 @@ -85,7 +85,7 @@ def test_check_tx_variable(): setvar:'tx.detection_paranoia_level=%{TX.blocking_paranoia_level}'" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_tx_variable() assert len(c.error_undefined_txvars) == 0 @@ -106,7 +106,7 @@ def test_check_tx_variable_fail_nonexisting(): setvar:tx.bar=1" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.collect_tx_variable() c.check_tx_variable() @@ -142,7 +142,7 @@ def test_check_pl_consistency(): setvar:'tx.inbound_anomaly_score_pl1=+%{tx.critical_anomaly_score}'" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.collect_tx_variable() c.check_pl_consistency() @@ -178,7 +178,7 @@ def test_check_pl_consistency_fail(): setvar:'tx.inbound_anomaly_score_pl1=+%{tx.error_anomaly_score}'" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.collect_tx_variable() c.check_pl_consistency() @@ -196,7 +196,7 @@ def test_check_tags(): tag:OWASP_CRS" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_tags(["PIZZA", "OWASP_CRS"]) assert len(c.error_new_unlisted_tags) == 0 @@ -213,7 +213,7 @@ def test_check_tags_fail(): tag:PINEAPPLE" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_tags(["OWASP_CRS", "PIZZA"]) assert len(c.error_new_unlisted_tags) == 1 @@ -222,7 +222,7 @@ def test_check_tags_fail(): def test_check_lowercase_ignorecase(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,log,status:403"' p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_ignore_case() assert len([]) == 0 @@ -240,7 +240,7 @@ def test_check_crs_tag(): tag:OWASP_CRS/CHECK-TAG" """ p = parse_config(t) - c = Check(p, filename = "REQUEST-900-CHECK-TAG.conf") + c = Linter(p, filename = "REQUEST-900-CHECK-TAG.conf") print(c.filename) c.check_crs_tag([]) @@ -258,7 +258,7 @@ def test_check_crs_tag_fail(): tag:attack-xss" """ p = parse_config(t) - c = Check(p, filename = "REQUEST-900-CHECK-TAG.conf") + c = Linter(p, filename = "REQUEST-900-CHECK-TAG.conf") c.check_crs_tag([]) assert len(c.error_no_crstag) == 1 @@ -275,7 +275,7 @@ def test_check_crs_tag_fail2(): tag:OWASP_CRS" """ p = parse_config(t) - c = Check(p, filename = "REQUEST-900-CHECK-TAG.conf") + c = Linter(p, filename = "REQUEST-900-CHECK-TAG.conf") c.check_crs_tag([]) assert len(c.error_no_crstag) == 1 @@ -292,7 +292,7 @@ def test_check_crs_tag_fail3(): tag:OWASP_CRS/CHECK-TAG" """ p = parse_config(t) - c = Check(p, filename = "REQUEST-900-CHECK-TAG.conf") + c = Linter(p, filename = "REQUEST-900-CHECK-TAG.conf") c.check_crs_tag([]) assert len(c.error_no_crstag) == 1 @@ -309,7 +309,7 @@ def test_check_ver_action(crsversion): ver:'OWASP_CRS/4.10.0'" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_ver_action(crsversion) assert len(c.error_no_ver_action_or_wrong_version) == 0 @@ -327,7 +327,7 @@ def test_check_ver_action_fail(crsversion): ver:OWASP_CRS/1.0.0-dev" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_ver_action(crsversion) assert len(c.error_no_ver_action_or_wrong_version) == 1 @@ -348,7 +348,7 @@ def test_check_capture_action(): SecRule TX:1 "@eq attack" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_capture_action() assert len(c.error_tx_N_without_capture_action) == 0 @@ -368,7 +368,7 @@ def test_check_capture_action_fail(): SecRule TX:0 "@eq attack" """ p = parse_config(t) - c = Check(p) + c = Linter(p) c.check_capture_action() assert len(c.error_tx_N_without_capture_action) == 1 From c7a7db12518f4b6d87d43d0713b691ecd30bfb33 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Tue, 7 Oct 2025 19:15:30 -0300 Subject: [PATCH 04/19] fix: update refactored files Signed-off-by: Felipe Zipitria --- src/crs_linter/lint_problem.py | 32 + src/crs_linter/linter.py | 801 ++---------------------- src/crs_linter/rules/__init__.py | 5 +- src/crs_linter/rules/approved_tags.py | 10 +- src/crs_linter/rules/check_capture.py | 65 ++ src/crs_linter/rules/crs_tag.py | 18 +- src/crs_linter/rules/ctl_audit_log.py | 24 + src/crs_linter/rules/duplicated.py | 3 +- src/crs_linter/rules/duplicated_ids.py | 25 + src/crs_linter/rules/ignore_case.py | 139 ++-- src/crs_linter/rules/ordered_actions.py | 7 +- src/crs_linter/rules/pl_consistency.py | 91 ++- src/crs_linter/rules/rule_tests.py | 38 ++ src/crs_linter/rules/variables_usage.py | 97 ++- src/crs_linter/utils.py | 6 + tests/test_linter.py | 105 ++-- 16 files changed, 523 insertions(+), 943 deletions(-) create mode 100644 src/crs_linter/lint_problem.py create mode 100644 src/crs_linter/rules/check_capture.py create mode 100644 src/crs_linter/rules/ctl_audit_log.py create mode 100644 src/crs_linter/rules/duplicated_ids.py create mode 100644 src/crs_linter/rules/rule_tests.py create mode 100644 src/crs_linter/utils.py diff --git a/src/crs_linter/lint_problem.py b/src/crs_linter/lint_problem.py new file mode 100644 index 0000000..637e798 --- /dev/null +++ b/src/crs_linter/lint_problem.py @@ -0,0 +1,32 @@ +class LintProblem: + """Represents a linting problem found by crs-linter.""" + def __init__(self, line, end_line, column=None, desc='', rule=None): + #: Line on which the problem was found (starting at 1) + self.line = line + #: Line on which the problem ends + self.end_line = end_line + #: Column on which the problem was found (starting at 1) + self.column = column + #: Human-readable description of the problem + self.desc = desc + #: Identifier of the rule that detected the problem + self.rule = rule + self.level = None + + @property + def message(self): + if self.rule is not None: + return f'{self.desc} ({self.rule})' + return self.desc + + def __eq__(self, other): + return (self.line == other.line and + self.column == other.column and + self.rule == other.rule) + + def __lt__(self, other): + return (self.line < other.line or + (self.line == other.line and self.column < other.column)) + + def __repr__(self): + return f'{self.line}:{self.column}: {self.message}' diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py index 78eb4ef..4fdf0b2 100644 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -2,362 +2,88 @@ import re import os.path import sys - - -class LintProblem: - """Represents a linting problem found by crs-linter.""" - def __init__(self, line, end_line, column=None, desc='', rule=None): - #: Line on which the problem was found (starting at 1) - self.line = line - #: Line on which the problem ends - self.end_line = end_line - #: Column on which the problem was found (starting at 1) - self.column = column - #: Human-readable description of the problem - self.desc = desc - #: Identifier of the rule that detected the problem - self.rule = rule - self.level = None - - @property - def message(self): - if self.rule is not None: - return f'{self.desc} ({self.rule})' - return self.desc - - def __eq__(self, other): - return (self.line == other.line and - self.column == other.column and - self.rule == other.rule) - - def __lt__(self, other): - return (self.line < other.line or - (self.line == other.line and self.column < other.column)) - - def __repr__(self): - return f'{self.line}:{self.column}: {self.message}' +from .lint_problem import LintProblem +from .rules import ( + approved_tags, + crs_tag, + ctl_audit_log, + duplicated_ids, + ignore_case, + ordered_actions, + pl_consistency, + rule_tests, + variables_usage, +) class Linter: - ids = {} # list of rule id's and their location in files - vars = {} # list of TX variables and their location in files - + """Main linter class that orchestrates all rule checks.""" + def __init__(self, data, filename=None, txvars=None): - # txvars is a global used hash table, but processing of rules is a sequential flow - # all rules need this global table - self.globtxvars = txvars or {} - # list available operators, actions, transformations and ctl args - self.operators = "beginsWith|containsWord|contains|detectSQLi|detectXSS|endsWith|eq|fuzzyHash|geoLookup|ge|gsbLookup|gt|inspectFile|ipMatch|ipMatchF|ipMatchFromFile|le|lt|noMatch|pmFromFile|pmf|pm|rbl|rsub|rx|streq|strmatch|unconditionalMatch|validateByteRange|validateDTD|validateHash|validateSchema|validateUrlEncoding|validateUtf8Encoding|verifyCC|verifyCPF|verifySSN|within".split( - "|" - ) - self.operatorsl = [o.lower() for o in self.operators] - self.actions = "accuracy|allow|append|auditlog|block|capture|chain|ctl|deny|deprecatevar|drop|exec|expirevar|id|initcol|logdata|log|maturity|msg|multiMatch|noauditlog|nolog|pass|pause|phase|prepend|proxy|redirect|rev|sanitiseArg|sanitiseMatched|sanitiseMatchedBytes|sanitiseRequestHeader|sanitiseResponseHeader|setenv|setrsc|setsid|setuid|setvar|severity|skipAfter|skip|status|tag|t|ver|xmlns".split( - "|" - ) - self.actionsl = [a.lower() for a in self.actions] - self.transforms = "base64DecodeExt|base64Decode|base64Encode|cmdLine|compressWhitespace|cssDecode|escapeSeqDecode|hexDecode|hexEncode|htmlEntityDecode|jsDecode|length|lowercase|md5|none|normalisePathWin|normalisePath|normalizePathWin|normalizePath|parityEven7bit|parityOdd7bit|parityZero7bit|removeCommentsChar|removeComments|removeNulls|removeWhitespace|replaceComments|replaceNulls|sha1|sqlHexDecode|trimLeft|trimRight|trim|uppercase|urlDecodeUni|urlDecode|urlEncode|utf8toUnicode".split( - "|" - ) - self.transformsl = [t.lower() for t in self.transforms] - self.ctls = "auditEngine|auditLogParts|debugLogLevel|forceRequestBodyVariable|hashEnforcement|hashEngine|requestBodyAccess|requestBodyLimit|requestBodyProcessor|responseBodyAccess|responseBodyLimit|ruleEngine|ruleRemoveById|ruleRemoveByMsg|ruleRemoveByTag|ruleRemoveTargetById|ruleRemoveTargetByMsg|ruleRemoveTargetByTag".split( - "|" - ) - self.ctlsl = [c.lower() for c in self.ctls] - - # list the actions in expected order - # see wiki: https://github.com/coreruleset/coreruleset/wiki/Order-of-ModSecurity-Actions-in-CRS-rules - # note, that these tokens are with lovercase here, but used only for to check the order - self.ordered_actions = [ - "id", # 0 - "phase", # 1 - "allow", - "block", - "deny", - "drop", - "pass", - "proxy", - "redirect", - "status", - "capture", # 10 - "t", - "log", - "nolog", - "auditlog", - "noauditlog", - "msg", - "logdata", - "tag", - "sanitisearg", - "sanitiserequestheader", # 20 - "sanitisematched", - "sanitisematchedbytes", - "ctl", - "ver", - "severity", - "multimatch", - "initcol", - "setenv", - "setvar", - "expirevar", # 30 - "chain", - "skip", - "skipafter", - ] - self.data = data # holds the parsed data - self.current_ruleid = 0 # holds the rule id - self.curr_lineno = 0 # current line number - self.chained = False # holds the chained flag - self.re_tx_var = re.compile(r"%\{}") self.filename = filename + self.globtxvars = txvars or {} # global TX variables hash table self.ids = {} # list of rule id's and their location in files - - # Any of these variables below are used to store the errors - self.error_case_mistmatch = [] # list of case mismatch errors - self.error_action_order = [] # list of ordered action errors - self.error_wrong_ctl_auditlogparts = [] # list of wrong ctl:auditLogParts - self.error_undefined_txvars = [] # list of undefined TX variables - self.error_inconsistent_pltags = [] # list of incosistent PL tags - self.error_inconsistent_plscores = [] # list of incosistent PL scores - self.error_duplicated_id = [] # list of duplicated id's - self.error_new_unlisted_tags = [] # list of new, unlisted tags - self.error_combined_transformation_and_ignorecase = ( - [] - ) # list of combinations of t:lowercase and (?i) - self.error_no_crstag = [] # list of rules without tag:OWASP_CRS - self.error_no_ver_action_or_wrong_version = ( - [] - ) # list of rules without ver action or incorrect ver - self.error_tx_N_without_capture_action = ( - [] - ) # list of rules which uses TX.N without previous 'capture' - self.error_rule_hasnotest = ( - [] - ) # list of rules which don't have any tests + # regex to produce tag from filename: self.re_fname = re.compile(r"(REQUEST|RESPONSE)\-\d{3}\-") self.filename_tag_exclusions = [] - def is_error(self): - """Returns True if any error is found""" - error_vars = [ - self.error_case_mistmatch, - self.error_action_order, - self.error_wrong_ctl_auditlogparts, - self.error_undefined_txvars, - self.error_inconsistent_pltags, - self.error_inconsistent_plscores, - self.error_duplicated_id, - self.error_new_unlisted_tags, - self.error_combined_transformation_and_ignorecase, - self.error_no_crstag, - self.error_no_ver_action_or_wrong_version, - self.error_tx_N_without_capture_action, - ] - return any([len(var) > 0 for var in error_vars]) - - def store_error(self, msg): - # store the error msg in the list - self.error_case_mistmatch.append( - { - "ruleid": 0, - "line": self.curr_lineno, - "endLine": self.curr_lineno, - "message": msg, - } - ) - - def check_ignore_case(self): - # check the ignore cases at operators, actions, - # transformations and ctl arguments - for d in self.data: - if "actions" in d: - aidx = 0 # index of action in list - if not self.chained: - self.current_ruleid = 0 - else: - self.chained = False - - for a in d["actions"]: - action = a["act_name"].lower() - self.curr_lineno = a["lineno"] - if action == "id": - self.current_ruleid = int(a["act_arg"]) - - if action == "chain": - self.chained = True - - # check the action is valid - if action not in self.actionsl: - self.store_error(f"Invalid action {action}") - # check the action case sensitive format - if ( - self.actions[self.actionsl.index(action)] - != a["act_name"] - ): - self.store_error(f"Action case mismatch: {action}") - - if a["act_name"] == "ctl": - # check the ctl argument is valid - if a["act_arg"].lower() not in self.ctlsl: - self.store_error(f'Invalid ctl {a["act_arg"]}') - # check the ctl argument case sensitive format - if ( - self.ctls[self.ctlsl.index(a["act_arg"].lower())] - != a["act_arg"] - ): - self.store_error(f'Ctl case mismatch: {a["act_arg"]}') - if a["act_name"] == "t": - # check the transform is valid - if a["act_arg"].lower() not in self.transformsl: - self.store_error(f'Invalid transform: {a["act_arg"]}') - # check the transform case sensitive format - if ( - self.transforms[ - self.transformsl.index(a["act_arg"].lower()) - ] - != a["act_arg"] - ): - self.store_error( - f'Transform case mismatch : {a["act_arg"]}' - ) - aidx += 1 - if "operator" in d and d["operator"] != "": - self.curr_lineno = d["oplineno"] - # strip the operator - op = d["operator"].replace("!", "").replace("@", "") - # check the operator is valid - if op.lower() not in self.operatorsl: - self.store_error(f'Invalid operator: {d["operator"]}') - # check the operator case sensitive format - if self.operators[self.operatorsl.index(op.lower())] != op: - self.store_error(f'Operator case mismatch: {d["operator"]}') - else: - if d["type"].lower() == "secrule": - self.curr_lineno = d["lineno"] - self.store_error("Empty operator isn't allowed") - if self.current_ruleid > 0: - for e in self.error_case_mistmatch: - e["ruleid"] = self.current_ruleid - e["message"] += f" (rule: {self.current_ruleid})" - - def check_action_order(self): - for d in self.data: - if "actions" in d: - max_order = 0 # maximum position of read actions - if not self.chained: - self.current_ruleid = 0 - else: - self.chained = False - - for index, a in enumerate(d["actions"]): - action = a["act_name"].lower() - # get the 'id' of rule - self.curr_lineno = a["lineno"] - if a["act_name"] == "id": - self.current_ruleid = int(a["act_arg"]) - - # check if chained - if a["act_name"] == "chain": - self.chained = True - - # get the index of action from the ordered list - # above from constructor - try: - act_idx = self.ordered_actions.index(action) - except ValueError: - self.error_action_order.append( - { - "ruleid": 0, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f'action "{action}" at pos {index-1} is in the wrong order: "{action}" at pos {index}', - } - ) - sys.exit(-1) - - # if the index of current action is @ge than the previous - # max value, load it into max_order - if act_idx >= max_order: - max_order = act_idx - else: - # action is the previous action's position in list - # act_idx is the current action's position in list - # if the prev is @gt actually, means it's at wrong position - if act_idx < max_order: - self.error_action_order.append( - { - "ruleid": 0, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"action 'action {action}' at pos {index - 1} is in the wrong order '{action}' at pos {index}", - } - ) - for a in self.error_action_order: - if a["ruleid"] == 0: - a["ruleid"] = self.current_ruleid - a["message"] += f" (rule: {self.current_ruleid})" - - def check_ctl_audit_log(self): - """check there is no ctl:auditLogParts action in any rules""" - for d in self.data: - if "actions" in d: - for a in d["actions"]: - # get the 'id' of rule - self.curr_lineno = a["lineno"] - if a["act_name"] == "id": - self.current_ruleid = int(a["act_arg"]) - - # check if action is ctl:auditLogParts - if ( - a["act_name"].lower() == "ctl" - and a["act_arg"].lower() == "auditlogparts" - ): - self.error_wrong_ctl_auditlogparts.append( - { - "ruleid": self.current_ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": "", - } - ) - - def collect_tx_variable(self): - """collect TX variables in rules - this function collects the TX variables at rules, - if the variable is at a 'setvar' action's left side, eg - setvar:tx.foo=bar - - Because this rule called before any other check, - additionally it checks the duplicated rule ID + def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None): """ + Run all linting checks and yield LintProblem objects. + This is the main entry point for the linter. + """ + # First collect TX variables and check for duplicated IDs + self._collect_tx_variables() + + # Run all rule checks + for problem in ignore_case.check(self.data): + yield problem + + for problem in ordered_actions.check(self.data): + yield problem + + for problem in ctl_audit_log.check(self.data): + yield problem + + for problem in variables_usage.check(self.data, self.globtxvars): + yield problem + + for problem in pl_consistency.check(self.data, self.globtxvars): + yield problem + + for problem in crs_tag.check(self.data): + yield problem + + if tagslist: + for problem in approved_tags.check(self.data, tagslist): + yield problem + + if test_cases is not None: + for problem in rule_tests.check(self.data, test_cases, exclusion_list or []): + yield problem + + # Check for duplicated IDs + for problem in duplicated_ids.check(self.data, self.ids): + yield problem + + def _collect_tx_variables(self): + """Collect TX variables in rules and check for duplicated IDs""" chained = False for d in self.data: if "actions" in d: if not chained: ruleid = 0 # ruleid - phase = ( - 2 # works only in Apache, libmodsecurity uses default phase 1 - ) + phase = 2 # works only in Apache, libmodsecurity uses default phase 1 else: chained = False for a in d["actions"]: if a["act_name"] == "id": ruleid = int(a["act_arg"]) if ruleid in self.ids: - self.error_duplicated_id.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": "id %d is duplicated, previous place: %s:%d" - % ( - ruleid, - self.ids[ruleid]["fname"], - self.ids[ruleid]["lineno"], - ), - } - ) + # This will be caught by duplicated_ids rule + pass else: self.ids[ruleid] = { "fname": self.filename, @@ -388,373 +114,6 @@ def collect_tx_variable(self): "endLine": a["lineno"], } - def check_tx_variable(self): - """this function checks if a used TX variable has set - - a variable is used when: - * it's an operator argument: "@rx %{TX.foo}" - * it's a target: SecRule TX.foo "@..." - * it's a right side value in a value giving: setvar:tx.bar=tx.foo - - this function collects the variables if it is used but not set previously - """ - # set if rule checks the existence of var, e.g., `&TX:foo "@eq 1"` - check_exists = None - has_disruptive = False # set if rule contains disruptive action - chained = False - for d in self.data: - if d["type"].lower() in ["secrule", "secaction"]: - if not chained: - # works only in Apache, libmodsecurity uses default phase 1 - phase = 2 - ruleid = 0 - else: - chained = False - - # iterate over actions and collect these values: - # ruleid, phase, chained, rule has or not any disruptive action - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "phase": - phase = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - if a["act_name"] in [ - "block", - "deny", - "drop", - "allow", - "proxy", - "redirect", - ]: - has_disruptive = True - - # check wheter tx.var is used at setvar's right side - val_act = [] - val_act_arg = [] - # example: - # setvar:'tx.inbound_anomaly_score_threshold=5' - # - # act_arg <- tx.inbound_anomaly_score_threshold - # act_atg_val <- 5 - # - # example2 (same as above, but no single quotes!): - # setvar:tx.inbound_anomaly_score_threshold=5 - # act_arg <- tx.inbound_anomaly_score_threshold - # act_atg_val <- 5 - # - if "act_arg" in a and a["act_arg"] is not None: - val_act = re.findall(r"%\{(tx.[^%]*)}", a["act_arg"], re.I) - if "act_arg_val" in a and a["act_arg_val"] is not None: - val_act_arg = re.findall( - r"%\{(tx.[^%]*)}", a["act_arg_val"], re.I - ) - for v in val_act + val_act_arg: - v = v.lower().replace("tx.", "") - # check whether the variable is a captured var, eg TX.1 - we do not care that case - if not re.match(r"^\d$", v, re.I): - # v holds the tx.ANY variable, but not the captured ones - # we should collect these variables - if ( - v not in self.globtxvars - or phase < self.globtxvars[v]["phase"] - ): - self.error_undefined_txvars.append( - { - "var": v, - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"TX variable '{v}' not set / later set (rvar) in rule {ruleid}", - } - ) - else: - self.globtxvars[v]["used"] = True - else: - if v in self.globtxvars: - self.globtxvars[v]["used"] = True - - if "operator_argument" in d: - oparg = re.findall(r"%\{(tx.[^%]*)}", d["operator_argument"], re.I) - if oparg: - for o in oparg: - o = o.lower() - o = re.sub(r"tx\.", "", o, re.I) - if ( - ( - o not in self.globtxvars - or phase < self.globtxvars[o]["phase"] - ) - and not re.match(r"^\d$", o) - and not re.match(r"/.*/", o) - and check_exists is None - ): - self.error_undefined_txvars.append( - { - "var": o, - "ruleid": ruleid, - "line": d["lineno"], - "endLine": d["lineno"], - "message": "TX variable '%s' not set / later set (OPARG) in rule %d" - % (o, ruleid), - } - ) - elif ( - o in self.globtxvars - and phase >= self.globtxvars[o]["phase"] - and not re.match(r"^\d$", o) - and not re.match(r"/.*/", o) - ): - self.globtxvars[o]["used"] = True - if "variables" in d: - for v in d["variables"]: - # check if the variable is TX and has not a & prefix, which counts - # the variable length - if v["variable"].lower() == "tx": - if not v["counter"]: - # * if the variable part (after '.' or ':') is not there in - # the list of collected TX variables, and - # * not a numeric, eg TX:2, and - # * not a regular expression, between '/' chars, eg TX:/^foo/ - # OR - # * rule's phase lower than declaration's phase - rvar = v["variable_part"].lower() - if ( - ( - rvar not in self.globtxvars - or ( - ruleid != self.globtxvars[rvar]["ruleid"] - and phase < self.globtxvars[rvar]["phase"] - ) - ) - and not re.match(r"^\d$", rvar) - and not re.match(r"/.*/", rvar) - ): - self.error_undefined_txvars.append( - { - "var": rvar, - "ruleid": ruleid, - "line": d["lineno"], - "endLine": d["lineno"], - "message": "TX variable '%s' not set / later set (VAR)" - % (v["variable_part"]), - } - ) - elif ( - rvar in self.globtxvars - and phase >= self.globtxvars[rvar]["phase"] - and not re.match(r"^\d$", rvar) - and not re.match(r"/.*/", rvar) - ): - self.globtxvars[rvar]["used"] = True - else: - check_exists = True - self.globtxvars[v["variable_part"].lower()] = { - "var": v["variable_part"].lower(), - "phase": phase, - "used": False, - "file": self.filename, - "ruleid": ruleid, - "message": "", - "line": d["lineno"], - "endLine": d["lineno"], - } - if has_disruptive: - self.globtxvars[v["variable_part"].lower()][ - "used" - ] = True - if ( - len(self.error_undefined_txvars) > 0 - and self.error_undefined_txvars[-1]["var"] - == v["variable_part"].lower() - ): - del self.error_undefined_txvars[-1] - if not chained: - check_exists = None - has_disruptive = False - - def check_pl_consistency(self): - """this method checks the PL consistency - - the function iterates through the rules, and catches the set PL, eg: - - SecRule TX:DETECTION_PARANOIA_LEVEL "@lt 1" ... - this means we are on PL1 currently - - all rules must consist with current PL at the used tags and variables - - eg: - tag:'paranoia-level/1' - ^ - setvar:'tx.outbound_anomaly_score_pl1=+%{tx.error_anomaly_score}'" - ^^^ - additional relations: - * all rules must have the "tag:'paranoia-level/N'" if it does not have "nolog" action - * if rule have "nolog" action it must not have "tag:'paranoia-level/N'" action - * anomaly scoring value on current PL must increment by value corresponding to severity - - """ - curr_pl = 0 - tags = [] # collect tags - _txvars = {} # collect setvars and values - _txvlines = {} # collect setvars and its lines - severity = None # severity - has_nolog = False # nolog action exists - - for d in self.data: - # find the current PL - if d["type"].lower() in ["secrule"]: - for v in d["variables"]: - if ( - v["variable"].lower() == "tx" - and v["variable_part"].lower() == "detection_paranoia_level" - and d["operator"] == "@lt" - and re.match(r"^\d$", d["operator_argument"]) - ): - curr_pl = int(d["operator_argument"]) - - if "actions" in d: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "severity": - severity = a["act_arg"].replace("'", "").lower() - if a["act_name"] == "tag": - tags.append(a) - if a["act_name"] == "setvar": - if a["act_arg"][0:2].lower() == "tx": - # this hack necessary, because sometimes we use setvar argument - # between '', sometimes not - # eg - # setvar:crs_setup_version=334 - # setvar:'tx.inbound_anomaly_score_threshold=5' - txv = a["act_arg"][3:].split("=") - txv[0] = txv[0].lower() # variable name - if len(txv) > 1: - # variable value - txv[1] = txv[1].lower().strip(r"+\{}") - else: - txv.append(a["act_arg_val"].strip(r"+\{}")) - _txvars[txv[0]] = txv[1] - _txvlines[txv[0]] = a["lineno"] - if a["act_name"] == "nolog": - has_nolog = True - if a["act_name"] == "chain": - chained = True - - has_pl_tag = False - for a in tags: - if a["act_arg"][0:14] == "paranoia-level": - has_pl_tag = True - pltag = int(a["act_arg"].split("/")[1]) - if has_nolog: - self.error_inconsistent_pltags.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f'tag \'{a["act_arg"]}\' with \'nolog\' action, rule id: {ruleid}', - } - ) - elif pltag != curr_pl and curr_pl > 0: - self.error_inconsistent_pltags.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f'tag \'{a["act_arg"]}\' on PL {curr_pl}, rule id: {ruleid}', - } - ) - - if not has_pl_tag and not has_nolog and curr_pl >= 1: - self.error_inconsistent_pltags.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule does not have `paranoia-level/{curr_pl}` action, rule id: {ruleid}", - } - ) - - for t in _txvars: - subst_val = re.search( - r"%\{tx.[a-z]+_anomaly_score}", _txvars[t], re.I - ) - val = re.sub(r"[+%{}]", "", _txvars[t]).lower() - # check if last char is a numeric, eg ...anomaly_score_pl1 - scorepl = re.search(r"anomaly_score_pl\d$", t) - if scorepl: - if curr_pl > 0 and int(t[-1]) != curr_pl: - self.error_inconsistent_plscores.append( - { - "ruleid": ruleid, - "line": _txvlines[t], - "endLine": _txvlines[t], - "message": f"variable {t} on PL {curr_pl}, rule id: {ruleid}", - } - ) - if severity is None and subst_val: # - do we need this? - self.error_inconsistent_plscores.append( - { - "ruleid": ruleid, - "line": _txvlines[t], - "endLine": _txvlines[t], - "message": f"missing severity action, rule id: {ruleid}", - } - ) - else: - if val != "tx.%s_anomaly_score" % (severity) and val != "0": - self.error_inconsistent_plscores.append( - { - "ruleid": ruleid, - "line": _txvlines[t], - "endLine": _txvlines[t], - "message": f"invalid value for anomaly_score_pl{t[-1]}: {val} with severity {severity}, rule id: {ruleid}", - } - ) - # variable was found so we need to mark it as used - self.globtxvars[t]["used"] = True - - # reset local variables if we are done with a rule <==> no more 'chain' action - if not chained: - tags = [] # collect tags - _txvars = {} # collect setvars and values - _txvlines = {} # collect setvars and its lines - severity = None # severity - has_nolog = False # rule has nolog action - - def check_tags(self, tagslist): - """ - check that only tags from the util/APPROVED_TAGS file are used - """ - chained = False - ruleid = 0 - for d in self.data: - if "actions" in d: - aidx = 0 # stores the index of current action - if not chained: - ruleid = 0 - else: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - if a["act_name"] == "tag": - # check wheter tag is in tagslist - if tagslist.count(a["act_arg"]) == 0: - self.error_new_unlisted_tags.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f'rule uses unknown tag: \'{a["act_arg"]}\'; only tags registered in the util/APPROVED_TAGS file may be used; rule id: {ruleid}', - } - ) - def gen_crs_file_tag(self, fname=None): """ generate tag from filename @@ -764,41 +123,6 @@ def gen_crs_file_tag(self, fname=None): filename = filename.replace("APPLICATION-", "") return "/".join(["OWASP_CRS", filename]) - def find_ids_without_tests(self, test_cases, exclusion_list): - """ - s: the parsed structure - test_cases: all available test cases - exclusion_list_name: file which contains rule id's without tests - """ - rids = {} - for i in self.data: - # only SecRule counts - if i['type'] == "SecRule": - for a in i['actions']: - # find the `id` action - if a['act_name'] == "id": - # get the argument of the action - rid = int(a['act_arg']) # int - srid = a['act_arg'] # string - if (rid%1000) >= 100: # skip the PL control rules - # also skip these hardcoded rules - need_check = True - for excl in exclusion_list: - # exclude full rule IDs or rule ID prefixes - if srid[:len(excl)] == excl: - need_check = False - if need_check: - # if there is no test cases, just print it - if rid not in test_cases: - rids[rid] = a['lineno'] - self.error_rule_hasnotest.append({ - 'ruleid' : rid, - 'line' : a['lineno'], - 'endLine': a['lineno'], - 'message': f"rule does not have any tests; rule id: {rid}'" - }) - return True - def parse_config(text): try: @@ -820,10 +144,3 @@ def parse_file(filename): except Exception as e: print(e) - -def get_id(actions): - """ Return the ID from actions """ - for a in actions: - if a["act_name"] == "id": - return int(a["act_arg"]) - return 0 diff --git a/src/crs_linter/rules/__init__.py b/src/crs_linter/rules/__init__.py index 6f040c3..dc5bd7e 100644 --- a/src/crs_linter/rules/__init__.py +++ b/src/crs_linter/rules/__init__.py @@ -1,18 +1,21 @@ # import all checks -from crs_linter.linter import LintProblem +from crs_linter.lint_problem import LintProblem from . import ( approved_tags, capture, crs_tag, + ctl_audit_log, deprecated, duplicated, + duplicated_ids, ignore_case, indentation, lowercase_ignorecase, ordered_actions, pl_consistency, pl_tags, + rule_tests, variables_usage, version ) diff --git a/src/crs_linter/rules/approved_tags.py b/src/crs_linter/rules/approved_tags.py index d38435c..42b49d6 100644 --- a/src/crs_linter/rules/approved_tags.py +++ b/src/crs_linter/rules/approved_tags.py @@ -1,12 +1,12 @@ -from crs_linter.linter import LintProblem +from crs_linter.lint_problem import LintProblem -def check_tags(self, tags): +def check(data, tags): """ check that only tags from the util/APPROVED_TAGS file are used """ chained = False ruleid = 0 - for d in self.data: + for d in data: if "actions" in d: for a in d["actions"]: if a["act_name"] == "tag": @@ -14,8 +14,8 @@ def check_tags(self, tags): # check wheter tag is in tagslist if tags.count(tag) == 0: yield LintProblem( - rule_id=ruleid, line=a["lineno"], end_line=a["lineno"], - desc=f'rule uses unknown tag: "{tag}"; only tags registered in the util/APPROVED_TAGS file may be used; rule id: {ruleid}' + desc=f'rule uses unknown tag: "{tag}"; only tags registered in the util/APPROVED_TAGS file may be used; rule id: {ruleid}', + rule="approved_tags" ) diff --git a/src/crs_linter/rules/check_capture.py b/src/crs_linter/rules/check_capture.py new file mode 100644 index 0000000..cafd922 --- /dev/null +++ b/src/crs_linter/rules/check_capture.py @@ -0,0 +1,65 @@ +import re +from crs_linter.lint_problem import LintProblem + +def check(data): + """ + check that every chained rule has a `capture` action if it uses TX.N variable + """ + chained = False + ruleid = 0 + chainlevel = 0 + capture_level = None + re_number = re.compile(r"^\d$") + has_capture = False + use_captured_var = False + captured_var_chain_level = 0 + for d in data: + # only the SecRule object is relevant + if d["type"].lower() == "secrule": + for v in d["variables"]: + if v["variable"].lower() == "tx" and re_number.match( + v["variable_part"] + ): + # only the first occurrence required + if not use_captured_var: + use_captured_var = True + captured_var_chain_level = chainlevel + if "actions" in d: + if not chained: + ruleid = 0 + chainlevel = 0 + else: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + chainlevel += 1 + if a["act_name"] == "capture": + capture_level = chainlevel + has_capture = True + if ruleid > 0 and not chained: # end of chained rule + if use_captured_var: + # we allow if target with TX:N is in the first rule + # of a chained rule without 'capture' + if captured_var_chain_level > 0: + if ( + not has_capture + or captured_var_chain_level < capture_level + ): + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + + desc=f"rule uses TX.N without capture; rule id: {ruleid}", + rule="check_capture", + ) + # clear variables + chained = False + chainlevel = 0 + has_capture = False + capture_level = 0 + captured_var_chain_level = 0 + use_captured_var = False + ruleid = 0 diff --git a/src/crs_linter/rules/crs_tag.py b/src/crs_linter/rules/crs_tag.py index 67758f0..3678868 100644 --- a/src/crs_linter/rules/crs_tag.py +++ b/src/crs_linter/rules/crs_tag.py @@ -1,12 +1,14 @@ -def check_crs_tag(self): +from crs_linter.lint_problem import LintProblem + +def check(data): """ check that every rule has a `tag:'OWASP_CRS'` action """ chained = False ruleid = 0 has_crs = False - for d in self.data: + for d in data: if "actions" in d: chainlevel = 0 @@ -27,12 +29,10 @@ def check_crs_tag(self): if a["act_arg"] == "OWASP_CRS": has_crs = True if ruleid > 0 and not has_crs: - self.error_no_crstag.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule does not have tag with value 'OWASP_CRS'; rule id: {ruleid}", - } + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"rule does not have tag with value 'OWASP_CRS'; rule id: {ruleid}", + rule="crs_tag", ) diff --git a/src/crs_linter/rules/ctl_audit_log.py b/src/crs_linter/rules/ctl_audit_log.py new file mode 100644 index 0000000..723ad42 --- /dev/null +++ b/src/crs_linter/rules/ctl_audit_log.py @@ -0,0 +1,24 @@ +from crs_linter.lint_problem import LintProblem + + +def check(data): + """check there is no ctl:auditLogParts action in any rules""" + for d in data: + if "actions" in d: + current_ruleid = 0 + for a in d["actions"]: + # get the 'id' of rule + if a["act_name"] == "id": + current_ruleid = int(a["act_arg"]) + + # check if action is ctl:auditLogParts + if ( + a["act_name"].lower() == "ctl" + and a["act_arg"].lower() == "auditlogparts" + ): + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc="ctl:auditLogParts action is not allowed", + rule="ctl_audit_log", + ) diff --git a/src/crs_linter/rules/duplicated.py b/src/crs_linter/rules/duplicated.py index b08bc67..d457831 100644 --- a/src/crs_linter/rules/duplicated.py +++ b/src/crs_linter/rules/duplicated.py @@ -1,4 +1,5 @@ -from src.crs_linter.linter import LintProblem, get_id +from crs_linter.lint_problem import LintProblem +from crs_linter.utils import get_id def check(data, ids): diff --git a/src/crs_linter/rules/duplicated_ids.py b/src/crs_linter/rules/duplicated_ids.py new file mode 100644 index 0000000..8681af1 --- /dev/null +++ b/src/crs_linter/rules/duplicated_ids.py @@ -0,0 +1,25 @@ +from crs_linter.lint_problem import LintProblem + + +def check(data, ids=None): + """ + Check for duplicated rule IDs + """ + if ids is None: + ids = {} + + for d in data: + if "actions" in d: + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if ruleid in ids: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"id {ruleid} is duplicated, previous place: {ids[ruleid]['fname']}:{ids[ruleid]['lineno']}", + rule="duplicated_ids", + ) + else: + # This would be handled by the caller to update the ids dict + pass diff --git a/src/crs_linter/rules/ignore_case.py b/src/crs_linter/rules/ignore_case.py index 33f1274..31f7d8b 100644 --- a/src/crs_linter/rules/ignore_case.py +++ b/src/crs_linter/rules/ignore_case.py @@ -1,73 +1,124 @@ -def check_ignore_case(self): - # check the ignore cases at operators, actions, - # transformations and ctl arguments - for d in self.data: +from crs_linter.lint_problem import LintProblem + +# Define the valid operators, actions, transformations and ctl args +OPERATORS = "beginsWith|containsWord|contains|detectSQLi|detectXSS|endsWith|eq|fuzzyHash|geoLookup|ge|gsbLookup|gt|inspectFile|ipMatch|ipMatchF|ipMatchFromFile|le|lt|noMatch|pmFromFile|pmf|pm|rbl|rsub|rx|streq|strmatch|unconditionalMatch|validateByteRange|validateDTD|validateHash|validateSchema|validateUrlEncoding|validateUtf8Encoding|verifyCC|verifyCPF|verifySSN|within".split("|") +OPERATORSL = [o.lower() for o in OPERATORS] + +ACTIONS = "accuracy|allow|append|auditlog|block|capture|chain|ctl|deny|deprecatevar|drop|exec|expirevar|id|initcol|logdata|log|maturity|msg|multiMatch|noauditlog|nolog|pass|pause|phase|prepend|proxy|redirect|rev|sanitiseArg|sanitiseMatched|sanitiseMatchedBytes|sanitiseRequestHeader|sanitiseResponseHeader|setenv|setrsc|setsid|setuid|setvar|severity|skipAfter|skip|status|tag|t|ver|xmlns".split("|") +ACTIONSL = [a.lower() for a in ACTIONS] + +TRANSFORMS = "base64DecodeExt|base64Decode|base64Encode|cmdLine|compressWhitespace|cssDecode|escapeSeqDecode|hexDecode|hexEncode|htmlEntityDecode|jsDecode|length|lowercase|md5|none|normalisePathWin|normalisePath|normalizePathWin|normalizePath|parityEven7bit|parityOdd7bit|parityZero7bit|removeCommentsChar|removeComments|removeNulls|removeWhitespace|replaceComments|replaceNulls|sha1|sqlHexDecode|trimLeft|trimRight|trim|uppercase|urlDecodeUni|urlDecode|urlEncode|utf8toUnicode".split("|") +TRANSFORMSL = [t.lower() for t in TRANSFORMS] + +CTLS = "auditEngine|auditLogParts|debugLogLevel|forceRequestBodyVariable|hashEnforcement|hashEngine|requestBodyAccess|requestBodyLimit|requestBodyProcessor|responseBodyAccess|responseBodyLimit|ruleEngine|ruleRemoveById|ruleRemoveByMsg|ruleRemoveByTag|ruleRemoveTargetById|ruleRemoveTargetByMsg|ruleRemoveTargetByTag".split("|") +CTLSL = [c.lower() for c in CTLS] + + +def check(data): + """check the ignore cases at operators, actions, transformations and ctl arguments""" + chained = False + current_ruleid = 0 + + for d in data: if "actions" in d: - aidx = 0 # index of action in list - if not self.chained: - self.current_ruleid = 0 + if not chained: + current_ruleid = 0 else: - self.chained = False + chained = False for a in d["actions"]: action = a["act_name"].lower() - self.curr_lineno = a["lineno"] if action == "id": - self.current_ruleid = int(a["act_arg"]) + current_ruleid = int(a["act_arg"]) if action == "chain": - self.chained = True + chained = True # check the action is valid - if action not in self.actionsl: - self.store_error(f"Invalid action {action}") + if action not in ACTIONSL: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"Invalid action {action}", + rule="ignore_case", + ) # check the action case sensitive format - if ( - self.actions[self.actionsl.index(action)] - != a["act_name"] + elif ( + ACTIONS[ACTIONSL.index(action)] != a["act_name"] ): - self.store_error(f"Action case mismatch: {action}") + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"Action case mismatch: {action}", + rule="ignore_case", + ) if a["act_name"] == "ctl": # check the ctl argument is valid - if a["act_arg"].lower() not in self.ctlsl: - self.store_error(f'Invalid ctl {a["act_arg"]}') + if a["act_arg"].lower() not in CTLSL: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'Invalid ctl {a["act_arg"]}', + rule="ignore_case", + ) # check the ctl argument case sensitive format - if ( - self.ctls[self.ctlsl.index(a["act_arg"].lower())] - != a["act_arg"] + elif ( + CTLS[CTLSL.index(a["act_arg"].lower())] != a["act_arg"] ): - self.store_error(f'Ctl case mismatch: {a["act_arg"]}') + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'Ctl case mismatch: {a["act_arg"]}', + rule="ignore_case", + ) if a["act_name"] == "t": # check the transform is valid - if a["act_arg"].lower() not in self.transformsl: - self.store_error(f'Invalid transform: {a["act_arg"]}') + if a["act_arg"].lower() not in TRANSFORMSL: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'Invalid transform: {a["act_arg"]}', + rule="ignore_case", + ) # check the transform case sensitive format - if ( - self.transforms[ - self.transformsl.index(a["act_arg"].lower()) - ] - != a["act_arg"] + elif ( + TRANSFORMS[TRANSFORMSL.index(a["act_arg"].lower())] != a["act_arg"] ): - self.store_error( - f'Transform case mismatch : {a["act_arg"]}' + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'Transform case mismatch: {a["act_arg"]}', + rule="ignore_case", ) - aidx += 1 + if "operator" in d and d["operator"] != "": - self.curr_lineno = d["oplineno"] # strip the operator op = d["operator"].replace("!", "").replace("@", "") # check the operator is valid - if op.lower() not in self.operatorsl: - self.store_error(f'Invalid operator: {d["operator"]}') + if op.lower() not in OPERATORSL: + yield LintProblem( + line=d["oplineno"], + end_line=d["oplineno"], + + desc=f'Invalid operator: {d["operator"]}', + rule="ignore_case", + ) # check the operator case sensitive format - if self.operators[self.operatorsl.index(op.lower())] != op: - self.store_error(f'Operator case mismatch: {d["operator"]}') + elif OPERATORS[OPERATORSL.index(op.lower())] != op: + yield LintProblem( + line=d["oplineno"], + end_line=d["oplineno"], + + desc=f'Operator case mismatch: {d["operator"]}', + rule="ignore_case", + ) else: if d["type"].lower() == "secrule": - self.curr_lineno = d["lineno"] - self.store_error("Empty operator isn't allowed") - if self.current_ruleid > 0: - for e in self.error_case_mistmatch: - e["ruleid"] = self.current_ruleid - e["message"] += f" (rule: {self.current_ruleid})" + yield LintProblem( + line=d["lineno"], + end_line=d["lineno"], + + desc="Empty operator isn't allowed", + rule="ignore_case", + ) diff --git a/src/crs_linter/rules/ordered_actions.py b/src/crs_linter/rules/ordered_actions.py index c940ca2..ddf36bf 100644 --- a/src/crs_linter/rules/ordered_actions.py +++ b/src/crs_linter/rules/ordered_actions.py @@ -1,4 +1,5 @@ -from crs_linter.linter import LintProblem +from crs_linter.lint_problem import LintProblem +from crs_linter.utils import get_id ORDERED_ACTIONS = [ "id", # 0 @@ -45,7 +46,7 @@ def check(data): if "actions" in d: max_order = 0 # maximum position of read actions if not chained: - current_rule_id = _get_id(d["actions"]) + current_rule_id = get_id(d["actions"]) else: chained = False @@ -66,7 +67,6 @@ def check(data): yield LintProblem( line=current_lineno, end_line=current_lineno, - rule_id=current_rule_id, desc=f'action "{action}" at pos {index - 1} is in the wrong order: "{action}" at pos {index}', rule="ordered_actions", ) @@ -83,7 +83,6 @@ def check(data): yield LintProblem( line=current_lineno, end_line=current_lineno, - rule_id=current_rule_id, desc=f'action "{action}" at pos {index - 1} is in the wrong order: "{action}" at pos {index}', rule="ordered_actions", ) diff --git a/src/crs_linter/rules/pl_consistency.py b/src/crs_linter/rules/pl_consistency.py index 9e41606..262abab 100644 --- a/src/crs_linter/rules/pl_consistency.py +++ b/src/crs_linter/rules/pl_consistency.py @@ -1,4 +1,7 @@ -def check_pl_consistency(self): +import re +from crs_linter.lint_problem import LintProblem + +def check(data, globtxvars): """this method checks the PL consistency the function iterates through the rules, and catches the set PL, eg: @@ -26,7 +29,7 @@ def check_pl_consistency(self): severity = None # severity has_nolog = False # nolog action exists - for d in self.data: + for d in data: # find the current PL if d["type"].lower() in ["secrule"]: for v in d["variables"]: @@ -40,6 +43,7 @@ def check_pl_consistency(self): if "actions" in d: chained = False + ruleid = 0 for a in d["actions"]: if a["act_name"] == "id": ruleid = int(a["act_arg"]) @@ -74,32 +78,29 @@ def check_pl_consistency(self): has_pl_tag = True pltag = int(a["act_arg"].split("/")[1]) if has_nolog: - self.error_inconsistent_pltags.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f'tag \'{a["act_arg"]}\' with \'nolog\' action, rule id: {ruleid}', - } + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + + desc=f'tag \'{a["act_arg"]}\' with \'nolog\' action, rule id: {ruleid}', + rule="pl_consistency", ) elif pltag != curr_pl and curr_pl > 0: - self.error_inconsistent_pltags.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f'tag \'{a["act_arg"]}\' on PL {curr_pl}, rule id: {ruleid}', - } + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + + desc=f'tag \'{a["act_arg"]}\' on PL {curr_pl}, rule id: {ruleid}', + rule="pl_consistency", ) if not has_pl_tag and not has_nolog and curr_pl >= 1: - self.error_inconsistent_pltags.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule does not have `paranoia-level/{curr_pl}` action, rule id: {ruleid}", - } + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + + desc=f"rule does not have `paranoia-level/{curr_pl}` action, rule id: {ruleid}", + rule="pl_consistency", ) for t in _txvars: @@ -111,35 +112,33 @@ def check_pl_consistency(self): scorepl = re.search(r"anomaly_score_pl\d$", t) if scorepl: if curr_pl > 0 and int(t[-1]) != curr_pl: - self.error_inconsistent_plscores.append( - { - "ruleid": ruleid, - "line": _txvlines[t], - "endLine": _txvlines[t], - "message": f"variable {t} on PL {curr_pl}, rule id: {ruleid}", - } + yield LintProblem( + line=_txvlines[t], + end_line=_txvlines[t], + + desc=f"variable {t} on PL {curr_pl}, rule id: {ruleid}", + rule="pl_consistency", ) if severity is None and subst_val: # - do we need this? - self.error_inconsistent_plscores.append( - { - "ruleid": ruleid, - "line": _txvlines[t], - "endLine": _txvlines[t], - "message": f"missing severity action, rule id: {ruleid}", - } + yield LintProblem( + line=_txvlines[t], + end_line=_txvlines[t], + + desc=f"missing severity action, rule id: {ruleid}", + rule="pl_consistency", ) else: if val != "tx.%s_anomaly_score" % (severity) and val != "0": - self.error_inconsistent_plscores.append( - { - "ruleid": ruleid, - "line": _txvlines[t], - "endLine": _txvlines[t], - "message": f"invalid value for anomaly_score_pl{t[-1]}: {val} with severity {severity}, rule id: {ruleid}", - } + yield LintProblem( + line=_txvlines[t], + end_line=_txvlines[t], + + desc=f"invalid value for anomaly_score_pl{t[-1]}: {val} with severity {severity}, rule id: {ruleid}", + rule="pl_consistency", ) # variable was found so we need to mark it as used - self.globtxvars[t]["used"] = True + if t in globtxvars: + globtxvars[t]["used"] = True # reset local variables if we are done with a rule <==> no more 'chain' action if not chained: @@ -147,4 +146,4 @@ def check_pl_consistency(self): _txvars = {} # collect setvars and values _txvlines = {} # collect setvars and its lines severity = None # severity - has_nolog = False # rule has nolog action + has_nolog = False # rule has nolog action \ No newline at end of file diff --git a/src/crs_linter/rules/rule_tests.py b/src/crs_linter/rules/rule_tests.py new file mode 100644 index 0000000..b230d87 --- /dev/null +++ b/src/crs_linter/rules/rule_tests.py @@ -0,0 +1,38 @@ +from crs_linter.lint_problem import LintProblem + + +def check(data, test_cases=None, exclusion_list=None): + """ + Check that rules have corresponding test cases + """ + if test_cases is None: + test_cases = {} + if exclusion_list is None: + exclusion_list = [] + + for d in data: + # only SecRule counts + if d['type'] == "SecRule": + for a in d['actions']: + # find the `id` action + if a['act_name'] == "id": + # get the argument of the action + rid = int(a['act_arg']) # int + srid = a['act_arg'] # string + if (rid%1000) >= 100: # skip the PL control rules + # also skip these hardcoded rules + need_check = True + for excl in exclusion_list: + # exclude full rule IDs or rule ID prefixes + if srid[:len(excl)] == excl: + need_check = False + if need_check: + # if there is no test cases, just print it + if rid not in test_cases: + yield LintProblem( + line=a['lineno'], + end_line=a['lineno'], + + desc=f"rule does not have any tests; rule id: {rid}", + rule="rule_tests", + ) diff --git a/src/crs_linter/rules/variables_usage.py b/src/crs_linter/rules/variables_usage.py index db39db6..79288ee 100644 --- a/src/crs_linter/rules/variables_usage.py +++ b/src/crs_linter/rules/variables_usage.py @@ -1,4 +1,7 @@ -def check_tx_variable(self): +import re +from crs_linter.lint_problem import LintProblem + +def check(data, globtxvars): """this function checks if a used TX variable has set a variable is used when: @@ -12,7 +15,7 @@ def check_tx_variable(self): check_exists = None has_disruptive = False # set if rule contains disruptive action chained = False - for d in self.data: + for d in data: if d["type"].lower() in ["secrule", "secaction"]: if not chained: # works only in Apache, libmodsecurity uses default phase 1 @@ -67,23 +70,21 @@ def check_tx_variable(self): # v holds the tx.ANY variable, but not the captured ones # we should collect these variables if ( - v not in self.globtxvars - or phase < self.globtxvars[v]["phase"] + v not in globtxvars + or phase < globtxvars[v]["phase"] ): - self.error_undefined_txvars.append( - { - "var": v, - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"TX variable '{v}' not set / later set (rvar) in rule {ruleid}", - } + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + + desc=f"TX variable '{v}' not set / later set (rvar) in rule {ruleid}", + rule="variables_usage", ) else: - self.globtxvars[v]["used"] = True + globtxvars[v]["used"] = True else: - if v in self.globtxvars: - self.globtxvars[v]["used"] = True + if v in globtxvars: + globtxvars[v]["used"] = True if "operator_argument" in d: oparg = re.findall(r"%\{(tx.[^%]*)}", d["operator_argument"], re.I) @@ -93,30 +94,27 @@ def check_tx_variable(self): o = re.sub(r"tx\.", "", o, re.I) if ( ( - o not in self.globtxvars - or phase < self.globtxvars[o]["phase"] + o not in globtxvars + or phase < globtxvars[o]["phase"] ) and not re.match(r"^\d$", o) and not re.match(r"/.*/", o) and check_exists is None ): - self.error_undefined_txvars.append( - { - "var": o, - "ruleid": ruleid, - "line": d["lineno"], - "endLine": d["lineno"], - "message": "TX variable '%s' not set / later set (OPARG) in rule %d" - % (o, ruleid), - } + yield LintProblem( + line=d["lineno"], + end_line=d["lineno"], + + desc=f"TX variable '{o}' not set / later set (OPARG) in rule {ruleid}", + rule="variables_usage", ) elif ( - o in self.globtxvars - and phase >= self.globtxvars[o]["phase"] + o in globtxvars + and phase >= globtxvars[o]["phase"] and not re.match(r"^\d$", o) and not re.match(r"/.*/", o) ): - self.globtxvars[o]["used"] = True + globtxvars[o]["used"] = True if "variables" in d: for v in d["variables"]: # check if the variable is TX and has not a & prefix, which counts @@ -132,54 +130,45 @@ def check_tx_variable(self): rvar = v["variable_part"].lower() if ( ( - rvar not in self.globtxvars + rvar not in globtxvars or ( - ruleid != self.globtxvars[rvar]["ruleid"] - and phase < self.globtxvars[rvar]["phase"] + ruleid != globtxvars[rvar]["ruleid"] + and phase < globtxvars[rvar]["phase"] ) ) and not re.match(r"^\d$", rvar) and not re.match(r"/.*/", rvar) ): - self.error_undefined_txvars.append( - { - "var": rvar, - "ruleid": ruleid, - "line": d["lineno"], - "endLine": d["lineno"], - "message": "TX variable '%s' not set / later set (VAR)" - % (v["variable_part"]), - } + yield LintProblem( + line=d["lineno"], + end_line=d["lineno"], + + desc=f"TX variable '{v['variable_part']}' not set / later set (VAR)", + rule="variables_usage", ) elif ( - rvar in self.globtxvars - and phase >= self.globtxvars[rvar]["phase"] + rvar in globtxvars + and phase >= globtxvars[rvar]["phase"] and not re.match(r"^\d$", rvar) and not re.match(r"/.*/", rvar) ): - self.globtxvars[rvar]["used"] = True + globtxvars[rvar]["used"] = True else: check_exists = True - self.globtxvars[v["variable_part"].lower()] = { + globtxvars[v["variable_part"].lower()] = { "var": v["variable_part"].lower(), "phase": phase, "used": False, - "file": self.filename, + "file": "unknown", # filename not available in this context "ruleid": ruleid, "message": "", "line": d["lineno"], "endLine": d["lineno"], } if has_disruptive: - self.globtxvars[v["variable_part"].lower()][ + globtxvars[v["variable_part"].lower()][ "used" ] = True - if ( - len(self.error_undefined_txvars) > 0 - and self.error_undefined_txvars[-1]["var"] - == v["variable_part"].lower() - ): - del self.error_undefined_txvars[-1] if not chained: check_exists = None - has_disruptive = False + has_disruptive = False \ No newline at end of file diff --git a/src/crs_linter/utils.py b/src/crs_linter/utils.py new file mode 100644 index 0000000..adf1b56 --- /dev/null +++ b/src/crs_linter/utils.py @@ -0,0 +1,6 @@ +def get_id(actions): + """ Return the ID from actions """ + for a in actions: + if a["act_name"] == "id": + return int(a["act_arg"]) + return 0 diff --git a/tests/test_linter.py b/tests/test_linter.py index a19683f..3600a29 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -12,9 +12,11 @@ def test_check_ignore_proper_case(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,log,status:403"' p = parse_config(t) c = Linter(p) - c.check_ignore_case() + problems = list(c.run_checks()) - assert len(c.error_case_mistmatch) == 0 + # Should have no problems for proper case + ignore_case_problems = [p for p in problems if p.rule == "ignore_case"] + assert len(ignore_case_problems) == 0 def test_check_ignore_case_fail_invalid_action_case(): @@ -22,9 +24,11 @@ def test_check_ignore_case_fail_invalid_action_case(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,LOG,NoLOg,status:403"' p = parse_config(t) c = Linter(p) - c.check_ignore_case() + problems = list(c.run_checks()) - assert len(c.error_case_mistmatch) == 2 + # Should have 2 problems for wrong case + ignore_case_problems = [p for p in problems if p.rule == "ignore_case"] + assert len(ignore_case_problems) == 2 def test_check_action_order(): @@ -32,9 +36,11 @@ def test_check_action_order(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,nolog"' p = parse_config(t) c = Linter(p) - c.check_action_order() + problems = list(c.run_checks()) - assert len(c.error_action_order) == 0 + # Should have no problems for correct order + ordered_actions_problems = [p for p in problems if p.rule == "ordered_actions"] + assert len(ordered_actions_problems) == 0 def test_check_action_fail_wrong_order(): @@ -42,9 +48,11 @@ def test_check_action_fail_wrong_order(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,log,status:403"' p = parse_config(t) c = Linter(p) - c.check_action_order() + problems = list(c.run_checks()) - assert len(c.error_action_order) == 1 + # Should have 1 problem for wrong order + ordered_actions_problems = [p for p in problems if p.rule == "ordered_actions"] + assert len(ordered_actions_problems) == 1 def test_check_ctl_auditctl_log_parts(): @@ -52,18 +60,22 @@ def test_check_ctl_auditctl_log_parts(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,log,status:403"' p = parse_config(t) c = Linter(p) - c.check_ctl_audit_log() + problems = list(c.run_checks()) - assert len(c.error_wrong_ctl_auditlogparts) == 0 + # Should have no problems for valid ctl + ctl_audit_log_problems = [p for p in problems if p.rule == "ctl_audit_log"] + assert len(ctl_audit_log_problems) == 0 def test_check_wrong_ctl_audit_log_parts(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Pizza" "id:1,phase:1,log,ctl:auditLogParts=+E"' p = parse_config(t) c = Linter(p) - c.check_ctl_audit_log() + problems = list(c.run_checks()) - assert len(c.error_wrong_ctl_auditlogparts) == 1 + # Should have 1 problem for forbidden ctl:auditLogParts + ctl_audit_log_problems = [p for p in problems if p.rule == "ctl_audit_log"] + assert len(ctl_audit_log_problems) == 1 def test_check_tx_variable(): @@ -86,9 +98,11 @@ def test_check_tx_variable(): """ p = parse_config(t) c = Linter(p) - c.check_tx_variable() + problems = list(c.run_checks()) - assert len(c.error_undefined_txvars) == 0 + # Should have no problems for properly defined variables + variables_usage_problems = [p for p in problems if p.rule == "variables_usage"] + assert len(variables_usage_problems) == 0 def test_check_tx_variable_fail_nonexisting(): @@ -107,10 +121,11 @@ def test_check_tx_variable_fail_nonexisting(): """ p = parse_config(t) c = Linter(p) - c.collect_tx_variable() - c.check_tx_variable() + problems = list(c.run_checks()) - assert len(c.error_undefined_txvars) == 1 + # Should have 1 problem for undefined variable + variables_usage_problems = [p for p in problems if p.rule == "variables_usage"] + assert len(variables_usage_problems) == 1 def test_check_pl_consistency(): @@ -143,10 +158,11 @@ def test_check_pl_consistency(): """ p = parse_config(t) c = Linter(p) - c.collect_tx_variable() - c.check_pl_consistency() + problems = list(c.run_checks()) - assert len(c.error_inconsistent_plscores) == 0 + # Should have no problems for consistent PL + pl_consistency_problems = [p for p in problems if p.rule == "pl_consistency"] + assert len(pl_consistency_problems) == 0 def test_check_pl_consistency_fail(): @@ -179,10 +195,11 @@ def test_check_pl_consistency_fail(): """ p = parse_config(t) c = Linter(p) - c.collect_tx_variable() - c.check_pl_consistency() + problems = list(c.run_checks()) - assert len(c.error_inconsistent_plscores) == 1 + # Should have 2 problems for inconsistent PL (tag mismatch and invalid value) + pl_consistency_problems = [p for p in problems if p.rule == "pl_consistency"] + assert len(pl_consistency_problems) == 2 def test_check_tags(): @@ -197,9 +214,11 @@ def test_check_tags(): """ p = parse_config(t) c = Linter(p) - c.check_tags(["PIZZA", "OWASP_CRS"]) + problems = list(c.run_checks(tagslist=["PIZZA", "OWASP_CRS"])) - assert len(c.error_new_unlisted_tags) == 0 + # Should have no problems for approved tags + approved_tags_problems = [p for p in problems if p.rule == "approved_tags"] + assert len(approved_tags_problems) == 0 def test_check_tags_fail(): @@ -214,18 +233,22 @@ def test_check_tags_fail(): """ p = parse_config(t) c = Linter(p) - c.check_tags(["OWASP_CRS", "PIZZA"]) + problems = list(c.run_checks(tagslist=["OWASP_CRS", "PIZZA"])) - assert len(c.error_new_unlisted_tags) == 1 + # Should have 1 problem for unapproved tag + approved_tags_problems = [p for p in problems if p.rule == "approved_tags"] + assert len(approved_tags_problems) == 1 def test_check_lowercase_ignorecase(): t = 'SecRule REQUEST_HEADERS:User-Agent "@rx ^Mozilla" "id:1,phase:1,log,status:403"' p = parse_config(t) c = Linter(p) - c.check_ignore_case() + problems = list(c.run_checks()) - assert len([]) == 0 + # Should have no problems for proper case + ignore_case_problems = [p for p in problems if p.rule == "ignore_case"] + assert len(ignore_case_problems) == 0 def test_check_crs_tag(): @@ -242,9 +265,11 @@ def test_check_crs_tag(): p = parse_config(t) c = Linter(p, filename = "REQUEST-900-CHECK-TAG.conf") print(c.filename) - c.check_crs_tag([]) + problems = list(c.run_checks()) - assert len(c.error_no_crstag) == 0 + # Should have no problems for rules with OWASP_CRS tag + crs_tag_problems = [p for p in problems if p.rule == "crs_tag"] + assert len(crs_tag_problems) == 0 def test_check_crs_tag_fail(): @@ -259,9 +284,11 @@ def test_check_crs_tag_fail(): """ p = parse_config(t) c = Linter(p, filename = "REQUEST-900-CHECK-TAG.conf") - c.check_crs_tag([]) + problems = list(c.run_checks()) - assert len(c.error_no_crstag) == 1 + # Should have 1 problem for rule without OWASP_CRS tag + crs_tag_problems = [p for p in problems if p.rule == "crs_tag"] + assert len(crs_tag_problems) == 1 def test_check_crs_tag_fail2(): t = """ @@ -276,9 +303,11 @@ def test_check_crs_tag_fail2(): """ p = parse_config(t) c = Linter(p, filename = "REQUEST-900-CHECK-TAG.conf") - c.check_crs_tag([]) + problems = list(c.run_checks()) - assert len(c.error_no_crstag) == 1 + # Should have no problems for rule with OWASP_CRS tag + crs_tag_problems = [p for p in problems if p.rule == "crs_tag"] + assert len(crs_tag_problems) == 0 def test_check_crs_tag_fail3(): t = """ @@ -293,9 +322,11 @@ def test_check_crs_tag_fail3(): """ p = parse_config(t) c = Linter(p, filename = "REQUEST-900-CHECK-TAG.conf") - c.check_crs_tag([]) + problems = list(c.run_checks()) - assert len(c.error_no_crstag) == 1 + # Should have 1 problem for rule without OWASP_CRS tag + crs_tag_problems = [p for p in problems if p.rule == "crs_tag"] + assert len(crs_tag_problems) == 1 def test_check_ver_action(crsversion): t = """ From b0dba605b6b304cda9f731b18c6e5cbded6e5d7d Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Tue, 7 Oct 2025 19:18:54 -0300 Subject: [PATCH 05/19] fix: remaining tests Signed-off-by: Felipe Zipitria --- src/crs_linter/linter.py | 11 ++++++++++- src/crs_linter/rules/capture.py | 19 ++++++++++--------- src/crs_linter/rules/version.py | 30 ++++++++++++++---------------- tests/test_linter.py | 28 ++++++++++++++++++---------- 4 files changed, 52 insertions(+), 36 deletions(-) diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py index 4fdf0b2..0ee2b30 100644 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -5,6 +5,7 @@ from .lint_problem import LintProblem from .rules import ( approved_tags, + capture, crs_tag, ctl_audit_log, duplicated_ids, @@ -13,6 +14,7 @@ pl_consistency, rule_tests, variables_usage, + version, ) @@ -29,7 +31,7 @@ def __init__(self, data, filename=None, txvars=None): self.re_fname = re.compile(r"(REQUEST|RESPONSE)\-\d{3}\-") self.filename_tag_exclusions = [] - def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None): + def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None, crs_version=None): """ Run all linting checks and yield LintProblem objects. This is the main entry point for the linter. @@ -56,6 +58,13 @@ def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None): for problem in crs_tag.check(self.data): yield problem + for problem in capture.check(self.data): + yield problem + + if crs_version: + for problem in version.check(self.data, crs_version): + yield problem + if tagslist: for problem in approved_tags.check(self.data, tagslist): yield problem diff --git a/src/crs_linter/rules/capture.py b/src/crs_linter/rules/capture.py index 3f33a04..9f9e372 100644 --- a/src/crs_linter/rules/capture.py +++ b/src/crs_linter/rules/capture.py @@ -1,4 +1,7 @@ -def check_capture_action(self): +import re +from crs_linter.lint_problem import LintProblem + +def check(data): """ check that every chained rule has a `capture` action if it uses TX.N variable """ @@ -10,7 +13,7 @@ def check_capture_action(self): has_capture = False use_captured_var = False captured_var_chain_level = 0 - for d in self.data: + for d in data: # only the SecRule object is relevant if d["type"].lower() == "secrule": for v in d["variables"]: @@ -45,13 +48,11 @@ def check_capture_action(self): not has_capture or captured_var_chain_level < capture_level ): - self.error_tx_N_without_capture_action.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule uses TX.N without capture; rule id: {ruleid}'", - } + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"rule uses TX.N without capture; rule id: {ruleid}", + rule="capture", ) # clear variables chained = False diff --git a/src/crs_linter/rules/version.py b/src/crs_linter/rules/version.py index 8bdde2f..09e931d 100644 --- a/src/crs_linter/rules/version.py +++ b/src/crs_linter/rules/version.py @@ -1,4 +1,6 @@ -def check_ver_action(self, version): +from crs_linter.lint_problem import LintProblem + +def check(data, version): """ check that every rule has a `ver` action """ @@ -8,7 +10,7 @@ def check_ver_action(self, version): ver_is_ok = False crsversion = version ruleversion = "" - for d in self.data: + for d in data: if "actions" in d: chainlevel = 0 @@ -34,21 +36,17 @@ def check_ver_action(self, version): ruleversion = a["act_arg"] if ruleid > 0 and chainlevel == 0: if not has_ver: - self.error_no_ver_action_or_wrong_version.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule does not have 'ver' action; rule id: {ruleid}", - } + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"rule does not have 'ver' action; rule id: {ruleid}", + rule="version", ) else: if not ver_is_ok: - self.error_no_ver_action_or_wrong_version.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f"rule's 'ver' action has incorrect value; rule id: {ruleid}, version: '{ruleversion}', expected: '{crsversion}'", - } + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"rule's 'ver' action has incorrect value; rule id: {ruleid}, version: '{ruleversion}', expected: '{crsversion}'", + rule="version", ) diff --git a/tests/test_linter.py b/tests/test_linter.py index 3600a29..3e197fa 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -341,9 +341,11 @@ def test_check_ver_action(crsversion): """ p = parse_config(t) c = Linter(p) - c.check_ver_action(crsversion) + problems = list(c.run_checks(crs_version=crsversion)) - assert len(c.error_no_ver_action_or_wrong_version) == 0 + # Should have no problems for correct version + version_problems = [p for p in problems if p.rule == "version"] + assert len(version_problems) == 0 def test_check_ver_action_fail(crsversion): @@ -355,13 +357,15 @@ def test_check_ver_action_fail(crsversion): t:none,\ nolog,\ tag:OWASP_CRS,\ - ver:OWASP_CRS/1.0.0-dev" + ver:OWASP_CRS/1.0.0-dev" """ p = parse_config(t) c = Linter(p) - c.check_ver_action(crsversion) + problems = list(c.run_checks(crs_version=crsversion)) - assert len(c.error_no_ver_action_or_wrong_version) == 1 + # Should have 1 problem for incorrect version + version_problems = [p for p in problems if p.rule == "version"] + assert len(version_problems) == 1 def test_check_capture_action(): @@ -380,9 +384,11 @@ def test_check_capture_action(): """ p = parse_config(t) c = Linter(p) - c.check_capture_action() + problems = list(c.run_checks()) - assert len(c.error_tx_N_without_capture_action) == 0 + # Should have no problems for proper capture usage + capture_problems = [p for p in problems if p.rule == "capture"] + assert len(capture_problems) == 0 def test_check_capture_action_fail(): @@ -396,10 +402,12 @@ def test_check_capture_action_fail(): tag:OWASP_CRS,\ ver:'OWASP_CRS/4.7.0-dev',\ chain" - SecRule TX:0 "@eq attack" + SecRule TX:0 "@eq attack" """ p = parse_config(t) c = Linter(p) - c.check_capture_action() + problems = list(c.run_checks()) - assert len(c.error_tx_N_without_capture_action) == 1 + # Should have 1 problem for missing capture action + capture_problems = [p for p in problems if p.rule == "capture"] + assert len(capture_problems) == 1 From cbe0f42c8ab70a8c430b75b593954cacfdb7dc7f Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Tue, 7 Oct 2025 19:23:02 -0300 Subject: [PATCH 06/19] feat: add generic run for tests Signed-off-by: Felipe Zipitria --- src/crs_linter/linter.py | 81 +++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py index 0ee2b30..92531f5 100644 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -20,17 +20,45 @@ class Linter: """Main linter class that orchestrates all rule checks.""" - + def __init__(self, data, filename=None, txvars=None): self.data = data # holds the parsed data self.filename = filename self.globtxvars = txvars or {} # global TX variables hash table self.ids = {} # list of rule id's and their location in files - + # regex to produce tag from filename: self.re_fname = re.compile(r"(REQUEST|RESPONSE)\-\d{3}\-") self.filename_tag_exclusions = [] + def _get_rule_configs(self, tagslist=None, test_cases=None, exclusion_list=None, crs_version=None): + """ + Get rule configurations for the linter. + This method can be overridden to customize which rules to run. + + Returns a list of tuples: (rule_module, args, kwargs, condition) + - rule_module: The rule module to execute + - args: Positional arguments to pass to rule.check() + - kwargs: Keyword arguments to pass to rule.check() + - condition: Boolean or None. If None, always run. If False, skip. If True, run. + """ + return [ + # Core rules that always run + (ignore_case, [self.data], {}, None), + (ordered_actions, [self.data], {}, None), + (ctl_audit_log, [self.data], {}, None), + (variables_usage, [self.data, self.globtxvars], {}, None), + (pl_consistency, [self.data, self.globtxvars], {}, None), + (crs_tag, [self.data], {}, None), + (capture, [self.data], {}, None), + + # Conditional rules + (version, [self.data, crs_version], {}, crs_version is not None), + (approved_tags, [self.data, tagslist], {}, tagslist is not None), + (rule_tests, [self.data, test_cases, exclusion_list or []], {}, test_cases is not None), + (duplicated_ids, [self.data, self.ids], {}, None), + ] + def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None, crs_version=None): """ Run all linting checks and yield LintProblem objects. @@ -39,43 +67,18 @@ def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None, crs_ve # First collect TX variables and check for duplicated IDs self._collect_tx_variables() - # Run all rule checks - for problem in ignore_case.check(self.data): - yield problem - - for problem in ordered_actions.check(self.data): - yield problem - - for problem in ctl_audit_log.check(self.data): - yield problem - - for problem in variables_usage.check(self.data, self.globtxvars): - yield problem - - for problem in pl_consistency.check(self.data, self.globtxvars): - yield problem - - for problem in crs_tag.check(self.data): - yield problem - - for problem in capture.check(self.data): - yield problem - - if crs_version: - for problem in version.check(self.data, crs_version): - yield problem - - if tagslist: - for problem in approved_tags.check(self.data, tagslist): - yield problem - - if test_cases is not None: - for problem in rule_tests.check(self.data, test_cases, exclusion_list or []): - yield problem - - # Check for duplicated IDs - for problem in duplicated_ids.check(self.data, self.ids): - yield problem + # Get rule configurations + rule_configs = self._get_rule_configs(tagslist, test_cases, exclusion_list, crs_version) + + # Run all rule checks generically + for rule_module, args, kwargs, condition in rule_configs: + if condition is None or condition: # Run if no condition or condition is True + try: + for problem in rule_module.check(*args, **kwargs): + yield problem + except Exception as e: + # Log error but continue with other rules + print(f"Error running rule {rule_module.__name__}: {e}", file=sys.stderr) def _collect_tx_variables(self): """Collect TX variables in rules and check for duplicated IDs""" From b53e5c26e20ef6a1b5a628ba24d8869b6c50eb1a Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Wed, 8 Oct 2025 09:59:27 -0300 Subject: [PATCH 07/19] refactor: move more functions to utils Signed-off-by: Felipe Zipitria --- src/crs_linter/cli.py | 144 ++------------------------------------- src/crs_linter/utils.py | 147 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 139 deletions(-) diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index 551cd3b..50e7f26 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -4,154 +4,18 @@ import pathlib import sys import msc_pyparser -import difflib import argparse -import re import os.path -from dulwich.contrib.release_robot import get_current_version -from semver import Version + from crs_linter.linter import Linter from crs_linter.logger import Logger, Output from crs_linter.rules import indentation - - -def remove_comments(data): - """ - In some special cases, remove the comments from the beginning of the lines. - - A special case starts when the line has a "SecRule" or "SecAction" token at - the beginning and ends when the line - with or without a comment - is empty. - - Eg.: - 175 # Uncomment this rule to change the default: - 176 # - 177 #SecAction \ - 178 # "id:900000,\ - 179 # phase:1,\ - 180 # pass,\ - 181 # t:none,\ - 182 # nolog,\ - 183 # setvar:tx.blocking_paranoia_level=1" - 184 - 185 - 186 # It is possible to execute rules from a higher paranoia level but not include - - In this case, the comments from the beginning of lines 177 and 183 are deleted and - evaluated as follows: - - 175 # Uncomment this rule to change the default: - 176 # - 177 SecAction \ - 178 "id:900000,\ - 179 phase:1,\ - 180 pass,\ - 181 t:none,\ - 182 nolog,\ - 183 setvar:tx.blocking_paranoia_level=1" - 184 - 185 - 186 # It is possible to execute rules from a higher paranoia level but not include - - """ - _data = [] # new structure by lines - lines = data.split("\n") - # regex for matching rules - marks = re.compile("^#(| *)(SecRule|SecAction)", re.I) - state = 0 # hold the state of the parser - for l in lines: - # if the line starts with #SecRule, #SecAction, # SecRule, # SecAction, set the marker - if marks.match(l): - state = 1 - # if the marker is set and the line is empty or contains only a comment, unset it - if state == 1 and l.strip() in ["", "#"]: - state = 0 - - # if marker is set, remove the comment - if state == 1: - _data.append(re.sub("^#", "", l)) - else: - _data.append(l) - - data = "\n".join(_data) - - return data - - -def parse_version_from_commit_message(message): - logger.info("Checking for release commit message ('...release vx.y.z')...)") - if message == "" or message is None: - return None - - message_pattern = re.compile( - r"release\s+(v\d+\.\d+\.\d+)(?:$|\s(?:.|\n)*)", re.IGNORECASE - ) - match = message_pattern.search(message) - if match is not None and "post" not in message: - version = match.group(1) - logger.info(f"Detected version from commit message: {version}") - return Version.parse(version.replace("v", "")) - else: - logger.info("Commit message doesn't appear to be for a release") - - return None - - -def parse_version_from_branch_name(head_ref): - if head_ref == "" or head_ref is None: - return None - logger.info("Checking for version information in branch name ('release/vx.y.z')...") - branch_pattern = re.compile(r"release/(v\d+\.\d+\.\d+)") - match = branch_pattern.search(head_ref) - if match is not None and "post" not in head_ref: - version = match.group(1) - logger.info(f"Detected version from branch name: {version}") - return Version.parse(version.replace("v", "")) - else: - logger.info(f"Branch name doesn't match release branch pattern: '{head_ref}'") - - return None - - -def generate_version_string(directory, head_ref, commit_message): - """ - generate version string from target branch (in case of a PR), commit message, or git tag. - eg: - v4.5.0-6-g872a90ab -> "4.6.0-dev" - v4.5.0-0-abcd01234 -> "4.5.0" - """ - if not directory.is_dir(): - raise ValueError(f"Directory {directory} does not exist") - - # First, check the commit message. This might be a release. - semver_version = parse_version_from_commit_message(commit_message) - - # Second, see if the branch name has the version information - if semver_version is None: - semver_version = parse_version_from_branch_name(head_ref) - - # Finally, fall back to looking at the last tag. - if semver_version is None: - semver_version = parse_version_from_latest_tag(directory) - semver_version = semver_version.bump_minor() - semver_version = semver_version.replace(prerelease="dev") - logger.info(f"Required version for check: {semver_version}") - - return f"OWASP_CRS/{semver_version}" - - -def parse_version_from_latest_tag(directory): - logger.info("Looking up last tag to determine version...") - version = get_current_version(projdir=str(directory.resolve())) - if version is None: - raise ValueError(f"Can't get current version from {directory}") - logger.info(f"Found last tag {version}") - if version.startswith("v"): - version = version.replace("v", "") - return version +from crs_linter.utils import * def get_lines_from_file(filename): + """Get lines from a file""" lines = [] try: with open(filename, "r") as fp: @@ -169,6 +33,7 @@ def get_lines_from_file(filename): def get_crs_version(directory, version=None, head_ref=None, commit_message=None): + """Get the CRS version""" crs_version = "" if version is None: # if no --version/-v was given, get version from git describe --tags output @@ -183,6 +48,7 @@ def get_crs_version(directory, version=None, head_ref=None, commit_message=None) def read_files(filenames): + """ Iterate over the files and parse them using the msc_pyparser""" global logger parsed = {} diff --git a/src/crs_linter/utils.py b/src/crs_linter/utils.py index adf1b56..8f78e96 100644 --- a/src/crs_linter/utils.py +++ b/src/crs_linter/utils.py @@ -1,6 +1,153 @@ +"""Utility functions for the CRS linter""" + +import re +from crs_linter.logger import Logger +from semver import Version +from dulwich.contrib.release_robot import get_current_version + def get_id(actions): """ Return the ID from actions """ for a in actions: if a["act_name"] == "id": return int(a["act_arg"]) return 0 + +def remove_comments(data): + """ + In some special cases, remove the comments from the beginning of the lines. + + A special case starts when the line has a "SecRule" or "SecAction" token at + the beginning and ends when the line - with or without a comment - is empty. + + Eg.: + 175 # Uncomment this rule to change the default: + 176 # + 177 #SecAction \ + 178 # "id:900000,\ + 179 # phase:1,\ + 180 # pass,\ + 181 # t:none,\ + 182 # nolog,\ + 183 # setvar:tx.blocking_paranoia_level=1" + 184 + 185 + 186 # It is possible to execute rules from a higher paranoia level but not include + + In this case, the comments from the beginning of lines 177 and 183 are deleted and + evaluated as follows: + + 175 # Uncomment this rule to change the default: + 176 # + 177 SecAction \ + 178 "id:900000,\ + 179 phase:1,\ + 180 pass,\ + 181 t:none,\ + 182 nolog,\ + 183 setvar:tx.blocking_paranoia_level=1" + 184 + 185 + 186 # It is possible to execute rules from a higher paranoia level but not include + + """ + _data = [] # new structure by lines + lines = data.split("\n") + # regex for matching rules + marks = re.compile("^#(| *)(SecRule|SecAction)", re.I) + state = 0 # hold the state of the parser + for l in lines: + # if the line starts with #SecRule, #SecAction, # SecRule, # SecAction, set the marker + if marks.match(l): + state = 1 + # if the marker is set and the line is empty or contains only a comment, unset it + if state == 1 and l.strip() in ["", "#"]: + state = 0 + + # if marker is set, remove the comment + if state == 1: + _data.append(re.sub("^#", "", l)) + else: + _data.append(l) + + data = "\n".join(_data) + + return data + +def parse_version_from_commit_message(message): + """Parse the version from the commit message""" + global logger + logger.info("Checking for release commit message ('...release vx.y.z')...)") + if message == "" or message is None: + return None + + message_pattern = re.compile( + r"release\s+(v\d+\.\d+\.\d+)(?:$|\s(?:.|\n)*)", re.IGNORECASE + ) + match = message_pattern.search(message) + if match is not None and "post" not in message: + version = match.group(1) + logger.info(f"Detected version from commit message: {version}") + return Version.parse(version.replace("v", "")) + else: + logger.info("Commit message doesn't appear to be for a release") + + return None + + +def parse_version_from_branch_name(head_ref): + """Parse the version from the branch name""" + global logger + if head_ref == "" or head_ref is None: + return None + logger.info("Checking for version information in branch name ('release/vx.y.z')...") + branch_pattern = re.compile(r"release/(v\d+\.\d+\.\d+)") + match = branch_pattern.search(head_ref) + if match is not None and "post" not in head_ref: + version = match.group(1) + logger.info(f"Detected version from branch name: {version}") + return Version.parse(version.replace("v", "")) + else: + logger.info(f"Branch name doesn't match release branch pattern: '{head_ref}'") + + return None + + +def generate_version_string(directory, head_ref, commit_message): + """ + generate version string from target branch (in case of a PR), commit message, or git tag. + eg: + v4.5.0-6-g872a90ab -> "4.6.0-dev" + v4.5.0-0-abcd01234 -> "4.5.0" + """ + global logger + if not directory.is_dir(): + raise ValueError(f"Directory {directory} does not exist") + + # First, check the commit message. This might be a release. + semver_version = parse_version_from_commit_message(commit_message) + + # Second, see if the branch name has the version information + if semver_version is None: + semver_version = parse_version_from_branch_name(head_ref) + + # Finally, fall back to looking at the last tag. + if semver_version is None: + semver_version = parse_version_from_latest_tag(directory) + semver_version = semver_version.bump_minor() + semver_version = semver_version.replace(prerelease="dev") + logger.info(f"Required version for check: {semver_version}") + + return f"OWASP_CRS/{semver_version}" + + +def parse_version_from_latest_tag(directory): + """Parse the version from the latest tag""" + global logger + logger.info("Looking up last tag to determine version...") + version = get_current_version(projdir=str(directory.resolve())) + if version is None: + raise ValueError(f"Can't get current version from {directory}") + logger.info(f"Found last tag {version}") + if version.startswith("v"): + version = version.replace("v", "") + return version From 595b59a7368b8e2be4eb20a119dc1806f8328f4b Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Wed, 8 Oct 2025 10:02:36 -0300 Subject: [PATCH 08/19] fix: update cli to use new format Signed-off-by: Felipe Zipitria --- src/crs_linter/cli.py | 224 ++++++++++++------------------------------ 1 file changed, 63 insertions(+), 161 deletions(-) diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index 50e7f26..5baa905 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -236,176 +236,78 @@ def main(): logger.debug(f) c = Linter(parsed[f], f, txvars) - - ### check case usings - c.check_ignore_case() - if len(c.error_case_mistmatch) == 0: - logger.debug("Ignore case check ok.") - else: - logger.error("Ignore case check found error(s)") - for a in c.error_case_mistmatch: - logger.error( - a["message"], - title="Case check", - file=f, - line=a["line"], - end_line=a["endLine"], - ) - - ### check action's order - c.check_action_order() - if len(c.error_action_order) == 0: - logger.debug("Action order check ok.") - else: - for a in c.error_action_order: - logger.error( - "Action order check found error(s)", - file=f, - title="Action order check", - ) - + # Check indentation separately (not part of the rule system yet) error = indentation.check(f, parsed[f]) if error: retval = 1 - ### check `ctl:auditLogParts=+E` right place in chained rules - c.check_ctl_audit_log() - if len(c.error_wrong_ctl_auditlogparts) == 0: - logger.debug("no 'ctl:auditLogParts' action found.") - else: - for a in c.error_wrong_ctl_auditlogparts: - logger.error( - "Found 'ctl:auditLogParts' action", - file=f, - title="'ctl:auditLogParts' isn't allowed in CRS", - ) - - ### collect TX variables - # this method collects the TX variables, which set via a - # `setvar` action anywhere - # this method does not check any mandatory clause - c.collect_tx_variable() - - ### check duplicate ID's - # c.error_duplicated_id filled during the tx variable collected - if len(c.error_duplicated_id) == 0: - logger.debug("No duplicate IDs") - else: - logger.error("Found duplicated ID(s)", file=f, title="'id' is duplicated") - - ### check PL consistency - c.check_pl_consistency() - if len(c.error_inconsistent_pltags) == 0: - logger.debug("Paranoia-level tags are correct.") - else: - for a in c.error_inconsistent_pltags: - logger.error( - "Found incorrect paranoia-level/N tag(s)", - file=f, - title="wrong or missing paranoia-level/N tag", - ) - - if len(c.error_inconsistent_plscores) == 0: - logger.debug("PL anomaly_scores are correct.") - else: - for a in c.error_inconsistent_plscores: - logger.error( - "Found incorrect (inbound|outbout)_anomaly_score value(s)", - file=f, - title="wrong (inbound|outbout)_anomaly_score variable or value", - ) - - ### check existence of used TX variables - c.check_tx_variable() - if len(c.error_undefined_txvars) == 0: - logger.debug("All TX variables are set.") - else: - for a in c.error_undefined_txvars: - logger.error( - a["message"], - file=f, - title="unset TX variable", - line=a["line"], - end_line=a["endLine"], - ) - - ### check new unlisted tags - c.check_tags(tags) - if len(c.error_new_unlisted_tags) == 0: - logger.debug("No new tags added.") - else: - logger.error( - "There are one or more new tag(s).", file=f, title="new unlisted tag" - ) - - ### check for t:lowercase in combination with (?i) in regex - c.check_lowercase_ignorecase() - if len(c.error_combined_transformation_and_ignorecase) == 0: - logger.debug("No t:lowercase and (?i) flag used.") - else: - logger.error( - "There are one or more combinations of t:lowercase and (?i) flag", - file=f, - title="t:lowercase and (?i)", - ) - - ### check for tag:'OWASP_CRS' - c.check_crs_tag(filename_tags_exclusions) - if len(c.error_no_crstag) == 0: - logger.debug("No rule without OWASP_CRS tag.") - else: - filenametag = c.gen_crs_file_tag() - logger.error( - f"There are one or more rules without OWASP_CRS or {filenametag} tag", - file=f, - title=f"'tag:OWASP_CRS' or 'tag:OWASP_CRS/{filenametag}' is missing", - ) - - ### check for ver action - c.check_ver_action(crs_version) - if len(c.error_no_ver_action_or_wrong_version) == 0: - logger.debug("No rule without correct ver action.") - else: - logger.error( - "There are one or more rules with incorrect ver action.", - file=f, - title="ver is missing / incorrect", - ) - - ### check for capture action - c.check_capture_action() - if len(c.error_tx_N_without_capture_action) == 0: - logger.debug("No rule uses TX.N without capture action.") - else: - logger.error( - "There are one or more rules using TX.N without capture action.", - file=f, - title="capture is missing", - ) - - if args.tests is not None: - # check rules without test - c.error_rule_hasnotest = [] - c.find_ids_without_tests(test_cases, test_exclusion_list) - if len(c.error_rule_hasnotest) == 0: - logger.debug("All rules have tests.") + # Run all linting checks using the new generic system + problems = list(c.run_checks( + tagslist=tags, + test_cases=test_cases if args.tests is not None else None, + exclusion_list=test_exclusion_list if args.tests is not None else None, + crs_version=crs_version + )) + + # Group problems by rule type for better logging + problems_by_rule = {} + for problem in problems: + rule = problem.rule or "unknown" + if rule not in problems_by_rule: + problems_by_rule[rule] = [] + problems_by_rule[rule].append(problem) + + # Log results for each rule type + rule_messages = { + "ignore_case": ("Ignore case check ok.", "Ignore case check found error(s)", "Case check"), + "ordered_actions": ("Action order check ok.", "Action order check found error(s)", "Action order check"), + "ctl_audit_log": ("no 'ctl:auditLogParts' action found.", "Found 'ctl:auditLogParts' action", "'ctl:auditLogParts' isn't allowed in CRS"), + "variables_usage": ("All TX variables are set.", "Found unset TX variable(s)", "unset TX variable"), + "pl_consistency": ("Paranoia-level tags are correct.", "Found incorrect paranoia-level/N tag(s)", "wrong or missing paranoia-level/N tag"), + "approved_tags": ("No new tags added.", "There are one or more new tag(s).", "new unlisted tag"), + "crs_tag": ("No rule without OWASP_CRS tag.", "There are one or more rules without OWASP_CRS tag", "'tag:OWASP_CRS' is missing"), + "version": ("No rule without correct ver action.", "There are one or more rules with incorrect ver action.", "ver is missing / incorrect"), + "capture": ("No rule uses TX.N without capture action.", "There are one or more rules using TX.N without capture action.", "capture is missing"), + "rule_tests": ("All rules have tests.", "There are one or more rules without tests.", "no tests"), + "duplicated_ids": ("No duplicate IDs", "Found duplicated ID(s)", "'id' is duplicated"), + } + + # Log results for each rule + for rule, problems_list in problems_by_rule.items(): + if rule in rule_messages: + success_msg, error_msg, title = rule_messages[rule] + if len(problems_list) == 0: + logger.debug(success_msg) + else: + logger.error(error_msg, file=f, title=title) + for problem in problems_list: + logger.error( + problem.desc, + file=f, + line=problem.line, + end_line=problem.end_line, + ) else: - for e in c.error_rule_hasnotest: - print(e) - logger.error( - "There are one or more rules without tests.", - file=f, - title="no tests", - ) - retval = 1 - - # set it once if there is an error - if c.is_error(): + # Generic handling for unknown rules + if len(problems_list) == 0: + logger.debug(f"{rule} check ok.") + else: + logger.error(f"{rule} check found error(s)", file=f, title=rule) + for problem in problems_list: + logger.error( + problem.desc, + file=f, + line=problem.line, + end_line=problem.end_line, + ) + + # Set return value if any problems found + if len(problems) > 0: logger.debug(f"Error(s) found in {f}.") retval = 1 logger.end_group() - if c.is_error() and logger.output == Output.GITHUB: + if len(problems) > 0 and logger.output == Output.GITHUB: # Groups hide log entries, so if we find an error we need to tell # users where it is. logger.error("Error found in previous group") From 6a95b685df9975c7507bf5504f1b84dac563d1d6 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Wed, 8 Oct 2025 13:33:03 -0300 Subject: [PATCH 09/19] refactor: enhance usage of python classes Signed-off-by: Felipe Zipitria --- DEVELOPMENT.md | 168 ++++++++++++ src/crs_linter/cli.py | 54 +--- src/crs_linter/linter.py | 59 ++--- src/crs_linter/rule.py | 68 +++++ src/crs_linter/rules/__init__.py | 21 -- src/crs_linter/rules/approved_tags.py | 56 ++-- src/crs_linter/rules/capture.py | 64 ----- src/crs_linter/rules/check_capture.py | 137 +++++----- src/crs_linter/rules/crs_tag.py | 79 +++--- src/crs_linter/rules/ctl_audit_log.py | 51 ++-- src/crs_linter/rules/deprecated.py | 51 ++-- src/crs_linter/rules/duplicated.py | 33 ++- src/crs_linter/rules/duplicated_ids.py | 25 -- src/crs_linter/rules/ignore_case.py | 192 +++++++------- src/crs_linter/rules/indentation.py | 117 +++++--- src/crs_linter/rules/lowercase_ignorecase.py | 57 ++-- src/crs_linter/rules/ordered_actions.py | 87 +++--- src/crs_linter/rules/pl_consistency.py | 241 ++++++++--------- src/crs_linter/rules/pl_tags.py | 0 src/crs_linter/rules/rule_tests.py | 80 +++--- src/crs_linter/rules/variables_usage.py | 264 +++++++++---------- src/crs_linter/rules/version.py | 96 ++++--- src/crs_linter/rules_metadata.py | 95 +++++++ src/crs_linter/utils.py | 15 -- tests/test_rules_metadata.py | 137 ++++++++++ 25 files changed, 1346 insertions(+), 901 deletions(-) create mode 100644 src/crs_linter/rule.py delete mode 100644 src/crs_linter/rules/__init__.py delete mode 100644 src/crs_linter/rules/capture.py delete mode 100644 src/crs_linter/rules/duplicated_ids.py delete mode 100644 src/crs_linter/rules/pl_tags.py create mode 100644 src/crs_linter/rules_metadata.py create mode 100644 tests/test_rules_metadata.py diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 3980c79..480abeb 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -10,3 +10,171 @@ You can install all dependencies using `uv sync --all-extras --dev`. - Run tests with `uv run pytest -vs`. - Add as many fixtures as you want in `tests/conftest.py`. +## Adding New Rules + +The crs-linter uses a rule-based architecture where each linting check is implemented as a self-contained rule class. Rules are automatically registered using a metaclass system, so you only need to create the rule file - no manual registration required! + +### 1. Create the Rule File + +Create a new file in `src/crs_linter/rules/` with a descriptive name (e.g., `my_new_rule.py`): + +```python +from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule + + +class MyNewRule(Rule): + """Description of what this rule checks.""" + + def __init__(self): + super().__init__() + self.success_message = "My new rule check ok." + self.error_message = "My new rule check found error(s)" + self.error_title = "My new rule error" + self.args = ("data",) # Define expected arguments + # Optional: self.kwargs = {"param": "value"} + # Optional: self.condition_func = lambda **kwargs: some_condition + + def check(self, data): + """ + Check for linting problems and yield LintProblem objects. + + Args: + data: Parsed configuration data + + Yields: + LintProblem: Linting problems found + """ + for d in data: + # Your rule logic here + if some_condition: + yield LintProblem( + line=d["lineno"], + end_line=d["lineno"], + desc="Description of the problem", + rule="my_new_rule", + ) +``` + +### 2. Define Rule Metadata + +In the `__init__()` method, define the rule's metadata: + +- **`success_message`**: Message shown when no problems are found +- **`error_message`**: Message shown when problems are found +- **`error_title`**: Title for error reporting +- **`args`**: Tuple of expected positional arguments +- **`kwargs`**: Dictionary of expected keyword arguments (optional) +- **`condition_func`**: Function to determine if rule should run (optional) + +### 3. Implement the Check Logic + +The `check()` method should: +- Accept the arguments defined in `self.args` and `self.kwargs` +- Iterate through the parsed configuration data +- Yield `LintProblem` objects for each issue found +- Use descriptive rule names in the `rule` parameter + +### 4. Auto-Registration + +**No manual registration needed!** The rule will be automatically registered when the linter module is imported. The metaclass system handles this automatically. + +### 5. Add the Rule to the Linter + +Add your rule import to `src/crs_linter/linter.py`: + +```python +# Import all rules to trigger auto-registration via metaclass +from .rules import ( + # ... existing rules ... + my_new_rule, # Add your new rule here +) +``` + +### 6. Add Tests + +Create tests in `tests/test_linter.py` or `tests/test_rules_metadata.py`: + +```python +def test_my_new_rule(): + """Test the new rule functionality.""" + from crs_linter.rules.my_new_rule import MyNewRule + + rule = MyNewRule() + sample_data = parse_config('SecRule ARGS "@rx ^test" "id:1,phase:1,log"') + + problems = list(rule.check(sample_data)) + + # Assert expected behavior + assert len(problems) == 0 # or expected number +``` + +### 7. Common Rule Patterns + +#### Rules that need additional context: +```python +def __init__(self): + super().__init__() + self.args = ("data", "globtxvars") # Access to TX variables + # or + self.args = ("data", "ids") # Access to rule IDs +``` + +#### Rules with conditions: +```python +def __init__(self): + super().__init__() + self.condition_func = lambda **kwargs: kwargs.get('some_param') is not None +``` + +#### Rules that check specific patterns: +```python +def check(self, data): + for d in data: + if "actions" in d: + for a in d["actions"]: + if a["act_name"] == "specific_action": + # Check the action + pass +``` + +### 8. Rule Naming Conventions + +- **Class names**: Use PascalCase (e.g., `MyNewRule`) +- **File names**: Use snake_case (e.g., `my_new_rule.py`) +- **Rule names**: Use snake_case (automatically derived from class name) +- **Descriptions**: Be clear and specific about what the rule checks + +### 9. Testing Your Rule + +Run the tests to ensure your rule works correctly: + +```bash +# Test all rules +uv run pytest tests/test_linter.py tests/test_rules_metadata.py -v + +# Test specific rule +uv run pytest tests/test_linter.py::test_my_new_rule -v +``` + +### 10. Integration with CLI + +Your rule will automatically be available in the CLI once the linter module is imported. The linter will: +- Run your rule when appropriate conditions are met +- Display your success/error messages +- Report problems with your rule name + +### 11. How Auto-Registration Works + +The crs-linter uses a metaclass system for automatic rule registration: + +1. **Metaclass**: The `Rule` base class uses `RuleMeta(ABCMeta)` metaclass +2. **Auto-Registration**: When a rule class is defined, the metaclass automatically creates an instance and registers it +3. **No Manual Work**: You don't need to manually register rules or create instances +4. **Import Trigger**: Rules are registered when the linter module imports them + +### 12. Example: Complete Rule Implementation + +See existing rules like `src/crs_linter/rules/ignore_case.py` or `src/crs_linter/rules/crs_tag.py` for complete examples of rule implementations. + + diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index 5baa905..3fa8812 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -257,49 +257,21 @@ def main(): problems_by_rule[rule] = [] problems_by_rule[rule].append(problem) - # Log results for each rule type - rule_messages = { - "ignore_case": ("Ignore case check ok.", "Ignore case check found error(s)", "Case check"), - "ordered_actions": ("Action order check ok.", "Action order check found error(s)", "Action order check"), - "ctl_audit_log": ("no 'ctl:auditLogParts' action found.", "Found 'ctl:auditLogParts' action", "'ctl:auditLogParts' isn't allowed in CRS"), - "variables_usage": ("All TX variables are set.", "Found unset TX variable(s)", "unset TX variable"), - "pl_consistency": ("Paranoia-level tags are correct.", "Found incorrect paranoia-level/N tag(s)", "wrong or missing paranoia-level/N tag"), - "approved_tags": ("No new tags added.", "There are one or more new tag(s).", "new unlisted tag"), - "crs_tag": ("No rule without OWASP_CRS tag.", "There are one or more rules without OWASP_CRS tag", "'tag:OWASP_CRS' is missing"), - "version": ("No rule without correct ver action.", "There are one or more rules with incorrect ver action.", "ver is missing / incorrect"), - "capture": ("No rule uses TX.N without capture action.", "There are one or more rules using TX.N without capture action.", "capture is missing"), - "rule_tests": ("All rules have tests.", "There are one or more rules without tests.", "no tests"), - "duplicated_ids": ("No duplicate IDs", "Found duplicated ID(s)", "'id' is duplicated"), - } - - # Log results for each rule + # Log results for each rule using the Rules system for rule, problems_list in problems_by_rule.items(): - if rule in rule_messages: - success_msg, error_msg, title = rule_messages[rule] - if len(problems_list) == 0: - logger.debug(success_msg) - else: - logger.error(error_msg, file=f, title=title) - for problem in problems_list: - logger.error( - problem.desc, - file=f, - line=problem.line, - end_line=problem.end_line, - ) + success_msg, error_msg, title = c.rules.get_rule_messages(rule) + + if len(problems_list) == 0: + logger.debug(success_msg) else: - # Generic handling for unknown rules - if len(problems_list) == 0: - logger.debug(f"{rule} check ok.") - else: - logger.error(f"{rule} check found error(s)", file=f, title=rule) - for problem in problems_list: - logger.error( - problem.desc, - file=f, - line=problem.line, - end_line=problem.end_line, - ) + logger.error(error_msg, file=f, title=title) + for problem in problems_list: + logger.error( + problem.desc, + file=f, + line=problem.line, + end_line=problem.end_line, + ) # Set return value if any problems found if len(problems) > 0: diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py index 92531f5..3d628b9 100644 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -3,61 +3,55 @@ import os.path import sys from .lint_problem import LintProblem +from .rules_metadata import get_rules + +# Import all rules to trigger auto-registration via metaclass from .rules import ( approved_tags, - capture, + check_capture, crs_tag, ctl_audit_log, - duplicated_ids, + deprecated, + duplicated, ignore_case, + indentation, + lowercase_ignorecase, ordered_actions, pl_consistency, rule_tests, variables_usage, - version, + version ) class Linter: """Main linter class that orchestrates all rule checks.""" - - def __init__(self, data, filename=None, txvars=None): + + def __init__(self, data, filename=None, txvars=None, rules=None): self.data = data # holds the parsed data self.filename = filename self.globtxvars = txvars or {} # global TX variables hash table self.ids = {} # list of rule id's and their location in files - + # regex to produce tag from filename: self.re_fname = re.compile(r"(REQUEST|RESPONSE)\-\d{3}\-") self.filename_tag_exclusions = [] + + # Initialize rules system + self.rules = rules or get_rules() def _get_rule_configs(self, tagslist=None, test_cases=None, exclusion_list=None, crs_version=None): """ - Get rule configurations for the linter. + Get rule configurations for the linter using the Rules system. This method can be overridden to customize which rules to run. - - Returns a list of tuples: (rule_module, args, kwargs, condition) - - rule_module: The rule module to execute - - args: Positional arguments to pass to rule.check() - - kwargs: Keyword arguments to pass to rule.check() - - condition: Boolean or None. If None, always run. If False, skip. If True, run. """ - return [ - # Core rules that always run - (ignore_case, [self.data], {}, None), - (ordered_actions, [self.data], {}, None), - (ctl_audit_log, [self.data], {}, None), - (variables_usage, [self.data, self.globtxvars], {}, None), - (pl_consistency, [self.data, self.globtxvars], {}, None), - (crs_tag, [self.data], {}, None), - (capture, [self.data], {}, None), - - # Conditional rules - (version, [self.data, crs_version], {}, crs_version is not None), - (approved_tags, [self.data, tagslist], {}, tagslist is not None), - (rule_tests, [self.data, test_cases, exclusion_list or []], {}, test_cases is not None), - (duplicated_ids, [self.data, self.ids], {}, None), - ] + return self.rules.get_rule_configs( + self, + tagslist=tagslist, + test_cases=test_cases, + exclusion_list=exclusion_list, + crs_version=crs_version + ) def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None, crs_version=None): """ @@ -71,14 +65,15 @@ def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None, crs_ve rule_configs = self._get_rule_configs(tagslist, test_cases, exclusion_list, crs_version) # Run all rule checks generically - for rule_module, args, kwargs, condition in rule_configs: + for rule_instance, args, kwargs, condition in rule_configs: if condition is None or condition: # Run if no condition or condition is True try: - for problem in rule_module.check(*args, **kwargs): + for problem in rule_instance.check(*args, **kwargs): yield problem except Exception as e: # Log error but continue with other rules - print(f"Error running rule {rule_module.__name__}: {e}", file=sys.stderr) + rule_name = getattr(rule_instance, '__class__', type(rule_instance)).__name__ + print(f"Error running rule {rule_name}: {e}", file=sys.stderr) def _collect_tx_variables(self): """Collect TX variables in rules and check for duplicated IDs""" diff --git a/src/crs_linter/rule.py b/src/crs_linter/rule.py new file mode 100644 index 0000000..373a191 --- /dev/null +++ b/src/crs_linter/rule.py @@ -0,0 +1,68 @@ +""" +Base Rule class for all linting rules. +""" + +from abc import ABC, ABCMeta, abstractmethod +from typing import Any, Generator, Tuple, Optional, Callable +from .lint_problem import LintProblem + + +class RuleMeta(ABCMeta): + """Metaclass that auto-registers Rule classes.""" + + def __new__(cls, name, bases, attrs): + # Create the class + rule_class = super().__new__(cls, name, bases, attrs) + + # Skip the base Rule class itself + if name != 'Rule': + # Auto-register the rule class when it's defined + import crs_linter.rules_metadata + rule_instance = rule_class() + crs_linter.rules_metadata._rules_instance.register_rule(rule_instance) + + return rule_class + + +class Rule(ABC, metaclass=RuleMeta): + """Base class for all linting rules.""" + + def __init__(self): + self.name = self.__class__.__name__.lower() + self.success_message = f"{self.name} check ok." + self.error_message = f"{self.name} check found error(s)" + self.error_title = self.name.replace('_', ' ').title() + self.args = () + self.kwargs = {} + self.condition_func = None + + def get_messages(self) -> Tuple[str, str, str]: + """Get success, error, and title messages for this rule.""" + return (self.success_message, self.error_message, self.error_title) + + def get_args(self) -> tuple: + """Get the expected arguments for this rule.""" + return self.args + + def get_kwargs(self) -> dict: + """Get the expected keyword arguments for this rule.""" + return self.kwargs + + def get_condition(self) -> Optional[Callable]: + """Get the condition function for this rule.""" + return self.condition_func + + @abstractmethod + def check(self, *args, **kwargs) -> Generator[LintProblem, None, None]: + """ + Check for linting problems and yield LintProblem objects. + + This method must be implemented by all rule subclasses. + """ + pass + + def __str__(self): + return f"{self.__class__.__name__}()" + + def __repr__(self): + return f"{self.__class__.__name__}()" diff --git a/src/crs_linter/rules/__init__.py b/src/crs_linter/rules/__init__.py deleted file mode 100644 index dc5bd7e..0000000 --- a/src/crs_linter/rules/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# import all checks -from crs_linter.lint_problem import LintProblem - -from . import ( - approved_tags, - capture, - crs_tag, - ctl_audit_log, - deprecated, - duplicated, - duplicated_ids, - ignore_case, - indentation, - lowercase_ignorecase, - ordered_actions, - pl_consistency, - pl_tags, - rule_tests, - variables_usage, - version -) diff --git a/src/crs_linter/rules/approved_tags.py b/src/crs_linter/rules/approved_tags.py index 42b49d6..9b94f0c 100644 --- a/src/crs_linter/rules/approved_tags.py +++ b/src/crs_linter/rules/approved_tags.py @@ -1,21 +1,39 @@ from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule + + +class ApprovedTags(Rule): + """Check that only tags from the util/APPROVED_TAGS file are used.""" + + def __init__(self): + super().__init__() + self.success_message = "No new tags added." + self.error_message = "There are one or more new tag(s)." + self.error_title = "new unlisted tag" + self.args = ("data", "tags") + + def check(self, data, tags): + """ + check that only tags from the util/APPROVED_TAGS file are used + """ + # Skip if no tags list provided + if tags is None: + return + + chained = False + ruleid = 0 + for d in data: + if "actions" in d: + for a in d["actions"]: + if a["act_name"] == "tag": + tag = a["act_arg"] + # check wheter tag is in tagslist + if tags.count(tag) == 0: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'rule uses unknown tag: "{tag}"; only tags registered in the util/APPROVED_TAGS file may be used; rule id: {ruleid}', + rule="approved_tags" + ) + -def check(data, tags): - """ - check that only tags from the util/APPROVED_TAGS file are used - """ - chained = False - ruleid = 0 - for d in data: - if "actions" in d: - for a in d["actions"]: - if a["act_name"] == "tag": - tag = a["act_arg"] - # check wheter tag is in tagslist - if tags.count(tag) == 0: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f'rule uses unknown tag: "{tag}"; only tags registered in the util/APPROVED_TAGS file may be used; rule id: {ruleid}', - rule="approved_tags" - ) diff --git a/src/crs_linter/rules/capture.py b/src/crs_linter/rules/capture.py deleted file mode 100644 index 9f9e372..0000000 --- a/src/crs_linter/rules/capture.py +++ /dev/null @@ -1,64 +0,0 @@ -import re -from crs_linter.lint_problem import LintProblem - -def check(data): - """ - check that every chained rule has a `capture` action if it uses TX.N variable - """ - chained = False - ruleid = 0 - chainlevel = 0 - capture_level = None - re_number = re.compile(r"^\d$") - has_capture = False - use_captured_var = False - captured_var_chain_level = 0 - for d in data: - # only the SecRule object is relevant - if d["type"].lower() == "secrule": - for v in d["variables"]: - if v["variable"].lower() == "tx" and re_number.match( - v["variable_part"] - ): - # only the first occurrence required - if not use_captured_var: - use_captured_var = True - captured_var_chain_level = chainlevel - if "actions" in d: - if not chained: - ruleid = 0 - chainlevel = 0 - else: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - chainlevel += 1 - if a["act_name"] == "capture": - capture_level = chainlevel - has_capture = True - if ruleid > 0 and not chained: # end of chained rule - if use_captured_var: - # we allow if target with TX:N is in the first rule - # of a chained rule without 'capture' - if captured_var_chain_level > 0: - if ( - not has_capture - or captured_var_chain_level < capture_level - ): - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f"rule uses TX.N without capture; rule id: {ruleid}", - rule="capture", - ) - # clear variables - chained = False - chainlevel = 0 - has_capture = False - capture_level = 0 - captured_var_chain_level = 0 - use_captured_var = False - ruleid = 0 diff --git a/src/crs_linter/rules/check_capture.py b/src/crs_linter/rules/check_capture.py index cafd922..be794d6 100644 --- a/src/crs_linter/rules/check_capture.py +++ b/src/crs_linter/rules/check_capture.py @@ -1,65 +1,78 @@ import re from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule + + +class CheckCapture(Rule): + """Check that every chained rule has a `capture` action if it uses TX.N variable.""" + + def __init__(self): + super().__init__() + self.name = "capture" # Override the default name + self.success_message = "No rule uses TX.N without capture action." + self.error_message = "There are one or more rules using TX.N without capture action." + self.error_title = "capture is missing" + self.args = ("data",) + + def check(self, data): + """ + check that every chained rule has a `capture` action if it uses TX.N variable + """ + chained = False + ruleid = 0 + chainlevel = 0 + capture_level = None + re_number = re.compile(r"^\d$") + has_capture = False + use_captured_var = False + captured_var_chain_level = 0 + for d in data: + # only the SecRule object is relevant + if d["type"].lower() == "secrule": + for v in d["variables"]: + if v["variable"].lower() == "tx" and re_number.match( + v["variable_part"] + ): + # only the first occurrence required + if not use_captured_var: + use_captured_var = True + captured_var_chain_level = chainlevel + if "actions" in d: + if not chained: + ruleid = 0 + chainlevel = 0 + else: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + chainlevel += 1 + if a["act_name"] == "capture": + capture_level = chainlevel + has_capture = True + if ruleid > 0 and not chained: # end of chained rule + if use_captured_var: + # we allow if target with TX:N is in the first rule + # of a chained rule without 'capture' + if captured_var_chain_level > 0: + if ( + not has_capture + or captured_var_chain_level < capture_level + ): + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"rule uses TX.N without capture; rule id: {ruleid}", + rule="capture", + ) + # clear variables + chained = False + chainlevel = 0 + has_capture = False + capture_level = 0 + captured_var_chain_level = 0 + use_captured_var = False + ruleid = 0 -def check(data): - """ - check that every chained rule has a `capture` action if it uses TX.N variable - """ - chained = False - ruleid = 0 - chainlevel = 0 - capture_level = None - re_number = re.compile(r"^\d$") - has_capture = False - use_captured_var = False - captured_var_chain_level = 0 - for d in data: - # only the SecRule object is relevant - if d["type"].lower() == "secrule": - for v in d["variables"]: - if v["variable"].lower() == "tx" and re_number.match( - v["variable_part"] - ): - # only the first occurrence required - if not use_captured_var: - use_captured_var = True - captured_var_chain_level = chainlevel - if "actions" in d: - if not chained: - ruleid = 0 - chainlevel = 0 - else: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - chainlevel += 1 - if a["act_name"] == "capture": - capture_level = chainlevel - has_capture = True - if ruleid > 0 and not chained: # end of chained rule - if use_captured_var: - # we allow if target with TX:N is in the first rule - # of a chained rule without 'capture' - if captured_var_chain_level > 0: - if ( - not has_capture - or captured_var_chain_level < capture_level - ): - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - - desc=f"rule uses TX.N without capture; rule id: {ruleid}", - rule="check_capture", - ) - # clear variables - chained = False - chainlevel = 0 - has_capture = False - capture_level = 0 - captured_var_chain_level = 0 - use_captured_var = False - ruleid = 0 diff --git a/src/crs_linter/rules/crs_tag.py b/src/crs_linter/rules/crs_tag.py index 3678868..ee2da77 100644 --- a/src/crs_linter/rules/crs_tag.py +++ b/src/crs_linter/rules/crs_tag.py @@ -1,38 +1,51 @@ from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule -def check(data): - """ - check that every rule has a `tag:'OWASP_CRS'` action - """ - chained = False - ruleid = 0 - has_crs = False - for d in data: - if "actions" in d: - chainlevel = 0 - - if not chained: - ruleid = 0 - has_crs = False + +class CrsTag(Rule): + """Check that every rule has a `tag:'OWASP_CRS'` action.""" + + def __init__(self): + super().__init__() + self.success_message = "No rule without OWASP_CRS tag." + self.error_message = "There are one or more rules without OWASP_CRS tag" + self.error_title = "'tag:OWASP_CRS' is missing" + self.args = ("data",) + + def check(self, data): + """ + check that every rule has a `tag:'OWASP_CRS'` action + """ + chained = False + ruleid = 0 + has_crs = False + for d in data: + if "actions" in d: chainlevel = 0 - else: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - chainlevel += 1 - if a["act_name"] == "tag": - if chainlevel == 0: - if a["act_arg"] == "OWASP_CRS": - has_crs = True - if ruleid > 0 and not has_crs: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f"rule does not have tag with value 'OWASP_CRS'; rule id: {ruleid}", - rule="crs_tag", - ) + + if not chained: + ruleid = 0 + has_crs = False + chainlevel = 0 + else: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + chainlevel += 1 + if a["act_name"] == "tag": + if chainlevel == 0: + if a["act_arg"] == "OWASP_CRS": + has_crs = True + if ruleid > 0 and not has_crs: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"rule does not have tag with value 'OWASP_CRS'; rule id: {ruleid}", + rule="crs_tag", + ) + diff --git a/src/crs_linter/rules/ctl_audit_log.py b/src/crs_linter/rules/ctl_audit_log.py index 723ad42..5ab7dcf 100644 --- a/src/crs_linter/rules/ctl_audit_log.py +++ b/src/crs_linter/rules/ctl_audit_log.py @@ -1,24 +1,35 @@ from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule -def check(data): - """check there is no ctl:auditLogParts action in any rules""" - for d in data: - if "actions" in d: - current_ruleid = 0 - for a in d["actions"]: - # get the 'id' of rule - if a["act_name"] == "id": - current_ruleid = int(a["act_arg"]) +class CtlAuditLog(Rule): + """Check there is no ctl:auditLogParts action in any rules.""" - # check if action is ctl:auditLogParts - if ( - a["act_name"].lower() == "ctl" - and a["act_arg"].lower() == "auditlogparts" - ): - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc="ctl:auditLogParts action is not allowed", - rule="ctl_audit_log", - ) + def __init__(self): + super().__init__() + self.success_message = "no 'ctl:auditLogParts' action found." + self.error_message = "Found 'ctl:auditLogParts' action" + self.error_title = "'ctl:auditLogParts' isn't allowed in CRS" + self.args = ("data",) + + def check(self, data): + """check there is no ctl:auditLogParts action in any rules""" + for d in data: + if "actions" in d: + current_ruleid = 0 + for a in d["actions"]: + # get the 'id' of rule + if a["act_name"] == "id": + current_ruleid = int(a["act_arg"]) + + # check if action is ctl:auditLogParts + if ( + a["act_name"].lower() == "ctl" + and a["act_arg"].lower() == "auditlogparts" + ): + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc="ctl:auditLogParts action is not allowed", + rule="ctl_audit_log", + ) diff --git a/src/crs_linter/rules/deprecated.py b/src/crs_linter/rules/deprecated.py index c45f95a..f05c22c 100644 --- a/src/crs_linter/rules/deprecated.py +++ b/src/crs_linter/rules/deprecated.py @@ -1,23 +1,34 @@ -def check_ctl_audit_log(self): - """check there is no ctl:auditLogParts action in any rules""" - for d in self.data: - if "actions" in d: - for a in d["actions"]: - # get the 'id' of rule - self.curr_lineno = a["lineno"] - if a["act_name"] == "id": - self.current_ruleid = int(a["act_arg"]) +from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule - # check if action is ctl:auditLogParts - if ( + +class Deprecated(Rule): + """Check for deprecated patterns in rules.""" + + def __init__(self): + super().__init__() + self.success_message = "No deprecated patterns found." + self.error_message = "Found deprecated pattern(s)" + self.error_title = "deprecated pattern" + self.args = ("data",) + + def check(self, data): + """check for deprecated patterns in rules""" + for d in data: + if "actions" in d: + current_ruleid = 0 + for a in d["actions"]: + if a["act_name"] == "id": + current_ruleid = int(a["act_arg"]) + + # check if action is ctl:auditLogParts (deprecated) + if ( a["act_name"].lower() == "ctl" and a["act_arg"].lower() == "auditlogparts" - ): - self.error_wrong_ctl_auditlogparts.append( - { - "ruleid": self.current_ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": "", - } - ) + ): + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"ctl:auditLogParts action is deprecated; rule id: {current_ruleid}", + rule="deprecated", + ) diff --git a/src/crs_linter/rules/duplicated.py b/src/crs_linter/rules/duplicated.py index d457831..0f72b8a 100644 --- a/src/crs_linter/rules/duplicated.py +++ b/src/crs_linter/rules/duplicated.py @@ -1,14 +1,27 @@ from crs_linter.lint_problem import LintProblem from crs_linter.utils import get_id +from crs_linter.rule import Rule -def check(data, ids): - """ Checks the duplicated rule ID """ - for d in data: - if "actions" in d: - rule_id = get_id(d["actions"]) - if rule_id in ids: - yield LintProblem( - rule_id, - desc="id %d is duplicated, previous place: %s:%d" - ) +class DuplicatedIds(Rule): + """Check for duplicated rule IDs.""" + + def __init__(self): + super().__init__() + self.success_message = "No duplicate IDs found." + self.error_message = "Found duplicated ID(s)" + self.error_title = "'id' is duplicated" + self.args = ("data", "ids") + + def check(self, data, ids): + """Checks the duplicated rule ID""" + for d in data: + if "actions" in d: + rule_id = get_id(d["actions"]) + if rule_id in ids: + yield LintProblem( + line=0, # Line number not available in this context + end_line=0, + desc=f"id {rule_id} is duplicated, previous place: {ids[rule_id]['fname']}:{ids[rule_id]['lineno']}", + rule="duplicated", + ) diff --git a/src/crs_linter/rules/duplicated_ids.py b/src/crs_linter/rules/duplicated_ids.py deleted file mode 100644 index 8681af1..0000000 --- a/src/crs_linter/rules/duplicated_ids.py +++ /dev/null @@ -1,25 +0,0 @@ -from crs_linter.lint_problem import LintProblem - - -def check(data, ids=None): - """ - Check for duplicated rule IDs - """ - if ids is None: - ids = {} - - for d in data: - if "actions" in d: - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if ruleid in ids: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f"id {ruleid} is duplicated, previous place: {ids[ruleid]['fname']}:{ids[ruleid]['lineno']}", - rule="duplicated_ids", - ) - else: - # This would be handled by the caller to update the ids dict - pass diff --git a/src/crs_linter/rules/ignore_case.py b/src/crs_linter/rules/ignore_case.py index 31f7d8b..326b1d2 100644 --- a/src/crs_linter/rules/ignore_case.py +++ b/src/crs_linter/rules/ignore_case.py @@ -1,4 +1,5 @@ from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule # Define the valid operators, actions, transformations and ctl args OPERATORS = "beginsWith|containsWord|contains|detectSQLi|detectXSS|endsWith|eq|fuzzyHash|geoLookup|ge|gsbLookup|gt|inspectFile|ipMatch|ipMatchF|ipMatchFromFile|le|lt|noMatch|pmFromFile|pmf|pm|rbl|rsub|rx|streq|strmatch|unconditionalMatch|validateByteRange|validateDTD|validateHash|validateSchema|validateUrlEncoding|validateUtf8Encoding|verifyCC|verifyCPF|verifySSN|within".split("|") @@ -14,111 +15,118 @@ CTLSL = [c.lower() for c in CTLS] -def check(data): - """check the ignore cases at operators, actions, transformations and ctl arguments""" - chained = False - current_ruleid = 0 - - for d in data: - if "actions" in d: - if not chained: - current_ruleid = 0 - else: - chained = False +class IgnoreCase(Rule): + """Check the ignore cases at operators, actions, transformations and ctl arguments.""" - for a in d["actions"]: - action = a["act_name"].lower() - if action == "id": - current_ruleid = int(a["act_arg"]) + def __init__(self): + super().__init__() + self.success_message = "Ignore case check ok." + self.error_message = "Ignore case check found error(s)" + self.error_title = "Case check" + self.args = ("data",) - if action == "chain": - chained = True + def check(self, data): + """check the ignore cases at operators, actions, transformations and ctl arguments""" + chained = False + current_ruleid = 0 + + for d in data: + if "actions" in d: + if not chained: + current_ruleid = 0 + else: + chained = False - # check the action is valid - if action not in ACTIONSL: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f"Invalid action {action}", - rule="ignore_case", - ) - # check the action case sensitive format - elif ( - ACTIONS[ACTIONSL.index(action)] != a["act_name"] - ): - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f"Action case mismatch: {action}", - rule="ignore_case", - ) + for a in d["actions"]: + action = a["act_name"].lower() + if action == "id": + current_ruleid = int(a["act_arg"]) - if a["act_name"] == "ctl": - # check the ctl argument is valid - if a["act_arg"].lower() not in CTLSL: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f'Invalid ctl {a["act_arg"]}', - rule="ignore_case", - ) - # check the ctl argument case sensitive format - elif ( - CTLS[CTLSL.index(a["act_arg"].lower())] != a["act_arg"] - ): - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f'Ctl case mismatch: {a["act_arg"]}', - rule="ignore_case", - ) - if a["act_name"] == "t": - # check the transform is valid - if a["act_arg"].lower() not in TRANSFORMSL: + if action == "chain": + chained = True + + # check the action is valid + if action not in ACTIONSL: yield LintProblem( line=a["lineno"], end_line=a["lineno"], - desc=f'Invalid transform: {a["act_arg"]}', + desc=f"Invalid action {action}", rule="ignore_case", ) - # check the transform case sensitive format + # check the action case sensitive format elif ( - TRANSFORMS[TRANSFORMSL.index(a["act_arg"].lower())] != a["act_arg"] + ACTIONS[ACTIONSL.index(action)] != a["act_name"] ): yield LintProblem( line=a["lineno"], end_line=a["lineno"], - desc=f'Transform case mismatch: {a["act_arg"]}', + desc=f"Action case mismatch: {action}", rule="ignore_case", ) - - if "operator" in d and d["operator"] != "": - # strip the operator - op = d["operator"].replace("!", "").replace("@", "") - # check the operator is valid - if op.lower() not in OPERATORSL: - yield LintProblem( - line=d["oplineno"], - end_line=d["oplineno"], - - desc=f'Invalid operator: {d["operator"]}', - rule="ignore_case", - ) - # check the operator case sensitive format - elif OPERATORS[OPERATORSL.index(op.lower())] != op: - yield LintProblem( - line=d["oplineno"], - end_line=d["oplineno"], - - desc=f'Operator case mismatch: {d["operator"]}', - rule="ignore_case", - ) - else: - if d["type"].lower() == "secrule": - yield LintProblem( - line=d["lineno"], - end_line=d["lineno"], - - desc="Empty operator isn't allowed", - rule="ignore_case", - ) + + if a["act_name"] == "ctl": + # check the ctl argument is valid + if a["act_arg"].lower() not in CTLSL: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'Invalid ctl {a["act_arg"]}', + rule="ignore_case", + ) + # check the ctl argument case sensitive format + elif ( + CTLS[CTLSL.index(a["act_arg"].lower())] != a["act_arg"] + ): + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'Ctl case mismatch: {a["act_arg"]}', + rule="ignore_case", + ) + if a["act_name"] == "t": + # check the transform is valid + if a["act_arg"].lower() not in TRANSFORMSL: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'Invalid transform: {a["act_arg"]}', + rule="ignore_case", + ) + # check the transform case sensitive format + elif ( + TRANSFORMS[TRANSFORMSL.index(a["act_arg"].lower())] != a["act_arg"] + ): + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'Transform case mismatch: {a["act_arg"]}', + rule="ignore_case", + ) + + if "operator" in d and d["operator"] != "": + # strip the operator + op = d["operator"].replace("!", "").replace("@", "") + # check the operator is valid + if op.lower() not in OPERATORSL: + yield LintProblem( + line=d["oplineno"], + end_line=d["oplineno"], + desc=f'Invalid operator: {d["operator"]}', + rule="ignore_case", + ) + # check the operator case sensitive format + elif OPERATORS[OPERATORSL.index(op.lower())] != op: + yield LintProblem( + line=d["oplineno"], + end_line=d["oplineno"], + desc=f'Operator case mismatch: {d["operator"]}', + rule="ignore_case", + ) + else: + if d["type"].lower() == "secrule": + yield LintProblem( + line=d["lineno"], + end_line=d["lineno"], + desc="Empty operator isn't allowed", + rule="ignore_case", + ) diff --git a/src/crs_linter/rules/indentation.py b/src/crs_linter/rules/indentation.py index ae5cd6a..7e41638 100644 --- a/src/crs_linter/rules/indentation.py +++ b/src/crs_linter/rules/indentation.py @@ -1,42 +1,77 @@ +import difflib +import re +import msc_pyparser +from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule -def check(filename, content): - error = False - - ### make a diff to check the indentations - try: - with open(filename, "r") as fp: - from_lines = fp.readlines() - if filename.startswith("crs-setup.conf.example"): - from_lines = remove_comments("".join(from_lines)).split("\n") - from_lines = [l + "\n" for l in from_lines] - except: - logger.error(f"Can't open file for indentation check: {filename}") - error = True - - # virtual output - writer = msc_pyparser.MSCWriter(content) - writer.generate() - output = [] - for l in writer.output: - output += [l + "\n" for l in l.split("\n") if l != "\n"] - - if len(from_lines) < len(output): - from_lines.append("\n") - elif len(from_lines) > len(output): - output.append("\n") - - diff = difflib.unified_diff(from_lines, output) - if from_lines == output: - logger.debug("Indentation check ok.") - else: - logger.debug("Indentation check found error(s)") - error = True - - for d in diff: - d = d.strip("\n") - r = re.match(r"^@@ -(\d+),(\d+) \+\d+,\d+ @@$", d) - if r: - line1, line2 = [int(i) for i in r.groups()] - logger.error("an indentation error was found", file=filename, title="Indentation error", line=line1, end_line=line1 + line2) - - return error + +class Indentation(Rule): + """Check for indentation errors in rules.""" + + def __init__(self): + super().__init__() + self.success_message = "Indentation check ok." + self.error_message = "Indentation check found error(s)" + self.error_title = "Indentation error" + self.args = ("filename", "content") + + def check(self, filename, content): + """Check indentation in the file""" + error = False + problems = [] + + # make a diff to check the indentations + try: + with open(filename, "r") as fp: + from_lines = fp.readlines() + if filename.startswith("crs-setup.conf.example"): + from_lines = self._remove_comments("".join(from_lines)).split("\n") + from_lines = [l + "\n" for l in from_lines] + except: + yield LintProblem( + line=0, + end_line=0, + desc=f"Can't open file for indentation check: {filename}", + rule="indentation", + ) + return + + # virtual output + writer = msc_pyparser.MSCWriter(content) + writer.generate() + output = [] + for l in writer.output: + output += [l + "\n" for l in l.split("\n") if l != "\n"] + + if len(from_lines) < len(output): + from_lines.append("\n") + elif len(from_lines) > len(output): + output.append("\n") + + diff = difflib.unified_diff(from_lines, output) + if from_lines == output: + # No indentation errors + return + else: + error = True + + for d in diff: + d = d.strip("\n") + r = re.match(r"^@@ -(\d+),(\d+) \+\d+,\d+ @@$", d) + if r: + line1, line2 = [int(i) for i in r.groups()] + yield LintProblem( + line=line1, + end_line=line1 + line2, + desc="an indentation error was found", + rule="indentation", + ) + + def _remove_comments(self, content): + """Remove comments from content""" + lines = content.split('\n') + result = [] + for line in lines: + if not line.strip().startswith('#'): + result.append(line) + return '\n'.join(result) diff --git a/src/crs_linter/rules/lowercase_ignorecase.py b/src/crs_linter/rules/lowercase_ignorecase.py index 1752ea5..e052206 100644 --- a/src/crs_linter/rules/lowercase_ignorecase.py +++ b/src/crs_linter/rules/lowercase_ignorecase.py @@ -1,22 +1,35 @@ -def check_lowercase_ignorecase(self): - ruleid = 0 - for d in self.data: - if d["type"].lower() == "secrule": - if d["operator"] == "@rx": - regex = d["operator_argument"] - if regex.startswith("(?i)"): - if "actions" in d: - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "t": - # check the transform is valid - if a["act_arg"].lower() == "lowercase": - self.error_combined_transformation_and_ignorecase.append( - { - "ruleid": ruleid, - "line": a["lineno"], - "endLine": a["lineno"], - "message": f'rule uses (?i) in combination with t:lowercase: \'{a["act_arg"]}\'; rule id: {ruleid}', - } - ) +from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule + + +class LowercaseIgnorecase(Rule): + """Check for combined transformation and ignorecase patterns.""" + + def __init__(self): + super().__init__() + self.success_message = "No combined transformation and ignorecase patterns found." + self.error_message = "Found combined transformation and ignorecase pattern(s)" + self.error_title = "combined transformation and ignorecase" + self.args = ("data",) + + def check(self, data): + """check for combined transformation and ignorecase patterns""" + ruleid = 0 + for d in data: + if d["type"].lower() == "secrule": + if d["operator"] == "@rx": + regex = d["operator_argument"] + if regex.startswith("(?i)"): + if "actions" in d: + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "t": + # check the transform is valid + if a["act_arg"].lower() == "lowercase": + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'rule uses (?i) in combination with t:lowercase: \'{a["act_arg"]}\'; rule id: {ruleid}', + rule="lowercase_ignorecase", + ) diff --git a/src/crs_linter/rules/ordered_actions.py b/src/crs_linter/rules/ordered_actions.py index ddf36bf..e4e3ab1 100644 --- a/src/crs_linter/rules/ordered_actions.py +++ b/src/crs_linter/rules/ordered_actions.py @@ -1,7 +1,8 @@ from crs_linter.lint_problem import LintProblem from crs_linter.utils import get_id +from crs_linter.rule import Rule -ORDERED_ACTIONS = [ +ACTIONS_ORDER = [ "id", # 0 "phase", # 1 "allow", @@ -38,48 +39,42 @@ "skipafter", ] -def check(data): - global act_idx, current_rule_id - chained = False - for d in data: - if "actions" in d: - max_order = 0 # maximum position of read actions - if not chained: - current_rule_id = get_id(d["actions"]) - else: - chained = False +class OrderedActions(Rule): + """Check that actions are in the correct order.""" - for index, a in enumerate(d["actions"]): - action = a["act_name"].lower() - # get the 'id' of rule - current_lineno = a["lineno"] + def __init__(self): + super().__init__() + self.success_message = "Action order check ok." + self.error_message = "Action order check found error(s)" + self.error_title = "Action order check" + self.args = ("data",) - # check if chained - if a["act_name"] == "chain": - chained = True + def check(self, data): + chained = False - # get the index of action from the ordered list - # above from constructor - try: - act_idx = ORDERED_ACTIONS.index(action) - except ValueError: - yield LintProblem( - line=current_lineno, - end_line=current_lineno, - desc=f'action "{action}" at pos {index - 1} is in the wrong order: "{action}" at pos {index}', - rule="ordered_actions", - ) - - # if the index of current action is @ge than the previous - # max value, load it into max_order - if act_idx >= max_order: - max_order = act_idx + for d in data: + if "actions" in d: + max_order = 0 # maximum position of read actions + if not chained: + current_rule_id = get_id(d["actions"]) else: - # action is the previous action's position in list - # act_idx is the current action's position in list - # if the prev is @gt actually, means it's at wrong position - if act_idx < max_order: + chained = False + + for index, a in enumerate(d["actions"]): + action = a["act_name"].lower() + # get the 'id' of rule + current_lineno = a["lineno"] + + # check if chained + if a["act_name"] == "chain": + chained = True + + # get the index of action from the ordered list + # above from constructor + try: + act_idx = ACTIONS_ORDER.index(action) + except ValueError: yield LintProblem( line=current_lineno, end_line=current_lineno, @@ -87,3 +82,19 @@ def check(data): rule="ordered_actions", ) + # if the index of current action is @ge than the previous + # max value, load it into max_order + if act_idx >= max_order: + max_order = act_idx + else: + # action is the previous action's position in list + # act_idx is the current action's position in list + # if the prev is @gt actually, means it's at wrong position + if act_idx < max_order: + yield LintProblem( + line=current_lineno, + end_line=current_lineno, + desc=f'action "{action}" at pos {index - 1} is in the wrong order: "{action}" at pos {index}', + rule="ordered_actions", + ) + diff --git a/src/crs_linter/rules/pl_consistency.py b/src/crs_linter/rules/pl_consistency.py index 262abab..4c8396e 100644 --- a/src/crs_linter/rules/pl_consistency.py +++ b/src/crs_linter/rules/pl_consistency.py @@ -1,149 +1,140 @@ import re from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule -def check(data, globtxvars): - """this method checks the PL consistency - the function iterates through the rules, and catches the set PL, eg: +class PlConsistency(Rule): + """Check the PL consistency.""" + + def __init__(self): + super().__init__() + self.success_message = "Paranoia-level tags are correct." + self.error_message = "Found incorrect paranoia-level/N tag(s)" + self.error_title = "wrong or missing paranoia-level/N tag" + self.args = ("data", "globtxvars") + + def check(self, data, globtxvars): + """this method checks the PL consistency - SecRule TX:DETECTION_PARANOIA_LEVEL "@lt 1" ... - this means we are on PL1 currently + the function iterates through the rules, and catches the set PL, eg: - all rules must consist with current PL at the used tags and variables + SecRule TX:DETECTION_PARANOIA_LEVEL "@lt 1" ... + this means we are on PL1 currently - eg: - tag:'paranoia-level/1' - ^ - setvar:'tx.outbound_anomaly_score_pl1=+%{tx.error_anomaly_score}'" - ^^^ - additional relations: - * all rules must have the "tag:'paranoia-level/N'" if it does not have "nolog" action - * if rule have "nolog" action it must not have "tag:'paranoia-level/N'" action - * anomaly scoring value on current PL must increment by value corresponding to severity + all rules must consist with current PL at the used tags and variables - """ - curr_pl = 0 - tags = [] # collect tags - _txvars = {} # collect setvars and values - _txvlines = {} # collect setvars and its lines - severity = None # severity - has_nolog = False # nolog action exists + eg: + tag:'paranoia-level/1' + setvar:tx.anomaly_score_pl1=+%{tx.inbound_anomaly_score} + """ + curr_pl = 0 + tags = [] # collect tags + _txvars = {} # collect setvars and values + _txvlines = {} # collect setvars and its lines + severity = None # severity + has_nolog = False # nolog action exists + ruleid = 0 - for d in data: - # find the current PL - if d["type"].lower() in ["secrule"]: - for v in d["variables"]: - if ( + for d in data: + # find the current PL + if d["type"].lower() in ["secrule"]: + for v in d["variables"]: + if ( v["variable"].lower() == "tx" and v["variable_part"].lower() == "detection_paranoia_level" and d["operator"] == "@lt" and re.match(r"^\d$", d["operator_argument"]) - ): - curr_pl = int(d["operator_argument"]) + ): + curr_pl = int(d["operator_argument"]) - if "actions" in d: - chained = False - ruleid = 0 - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "severity": - severity = a["act_arg"].replace("'", "").lower() - if a["act_name"] == "tag": - tags.append(a) - if a["act_name"] == "setvar": - if a["act_arg"][0:2].lower() == "tx": - # this hack necessary, because sometimes we use setvar argument - # between '', sometimes not - # eg - # setvar:crs_setup_version=334 - # setvar:'tx.inbound_anomaly_score_threshold=5' - txv = a["act_arg"][3:].split("=") - txv[0] = txv[0].lower() # variable name - if len(txv) > 1: - # variable value - txv[1] = txv[1].lower().strip(r"+\{}") - else: - txv.append(a["act_arg_val"].strip(r"+\{}")) - _txvars[txv[0]] = txv[1] - _txvlines[txv[0]] = a["lineno"] - if a["act_name"] == "nolog": - has_nolog = True - if a["act_name"] == "chain": - chained = True + if "actions" in d: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "severity": + severity = a["act_arg"].replace("'", "").lower() + if a["act_name"] == "tag": + tags.append(a) + if a["act_name"] == "setvar": + if a["act_arg"][0:2].lower() == "tx": + txv = a["act_arg"][3:].split("=") + txv[0] = txv[0].lower() # variable name + if len(txv) > 1: + txv[1] = txv[1].lower().strip(r"+\{}") + else: + txv.append(a["act_arg_val"].strip(r"+\{}")) + _txvars[txv[0]] = txv[1] + _txvlines[txv[0]] = a["lineno"] + if a["act_name"] == "nolog": + has_nolog = True + if a["act_name"] == "chain": + chained = True - has_pl_tag = False - for a in tags: - if a["act_arg"][0:14] == "paranoia-level": - has_pl_tag = True - pltag = int(a["act_arg"].split("/")[1]) - if has_nolog: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - - desc=f'tag \'{a["act_arg"]}\' with \'nolog\' action, rule id: {ruleid}', - rule="pl_consistency", - ) - elif pltag != curr_pl and curr_pl > 0: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - - desc=f'tag \'{a["act_arg"]}\' on PL {curr_pl}, rule id: {ruleid}', - rule="pl_consistency", - ) + has_pl_tag = False + for a in tags: + if a["act_arg"][0:14] == "paranoia-level": + has_pl_tag = True + pltag = int(a["act_arg"].split("/")[1]) + if has_nolog: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'tag \'{a["act_arg"]}\' with \'nolog\' action, rule id: {ruleid}', + rule="pl_consistency", + ) + elif pltag != curr_pl and curr_pl > 0: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f'tag \'{a["act_arg"]}\' on PL {curr_pl}, rule id: {ruleid}', + rule="pl_consistency", + ) - if not has_pl_tag and not has_nolog and curr_pl >= 1: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - - desc=f"rule does not have `paranoia-level/{curr_pl}` action, rule id: {ruleid}", - rule="pl_consistency", - ) + if not has_pl_tag and not has_nolog and curr_pl >= 1: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"rule does not have `paranoia-level/{curr_pl}` action, rule id: {ruleid}", + rule="pl_consistency", + ) - for t in _txvars: - subst_val = re.search( - r"%\{tx.[a-z]+_anomaly_score}", _txvars[t], re.I - ) - val = re.sub(r"[+%{}]", "", _txvars[t]).lower() - # check if last char is a numeric, eg ...anomaly_score_pl1 - scorepl = re.search(r"anomaly_score_pl\d$", t) - if scorepl: - if curr_pl > 0 and int(t[-1]) != curr_pl: - yield LintProblem( - line=_txvlines[t], - end_line=_txvlines[t], - - desc=f"variable {t} on PL {curr_pl}, rule id: {ruleid}", - rule="pl_consistency", - ) - if severity is None and subst_val: # - do we need this? - yield LintProblem( - line=_txvlines[t], - end_line=_txvlines[t], - - desc=f"missing severity action, rule id: {ruleid}", - rule="pl_consistency", - ) - else: - if val != "tx.%s_anomaly_score" % (severity) and val != "0": + for t in _txvars: + subst_val = re.search( + r"%\{tx.[a-z]+_anomaly_score}", _txvars[t], re.I + ) + val = re.sub(r"[+%{}]", "", _txvars[t]).lower() + scorepl = re.search(r"anomaly_score_pl\d$", t) + if scorepl: + if curr_pl > 0 and int(t[-1]) != curr_pl: yield LintProblem( line=_txvlines[t], end_line=_txvlines[t], - - desc=f"invalid value for anomaly_score_pl{t[-1]}: {val} with severity {severity}, rule id: {ruleid}", + desc=f"variable {t} on PL {curr_pl}, rule id: {ruleid}", rule="pl_consistency", ) - # variable was found so we need to mark it as used - if t in globtxvars: + if severity is None and subst_val: + yield LintProblem( + line=_txvlines[t], + end_line=_txvlines[t], + desc=f"missing severity action, rule id: {ruleid}", + rule="pl_consistency", + ) + else: + if val != "tx.%s_anomaly_score" % (severity) and val != "0": + yield LintProblem( + line=_txvlines[t], + end_line=_txvlines[t], + desc=f"invalid value for anomaly_score_pl{t[-1]}: {val} with severity {severity}, rule id: {ruleid}", + rule="pl_consistency", + ) globtxvars[t]["used"] = True - # reset local variables if we are done with a rule <==> no more 'chain' action - if not chained: - tags = [] # collect tags - _txvars = {} # collect setvars and values - _txvlines = {} # collect setvars and its lines - severity = None # severity - has_nolog = False # rule has nolog action \ No newline at end of file + # reset local variables if we are done with a rule <==> no more 'chain' action + if not chained: + tags = [] # collect tags + _txvars = {} # collect setvars and values + _txvlines = {} # collect setvars and its lines + severity = None # severity + has_nolog = False # rule has nolog action + diff --git a/src/crs_linter/rules/pl_tags.py b/src/crs_linter/rules/pl_tags.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/crs_linter/rules/rule_tests.py b/src/crs_linter/rules/rule_tests.py index b230d87..5e57c33 100644 --- a/src/crs_linter/rules/rule_tests.py +++ b/src/crs_linter/rules/rule_tests.py @@ -1,38 +1,48 @@ from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule -def check(data, test_cases=None, exclusion_list=None): - """ - Check that rules have corresponding test cases - """ - if test_cases is None: - test_cases = {} - if exclusion_list is None: - exclusion_list = [] - - for d in data: - # only SecRule counts - if d['type'] == "SecRule": - for a in d['actions']: - # find the `id` action - if a['act_name'] == "id": - # get the argument of the action - rid = int(a['act_arg']) # int - srid = a['act_arg'] # string - if (rid%1000) >= 100: # skip the PL control rules - # also skip these hardcoded rules - need_check = True - for excl in exclusion_list: - # exclude full rule IDs or rule ID prefixes - if srid[:len(excl)] == excl: - need_check = False - if need_check: - # if there is no test cases, just print it - if rid not in test_cases: - yield LintProblem( - line=a['lineno'], - end_line=a['lineno'], - - desc=f"rule does not have any tests; rule id: {rid}", - rule="rule_tests", - ) +class RuleTests(Rule): + """Check that rules have corresponding test cases.""" + + def __init__(self): + super().__init__() + self.success_message = "All rules have tests." + self.error_message = "There are one or more rules without tests." + self.error_title = "no tests" + self.args = ("data", "test_cases", "exclusion_list") + + def check(self, data, test_cases=None, exclusion_list=None): + """ + Check that rules have corresponding test cases + """ + if test_cases is None: + test_cases = {} + if exclusion_list is None: + exclusion_list = [] + + for d in data: + # only SecRule counts + if d['type'] == "SecRule": + for a in d['actions']: + # find the `id` action + if a['act_name'] == "id": + # get the argument of the action + rid = int(a['act_arg']) # int + srid = a['act_arg'] # string + if (rid%1000) >= 100: # skip the PL control rules + # also skip these hardcoded rules + need_check = True + for excl in exclusion_list: + # exclude full rule IDs or rule ID prefixes + if srid[:len(excl)] == excl: + need_check = False + if need_check: + # if there is no test cases, just print it + if rid not in test_cases: + yield LintProblem( + line=a['lineno'], + end_line=a['lineno'], + desc=f"rule does not have any tests; rule id: {rid}", + rule="rule_tests", + ) diff --git a/src/crs_linter/rules/variables_usage.py b/src/crs_linter/rules/variables_usage.py index 79288ee..a0c6609 100644 --- a/src/crs_linter/rules/variables_usage.py +++ b/src/crs_linter/rules/variables_usage.py @@ -1,174 +1,150 @@ import re from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule -def check(data, globtxvars): - """this function checks if a used TX variable has set - a variable is used when: - * it's an operator argument: "@rx %{TX.foo}" - * it's a target: SecRule TX.foo "@..." - * it's a right side value in a value giving: setvar:tx.bar=tx.foo +class VariablesUsage(Rule): + """Check if a used TX variable has been set.""" + + def __init__(self): + super().__init__() + self.success_message = "All TX variables are set." + self.error_message = "Found unset TX variable(s)" + self.error_title = "unset TX variable" + self.args = ("data", "globtxvars") + + def check(self, data, globtxvars): + """this function checks if a used TX variable has set - this function collects the variables if it is used but not set previously - """ - # set if rule checks the existence of var, e.g., `&TX:foo "@eq 1"` - check_exists = None - has_disruptive = False # set if rule contains disruptive action - chained = False - for d in data: - if d["type"].lower() in ["secrule", "secaction"]: - if not chained: - # works only in Apache, libmodsecurity uses default phase 1 - phase = 2 - ruleid = 0 - else: - chained = False + a variable is used when: + * it's an operator argument: "@rx %{TX.foo}" + * it's a target: SecRule TX.foo "@..." + * it's a right side value in a value giving: setvar:tx.bar=tx.foo + + this function collects the variables if it is used but not set previously + """ + # set if rule checks the existence of var, e.g., `&TX:foo "@eq 1"` + check_exists = None + has_disruptive = False + chained = False + for d in data: + if d["type"].lower() in ["secrule", "secaction"]: + if not chained: + phase = 2 + ruleid = 0 + else: + chained = False - # iterate over actions and collect these values: - # ruleid, phase, chained, rule has or not any disruptive action - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "phase": - phase = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - if a["act_name"] in [ - "block", - "deny", - "drop", - "allow", - "proxy", - "redirect", - ]: - has_disruptive = True + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "phase": + phase = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + if a["act_name"] in [ + "block", "deny", "drop", "allow", "proxy", "redirect" + ]: + has_disruptive = True - # check wheter tx.var is used at setvar's right side - val_act = [] - val_act_arg = [] - # example: - # setvar:'tx.inbound_anomaly_score_threshold=5' - # - # act_arg <- tx.inbound_anomaly_score_threshold - # act_atg_val <- 5 - # - # example2 (same as above, but no single quotes!): - # setvar:tx.inbound_anomaly_score_threshold=5 - # act_arg <- tx.inbound_anomaly_score_threshold - # act_atg_val <- 5 - # - if "act_arg" in a and a["act_arg"] is not None: - val_act = re.findall(r"%\{(tx.[^%]*)}", a["act_arg"], re.I) - if "act_arg_val" in a and a["act_arg_val"] is not None: - val_act_arg = re.findall( - r"%\{(tx.[^%]*)}", a["act_arg_val"], re.I - ) - for v in val_act + val_act_arg: - v = v.lower().replace("tx.", "") - # check whether the variable is a captured var, eg TX.1 - we do not care that case - if not re.match(r"^\d$", v, re.I): - # v holds the tx.ANY variable, but not the captured ones - # we should collect these variables - if ( + val_act = [] + val_act_arg = [] + if "act_arg" in a and a["act_arg"] is not None: + val_act = re.findall(r"%\{(tx.[^%]*)}", a["act_arg"], re.I) + if "act_arg_val" in a and a["act_arg_val"] is not None: + val_act_arg = re.findall( + r"%\{(tx.[^%]*)}", a["act_arg_val"], re.I + ) + for v in val_act + val_act_arg: + v = v.lower().replace("tx.", "") + if not re.match(r"^\d$", v, re.I): + if ( v not in globtxvars or phase < globtxvars[v]["phase"] - ): - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - - desc=f"TX variable '{v}' not set / later set (rvar) in rule {ruleid}", - rule="variables_usage", - ) + ): + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"TX variable '{v}' not set / later set (rvar) in rule {ruleid}", + rule="variables_usage", + ) + else: + globtxvars[v]["used"] = True else: - globtxvars[v]["used"] = True - else: - if v in globtxvars: - globtxvars[v]["used"] = True + if v in globtxvars: + globtxvars[v]["used"] = True - if "operator_argument" in d: - oparg = re.findall(r"%\{(tx.[^%]*)}", d["operator_argument"], re.I) - if oparg: - for o in oparg: - o = o.lower() - o = re.sub(r"tx\.", "", o, re.I) - if ( + if "operator_argument" in d: + oparg = re.findall(r"%\{(tx.[^%]*)}", d["operator_argument"], re.I) + if oparg: + for o in oparg: + o = o.lower() + o = re.sub(r"tx\.", "", o, re.I) + if ( ( - o not in globtxvars - or phase < globtxvars[o]["phase"] + o not in globtxvars + or phase < globtxvars[o]["phase"] ) and not re.match(r"^\d$", o) and not re.match(r"/.*/", o) and check_exists is None - ): - yield LintProblem( - line=d["lineno"], - end_line=d["lineno"], - - desc=f"TX variable '{o}' not set / later set (OPARG) in rule {ruleid}", - rule="variables_usage", - ) - elif ( + ): + yield LintProblem( + line=d["lineno"], + end_line=d["lineno"], + desc=f"TX variable '{o}' not set / later set (OPARG) in rule {ruleid}", + rule="variables_usage", + ) + elif ( o in globtxvars and phase >= globtxvars[o]["phase"] and not re.match(r"^\d$", o) and not re.match(r"/.*/", o) - ): - globtxvars[o]["used"] = True - if "variables" in d: - for v in d["variables"]: - # check if the variable is TX and has not a & prefix, which counts - # the variable length - if v["variable"].lower() == "tx": - if not v["counter"]: - # * if the variable part (after '.' or ':') is not there in - # the list of collected TX variables, and - # * not a numeric, eg TX:2, and - # * not a regular expression, between '/' chars, eg TX:/^foo/ - # OR - # * rule's phase lower than declaration's phase - rvar = v["variable_part"].lower() - if ( + ): + globtxvars[o]["used"] = True + if "variables" in d: + for v in d["variables"]: + if v["variable"].lower() == "tx": + if not v["counter"]: + rvar = v["variable_part"].lower() + if ( ( - rvar not in globtxvars - or ( - ruleid != globtxvars[rvar]["ruleid"] - and phase < globtxvars[rvar]["phase"] - ) + rvar not in globtxvars + or ( + ruleid != globtxvars[rvar]["ruleid"] + and phase < globtxvars[rvar]["phase"] + ) ) and not re.match(r"^\d$", rvar) and not re.match(r"/.*/", rvar) - ): - yield LintProblem( - line=d["lineno"], - end_line=d["lineno"], - - desc=f"TX variable '{v['variable_part']}' not set / later set (VAR)", - rule="variables_usage", - ) - elif ( + ): + yield LintProblem( + line=d["lineno"], + end_line=d["lineno"], + desc=f"TX variable '{v['variable_part']}' not set / later set (VAR)", + rule="variables_usage", + ) + elif ( rvar in globtxvars and phase >= globtxvars[rvar]["phase"] and not re.match(r"^\d$", rvar) and not re.match(r"/.*/", rvar) - ): - globtxvars[rvar]["used"] = True - else: - check_exists = True - globtxvars[v["variable_part"].lower()] = { - "var": v["variable_part"].lower(), - "phase": phase, - "used": False, - "file": "unknown", # filename not available in this context - "ruleid": ruleid, - "message": "", - "line": d["lineno"], - "endLine": d["lineno"], - } - if has_disruptive: - globtxvars[v["variable_part"].lower()][ - "used" - ] = True + ): + globtxvars[rvar]["used"] = True + else: + check_exists = True + globtxvars[v["variable_part"].lower()] = { + "var": v["variable_part"].lower(), + "phase": phase, + "used": False, + "file": None, # filename is not available here + "ruleid": ruleid, + "message": "", + "line": d["lineno"], + "endLine": d["lineno"], + } + if has_disruptive: + globtxvars[v["variable_part"].lower()]["used"] = True if not chained: check_exists = None - has_disruptive = False \ No newline at end of file + has_disruptive = False diff --git a/src/crs_linter/rules/version.py b/src/crs_linter/rules/version.py index 09e931d..2e74878 100644 --- a/src/crs_linter/rules/version.py +++ b/src/crs_linter/rules/version.py @@ -1,52 +1,64 @@ from crs_linter.lint_problem import LintProblem +from crs_linter.rule import Rule -def check(data, version): - """ - check that every rule has a `ver` action - """ - chained = False - ruleid = 0 - has_ver = False - ver_is_ok = False - crsversion = version - ruleversion = "" - for d in data: - if "actions" in d: - chainlevel = 0 - if not chained: - ruleid = 0 - has_ver = False - ver_is_ok = False +class Version(Rule): + """Check that every rule has a `ver` action.""" + + def __init__(self): + super().__init__() + self.success_message = "No rule without correct ver action." + self.error_message = "There are one or more rules with incorrect ver action." + self.error_title = "ver is missing / incorrect" + self.args = ("data", "version") + + def check(self, data, version): + """ + check that every rule has a `ver` action + """ + chained = False + ruleid = 0 + has_ver = False + ver_is_ok = False + crsversion = version + ruleversion = "" + for d in data: + if "actions" in d: chainlevel = 0 - else: - chained = False - for a in d["actions"]: - if a["act_name"] == "id": - ruleid = int(a["act_arg"]) - if a["act_name"] == "chain": - chained = True - chainlevel += 1 - if a["act_name"] == "ver": - if chainlevel == 0: - has_ver = True - if a["act_arg"] == version: - ver_is_ok = True - else: - ruleversion = a["act_arg"] - if ruleid > 0 and chainlevel == 0: - if not has_ver: - yield LintProblem( - line=a["lineno"], - end_line=a["lineno"], - desc=f"rule does not have 'ver' action; rule id: {ruleid}", - rule="version", - ) + + if not chained: + ruleid = 0 + has_ver = False + ver_is_ok = False + chainlevel = 0 else: - if not ver_is_ok: + chained = False + for a in d["actions"]: + if a["act_name"] == "id": + ruleid = int(a["act_arg"]) + if a["act_name"] == "chain": + chained = True + chainlevel += 1 + if a["act_name"] == "ver": + if chainlevel == 0: + has_ver = True + if a["act_arg"] == version: + ver_is_ok = True + else: + ruleversion = a["act_arg"] + if ruleid > 0 and chainlevel == 0: + if not has_ver: yield LintProblem( line=a["lineno"], end_line=a["lineno"], - desc=f"rule's 'ver' action has incorrect value; rule id: {ruleid}, version: '{ruleversion}', expected: '{crsversion}'", + desc=f"rule does not have 'ver' action; rule id: {ruleid}", rule="version", ) + else: + if not ver_is_ok: + yield LintProblem( + line=a["lineno"], + end_line=a["lineno"], + desc=f"rule's 'ver' action has incorrect value; rule id: {ruleid}, version: '{ruleversion}', expected: '{crsversion}'", + rule="version", + ) diff --git a/src/crs_linter/rules_metadata.py b/src/crs_linter/rules_metadata.py new file mode 100644 index 0000000..26a22b0 --- /dev/null +++ b/src/crs_linter/rules_metadata.py @@ -0,0 +1,95 @@ +""" +Rules and rule metadata configuration management. +""" + +from typing import List +from .rule import Rule + + +class Rules: + """Manages a collection of linting rules as a singleton.""" + + _instance = None + _initialized = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super(Rules, cls).__new__(cls) + return cls._instance + + def __init__(self): + if not self._initialized: + self._rules: List[Rule] = [] + self._initialized = True + + def register_rule(self, rule: Rule): + """Registers a rule instance.""" + if not isinstance(rule, Rule): + raise TypeError("rule must be an instance of Rule") + self._rules.append(rule) + + def get_rule_messages(self, name: str): + """Retrieves success, error messages, and title for a rule.""" + for rule in self._rules: + if rule.name == name: + return rule.get_messages() + return f"{name} check ok.", f"{name} check found error(s)", name.replace('_', ' ').title() + + def get_rule_configs(self, linter_instance, tagslist=None, test_cases=None, exclusion_list=None, crs_version=None): + """ + Generates rule configurations for the linter based on registered rules and current context. + """ + configs = [] + for rule in self._rules: + # Get rule's expected arguments + args = list(rule.get_args()) + kwargs = dict(rule.get_kwargs()) + + # Map common parameters + if "data" in args: + args[args.index("data")] = linter_instance.data + if "globtxvars" in args: + args[args.index("globtxvars")] = linter_instance.globtxvars + if "ids" in args: + args[args.index("ids")] = linter_instance.ids + if "tags" in args: + args[args.index("tags")] = tagslist + if "test_cases" in args: + args[args.index("test_cases")] = test_cases + if "exclusion_list" in args: + args[args.index("exclusion_list")] = exclusion_list + if "version" in args: + args[args.index("version")] = crs_version + + # Evaluate condition if a function is provided + condition = None + condition_func = rule.get_condition() + if condition_func: + # Pass relevant context to the condition function + condition = condition_func( + linter_instance=linter_instance, + tagslist=tagslist, + test_cases=test_cases, + exclusion_list=exclusion_list, + crs_version=crs_version + ) + else: + # Default conditions based on parameter presence + if rule.name == "version": + condition = crs_version is not None + elif rule.name == "approved_tags": + condition = tagslist is not None + elif rule.name == "rule_tests": + condition = test_cases is not None + + configs.append((rule, args, kwargs, condition)) + return configs + + +# Global singleton instance +_rules_instance = Rules() + + +def get_rules(): + """Get the singleton Rules instance.""" + return _rules_instance \ No newline at end of file diff --git a/src/crs_linter/utils.py b/src/crs_linter/utils.py index 8f78e96..547276f 100644 --- a/src/crs_linter/utils.py +++ b/src/crs_linter/utils.py @@ -75,8 +75,6 @@ def remove_comments(data): def parse_version_from_commit_message(message): """Parse the version from the commit message""" - global logger - logger.info("Checking for release commit message ('...release vx.y.z')...)") if message == "" or message is None: return None @@ -86,28 +84,20 @@ def parse_version_from_commit_message(message): match = message_pattern.search(message) if match is not None and "post" not in message: version = match.group(1) - logger.info(f"Detected version from commit message: {version}") return Version.parse(version.replace("v", "")) - else: - logger.info("Commit message doesn't appear to be for a release") return None def parse_version_from_branch_name(head_ref): """Parse the version from the branch name""" - global logger if head_ref == "" or head_ref is None: return None - logger.info("Checking for version information in branch name ('release/vx.y.z')...") branch_pattern = re.compile(r"release/(v\d+\.\d+\.\d+)") match = branch_pattern.search(head_ref) if match is not None and "post" not in head_ref: version = match.group(1) - logger.info(f"Detected version from branch name: {version}") return Version.parse(version.replace("v", "")) - else: - logger.info(f"Branch name doesn't match release branch pattern: '{head_ref}'") return None @@ -119,7 +109,6 @@ def generate_version_string(directory, head_ref, commit_message): v4.5.0-6-g872a90ab -> "4.6.0-dev" v4.5.0-0-abcd01234 -> "4.5.0" """ - global logger if not directory.is_dir(): raise ValueError(f"Directory {directory} does not exist") @@ -135,19 +124,15 @@ def generate_version_string(directory, head_ref, commit_message): semver_version = parse_version_from_latest_tag(directory) semver_version = semver_version.bump_minor() semver_version = semver_version.replace(prerelease="dev") - logger.info(f"Required version for check: {semver_version}") return f"OWASP_CRS/{semver_version}" def parse_version_from_latest_tag(directory): """Parse the version from the latest tag""" - global logger - logger.info("Looking up last tag to determine version...") version = get_current_version(projdir=str(directory.resolve())) if version is None: raise ValueError(f"Can't get current version from {directory}") - logger.info(f"Found last tag {version}") if version.startswith("v"): version = version.replace("v", "") return version diff --git a/tests/test_rules_metadata.py b/tests/test_rules_metadata.py new file mode 100644 index 0000000..3e8bae3 --- /dev/null +++ b/tests/test_rules_metadata.py @@ -0,0 +1,137 @@ +""" +Test the Rules metadata system and individual rule testing. +""" + +import pytest +from crs_linter.linter import Linter, parse_config +from crs_linter.rules_metadata import Rules, get_rules +from crs_linter.rule import Rule +from crs_linter.lint_problem import LintProblem +from crs_linter.rules.ignore_case import IgnoreCase +from crs_linter.rules.crs_tag import CrsTag + + +def test_rule_metadata_in_rule_class(): + """Test that rule instances have their own metadata.""" + rule = IgnoreCase() + + assert rule.name == "ignorecase" + assert rule.success_message == "Ignore case check ok." + assert rule.error_message == "Ignore case check found error(s)" + assert rule.error_title == "Case check" + assert rule.args == ("data",) + + +def test_default_rules_creation(): + """Test that default rules are created correctly.""" + rules = get_rules() + + # Check that we have some rules registered (exact count may vary) + assert len(rules._rules) > 0, "No rules registered" + + # Check that we have some expected rules + registered_rules = [rule.name for rule in rules._rules] + expected_rules = ["ignorecase", "orderedactions", "ctlauditlog", "variablesusage", + "plconsistency", "crstag", "version", "approvedtags"] + + for expected in expected_rules: + assert expected in registered_rules, f"Expected rule {expected} not found" + + +def test_rules_get_rule_messages(): + """Test that Rules.get_rule_messages returns correct messages.""" + rules = get_rules() + + success, error, title = rules.get_rule_messages("ignorecase") + assert success == "Ignore case check ok." + assert error == "Ignore case check found error(s)" + assert title == "Case check" + + +def test_rules_get_rule_configs(): + """Test that Rules.get_rule_configs generates correct configurations.""" + rules = get_rules() + sample_data = parse_config('SecRule ARGS "@rx ^test" "id:1,phase:1,log"') + linter = Linter(sample_data) + + configs = rules.get_rule_configs(linter) + + # Should have configurations for all registered rules + assert len(configs) > 0 + + # Each config should be a tuple of (rule_instance, args, kwargs, condition) + for rule_instance, args, kwargs, condition in configs: + assert isinstance(rule_instance, Rule) + assert isinstance(args, list) + assert isinstance(kwargs, dict) + + +def test_individual_rule_ignorecase(): + """Test the IgnoreCase rule individually.""" + rule = IgnoreCase() + sample_data = parse_config('SecRule ARGS "@rx ^test" "id:1,phase:1,LOG"') + + problems = list(rule.check(sample_data)) + + # Should have 1 problem for wrong case + assert len(problems) == 1 + assert problems[0].rule == "ignore_case" + + +def test_individual_rule_crstag(): + """Test the CrsTag rule individually.""" + rule = CrsTag() + sample_data = parse_config('SecRule ARGS "@rx ^test" "id:1,phase:1,log"') + + problems = list(rule.check(sample_data)) + + # Should have 1 problem for missing OWASP_CRS tag + assert len(problems) == 1 + assert problems[0].rule == "crs_tag" + + +def test_custom_rules_system(): + """Test that a custom Rules system can be created with specific rules.""" + # Test that we can create a new Rules instance and register rules + rules = Rules() + + # Get initial count + initial_count = len(rules._rules) + + # Register additional rules + rules.register_rule(IgnoreCase()) + rules.register_rule(CrsTag()) + + # Should have more rules now + assert len(rules._rules) >= initial_count + 2 + + sample_data = parse_config('SecRule ARGS "@rx ^test" "id:1,phase:1,log"') + linter = Linter(sample_data, rules=rules) + + configs = rules.get_rule_configs(linter) + + # Should have rule configurations + assert len(configs) > 0 + + +def test_rule_conditions(): + """Test that conditional rules are properly handled.""" + rules = get_rules() + sample_data = parse_config('SecRule ARGS "@rx ^test" "id:1,phase:1,log"') + linter = Linter(sample_data) + + # Without version, version rule should not run + configs_without_version = rules.get_rule_configs(linter) + version_configs = [c for c in configs_without_version if c[0].name == "version"] + + # Version rule should have condition = False + if len(version_configs) > 0: + assert version_configs[0][3] is None or version_configs[0][3] == False + + # With version, version rule should run + configs_with_version = rules.get_rule_configs(linter, crs_version="OWASP_CRS/4.0.0") + version_configs = [c for c in configs_with_version if c[0].name == "version"] + + # Version rule should have condition = True + if len(version_configs) > 0: + assert version_configs[0][3] is None or version_configs[0][3] == True From 401743b57795c4a3271267f6e6a1e87c5d2de575 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Wed, 8 Oct 2025 14:45:23 -0300 Subject: [PATCH 10/19] fix: cli call Signed-off-by: Felipe Zipitria --- src/crs_linter/cli.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index 3fa8812..0f823e2 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -237,9 +237,20 @@ def main(): c = Linter(parsed[f], f, txvars) # Check indentation separately (not part of the rule system yet) - error = indentation.check(f, parsed[f]) - if error: + indentation_checker = indentation.Indentation() + indentation_problems = list(indentation_checker.check(f, parsed[f])) + if indentation_problems: retval = 1 + logger.error(indentation_checker.error_message, file=f, title=indentation_checker.error_title) + for problem in indentation_problems: + logger.error( + problem.desc, + file=f, + line=problem.line, + end_line=problem.end_line, + ) + else: + logger.debug(indentation_checker.success_message) # Run all linting checks using the new generic system problems = list(c.run_checks( From 1040f97c953e9a73858dada4384bf32f61b7adc0 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sat, 11 Oct 2025 10:53:25 -0300 Subject: [PATCH 11/19] fix: intentation checks Signed-off-by: Felipe Zipitria --- src/crs_linter/cli.py | 17 ----------------- src/crs_linter/rules_metadata.py | 4 ++++ 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index 0f823e2..fc727bc 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -10,7 +10,6 @@ from crs_linter.linter import Linter from crs_linter.logger import Logger, Output -from crs_linter.rules import indentation from crs_linter.utils import * @@ -236,22 +235,6 @@ def main(): logger.debug(f) c = Linter(parsed[f], f, txvars) - # Check indentation separately (not part of the rule system yet) - indentation_checker = indentation.Indentation() - indentation_problems = list(indentation_checker.check(f, parsed[f])) - if indentation_problems: - retval = 1 - logger.error(indentation_checker.error_message, file=f, title=indentation_checker.error_title) - for problem in indentation_problems: - logger.error( - problem.desc, - file=f, - line=problem.line, - end_line=problem.end_line, - ) - else: - logger.debug(indentation_checker.success_message) - # Run all linting checks using the new generic system problems = list(c.run_checks( tagslist=tags, diff --git a/src/crs_linter/rules_metadata.py b/src/crs_linter/rules_metadata.py index 26a22b0..de07975 100644 --- a/src/crs_linter/rules_metadata.py +++ b/src/crs_linter/rules_metadata.py @@ -60,6 +60,10 @@ def get_rule_configs(self, linter_instance, tagslist=None, test_cases=None, excl args[args.index("exclusion_list")] = exclusion_list if "version" in args: args[args.index("version")] = crs_version + if "filename" in args: + args[args.index("filename")] = linter_instance.filename + if "content" in args: + args[args.index("content")] = linter_instance.data # Evaluate condition if a function is provided condition = None From b1ed11ce9c5941837419523304f252d7e351d452 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sat, 11 Oct 2025 11:02:33 -0300 Subject: [PATCH 12/19] fix: ids are shared Signed-off-by: Felipe Zipitria --- src/crs_linter/cli.py | 3 ++- src/crs_linter/linter.py | 14 +++----------- src/crs_linter/rules/duplicated.py | 21 +++++++++++++++++++-- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index fc727bc..648c607 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -209,6 +209,7 @@ def main(): filename_tags_exclusions = get_lines_from_file(args.filename_tags_exclusions) parsed = read_files(files) txvars = {} + ids = {} # Shared dict for tracking rule IDs across all files if args.tests is not None: # read existing tests @@ -233,7 +234,7 @@ def main(): for f in parsed.keys(): logger.start_group(f) logger.debug(f) - c = Linter(parsed[f], f, txvars) + c = Linter(parsed[f], f, txvars, ids) # Run all linting checks using the new generic system problems = list(c.run_checks( diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py index 3d628b9..6009a58 100644 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -27,11 +27,11 @@ class Linter: """Main linter class that orchestrates all rule checks.""" - def __init__(self, data, filename=None, txvars=None, rules=None): + def __init__(self, data, filename=None, txvars=None, ids=None, rules=None): self.data = data # holds the parsed data self.filename = filename self.globtxvars = txvars or {} # global TX variables hash table - self.ids = {} # list of rule id's and their location in files + self.ids = ids if ids is not None else {} # list of rule id's and their location in files (shared across files) # regex to produce tag from filename: self.re_fname = re.compile(r"(REQUEST|RESPONSE)\-\d{3}\-") @@ -76,7 +76,7 @@ def run_checks(self, tagslist=None, test_cases=None, exclusion_list=None, crs_ve print(f"Error running rule {rule_name}: {e}", file=sys.stderr) def _collect_tx_variables(self): - """Collect TX variables in rules and check for duplicated IDs""" + """Collect TX variables in rules""" chained = False for d in self.data: if "actions" in d: @@ -88,14 +88,6 @@ def _collect_tx_variables(self): for a in d["actions"]: if a["act_name"] == "id": ruleid = int(a["act_arg"]) - if ruleid in self.ids: - # This will be caught by duplicated_ids rule - pass - else: - self.ids[ruleid] = { - "fname": self.filename, - "lineno": a["lineno"], - } if a["act_name"] == "phase": phase = int(a["act_arg"]) if a["act_name"] == "chain": diff --git a/src/crs_linter/rules/duplicated.py b/src/crs_linter/rules/duplicated.py index 0f72b8a..d3f94f0 100644 --- a/src/crs_linter/rules/duplicated.py +++ b/src/crs_linter/rules/duplicated.py @@ -11,17 +11,34 @@ def __init__(self): self.success_message = "No duplicate IDs found." self.error_message = "Found duplicated ID(s)" self.error_title = "'id' is duplicated" - self.args = ("data", "ids") + self.args = ("data", "ids", "filename") - def check(self, data, ids): + def check(self, data, ids, filename): """Checks the duplicated rule ID""" for d in data: if "actions" in d: rule_id = get_id(d["actions"]) + # Skip rules without an ID (get_id returns 0 when no ID is found) + if rule_id == 0: + continue + if rule_id in ids: + # Found a duplicate! yield LintProblem( line=0, # Line number not available in this context end_line=0, desc=f"id {rule_id} is duplicated, previous place: {ids[rule_id]['fname']}:{ids[rule_id]['lineno']}", rule="duplicated", ) + else: + # First occurrence - add to ids dict for future duplicate detection + # Get the line number from the actions + lineno = 0 + for action in d["actions"]: + if action["act_name"] == "id": + lineno = action.get("lineno", 0) + break + ids[rule_id] = { + "fname": filename, + "lineno": lineno + } From 70a94417d1b08d0cd350a015768c734da081473c Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sun, 12 Oct 2025 14:07:56 -0300 Subject: [PATCH 13/19] fix: update to use relative path Signed-off-by: Felipe Zipitria --- src/crs_linter/rules/indentation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/crs_linter/rules/indentation.py b/src/crs_linter/rules/indentation.py index 7e41638..23cd203 100644 --- a/src/crs_linter/rules/indentation.py +++ b/src/crs_linter/rules/indentation.py @@ -1,5 +1,6 @@ import difflib import re +from pathlib import Path import msc_pyparser from crs_linter.lint_problem import LintProblem from crs_linter.rule import Rule @@ -24,7 +25,7 @@ def check(self, filename, content): try: with open(filename, "r") as fp: from_lines = fp.readlines() - if filename.startswith("crs-setup.conf.example"): + if Path(filename).name.startswith("crs-setup.conf.example"): from_lines = self._remove_comments("".join(from_lines)).split("\n") from_lines = [l + "\n" for l in from_lines] except: From b4344604c1e1ef4d723831f1433404c21f8da39b Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sun, 12 Oct 2025 14:15:09 -0300 Subject: [PATCH 14/19] chore: add verbose intentation errors Signed-off-by: Felipe Zipitria --- src/crs_linter/rules/indentation.py | 61 ++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/src/crs_linter/rules/indentation.py b/src/crs_linter/rules/indentation.py index 23cd203..8580763 100644 --- a/src/crs_linter/rules/indentation.py +++ b/src/crs_linter/rules/indentation.py @@ -49,24 +49,65 @@ def check(self, filename, content): elif len(from_lines) > len(output): output.append("\n") - diff = difflib.unified_diff(from_lines, output) + diff_lines = list(difflib.unified_diff(from_lines, output, lineterm='')) if from_lines == output: # No indentation errors return - else: - error = True - for d in diff: - d = d.strip("\n") - r = re.match(r"^@@ -(\d+),(\d+) \+\d+,\d+ @@$", d) + # Process the diff to extract meaningful error messages + i = 0 + while i < len(diff_lines): + line = diff_lines[i] + # Look for diff hunk headers like "@@ -1,2 +1,2 @@" + r = re.match(r"^@@ -(\d+),(\d+) \+(\d+),(\d+) @@$", line) if r: - line1, line2 = [int(i) for i in r.groups()] + start_line = int(r.group(1)) + count = int(r.group(2)) + + # Collect the diff lines for this hunk to show context + removed_lines = [] + added_lines = [] + j = i + 1 + while j < len(diff_lines) and not diff_lines[j].startswith('@@'): + diff_line = diff_lines[j] + if diff_line.startswith('-'): + # Line in original file that should be removed + content = diff_line[1:].strip() + if content: # Only show non-empty content + removed_lines.append(content[:60]) # Limit line length + elif diff_line.startswith('+'): + # Line that should be added (expected format) + content = diff_line[1:].strip() + if content: # Only show non-empty content + added_lines.append(content[:60]) # Limit line length + j += 1 + + # Create a meaningful error message + desc_parts = [] + if removed_lines: + desc_parts.append(f"Remove: {', '.join(removed_lines[:2])}") + if len(removed_lines) > 2: + desc_parts[-1] += f" (+{len(removed_lines) - 2} more)" + if added_lines: + desc_parts.append(f"Expected: {', '.join(added_lines[:2])}") + if len(added_lines) > 2: + desc_parts[-1] += f" (+{len(added_lines) - 2} more)" + + if desc_parts: + desc = f"Indentation/formatting error - {' | '.join(desc_parts)}" + else: + # Likely whitespace-only differences + desc = f"Indentation/whitespace error (lines {start_line}-{start_line + count - 1}): check spacing and formatting" + yield LintProblem( - line=line1, - end_line=line1 + line2, - desc="an indentation error was found", + line=start_line, + end_line=start_line + count - 1, + desc=desc, rule="indentation", ) + i = j + else: + i += 1 def _remove_comments(self, content): """Remove comments from content""" From 2f62db4512b7453333a15d6a76e618241d2e7e93 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sun, 12 Oct 2025 14:56:49 -0300 Subject: [PATCH 15/19] fix: use mscwriter Signed-off-by: Felipe Zipitria --- src/crs_linter/rules/indentation.py | 42 +++++++++++++---------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/src/crs_linter/rules/indentation.py b/src/crs_linter/rules/indentation.py index 8580763..1453182 100644 --- a/src/crs_linter/rules/indentation.py +++ b/src/crs_linter/rules/indentation.py @@ -21,13 +21,10 @@ def check(self, filename, content): error = False problems = [] - # make a diff to check the indentations + # Read the original file for comparison try: with open(filename, "r") as fp: - from_lines = fp.readlines() - if Path(filename).name.startswith("crs-setup.conf.example"): - from_lines = self._remove_comments("".join(from_lines)).split("\n") - from_lines = [l + "\n" for l in from_lines] + original_content = fp.read() except: yield LintProblem( line=0, @@ -37,23 +34,29 @@ def check(self, filename, content): ) return - # virtual output + # Generate the formatted output from the parsed content writer = msc_pyparser.MSCWriter(content) writer.generate() - output = [] - for l in writer.output: - output += [l + "\n" for l in l.split("\n") if l != "\n"] + formatted_output = "\n".join(writer.output) + + # Compare line by line + original_lines = original_content.splitlines(keepends=True) + formatted_lines = formatted_output.splitlines(keepends=True) - if len(from_lines) < len(output): - from_lines.append("\n") - elif len(from_lines) > len(output): - output.append("\n") + # Normalize line counts for comparison + if len(original_lines) < len(formatted_lines): + original_lines.append("\n") + elif len(original_lines) > len(formatted_lines): + formatted_lines.append("\n") - diff_lines = list(difflib.unified_diff(from_lines, output, lineterm='')) - if from_lines == output: + # Check if they're identical + if original_lines == formatted_lines: # No indentation errors return + # Generate diff to show differences + diff_lines = list(difflib.unified_diff(original_lines, formatted_lines, lineterm='')) + # Process the diff to extract meaningful error messages i = 0 while i < len(diff_lines): @@ -108,12 +111,3 @@ def check(self, filename, content): i = j else: i += 1 - - def _remove_comments(self, content): - """Remove comments from content""" - lines = content.split('\n') - result = [] - for line in lines: - if not line.strip().startswith('#'): - result.append(line) - return '\n'.join(result) From 2489541bbc0c34a0ac30fa6da3e888b0a590654b Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sun, 12 Oct 2025 15:08:58 -0300 Subject: [PATCH 16/19] fix: remove comments also in the indentation file Signed-off-by: Felipe Zipitria --- src/crs_linter/rules/indentation.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/crs_linter/rules/indentation.py b/src/crs_linter/rules/indentation.py index 1453182..b4dd689 100644 --- a/src/crs_linter/rules/indentation.py +++ b/src/crs_linter/rules/indentation.py @@ -4,6 +4,7 @@ import msc_pyparser from crs_linter.lint_problem import LintProblem from crs_linter.rule import Rule +from crs_linter.utils import remove_comments class Indentation(Rule): @@ -25,6 +26,10 @@ def check(self, filename, content): try: with open(filename, "r") as fp: original_content = fp.read() + # Apply the same transformation as in cli.py for crs-setup.conf.example + # This removes leading # from commented-out SecRule/SecAction blocks + if Path(filename).name.startswith("crs-setup.conf.example"): + original_content = remove_comments(original_content) except: yield LintProblem( line=0, From 3d56acd16aad0ab75fdd93c14618457ddcd0e33e Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sun, 12 Oct 2025 21:13:08 -0300 Subject: [PATCH 17/19] fix: simplify indentation check Signed-off-by: Felipe Zipitria --- src/crs_linter/cli.py | 4 +- src/crs_linter/rules/indentation.py | 95 +++++++---------------------- 2 files changed, 25 insertions(+), 74 deletions(-) diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index 648c607..953b938 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -239,8 +239,8 @@ def main(): # Run all linting checks using the new generic system problems = list(c.run_checks( tagslist=tags, - test_cases=test_cases if args.tests is not None else None, - exclusion_list=test_exclusion_list if args.tests is not None else None, + test_cases=test_cases, + exclusion_list=test_exclusion_list, crs_version=crs_version )) diff --git a/src/crs_linter/rules/indentation.py b/src/crs_linter/rules/indentation.py index b4dd689..1732f46 100644 --- a/src/crs_linter/rules/indentation.py +++ b/src/crs_linter/rules/indentation.py @@ -1,5 +1,4 @@ import difflib -import re from pathlib import Path import msc_pyparser from crs_linter.lint_problem import LintProblem @@ -17,6 +16,18 @@ def __init__(self): self.error_title = "Indentation error" self.args = ("filename", "content") + def _compare_strings(self, str1, str2): + # Create a SequenceMatcher object + matcher = difflib.SequenceMatcher(None, str1, str2) + + # Get the differences + differences = [] + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag != 'equal': + differences.append((tag, str1[i1:i2], str2[j1:j2])) + + return differences + def check(self, filename, content): """Check indentation in the file""" error = False @@ -28,7 +39,7 @@ def check(self, filename, content): original_content = fp.read() # Apply the same transformation as in cli.py for crs-setup.conf.example # This removes leading # from commented-out SecRule/SecAction blocks - if Path(filename).name.startswith("crs-setup.conf.example"): + if Path(filename).name.endswith(".example"): original_content = remove_comments(original_content) except: yield LintProblem( @@ -42,77 +53,17 @@ def check(self, filename, content): # Generate the formatted output from the parsed content writer = msc_pyparser.MSCWriter(content) writer.generate() - formatted_output = "\n".join(writer.output) - + # Add a newline to the end of the formatted output + formatted_output = "\n".join(writer.output) + "\n" # Compare line by line - original_lines = original_content.splitlines(keepends=True) - formatted_lines = formatted_output.splitlines(keepends=True) - - # Normalize line counts for comparison - if len(original_lines) < len(formatted_lines): - original_lines.append("\n") - elif len(original_lines) > len(formatted_lines): - formatted_lines.append("\n") - - # Check if they're identical - if original_lines == formatted_lines: - # No indentation errors - return - - # Generate diff to show differences - diff_lines = list(difflib.unified_diff(original_lines, formatted_lines, lineterm='')) + diffs = self._compare_strings(original_content, formatted_output) - # Process the diff to extract meaningful error messages - i = 0 - while i < len(diff_lines): - line = diff_lines[i] - # Look for diff hunk headers like "@@ -1,2 +1,2 @@" - r = re.match(r"^@@ -(\d+),(\d+) \+(\d+),(\d+) @@$", line) - if r: - start_line = int(r.group(1)) - count = int(r.group(2)) - - # Collect the diff lines for this hunk to show context - removed_lines = [] - added_lines = [] - j = i + 1 - while j < len(diff_lines) and not diff_lines[j].startswith('@@'): - diff_line = diff_lines[j] - if diff_line.startswith('-'): - # Line in original file that should be removed - content = diff_line[1:].strip() - if content: # Only show non-empty content - removed_lines.append(content[:60]) # Limit line length - elif diff_line.startswith('+'): - # Line that should be added (expected format) - content = diff_line[1:].strip() - if content: # Only show non-empty content - added_lines.append(content[:60]) # Limit line length - j += 1 - - # Create a meaningful error message - desc_parts = [] - if removed_lines: - desc_parts.append(f"Remove: {', '.join(removed_lines[:2])}") - if len(removed_lines) > 2: - desc_parts[-1] += f" (+{len(removed_lines) - 2} more)" - if added_lines: - desc_parts.append(f"Expected: {', '.join(added_lines[:2])}") - if len(added_lines) > 2: - desc_parts[-1] += f" (+{len(added_lines) - 2} more)" - - if desc_parts: - desc = f"Indentation/formatting error - {' | '.join(desc_parts)}" - else: - # Likely whitespace-only differences - desc = f"Indentation/whitespace error (lines {start_line}-{start_line + count - 1}): check spacing and formatting" - - yield LintProblem( - line=start_line, - end_line=start_line + count - 1, - desc=desc, + for diff in diffs: + yield LintProblem( + filename=filename, + line=0, + end_line=0, + desc=f"Change Type: {diff[0]}\nOriginal: {diff[1]}\nFormatted: {diff[2]}\n", rule="indentation", ) - i = j - else: - i += 1 + \ No newline at end of file From 121224f7910d9496d293ee8bc3f82b72a05d2430 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sun, 12 Oct 2025 21:28:25 -0300 Subject: [PATCH 18/19] fix: do not reset global variable Signed-off-by: Felipe Zipitria --- src/crs_linter/cli.py | 2 +- src/crs_linter/linter.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/crs_linter/cli.py b/src/crs_linter/cli.py index 953b938..3555f08 100755 --- a/src/crs_linter/cli.py +++ b/src/crs_linter/cli.py @@ -208,7 +208,7 @@ def main(): if args.filename_tags_exclusions is not None: filename_tags_exclusions = get_lines_from_file(args.filename_tags_exclusions) parsed = read_files(files) - txvars = {} + txvars = {} # Shared dict for tracking TX variables across all files ids = {} # Shared dict for tracking rule IDs across all files if args.tests is not None: diff --git a/src/crs_linter/linter.py b/src/crs_linter/linter.py index 6009a58..6ffec38 100644 --- a/src/crs_linter/linter.py +++ b/src/crs_linter/linter.py @@ -30,7 +30,7 @@ class Linter: def __init__(self, data, filename=None, txvars=None, ids=None, rules=None): self.data = data # holds the parsed data self.filename = filename - self.globtxvars = txvars or {} # global TX variables hash table + self.globtxvars = txvars if txvars is not None else {} # global TX variables hash table (shared across files) self.ids = ids if ids is not None else {} # list of rule id's and their location in files (shared across files) # regex to produce tag from filename: From f46158e66d3816dffb9c87d2623ac747b77e3451 Mon Sep 17 00:00:00 2001 From: Felipe Zipitria Date: Sun, 12 Oct 2025 21:33:20 -0300 Subject: [PATCH 19/19] docs: update development docs Signed-off-by: Felipe Zipitria --- DEVELOPMENT.md | 84 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 6 deletions(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 480abeb..a9625ea 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -109,7 +109,33 @@ def test_my_new_rule(): assert len(problems) == 0 # or expected number ``` -### 7. Common Rule Patterns +### 7. Available Parameters for Rules + +The rules system automatically maps common parameters when calling your rule's `check()` method. Simply declare them in `self.args`: + +#### Standard Parameters: +- **`data`**: Parsed configuration data for the current file +- **`filename`**: Path to the current file being checked +- **`content`**: Parsed content (same as `data`, for compatibility) + +#### Shared State Parameters (across all files): +- **`globtxvars`**: Dictionary of TX variables defined across all files + - Keys: Variable names (lowercase) + - Values: Dict with `phase`, `used`, `file`, `ruleid`, `line`, etc. + - **Shared across files**: Variables from earlier files are visible in later files + +- **`ids`**: Dictionary of rule IDs across all files + - Keys: Rule ID numbers (int) + - Values: Dict with `fname` (filename) and `lineno` (line number) + - **Shared across files**: Used to detect duplicate IDs across the entire ruleset + +#### Optional Parameters (only available when provided): +- **`tags`** or **`tagslist`**: List of approved tags (from `-t` flag) +- **`test_cases`**: Dictionary of test case IDs (from `-T` flag) +- **`exclusion_list`**: List of excluded tests (from `-E` flag) +- **`version`** or **`crs_version`**: CRS version string (from `-v` flag) + +### 8. Common Rule Patterns #### Rules that need additional context: ```python @@ -118,6 +144,8 @@ def __init__(self): self.args = ("data", "globtxvars") # Access to TX variables # or self.args = ("data", "ids") # Access to rule IDs + # or + self.args = ("data", "filename", "globtxvars") # Multiple parameters ``` #### Rules with conditions: @@ -138,14 +166,58 @@ def check(self, data): pass ``` -### 8. Rule Naming Conventions +#### Accessing shared state: +```python +def check(self, data, globtxvars, ids): + """Check using shared state from all files.""" + for d in data: + if "actions" in d: + rule_id = get_id(d["actions"]) + + # Check if ID already exists (duplicate detection) + if rule_id in ids: + yield LintProblem( + line=0, + end_line=0, + desc=f"Duplicate ID {rule_id}", + rule="my_rule" + ) + + # Check if TX variable is defined + for action in d["actions"]: + if action["act_name"] == "setvar" and "tx." in action["act_arg"]: + var_name = action["act_arg"].split("=")[0].replace("tx.", "").lower() + if var_name not in globtxvars: + yield LintProblem( + line=action["lineno"], + end_line=action["lineno"], + desc=f"TX variable {var_name} not defined", + rule="my_rule" + ) +``` + +### 9. Important: Shared State Across Files + +When the linter processes multiple files, **`globtxvars` and `ids` are shared** across all files: + +- Files are processed in **sorted order** (alphabetically by filename) +- `crs-setup.conf.example` is typically processed first (defines most TX variables) +- Later files can reference TX variables defined in earlier files +- Duplicate ID detection works across the entire ruleset + +**Example processing order:** +1. `crs-setup.conf.example` → defines `tx.blocking_paranoia_level` +2. `REQUEST-901-INITIALIZATION.conf` → can use `tx.blocking_paranoia_level` +3. `REQUEST-911-METHOD-ENFORCEMENT.conf` → can use variables from both previous files + +### 10. Rule Naming Conventions - **Class names**: Use PascalCase (e.g., `MyNewRule`) - **File names**: Use snake_case (e.g., `my_new_rule.py`) - **Rule names**: Use snake_case (automatically derived from class name) - **Descriptions**: Be clear and specific about what the rule checks -### 9. Testing Your Rule +### 11. Testing Your Rule Run the tests to ensure your rule works correctly: @@ -157,14 +229,14 @@ uv run pytest tests/test_linter.py tests/test_rules_metadata.py -v uv run pytest tests/test_linter.py::test_my_new_rule -v ``` -### 10. Integration with CLI +### 12. Integration with CLI Your rule will automatically be available in the CLI once the linter module is imported. The linter will: - Run your rule when appropriate conditions are met - Display your success/error messages - Report problems with your rule name -### 11. How Auto-Registration Works +### 13. How Auto-Registration Works The crs-linter uses a metaclass system for automatic rule registration: @@ -173,7 +245,7 @@ The crs-linter uses a metaclass system for automatic rule registration: 3. **No Manual Work**: You don't need to manually register rules or create instances 4. **Import Trigger**: Rules are registered when the linter module imports them -### 12. Example: Complete Rule Implementation +### 14. Example: Complete Rule Implementation See existing rules like `src/crs_linter/rules/ignore_case.py` or `src/crs_linter/rules/crs_tag.py` for complete examples of rule implementations.