coreruleset
diff --git a/‎src/crs_linter/cli.py‎
Lines changed: 7 additions & 59 deletions b/‎src/crs_linter/cli.py‎
Lines changed: 7 additions & 59 deletions
diff --git a/‎src/crs_linter/linter.py‎
100755100644
Lines changed: 66 additions & 26 deletions b/‎src/crs_linter/linter.py‎
100755100644
Lines changed: 66 additions & 26 deletions
diff --git a/‎src/crs_linter/rules/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎src/crs_linter/rules/__init__.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/crs_linter/rules/approved_tags.py‎
Lines changed: 22 additions & 0 deletions b/‎src/crs_linter/rules/approved_tags.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎src/crs_linter/rules/capture.py‎
Lines changed: 63 additions & 0 deletions b/‎src/crs_linter/rules/capture.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎src/crs_linter/rules/crs_tag.py‎
Lines changed: 38 additions & 0 deletions b/‎src/crs_linter/rules/crs_tag.py‎
Lines changed: 38 additions & 0 deletions
@@ -11,14 +11,9 @@
 from dulwich.contrib.release_robot import get_current_version
 from semver import Version
 
-try:
-    from linter import Check
-except ImportError:
-    from crs_linter.linter import Check
-try:
-    from logger import Logger, Output
-except ImportError:
-    from crs_linter.logger import Logger, Output
+from crs_linter.linter import Linter
+from crs_linter.logger import Logger, Output
+from crs_linter.rules import indentation
 
 
 def remove_comments(data):
@@ -187,54 +182,6 @@ def get_crs_version(directory, version=None, head_ref=None, commit_message=None)
     return crs_version
 
 
-def check_indentation(filename, content):
-    error = False
-
-    ### make a diff to check the indentations
-    try:
-        with open(filename, "r") as fp:
-            from_lines = fp.readlines()
-            if os.path.basename(filename) == "crs-setup.conf.example":
-                from_lines = remove_comments("".join(from_lines)).split("\n")
-                from_lines = [l + "\n" for l in from_lines]
-    except FileNotFoundError:
-        logger.error(f"Can't open file for indentation check: {filename}")
-        error = True
-
-    # virtual output
-    writer = msc_pyparser.MSCWriter(content)
-    writer.generate()
-    output = []
-    for l in writer.output:
-        output += [l + "\n" for l in l.split("\n") if l != "\n"]
-
-    if len(from_lines) < len(output):
-        from_lines.append("\n")
-    elif len(from_lines) > len(output):
-        output.append("\n")
-
-    diff = difflib.unified_diff(from_lines, output)
-    if from_lines == output:
-        logger.debug("Indentation check ok.")
-    else:
-        logger.debug("Indentation check found error(s)")
-        error = True
-    for d in diff:
-        d = d.strip("\n")
-        r = re.match(r"^@@ -(\d+),(\d+) \+\d+,\d+ @@$", d)
-        if r:
-            line1, line2 = [int(i) for i in r.groups()]
-            logger.error(
-                "an indentation error was found",
-                file=filename,
-                title="Indentation error",
-                line=line1,
-                end_line=line1 + line2,
-            )
-
-    return error
-
-
 def read_files(filenames):
     global logger
 
@@ -288,7 +235,7 @@ def _arg_in_argv(argv, args):
 
 def parse_args(argv):
     parser = argparse.ArgumentParser(
-        prog="crs-linter", description="CRS Rules Check tool"
+        prog="crs-linter", description="CRS Rules Linter tool"
     )
     parser.add_argument(
         "-o",
@@ -421,7 +368,8 @@ def main():
     for f in parsed.keys():
         logger.start_group(f)
         logger.debug(f)
-        c = Check(parsed[f], f, txvars)
+        c = Linter(parsed[f], f, txvars)
+
 
         ### check case usings
         c.check_ignore_case()
@@ -450,7 +398,7 @@ def main():
                     title="Action order check",
                 )
 
-        error = check_indentation(f, parsed[f])
+        error = indentation.check(f, parsed[f])
         if error:
             retval = 1
 
 
@@ -1,39 +1,48 @@
-import sys
 import msc_pyparser
-import difflib
-import argparse
 import re
-import subprocess
-import logging
-import os.path
 
-def parse_config(text):
-    try:
-        mparser = msc_pyparser.MSCParser()
-        mparser.parser.parse(text)
-        return mparser.configlines
+class LintProblem:
+    """Represents a linting problem found by crs-linter."""
+    def __init__(self, line, end_line, column=None, desc='<no description>', rule=None):
+        #: Line on which the problem was found (starting at 1)
+        self.line = line
+        #: Line on which the problem ends
+        self.end_line = end_line
+        #: Column on which the problem was found (starting at 1)
+        self.column = column
+        #: Human-readable description of the problem
+        self.desc = desc
+        #: Identifier of the rule that detected the problem
+        self.rule = rule
+        self.level = None
 
-    except Exception as e:
-        print(e)
+    @property
+    def message(self):
+        if self.rule is not None:
+            return f'{self.desc} ({self.rule})'
+        return self.desc
 
+    def __eq__(self, other):
+        return (self.line == other.line and
+                self.column == other.column and
+                self.rule == other.rule)
 
-def parse_file(filename):
-    try:
-        mparser = msc_pyparser.MSCParser()
-        with open(filename, "r") as f:
-            mparser.parser.parse(f.read())
-        return mparser.configlines
+    def __lt__(self, other):
+        return (self.line < other.line or
+                (self.line == other.line and self.column < other.column))
 
-    except Exception as e:
-        print(e)
+    def __repr__(self):
+        return f'{self.line}:{self.column}: {self.message}'
 
 
-class Check():
-    def __init__(self, data, filename=None, txvars={}):
+class Linter:
+    ids = {}  # list of rule id's and their location in files
+    vars = {}  # list of TX variables and their location in files
 
+    def __init__(self, data, filename=None, txvars=None):
         # txvars is a global used hash table, but processing of rules is a sequential flow
         # all rules need this global table
-        self.globtxvars = txvars
+        self.globtxvars = txvars or {}
         # list available operators, actions, transformations and ctl args
         self.operators = "beginsWith|containsWord|contains|detectSQLi|detectXSS|endsWith|eq|fuzzyHash|geoLookup|ge|gsbLookup|gt|inspectFile|ipMatch|ipMatchF|ipMatchFromFile|le|lt|noMatch|pmFromFile|pmf|pm|rbl|rsub|rx|streq|strmatch|unconditionalMatch|validateByteRange|validateDTD|validateHash|validateSchema|validateUrlEncoding|validateUtf8Encoding|verifyCC|verifyCPF|verifySSN|within".split(
             "|"
@@ -101,7 +110,6 @@ def __init__(self, data, filename=None, txvars={}):
         self.ids = {}  # list of rule id's and their location in files
 
         # Any of these variables below are used to store the errors
-
         self.error_case_mistmatch = []  # list of case mismatch errors
         self.error_action_order = []  # list of ordered action errors
         self.error_wrong_ctl_auditlogparts = []  # list of wrong ctl:auditLogParts
@@ -120,10 +128,11 @@ def __init__(self, data, filename=None, txvars={}):
         self.error_tx_N_without_capture_action = (
             []
         )  # list of rules which uses TX.N without previous 'capture'
-        self.error_rule_hasnotest  = (
+        self.error_rule_hasnotest = (
             []
         )  # list of rules which don't have any tests
         # regex to produce tag from filename:
+        import os.path
         self.re_fname = re.compile(r"(REQUEST|RESPONSE)\-\d{3}\-")
         self.filename_tag_exclusions = []
 
@@ -231,6 +240,7 @@ def check_ignore_case(self):
                     e["message"] += f" (rule: {self.current_ruleid})"
 
     def check_action_order(self):
+        import sys
         for d in self.data:
             if "actions" in d:
                 max_order = 0  # maximum position of read actions
@@ -771,6 +781,7 @@ def gen_crs_file_tag(self, fname=None):
         """
         generate tag from filename
         """
+        import os.path
         filename_for_tag = fname if fname is not None else self.filename
         filename = self.re_fname.sub("", os.path.basename(os.path.splitext(filename_for_tag)[0]))
         filename = filename.replace("APPLICATION-", "")
@@ -993,3 +1004,32 @@ def find_ids_without_tests(self, test_cases, exclusion_list):
                                         'message': f"rule does not have any tests; rule id: {rid}'"
                                     })
         return True
+
+
+def parse_config(text):
+    try:
+        mparser = msc_pyparser.MSCParser()
+        mparser.parser.parse(text)
+        return mparser.configlines
+
+    except Exception as e:
+        print(e)
+
+
+def parse_file(filename):
+    try:
+        mparser = msc_pyparser.MSCParser()
+        with open(filename, "r") as f:
+            mparser.parser.parse(f.read())
+        return mparser.configlines
+
+    except Exception as e:
+        print(e)
+
+
+def get_id(actions):
+    """ Return the ID from actions """
+    for a in actions:
+        if a["act_name"] == "id":
+            return int(a["act_arg"])
+    return 0
@@ -0,0 +1,17 @@
+# import all checks
+
+from . import (
+    approved_tags,
+    capture,
+    crs_tag,
+    deprecated,
+    duplicated,
+    ignore_case,
+    indentation,
+    lowercase_ignorecase,
+    ordered_actions,
+    pl_consistency,
+    pl_tags,
+    variables_usage,
+    version
+)
@@ -0,0 +1,22 @@
+from src.crs_linter.linter import LintProblem
+
+
+def check_tags(self, tags):
+    """
+    check that only tags from the util/APPROVED_TAGS file are used
+    """
+    chained = False
+    ruleid = 0
+    for d in self.data:
+        if "actions" in d:
+            for a in d["actions"]:
+               if a["act_name"] == "tag":
+                   tag = a["act_arg"]
+                    # check wheter tag is in tagslist
+                    if tags.count(tag) == 0:
+                        yield LintProblem(
+                                rule_id=ruleid,
+                                line=a["lineno"],
+                                end_line=a["lineno"],
+                                desc=f'rule uses unknown tag: "{tag}"; only tags registered in the util/APPROVED_TAGS file may be used; rule id: {ruleid}'
+                            )
@@ -0,0 +1,63 @@
+def check_capture_action(self):
+    """
+    check that every chained rule has a `capture` action if it uses TX.N variable
+    """
+    chained = False
+    ruleid = 0
+    chainlevel = 0
+    capture_level = None
+    re_number = re.compile(r"^\d$")
+    has_capture = False
+    use_captured_var = False
+    captured_var_chain_level = 0
+    for d in self.data:
+        # only the SecRule object is relevant
+        if d["type"].lower() == "secrule":
+            for v in d["variables"]:
+                if v["variable"].lower() == "tx" and re_number.match(
+                        v["variable_part"]
+                ):
+                    # only the first occurrence required
+                    if not use_captured_var:
+                        use_captured_var = True
+                        captured_var_chain_level = chainlevel
+            if "actions" in d:
+                if not chained:
+                    ruleid = 0
+                    chainlevel = 0
+                else:
+                    chained = False
+                for a in d["actions"]:
+                    if a["act_name"] == "id":
+                        ruleid = int(a["act_arg"])
+                    if a["act_name"] == "chain":
+                        chained = True
+                        chainlevel += 1
+                    if a["act_name"] == "capture":
+                        capture_level = chainlevel
+                        has_capture = True
+                if ruleid > 0 and not chained:  # end of chained rule
+                    if use_captured_var:
+                        # we allow if target with TX:N is in the first rule
+                        # of a chained rule without 'capture'
+                        if captured_var_chain_level > 0:
+                            if (
+                                    not has_capture
+                                    or captured_var_chain_level < capture_level
+                            ):
+                                self.error_tx_N_without_capture_action.append(
+                                    {
+                                        "ruleid": ruleid,
+                                        "line": a["lineno"],
+                                        "endLine": a["lineno"],
+                                        "message": f"rule uses TX.N without capture; rule id: {ruleid}'",
+                                    }
+                                )
+                    # clear variables
+                    chained = False
+                    chainlevel = 0
+                    has_capture = False
+                    capture_level = 0
+                    captured_var_chain_level = 0
+                    use_captured_var = False
+                    ruleid = 0
@@ -0,0 +1,38 @@
+
+def check_crs_tag(self):
+    """
+    check that every rule has a `tag:'OWASP_CRS'` action
+    """
+    chained = False
+    ruleid = 0
+    has_crs = False
+    for d in self.data:
+        if "actions" in d:
+            chainlevel = 0
+
+            if not chained:
+                ruleid = 0
+                has_crs = False
+                chainlevel = 0
+            else:
+                chained = False
+            for a in d["actions"]:
+                if a["act_name"] == "id":
+                    ruleid = int(a["act_arg"])
+                if a["act_name"] == "chain":
+                    chained = True
+                    chainlevel += 1
+                if a["act_name"] == "tag":
+                    if chainlevel == 0:
+                        if a["act_arg"] == "OWASP_CRS":
+                            has_crs = True
+            if ruleid > 0 and not has_crs:
+                self.error_no_crstag.append(
+                    {
+                        "ruleid": ruleid,
+                        "line": a["lineno"],
+                        "endLine": a["lineno"],
+                        "message": f"rule does not have tag with value 'OWASP_CRS'; rule id: {ruleid}",
+                    }
+                )
+