From dd7a4913e79ad011d2291dac4cc918963277ebf0 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Mon, 3 Mar 2025 15:15:14 +0100 Subject: [PATCH 1/9] Severity field in IDF Severity is expected in IntelMQ for a long time and partially, it's already used by e.g. ShadowServer reports. This implementation is based on their understanding of the field, but with explicit mentioning that operators could adjust it based on their knowledge. This is not intended to be an ultimate severity classification, but a help for first triage of recived events. Close #2365 --- CHANGELOG.md | 3 +++ intelmq/etc/harmonization.conf | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fefa2f65fa..1cad628bdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,9 @@ Please refer to the [NEWS](NEWS.md) for a list of changes which have an affect o ### Data Format +- added `severity` field to help with triaging received events (PR# by Kamil Mańkowski). + To allow saving the field in PostgreSQL database in existing installations, the following schema update is necessary: `ALTER TABLE events ADD severity varchar(10);`. + ### Bots #### Collectors diff --git a/intelmq/etc/harmonization.conf b/intelmq/etc/harmonization.conf index 027643ac9c..c223085d12 100644 --- a/intelmq/etc/harmonization.conf +++ b/intelmq/etc/harmonization.conf @@ -362,6 +362,12 @@ "tlp": { "description": "Traffic Light Protocol level of the event.", "type": "TLP" + }, + "severity": { + "description": "Severity of the event, based on the information from the source, and eventually modified by IntelMQ during processing. Meaning of the levels may differ based on the event source.", + "type": "LowercaseString", + "regex": "^(critical|high|medium|low|info|undefined)$", + "length": 10 } }, "report": { From e13f1ce1352340979021b732d4cf5f12c2d05dda Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Mon, 3 Mar 2025 17:26:39 +0100 Subject: [PATCH 2/9] Fix order --- intelmq/etc/harmonization.conf | 12 ++++++------ intelmq/tests/bin/initdb.sql | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/intelmq/etc/harmonization.conf b/intelmq/etc/harmonization.conf index c223085d12..8718941613 100644 --- a/intelmq/etc/harmonization.conf +++ b/intelmq/etc/harmonization.conf @@ -233,6 +233,12 @@ "description": "Some source may report URLs related to a an image generated of a resource without any metadata. Or an URL pointing to resource, which has been rendered into a webshot, e.g. a PNG image and the relevant metadata related to its retrieval/generation.", "type": "URL" }, + "severity": { + "description": "Severity of the event, based on the information from the source, and eventually modified by IntelMQ during processing. Meaning of the levels may differ based on the event source.", + "length": 10, + "regex": "^(critical|high|medium|low|info|undefined)$", + "type": "LowercaseString" + }, "source.abuse_contact": { "description": "Abuse contact for source address. A comma separated list.", "type": "LowercaseString" @@ -362,12 +368,6 @@ "tlp": { "description": "Traffic Light Protocol level of the event.", "type": "TLP" - }, - "severity": { - "description": "Severity of the event, based on the information from the source, and eventually modified by IntelMQ during processing. Meaning of the levels may differ based on the event source.", - "type": "LowercaseString", - "regex": "^(critical|high|medium|low|info|undefined)$", - "length": 10 } }, "report": { diff --git a/intelmq/tests/bin/initdb.sql b/intelmq/tests/bin/initdb.sql index 5a5f839f58..150b5d4519 100644 --- a/intelmq/tests/bin/initdb.sql +++ b/intelmq/tests/bin/initdb.sql @@ -52,6 +52,7 @@ CREATE TABLE events ( "raw" text, "rtir_id" integer, "screenshot_url" text, + "severity" varchar(10), "source.abuse_contact" text, "source.account" text, "source.allocated" timestamp with time zone, @@ -93,4 +94,4 @@ CREATE INDEX "idx_events_source.asn" ON events USING btree ("source.asn"); CREATE INDEX "idx_events_source.ip" ON events USING btree ("source.ip"); CREATE INDEX "idx_events_source.fqdn" ON events USING btree ("source.fqdn"); CREATE INDEX "idx_events_time.observation" ON events USING btree ("time.observation"); -CREATE INDEX "idx_events_time.source" ON events USING btree ("time.source"); +CREATE INDEX "idx_events_time.source" ON events USING btree ("time.source"); \ No newline at end of file From 62f944e22745bb912dae51e11309ac2778fafafe Mon Sep 17 00:00:00 2001 From: kamil-certat <117654481+kamil-certat@users.noreply.github.com> Date: Thu, 3 Apr 2025 16:07:14 +0200 Subject: [PATCH 3/9] Update CHANGELOG.md Co-authored-by: Sebastian --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cad628bdb..95ddc970d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ Please refer to the [NEWS](NEWS.md) for a list of changes which have an affect o ### Data Format -- added `severity` field to help with triaging received events (PR# by Kamil Mańkowski). +- added `severity` field to help with triaging received events (PR#2575 by Kamil Mańkowski). To allow saving the field in PostgreSQL database in existing installations, the following schema update is necessary: `ALTER TABLE events ADD severity varchar(10);`. ### Bots From dea15b152e96030e995074a174e078bb250ba477 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 3 Apr 2025 17:18:02 +0200 Subject: [PATCH 4/9] Added news entry and upgrade function --- NEWS.md | 5 +++++ intelmq/lib/upgrades.py | 14 +++++++++++++- intelmq/tests/bin/initdb.sql | 2 +- intelmq/tests/lib/test_upgrades.py | 7 +++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 12c225784f..9b23a05332 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,11 @@ Please refer to the change log for a full list of changes. ### Tools ### Data Format +To save new fields from IntelMQ Data Format in existing PostgreSQL instances, the following schema +update is necessary: +```sql +ALTER TABLE events ADD severity varchar(10); +``` ### Configuration diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index ee22b60a69..472caf05d4 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -973,6 +973,18 @@ def v340_deprecations(configuration, harmonization, dry_run, **kwargs): message = f"Found discontinued Twitter collector bot: {', '.join(found_twitter_collector)}" return message or changed, configuration, harmonization +def v341_new_fields(configuration, harmonization, dry_run, **kwargs): + changed = False + if "severity" not in harmonization["event"]: + harmonization["event"]["severity"] = { + "description": "Severity of the event, based on the information from the source, and eventually modified by IntelMQ during processing. Meaning of the levels may differ based on the event source.", + "length": 10, + "regex": "^(critical|high|medium|low|info|undefined)$", + "type": "LowercaseString", + } + changed = True + return changed, configuration, harmonization + UPGRADES = OrderedDict([ ((1, 0, 0, 'dev7'), (v100_dev7_modify_syntax,)), @@ -1004,7 +1016,7 @@ def v340_deprecations(configuration, harmonization, dry_run, **kwargs): ((3, 3, 0), ()), ((3, 3, 1), ()), ((3, 4, 0), (v340_deprecations, )), - ((3, 4, 1), ()), + ((3, 4, 1), (v341_new_fields, )), ]) ALWAYS = (harmonization,) diff --git a/intelmq/tests/bin/initdb.sql b/intelmq/tests/bin/initdb.sql index 150b5d4519..020af93d21 100644 --- a/intelmq/tests/bin/initdb.sql +++ b/intelmq/tests/bin/initdb.sql @@ -94,4 +94,4 @@ CREATE INDEX "idx_events_source.asn" ON events USING btree ("source.asn"); CREATE INDEX "idx_events_source.ip" ON events USING btree ("source.ip"); CREATE INDEX "idx_events_source.fqdn" ON events USING btree ("source.fqdn"); CREATE INDEX "idx_events_time.observation" ON events USING btree ("time.observation"); -CREATE INDEX "idx_events_time.source" ON events USING btree ("time.source"); \ No newline at end of file +CREATE INDEX "idx_events_time.source" ON events USING btree ("time.source"); diff --git a/intelmq/tests/lib/test_upgrades.py b/intelmq/tests/lib/test_upgrades.py index a30800b9cb..e09f01efd4 100644 --- a/intelmq/tests/lib/test_upgrades.py +++ b/intelmq/tests/lib/test_upgrades.py @@ -856,6 +856,13 @@ def test_v340_twitter_collector(self): self.assertIn('twitter-collector', result[0]) self.assertEqual(V340_TWITTER_COLLECTOR_IN, result[1]) + def test_v341_new_fields(self): + """ Test adding new harmonisation fields """ + result = upgrades.v341_new_fields({}, {"event": {"old-field": "must stay"}}, False) + self.assertTrue(result[0]) + self.assertIn("old-field", result[1]["event"]) + self.assertIn("severity", result[1]["event"]) + for name in upgrades.__all__: setattr(TestUpgradeLib, 'test_function_%s' % name, From aa4f03d6bca782f19b719587bd9cb0ddb9b82163 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 3 Apr 2025 17:20:04 +0200 Subject: [PATCH 5/9] Fix style --- intelmq/lib/upgrades.py | 1 + 1 file changed, 1 insertion(+) diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index 472caf05d4..d1fe7bdee9 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -973,6 +973,7 @@ def v340_deprecations(configuration, harmonization, dry_run, **kwargs): message = f"Found discontinued Twitter collector bot: {', '.join(found_twitter_collector)}" return message or changed, configuration, harmonization + def v341_new_fields(configuration, harmonization, dry_run, **kwargs): changed = False if "severity" not in harmonization["event"]: From aae17a75510410c9167360b7f756f1985f2a07c0 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 3 Apr 2025 17:30:57 +0200 Subject: [PATCH 6/9] Fix tests --- intelmq/lib/upgrades.py | 3 ++- intelmq/tests/lib/test_upgrades.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index d1fe7bdee9..d7a57f6616 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -41,7 +41,8 @@ 'v320_update_turris_greylist_url', 'v322_url_replacement', 'v322_removed_feeds_and_bots', - 'v340_deprecations' + 'v340_deprecations', + 'v341_new_fields' ] diff --git a/intelmq/tests/lib/test_upgrades.py b/intelmq/tests/lib/test_upgrades.py index e09f01efd4..d5f386fde4 100644 --- a/intelmq/tests/lib/test_upgrades.py +++ b/intelmq/tests/lib/test_upgrades.py @@ -860,8 +860,8 @@ def test_v341_new_fields(self): """ Test adding new harmonisation fields """ result = upgrades.v341_new_fields({}, {"event": {"old-field": "must stay"}}, False) self.assertTrue(result[0]) - self.assertIn("old-field", result[1]["event"]) - self.assertIn("severity", result[1]["event"]) + self.assertIn("old-field", result[2]["event"]) + self.assertIn("severity", result[2]["event"]) for name in upgrades.__all__: From 27fa9ac128b48b112c8743f252ef6d8de7a09040 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 3 Apr 2025 17:33:23 +0200 Subject: [PATCH 7/9] Add missing docstring --- intelmq/lib/upgrades.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index d7a57f6616..64510a4af2 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -976,6 +976,9 @@ def v340_deprecations(configuration, harmonization, dry_run, **kwargs): def v341_new_fields(configuration, harmonization, dry_run, **kwargs): + """ + Add new fields to IntelMQ Data Format + """ changed = False if "severity" not in harmonization["event"]: harmonization["event"]["severity"] = { From 33383ef6a41709b47a97326f498f1d1a1c1f491c Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 9 Apr 2025 16:15:34 +0200 Subject: [PATCH 8/9] Fix handling no event in harmonisation --- intelmq/lib/upgrades.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index 64510a4af2..0fda6daaea 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -980,7 +980,7 @@ def v341_new_fields(configuration, harmonization, dry_run, **kwargs): Add new fields to IntelMQ Data Format """ changed = False - if "severity" not in harmonization["event"]: + if "event" in harmonization and "severity" not in harmonization["event"]: harmonization["event"]["severity"] = { "description": "Severity of the event, based on the information from the source, and eventually modified by IntelMQ during processing. Meaning of the levels may differ based on the event source.", "length": 10, From 8b27c33f89ea678043a2b086915440174d491e68 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 9 Apr 2025 16:29:02 +0200 Subject: [PATCH 9/9] Simplify and fix upgrade function --- intelmq/lib/upgrades.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index 0fda6daaea..9ab53a1daa 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -979,15 +979,22 @@ def v341_new_fields(configuration, harmonization, dry_run, **kwargs): """ Add new fields to IntelMQ Data Format """ - changed = False - if "event" in harmonization and "severity" not in harmonization["event"]: - harmonization["event"]["severity"] = { - "description": "Severity of the event, based on the information from the source, and eventually modified by IntelMQ during processing. Meaning of the levels may differ based on the event source.", - "length": 10, - "regex": "^(critical|high|medium|low|info|undefined)$", - "type": "LowercaseString", - } - changed = True + changed = None + if "event" not in harmonization: + return changed, configuration, harmonization + + builtin_harmonisation = load_configuration( + resource_filename("intelmq", "etc/harmonization.conf") + ) + for field in [ + "severity", + ]: + if field not in harmonization["event"]: + if field not in builtin_harmonisation["event"]: + # ensure forward-compatibility if we ever remove something from harmonisation + continue + harmonization["event"][field] = builtin_harmonisation["event"][field] + changed = True return changed, configuration, harmonization