From e36bdf078b0308faa10649eaeb1c14dfa49cace8 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Mon, 3 Mar 2025 15:36:09 +0100 Subject: [PATCH 1/4] Implement IEP009 - product & vuln. identification The related IEP has already been discussed and open for more than a year. --- CHANGELOG.md | 12 ++++++++++++ intelmq/etc/harmonization.conf | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fefa2f65fa..44fa7ac644 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,18 @@ Please refer to the [NEWS](NEWS.md) for a list of changes which have an affect o ### Data Format +- Implementing [IEP009](https://github.com/certtools/ieps/tree/main/009) introducing fields to + identify products and vulnerabilities: `product.full_name`, `product.name`, `product.vendor`, + `product.version`, `product.vulnerabilities`. To store in existing PostgreSQL instances, a following + schema update will be necessary: + ```sql + ALTER TABLE events ADD "product.full_name" text; + ALTER TABLE events ADD "product.name" text; + ALTER TABLE events ADD "product.vendor" text; + ALTER TABLE events ADD "product.version" text; + ALTER TABLE events ADD "product.vulnerabilities" text; + ``` + ### Bots #### Collectors diff --git a/intelmq/etc/harmonization.conf b/intelmq/etc/harmonization.conf index 027643ac9c..994b9f439c 100644 --- a/intelmq/etc/harmonization.conf +++ b/intelmq/etc/harmonization.conf @@ -221,6 +221,26 @@ "length": 11, "type": "LowercaseString" }, + "product.full_name": { + "description": "A human readable product name. If a machine-readable format isn't available, this field should be used. It can directly use the version identification strings presented by the product. If not given, a good enough value can usually be constructed by concatenating product.product and product.version, or by consulting external sources such as the CPE Product Dictionary. Example: openssh_/8.9", + "type": "String" + }, + "product.vendor": { + "description": "Vendor name, recommended being as vendor in the CPE format. Example: openbsd", + "type": "LowercaseString" + }, + "product.name": { + "description": "Product name, recommended being as the product in the CPE format. Example: openssh", + "type": "LowercaseString" + }, + "product.version": { + "description": "Product version, recommended being as version in the CPE format. Example: 8.9", + "type": "LowercaseString" + }, + "product.vulnerabilities": { + "description": "List of vulnerability IDs, separated by semicolons. It's recommended to use a CVE ID where available, and other easily retrievable IDs in other cases, e.g. Github Advisory Database ID. Each vulnerability should only be listed once, and multiple values should be used if there are several different vulnerabilities. However, it's not necessary for a source to list all possible vulnerabilities for a given piece of software. Example: cve-2023-38408;cve-2023-28531;cve-2008-3844;cve-2007-2768", + "type": "LowercaseString" + }, "raw": { "description": "The original line of the event from encoded in base64.", "type": "Base64" From 81ffe543f2a221c6badb21c455d8fb6d6c7ea35e Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Mon, 3 Mar 2025 17:23:43 +0100 Subject: [PATCH 2/4] Fix order --- intelmq/etc/harmonization.conf | 32 ++++++++++++++++---------------- intelmq/tests/bin/initdb.sql | 7 ++++++- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/intelmq/etc/harmonization.conf b/intelmq/etc/harmonization.conf index 994b9f439c..b9e57d6e22 100644 --- a/intelmq/etc/harmonization.conf +++ b/intelmq/etc/harmonization.conf @@ -209,30 +209,18 @@ "description": "Event data converted into foreign format, intended to be exported by output plugin.", "type": "JSON" }, - "protocol.application": { - "description": "e.g. vnc, ssh, sip, irc, http or smtp.", - "length": 100, - "regex": "^[ -~]+$", - "type": "LowercaseString" - }, - "protocol.transport": { - "description": "e.g. tcp, udp, icmp.", - "iregex": "^(ip|icmp|igmp|ggp|ipencap|st2|tcp|cbt|egp|igp|bbn-rcc|nvp(-ii)?|pup|argus|emcon|xnet|chaos|udp|mux|dcn|hmp|prm|xns-idp|trunk-1|trunk-2|leaf-1|leaf-2|rdp|irtp|iso-tp4|netblt|mfe-nsp|merit-inp|sep|3pc|idpr|xtp|ddp|idpr-cmtp|tp\\+\\+|il|ipv6|sdrp|ipv6-route|ipv6-frag|idrp|rsvp|gre|mhrp|bna|esp|ah|i-nlsp|swipe|narp|mobile|tlsp|skip|ipv6-icmp|ipv6-nonxt|ipv6-opts|cftp|sat-expak|kryptolan|rvd|ippc|sat-mon|visa|ipcv|cpnx|cphb|wsn|pvp|br-sat-mon|sun-nd|wb-mon|wb-expak|iso-ip|vmtp|secure-vmtp|vines|ttp|nsfnet-igp|dgp|tcf|eigrp|ospf|sprite-rpc|larp|mtp|ax.25|ipip|micp|scc-sp|etherip|encap|gmtp|ifmp|pnni|pim|aris|scps|qnx|a/n|ipcomp|snp|compaq-peer|ipx-in-ip|vrrp|pgm|l2tp|ddx|iatp|st|srp|uti|smp|sm|ptp|isis|fire|crtp|crdup|sscopmce|iplt|sps|pipe|sctp|fc|divert)$", - "length": 11, - "type": "LowercaseString" - }, "product.full_name": { "description": "A human readable product name. If a machine-readable format isn't available, this field should be used. It can directly use the version identification strings presented by the product. If not given, a good enough value can usually be constructed by concatenating product.product and product.version, or by consulting external sources such as the CPE Product Dictionary. Example: openssh_/8.9", "type": "String" }, - "product.vendor": { - "description": "Vendor name, recommended being as vendor in the CPE format. Example: openbsd", - "type": "LowercaseString" - }, "product.name": { "description": "Product name, recommended being as the product in the CPE format. Example: openssh", "type": "LowercaseString" }, + "product.vendor": { + "description": "Vendor name, recommended being as vendor in the CPE format. Example: openbsd", + "type": "LowercaseString" + }, "product.version": { "description": "Product version, recommended being as version in the CPE format. Example: 8.9", "type": "LowercaseString" @@ -241,6 +229,18 @@ "description": "List of vulnerability IDs, separated by semicolons. It's recommended to use a CVE ID where available, and other easily retrievable IDs in other cases, e.g. Github Advisory Database ID. Each vulnerability should only be listed once, and multiple values should be used if there are several different vulnerabilities. However, it's not necessary for a source to list all possible vulnerabilities for a given piece of software. Example: cve-2023-38408;cve-2023-28531;cve-2008-3844;cve-2007-2768", "type": "LowercaseString" }, + "protocol.application": { + "description": "e.g. vnc, ssh, sip, irc, http or smtp.", + "length": 100, + "regex": "^[ -~]+$", + "type": "LowercaseString" + }, + "protocol.transport": { + "description": "e.g. tcp, udp, icmp.", + "iregex": "^(ip|icmp|igmp|ggp|ipencap|st2|tcp|cbt|egp|igp|bbn-rcc|nvp(-ii)?|pup|argus|emcon|xnet|chaos|udp|mux|dcn|hmp|prm|xns-idp|trunk-1|trunk-2|leaf-1|leaf-2|rdp|irtp|iso-tp4|netblt|mfe-nsp|merit-inp|sep|3pc|idpr|xtp|ddp|idpr-cmtp|tp\\+\\+|il|ipv6|sdrp|ipv6-route|ipv6-frag|idrp|rsvp|gre|mhrp|bna|esp|ah|i-nlsp|swipe|narp|mobile|tlsp|skip|ipv6-icmp|ipv6-nonxt|ipv6-opts|cftp|sat-expak|kryptolan|rvd|ippc|sat-mon|visa|ipcv|cpnx|cphb|wsn|pvp|br-sat-mon|sun-nd|wb-mon|wb-expak|iso-ip|vmtp|secure-vmtp|vines|ttp|nsfnet-igp|dgp|tcf|eigrp|ospf|sprite-rpc|larp|mtp|ax.25|ipip|micp|scc-sp|etherip|encap|gmtp|ifmp|pnni|pim|aris|scps|qnx|a/n|ipcomp|snp|compaq-peer|ipx-in-ip|vrrp|pgm|l2tp|ddx|iatp|st|srp|uti|smp|sm|ptp|isis|fire|crtp|crdup|sscopmce|iplt|sps|pipe|sctp|fc|divert)$", + "length": 11, + "type": "LowercaseString" + }, "raw": { "description": "The original line of the event from encoded in base64.", "type": "Base64" diff --git a/intelmq/tests/bin/initdb.sql b/intelmq/tests/bin/initdb.sql index 5a5f839f58..0cb1b39ba7 100644 --- a/intelmq/tests/bin/initdb.sql +++ b/intelmq/tests/bin/initdb.sql @@ -47,6 +47,11 @@ CREATE TABLE events ( "misp.attribute_uuid" varchar(36), "misp.event_uuid" varchar(36), "output" json, + "product.full_name" text, + "product.name" text, + "product.vendor" text, + "product.version" text, + "product.vulnerabilities" text, "protocol.application" varchar(100), "protocol.transport" varchar(11), "raw" text, @@ -93,4 +98,4 @@ CREATE INDEX "idx_events_source.asn" ON events USING btree ("source.asn"); CREATE INDEX "idx_events_source.ip" ON events USING btree ("source.ip"); CREATE INDEX "idx_events_source.fqdn" ON events USING btree ("source.fqdn"); CREATE INDEX "idx_events_time.observation" ON events USING btree ("time.observation"); -CREATE INDEX "idx_events_time.source" ON events USING btree ("time.source"); +CREATE INDEX "idx_events_time.source" ON events USING btree ("time.source"); \ No newline at end of file From 3dbeebf51060b7bee9b44c863037e93873b5b849 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 9 Apr 2025 16:28:14 +0200 Subject: [PATCH 3/4] Upgrade harmonisation --- NEWS.md | 9 +++++++++ intelmq/lib/upgrades.py | 30 ++++++++++++++++++++++++++++-- intelmq/tests/bin/initdb.sql | 2 +- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 12c225784f..a6ccf4bd11 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,15 @@ Please refer to the change log for a full list of changes. ### Tools ### Data Format +To save new fields from IntelMQ Data Format in existing PostgreSQL instances, the following schema +update is necessary: +```sql +ALTER TABLE events ADD "product.full_name" text; +ALTER TABLE events ADD "product.name" text; +ALTER TABLE events ADD "product.vendor" text; +ALTER TABLE events ADD "product.version" text; +ALTER TABLE events ADD "product.vulnerabilities" text; +``` ### Configuration diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index ee22b60a69..349b052505 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -41,7 +41,8 @@ 'v320_update_turris_greylist_url', 'v322_url_replacement', 'v322_removed_feeds_and_bots', - 'v340_deprecations' + 'v340_deprecations', + 'v341_new_fields' ] @@ -974,6 +975,30 @@ def v340_deprecations(configuration, harmonization, dry_run, **kwargs): return message or changed, configuration, harmonization +def v341_new_fields(configuration, harmonization, dry_run, **kwargs): + """ + Add new fields to IntelMQ Data Format + """ + changed = None + if "event" not in harmonization: + return changed, configuration, harmonization + + builtin_harmonisation = load_configuration( + resource_filename("intelmq", "etc/harmonization.conf") + ) + for field in [ + "product.full_name", + "product.name", + "product.vendor", + "product.version", + "product.vulnerabilities", + ]: + if field not in harmonization["event"]: + harmonization["event"][field] = builtin_harmonisation["event"][field] + changed = True + return changed, configuration, harmonization + + UPGRADES = OrderedDict([ ((1, 0, 0, 'dev7'), (v100_dev7_modify_syntax,)), ((1, 1, 0), (v110_shadowserver_feednames, v110_deprecations)), @@ -1004,7 +1029,8 @@ def v340_deprecations(configuration, harmonization, dry_run, **kwargs): ((3, 3, 0), ()), ((3, 3, 1), ()), ((3, 4, 0), (v340_deprecations, )), - ((3, 4, 1), ()), + ((3, 4, 1), (v341_new_fields, )), + ]) ALWAYS = (harmonization,) diff --git a/intelmq/tests/bin/initdb.sql b/intelmq/tests/bin/initdb.sql index 0cb1b39ba7..3a6cd03b49 100644 --- a/intelmq/tests/bin/initdb.sql +++ b/intelmq/tests/bin/initdb.sql @@ -98,4 +98,4 @@ CREATE INDEX "idx_events_source.asn" ON events USING btree ("source.asn"); CREATE INDEX "idx_events_source.ip" ON events USING btree ("source.ip"); CREATE INDEX "idx_events_source.fqdn" ON events USING btree ("source.fqdn"); CREATE INDEX "idx_events_time.observation" ON events USING btree ("time.observation"); -CREATE INDEX "idx_events_time.source" ON events USING btree ("time.source"); \ No newline at end of file +CREATE INDEX "idx_events_time.source" ON events USING btree ("time.source"); From c1c6c353d325472148b4cb734b304b1d1d338387 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Wed, 9 Apr 2025 16:37:05 +0200 Subject: [PATCH 4/4] Add tests --- intelmq/lib/upgrades.py | 3 +++ intelmq/tests/lib/test_upgrades.py | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index 349b052505..6da2f1a7bb 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -994,6 +994,9 @@ def v341_new_fields(configuration, harmonization, dry_run, **kwargs): "product.vulnerabilities", ]: if field not in harmonization["event"]: + if field not in builtin_harmonisation["event"]: + # ensure forward-compatibility if we ever remove something from harmonisation + continue harmonization["event"][field] = builtin_harmonisation["event"][field] changed = True return changed, configuration, harmonization diff --git a/intelmq/tests/lib/test_upgrades.py b/intelmq/tests/lib/test_upgrades.py index a30800b9cb..5dc6503663 100644 --- a/intelmq/tests/lib/test_upgrades.py +++ b/intelmq/tests/lib/test_upgrades.py @@ -856,6 +856,17 @@ def test_v340_twitter_collector(self): self.assertIn('twitter-collector', result[0]) self.assertEqual(V340_TWITTER_COLLECTOR_IN, result[1]) + def test_v341_new_fields(self): + """ Test adding new harmonisation fields """ + result = upgrades.v341_new_fields({}, {"event": {"old-field": "must stay"}}, False) + self.assertTrue(result[0]) + self.assertIn("old-field", result[2]["event"]) + self.assertIn("product.full_name", result[2]["event"]) + self.assertIn("product.name", result[2]["event"]) + self.assertIn("product.vendor", result[2]["event"]) + self.assertIn("product.version", result[2]["event"]) + self.assertIn("product.vulnerabilities", result[2]["event"]) + for name in upgrades.__all__: setattr(TestUpgradeLib, 'test_function_%s' % name,