Skip to content

Commit f2b8b53

Browse files
committed
ENH: Add attribute mapping
The bot can now construct an event much more alligned to custom needs, allowing setting comments and selecting just a subset of fields to export
1 parent 6461efa commit f2b8b53

File tree

4 files changed

+138
-13
lines changed

4 files changed

+138
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
- `intelmq.bots.outputs.misp.output_feed`:
3838
- Handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski).
3939
- Allow saving messages in bulks instead of refreshing the feed immediately (PR#2505 by Kamil Mankowski).
40+
- Add `attribute_mapping` parameter to allow selecting a subset of event attributes as well as additional attribute parameters (PR by Kamil Mankowski).
4041
- `intelmq.bots.outputs.smtp_batch.output`: Documentation on multiple recipients added (PR#2501 by Edvard Rejthar).
4142

4243
### Documentation

docs/user/bots.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4602,6 +4602,31 @@ incoming messages until the given number of them. Use it if your bot proceeds a
46024602
and constant saving to the disk is a problem. Reloading or restarting bot as well as generating
46034603
a new MISP event based on `interval_event` triggers saving regardless of the cache size.
46044604

4605+
**`attribute_mapping`**
4606+
4607+
(optional, dict) If set, allows selecting which IntelMQ event fields are mapped to MISP attributes
4608+
as well as attribute parameters (like e.g. a comment). The expected format is a *dictonary of dictionaries*:
4609+
first-level key represents an IntelMQ field that will be directly translated to a MISP attribute; nested
4610+
dictionary represents addditional parameters PyMISP can take when creating an attribute. They can use
4611+
names of other IntelMQ fields (then the value of such field will be used), or static values. If not needed,
4612+
leave empty dict.
4613+
4614+
For example:
4615+
4616+
```yaml
4617+
attribute_mapping:
4618+
source.ip:
4619+
feed.name:
4620+
comment: event_description.text
4621+
destination.ip:
4622+
to_ids: False
4623+
```
4624+
4625+
would create a MISP object with three attributes `source.ip`, `feed.name` and `destination.ip`
4626+
and set their values as in the IntelMQ event. In addition, the `feed.name` would have a comment
4627+
as given in the `event_description.text` from IntelMQ event, and `destination.ip` would be set
4628+
as not usable for IDS.
4629+
46054630
**Usage in MISP**
46064631

46074632
Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server.

intelmq/bots/outputs/misp/output_feed.py

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@
99
from pathlib import Path
1010
from uuid import uuid4
1111

12+
import pymisp
13+
1214
from intelmq.lib.bot import OutputBot
1315
from intelmq.lib.exceptions import MissingDependencyError
16+
from ....lib.message import Message, MessageFactory
1417
from intelmq.lib.mixins import CacheMixin
1518
from intelmq.lib.utils import parse_relative
1619

@@ -30,8 +33,11 @@ class MISPFeedOutputBot(OutputBot, CacheMixin):
3033
bulk_save_count: int = None
3134
misp_org_name = None
3235
misp_org_uuid = None
33-
output_dir: str = "/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path
36+
output_dir: str = (
37+
"/opt/intelmq/var/lib/bots/mispfeed-output" # TODO: should be path
38+
)
3439
_is_multithreadable: bool = False
40+
attribute_mapping: dict = None
3541

3642
@staticmethod
3743
def check_output_dir(dirname):
@@ -56,11 +62,13 @@ def init(self):
5662
if self.interval_event is None:
5763
self.timedelta = datetime.timedelta(hours=1)
5864
else:
59-
self.timedelta = datetime.timedelta(minutes=parse_relative(self.interval_event))
65+
self.timedelta = datetime.timedelta(
66+
minutes=parse_relative(self.interval_event)
67+
)
6068

61-
if (self.output_dir / '.current').exists():
69+
if (self.output_dir / ".current").exists():
6270
try:
63-
with (self.output_dir / '.current').open() as f:
71+
with (self.output_dir / ".current").open() as f:
6472
self.current_file = Path(f.read())
6573

6674
if self.current_file.exists():
@@ -127,12 +135,49 @@ def process(self):
127135

128136
def _add_message_to_feed(self, message: dict):
129137
obj = self.current_event.add_object(name="intelmq_event")
138+
if not self.attribute_mapping:
139+
self._default_mapping(obj, message)
140+
else:
141+
self._custom_mapping(obj, message)
142+
143+
def _default_mapping(self, obj: pymisp.MISPObject, message: dict):
130144
for object_relation, value in message.items():
131145
try:
132146
obj.add_attribute(object_relation, value=value)
133147
except NewAttributeError:
134148
# This entry isn't listed in the harmonization file, ignoring.
135-
pass
149+
self.logger.warning(
150+
"Object relation %s not exists in MISP definition, ignoring",
151+
object_relation,
152+
)
153+
154+
def _extract_misp_attribute_kwargs(self, message: dict, definition: dict) -> dict:
155+
# For caching and default mapping, the serialized version is the right format to work on.
156+
# However, for any custom mapping the Message object is more sufficient as it handles
157+
# subfields.
158+
message = MessageFactory.from_dict(
159+
message, harmonization=self.harmonization, default_type="Event"
160+
)
161+
result = {}
162+
for parameter, value in definition.items():
163+
# Check if the value is a harmonization key or a static value
164+
if isinstance(value, str) and (
165+
value in self.harmonization["event"]
166+
or value.split(".", 1)[0] in self.harmonization["event"]
167+
):
168+
result[parameter] = message.get(value)
169+
else:
170+
result[parameter] = value
171+
return result
172+
173+
def _custom_mapping(self, obj: pymisp.MISPObject, message: dict):
174+
for object_relation, definition in self.attribute_mapping.items():
175+
obj.add_attribute(
176+
object_relation,
177+
value=message[object_relation],
178+
**self._extract_misp_attribute_kwargs(message, definition),
179+
)
180+
# In case of manual mapping, we want to fail if it produces incorrect values
136181

137182
def _generate_feed(self, message: dict = None):
138183
if message:
@@ -151,18 +196,27 @@ def _generate_feed(self, message: dict = None):
151196

152197
@staticmethod
153198
def check(parameters):
154-
if 'output_dir' not in parameters:
199+
if "output_dir" not in parameters:
155200
return [["error", "Parameter 'output_dir' not given."]]
156201
try:
157-
created = MISPFeedOutputBot.check_output_dir(parameters['output_dir'])
202+
created = MISPFeedOutputBot.check_output_dir(parameters["output_dir"])
158203
except OSError:
159-
return [["error",
160-
"Directory %r of parameter 'output_dir' does not exist and could not be created." % parameters['output_dir']]]
204+
return [
205+
[
206+
"error",
207+
"Directory %r of parameter 'output_dir' does not exist and could not be created."
208+
% parameters["output_dir"],
209+
]
210+
]
161211
else:
162212
if created:
163-
return [["info",
164-
"Directory %r of parameter 'output_dir' did not exist, but has now been created."
165-
"" % parameters['output_dir']]]
213+
return [
214+
[
215+
"info",
216+
"Directory %r of parameter 'output_dir' did not exist, but has now been created."
217+
"" % parameters["output_dir"],
218+
]
219+
]
166220

167221

168222
BOT = MISPFeedOutputBot

intelmq/tests/bots/outputs/misp/test_output_feed.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pathlib import Path
99
from tempfile import TemporaryDirectory
1010

11+
from .....lib.message import Message, MessageFactory
1112
import intelmq.lib.test as test
1213
from intelmq.bots.outputs.misp.output_feed import MISPFeedOutputBot
1314

@@ -92,14 +93,58 @@ def test_accumulating_events(self):
9293

9394
# Simulating leftovers in the queue when it's time to generate new event
9495
Path(f"{self.directory.name}/.current").unlink()
95-
self.bot.cache_put(EXAMPLE_EVENT)
96+
self.bot.cache_put(MessageFactory.from_dict(EXAMPLE_EVENT).to_dict(jsondict_as_string=True))
9697
self.run_bot(parameters={"bulk_save_count": 3})
9798

9899
new_event = open(f"{self.directory.name}/.current").read()
99100
with open(new_event) as f:
100101
objects = json.load(f)["Event"]["Object"]
101102
assert len(objects) == 1
102103

104+
def test_attribute_mapping(self):
105+
self.run_bot(
106+
parameters={
107+
"attribute_mapping": {
108+
"source.ip": {},
109+
"feed.name": {"comment": "event_description.text"},
110+
"destination.ip": {"to_ids": False},
111+
"malware.name": {"comment": "extra.non_ascii"}
112+
}
113+
}
114+
)
115+
116+
current_event = open(f"{self.directory.name}/.current").read()
117+
with open(current_event) as f:
118+
objects = json.load(f).get("Event", {}).get("Object", [])
119+
120+
assert len(objects) == 1
121+
attributes = objects[0].get("Attribute")
122+
assert len(attributes) == 4
123+
source_ip = next(
124+
attr for attr in attributes if attr.get("object_relation") == "source.ip"
125+
)
126+
assert source_ip["value"] == "152.166.119.2"
127+
assert source_ip["comment"] == ""
128+
129+
feed_name = next(
130+
attr for attr in attributes if attr.get("object_relation") == "feed.name"
131+
)
132+
assert feed_name["value"] == EXAMPLE_EVENT["feed.name"]
133+
assert feed_name["comment"] == EXAMPLE_EVENT["event_description.text"]
134+
135+
destination_ip = next(
136+
attr for attr in attributes if attr.get("object_relation") == "destination.ip"
137+
)
138+
assert destination_ip["value"] == EXAMPLE_EVENT["destination.ip"]
139+
assert destination_ip["to_ids"] is False
140+
141+
malware_name = next(
142+
attr for attr in attributes if attr.get("object_relation") == "malware.name"
143+
)
144+
assert malware_name["value"] == EXAMPLE_EVENT["malware.name"]
145+
assert malware_name["comment"] == EXAMPLE_EVENT["extra.non_ascii"]
146+
147+
103148
def tearDown(self):
104149
self.cache.delete(self.bot_id)
105150
self.directory.cleanup()

0 commit comments

Comments
 (0)