diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab8400acdd..a9ea860636 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,11 +3,23 @@ CHANGELOG
1.1.0
-----
+### Core
+- Subitems in fields of type `JSONDict` (see below) can be accessed directly. E.g. you can do:
+ event['extra.foo'] = 'bar'
+ event['extra.foo'] # gives 'bar'
+ It is still possible to set and get the field as whole, however this may be removed or changed in the future:
+ event['extra'] = '{"foo": "bar"}'
+ event['extra'] # gives '{"foo": "bar"}'
+ "Old" bots and configurations compatible with 1.0.x do still work.
+ Also, the extra field is now properly exploded when exporting events, analogous to all other fields.
### Bots
#### Collectors
- Mail: New parameters; `sent_from`: filter messages by sender, `sent_to`: filter messages by recipient
+### Harmonization
+- Renamed `JSON` to `JSONDict` and added a new type `JSON`. `JSONDict` saves data internally as JSON, but acts like a dictionary. `JSON` accepts any valid JSON.
+
### Requirements
- Requests is no longer a listed as dependency of the core. For depending bots the requirement is noted in their REQUIREMENTS.txt file
diff --git a/NEWS.md b/NEWS.md
index fe741221ae..e80797af15 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,8 @@ See the changelog for a full list of changes.
1.1.0
-----
+### Configuration
+A new harmonization type `JSONDict` has been added specifically for the `extra` field. It is highly recommended to change the type of this field.
1.0.0 Stable release
--------------------
diff --git a/docs/Data-Harmonization.md b/docs/Data-Harmonization.md
index 6413ac6ddf..4326f52b6f 100644
--- a/docs/Data-Harmonization.md
+++ b/docs/Data-Harmonization.md
@@ -66,6 +66,9 @@ We recognize that ip geolocation is not an exact science and analysis of the abu
Some sources report an internal (NATed) IP address.
+### Extra values
+Data which does not fit in the harmonization can be saved in the 'extra' namespace. All keys must begin with `extra.`, there are no other rules on key names and values. The values can be get/set like all other fields.
+
## Fields List and data types
diff --git a/docs/Harmonization-fields.md b/docs/Harmonization-fields.md
index d65fe7137b..8a69bd7894 100644
--- a/docs/Harmonization-fields.md
+++ b/docs/Harmonization-fields.md
@@ -34,7 +34,7 @@ Harmonization field names
|Event_Description|event_description.text|[String](#string)|A free-form textual description of an abuse event.|
|Event_Description|event_description.url|[URL](#url)|A description URL is a link to a further description of the the abuse event in question.|
| |event_hash|[UppercaseString](#uppercasestring)|Computed event hash with specific keys and values that identify a unique event. At present, the hash should default to using the SHA1 function. Please note that for an event hash to be able to match more than one event (deduplication) the receiver of an event should calculate it based on a minimal set of keys and values present in the event. Using for example the observation time in the calculation will most likely render the checksum useless for deduplication purposes.|
-| |extra|[JSON](#json)|All anecdotal information, which cannot be parsed into the data harmonization elements. E.g. os.name, os.version, etc. **Note**: this is only intended for mapping any fields which can not map naturally into the data harmonization. It is not intended for extending the data harmonization with your own fields.|
+| |extra|[JSONDict](#jsondict)|All anecdotal information, which cannot be parsed into the data harmonization elements. E.g. os.name, os.version, etc. **Note**: this is only intended for mapping any fields which can not map naturally into the data harmonization. It is not intended for extending the data harmonization with your own fields.|
|Feed|feed.accuracy|[Accuracy](#accuracy)|A float between 0 and 100 that represents how accurate the data in the feed is|
|Feed|feed.code|[String](#string)|Code name for the feed, e.g. DFGS, HSDAG etc.|
|Feed|feed.documentation|[String](#string)|A URL or hint where to find the documentation of this feed.|
@@ -204,6 +204,15 @@ Sanitation accepts strings and everything int() accepts.
JSON type.
+Sanitation accepts any valid JSON objects.
+
+Valid values are only unicode strings with JSON objects.
+
+
+### JSONDict
+
+JSONDict type.
+
Sanitation accepts pythons dictionaries and JSON strings.
Valid values are only unicode strings with JSON dictionaries.
diff --git a/intelmq/bin/intelmq_psql_initdb.py b/intelmq/bin/intelmq_psql_initdb.py
index 4126c5b086..0ac2d8330d 100755
--- a/intelmq/bin/intelmq_psql_initdb.py
+++ b/intelmq/bin/intelmq_psql_initdb.py
@@ -55,7 +55,7 @@ def generate(harmonization_file=HARMONIZATION_CONF_FILE):
dbtype = 'real'
elif value['type'] == 'UUID':
dbtype = 'UUID'
- elif value['type'] == 'JSON':
+ elif value['type'] in ('JSON', 'JSONDict'):
dbtype = 'json'
else:
raise ValueError('Unknown type %r.' % value['type'])
diff --git a/intelmq/bin/intelmqctl.py b/intelmq/bin/intelmqctl.py
index 52ab4fbada..5d7c701279 100644
--- a/intelmq/bin/intelmqctl.py
+++ b/intelmq/bin/intelmqctl.py
@@ -883,6 +883,10 @@ def check(self):
self.logger.error('Invalid regex for type %r: %r.', harm_type_name, str(e))
retval = 1
continue
+ extra_type = files[HARMONIZATION_CONF_FILE].get('event', {}).get('extra', {}).get('type')
+ if extra_type != 'JSONDict':
+ self.logger.warning("'extra' field needs to be of type 'JSONDict'.")
+ retval = 1
self.logger.info('Checking for bots.')
for bot_id, bot_config in files[RUNTIME_CONF_FILE].items():
diff --git a/intelmq/bots/parsers/generic/parser_csv.py b/intelmq/bots/parsers/generic/parser_csv.py
index c450c41db3..97e84e41e7 100644
--- a/intelmq/bots/parsers/generic/parser_csv.py
+++ b/intelmq/bots/parsers/generic/parser_csv.py
@@ -64,7 +64,6 @@ def parse(self, report):
def parse_line(self, row, report):
event = self.new_event(report)
- extra = {}
for key, value in zip(self.columns, row):
regex = self.column_regex_search.get(key, None)
if regex:
@@ -85,18 +84,12 @@ def parse_line(self, row, report):
value = self.type_translation[value]
elif not hasattr(self.parameters, 'type'):
continue
- if key.startswith('extra.'):
- if value:
- extra[key[6:]] = value
- else:
- event.add(key, value)
+ event.add(key, value)
if hasattr(self.parameters, 'type')\
and "classification.type" not in event:
event.add('classification.type', self.parameters.type)
event.add("raw", self.recover_line(row))
- if extra:
- event.add('extra', extra)
yield event
recover_line = ParserBot.recover_line_csv
diff --git a/intelmq/bots/parsers/shadowserver/config.py b/intelmq/bots/parsers/shadowserver/config.py
index 4544d94875..fc506d4318 100644
--- a/intelmq/bots/parsers/shadowserver/config.py
+++ b/intelmq/bots/parsers/shadowserver/config.py
@@ -919,6 +919,8 @@ def validate_fqdn(value):
('extra.', 'system', validate_to_none),
('extra.', 'detected_since', validate_to_none),
('extra.', 'server', validate_to_none),
+ ('extra.', 'naics', invalidate_zero),
+ ('extra.', 'sic', invalidate_zero),
],
'constant_fields': {
'classification.type': 'compromised',
diff --git a/intelmq/etc/harmonization.conf b/intelmq/etc/harmonization.conf
index ef142a2a81..72ec371c96 100644
--- a/intelmq/etc/harmonization.conf
+++ b/intelmq/etc/harmonization.conf
@@ -130,7 +130,7 @@
},
"extra": {
"description": "All anecdotal information, which cannot be parsed into the data harmonization elements. E.g. os.name, os.version, etc. **Note**: this is only intended for mapping any fields which can not map naturally into the data harmonization. It is not intended for extending the data harmonization with your own fields.",
- "type": "JSON"
+ "type": "JSONDict"
},
"feed.accuracy": {
"description": "A float between 0 and 100 that represents how accurate the data in the feed is",
diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py
index 25d8de1b95..41bb623abe 100644
--- a/intelmq/lib/harmonization.py
+++ b/intelmq/lib/harmonization.py
@@ -31,8 +31,9 @@
__all__ = ['Base64', 'Boolean', 'ClassificationType', 'DateTime', 'FQDN',
'Float', 'Accuracy', 'GenericType', 'IPAddress', 'IPNetwork',
- 'Integer', 'JSON', 'LowercaseString', 'Registry', 'String', 'URL',
- 'ASN']
+ 'Integer', 'JSON', 'JSONDict', 'LowercaseString', 'Registry',
+ 'String', 'URL', 'ASN',
+ ]
class GenericType(object):
@@ -594,6 +595,44 @@ class JSON(GenericType):
"""
JSON type.
+ Sanitation accepts any valid JSON objects.
+
+ Valid values are only unicode strings with JSON objects.
+ """
+
+ @staticmethod
+ def is_valid(value, sanitize=False):
+ if sanitize:
+ value = JSON().sanitize(value)
+
+ if not isinstance(value, str):
+ return False
+
+ try:
+ json.loads(value)
+ except ValueError:
+ return False
+
+ return True
+
+ @staticmethod
+ def sanitize(value):
+ if value is None:
+ return None
+ if isinstance(value, (str, bytes)):
+ sanitized = GenericType.sanitize(value)
+ if JSON.is_valid(sanitized):
+ return sanitized
+ try:
+ return GenericType().sanitize(json.dumps(value, sort_keys=True))
+ except TypeError:
+ return None
+
+
+class JSONDict(JSON):
+ """
+ JSONDict type.
+
Sanitation accepts pythons dictionaries and JSON strings.
Valid values are only unicode strings with JSON dictionaries.
@@ -602,7 +641,7 @@ class JSON(GenericType):
@staticmethod
def is_valid(value, sanitize=False):
if sanitize:
- value = JSON().sanitize(value)
+ value = JSONDict().sanitize(value)
if not isinstance(value, str):
return False
@@ -617,19 +656,27 @@ def is_valid(value, sanitize=False):
return False
+ @staticmethod
+ def is_valid_subitem(value):
+ return True
+
@staticmethod
def sanitize(value):
if not value:
return None
if isinstance(value, (str, bytes)):
sanitized = GenericType.sanitize(value)
- if JSON.is_valid(sanitized):
+ if JSONDict.is_valid(sanitized):
return sanitized
try:
return GenericType().sanitize(json.dumps(value, sort_keys=True))
except TypeError:
return None
+ @staticmethod
+ def sanitize_subitem(value):
+ return value
+
class LowercaseString(GenericType):
"""
diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py
index 332c2b2a0c..769d0accfb 100644
--- a/intelmq/lib/message.py
+++ b/intelmq/lib/message.py
@@ -4,6 +4,7 @@
Use MessageFactory to get a Message object (types Report and Event).
"""
+import functools
import hashlib
import json
import re
@@ -85,6 +86,8 @@ def serialize(message):
class Message(dict):
+ _IGNORED_VALUES = ["", "-", "N/A"]
+
def __init__(self, message=(), auto=False, harmonization=None):
try:
classname = message['__type'].lower()
@@ -102,6 +105,11 @@ def __init__(self, message=(), auto=False, harmonization=None):
expected=VALID_MESSSAGE_TYPES,
docs=HARMONIZATION_CONF_FILE)
+ if classname == 'event' and self.harmonization_config['extra']['type'] == 'JSON':
+ warnings.warn("Assuming harmonization type 'JSONDict' for harmonization field 'extra'. "
+ "This assumption will be removed in version 2.0.", DeprecationWarning)
+ self.harmonization_config['extra']['type'] = 'JSONDict'
+
super(Message, self).__init__()
if isinstance(message, dict):
iterable = message.items()
@@ -114,6 +122,14 @@ def __init__(self, message=(), auto=False, harmonization=None):
def __setitem__(self, key, value):
self.add(key, value)
+ def __getitem__(self, key):
+ class_name, subitem = self.__get_type_config(key)
+ if class_name['type'] == 'JSONDict' and not subitem:
+ # return extra as string for backwards compatibility
+ return json.dumps(self.to_dict(hierarchical=True)[key.split('.')[0]])
+ else:
+ return super(Message, self).__getitem__(key)
+
def is_valid(self, key: str, value: str, sanitize: bool=True) -> bool:
"""
Checks if a value is valid for the key (after sanitation).
@@ -174,7 +190,7 @@ def add(self, key: str, value: str, sanitize: bool=True, force: bool=False,
if not overwrite and key in self:
raise exceptions.KeyExists(key)
- if value is None or value in ["", "-", "N/A"]:
+ if value is None or value in self._IGNORED_VALUES:
if overwrite and key in self:
del self[key]
return
@@ -206,7 +222,19 @@ def add(self, key: str, value: str, sanitize: bool=True, force: bool=False,
else:
return False
- super(Message, self).__setitem__(key, value)
+ class_name, subitem = self.__get_type_config(key)
+ if class_name and class_name['type'] == 'JSONDict' and not subitem:
+ # for backwards compatibility allow setting the extra field as string
+ for extrakey, extravalue in json.loads(value).items():
+ if hasattr(extravalue, '__len__'):
+ if not len(extravalue): # ignore empty values
+ continue
+ if extravalue in self._IGNORED_VALUES:
+ continue
+ super(Message, self).__setitem__('%s.%s' % (key, extrakey),
+ extravalue)
+ else:
+ super(Message, self).__setitem__(key, value)
return True
def update(self, other: dict):
@@ -251,17 +279,26 @@ def unserialize(message_string: str):
message = json.loads(message_string)
return message
+ @functools.lru_cache(maxsize=None)
def __is_valid_key(self, key: str):
- if key in self.harmonization_config or key == '__type':
+ try:
+ class_name, subitem = self.__get_type_config(key)
+ except KeyError:
+ return False
+ if key in self.harmonization_config or key == '__type' or subitem:
return True
return False
def __is_valid_value(self, key: str, value: str):
if key == '__type':
return (True, )
- config = self.__get_type_config(key)
+ config, subitem = self.__get_type_config(key)
class_reference = getattr(intelmq.lib.harmonization, config['type'])
- if not class_reference().is_valid(value):
+ if not subitem:
+ validation = class_reference().is_valid(value)
+ else:
+ validation = class_reference().is_valid_subitem(value)
+ if not validation:
return (False, 'is_valid returned False.')
if 'length' in config:
length = len(str(value))
@@ -277,13 +314,26 @@ def __is_valid_value(self, key: str, value: str):
return (True, )
def __sanitize_value(self, key: str, value: str):
- class_name = self.__get_type_config(key)['type']
- class_reference = getattr(intelmq.lib.harmonization, class_name)
- return class_reference().sanitize(value)
+ class_name, subitem = self.__get_type_config(key)
+ class_reference = getattr(intelmq.lib.harmonization, class_name['type'])
+ if not subitem:
+ return class_reference().sanitize(value)
+ else:
+ return class_reference().sanitize_subitem(value)
+ @functools.lru_cache(maxsize=None)
def __get_type_config(self, key: str):
- class_name = self.harmonization_config[key]
- return class_name
+ if key == '__type':
+ return None, None
+ try:
+ class_name = self.harmonization_config[key]
+ except KeyError:
+ # Could be done recursively in the future if needed
+ class_name = self.harmonization_config[key.split('.')[0]]
+ subitem = True
+ else:
+ subitem = False
+ return class_name, subitem
def __hash__(self):
return int(self.hash(), 16)
diff --git a/intelmq/tests/bots/experts/filter/test_extra.py b/intelmq/tests/bots/experts/filter/test_extra.py
new file mode 100644
index 0000000000..f3a7fd739a
--- /dev/null
+++ b/intelmq/tests/bots/experts/filter/test_extra.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+
+import intelmq.lib.test as test
+from intelmq.bots.experts.filter.expert import FilterExpertBot
+
+EXAMPLE_INPUT = {"__type": "Event",
+ "classification.type": "defacement",
+ "time.source": "2005-01-01T00:00:00+00:00",
+ "source.asn": 123,
+ "extra.test1": True,
+ "extra.test2": "bla",
+ }
+
+
+class TestFilterExpertBot(test.BotTestCase, unittest.TestCase):
+ """
+ A TestCase for FilterExpertBot.
+ """
+
+ @classmethod
+ def set_bot(cls):
+ cls.bot_reference = FilterExpertBot
+ cls.input_message = EXAMPLE_INPUT
+ cls.sysconfig = {'filter_key': 'extra.test1',
+ 'filter_value': True,
+ 'filter_action': 'drop'}
+
+ def test_extra_filter_drop(self):
+ self.run_bot()
+
+ def test_extra_filter_keep(self):
+ self.sysconfig = {'filter_key': 'extra.test2',
+ 'filter_value': 'bla',
+ 'filter_action': 'keep'}
+ self.run_bot()
+ self.assertMessageEqual(0, EXAMPLE_INPUT)
+
+if __name__ == '__main__': # pragma: no cover
+ unittest.main()
diff --git a/intelmq/tests/bots/outputs/mongodb/test_output.py b/intelmq/tests/bots/outputs/mongodb/test_output.py
index 372074b536..1fcd20c5cb 100644
--- a/intelmq/tests/bots/outputs/mongodb/test_output.py
+++ b/intelmq/tests/bots/outputs/mongodb/test_output.py
@@ -16,7 +16,7 @@
"extra": '{"foo.bar": "test"}'
}
OUTPUT1 = {'classification': {'type': 'botnet drone'},
- 'extra': '{"foo.bar": "test"}',
+ 'extra': {"foo": {"bar": "test"}},
'feed': {'name': 'Example Feed'},
'source': {'asn': 64496, 'ip': '192.0.2.1'},
}
diff --git a/intelmq/tests/bots/outputs/redis/test_output.py b/intelmq/tests/bots/outputs/redis/test_output.py
index 56b4270629..46fb3e92ff 100644
--- a/intelmq/tests/bots/outputs/redis/test_output.py
+++ b/intelmq/tests/bots/outputs/redis/test_output.py
@@ -23,7 +23,7 @@
"source.port": 65118,
"__type": "Event",
"feed.name": "BitSight",
- "extra": '{"non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166"}',
+ "extra.non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166",
"raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic"
"mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3"
"BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk"
diff --git a/intelmq/tests/bots/parsers/alienvault/test_parser_otx.py b/intelmq/tests/bots/parsers/alienvault/test_parser_otx.py
index b9a77d0fcc..97621c87cb 100644
--- a/intelmq/tests/bots/parsers/alienvault/test_parser_otx.py
+++ b/intelmq/tests/bots/parsers/alienvault/test_parser_otx.py
@@ -18,11 +18,11 @@
}
EXAMPLE_EVENT = {
"__type": "Event",
- "extra": '{"author": "AlienVault", "pulse": "The Spy Kittens Are Back: '
- 'Rocket Kitten 2", "pulse_key": "55e6bfb14637f22cb605746e", '
- '"tags": ["spy kittens", "rocket kitten", "ghole", '
- '"spearphishing", "Social engineering", "TSPY_WOOLERG", "apt", '
- '"trendmicro"], "time_updated": "2015-09-02T09:22:22.97+00:00"}',
+ "extra.author": "AlienVault",
+ "extra.time_updated": "2015-09-02T09:22:22.97+00:00",
+ "extra.pulse_key": "55e6bfb14637f22cb605746e",
+ "extra.pulse": "The Spy Kittens Are Back: Rocket Kitten 2",
+ "extra.tags": ['spy kittens', 'rocket kitten', 'ghole', 'spearphishing', 'Social engineering', 'TSPY_WOOLERG', 'apt', 'trendmicro'],
"comment": """Our findings show that Rocket Kitten is still active, retains
a growing level of persistence, and acts ever more aggressively in terms of
attack method. We also found that recent publications on the group’s activity
@@ -57,13 +57,13 @@
'able to bypass SMS-based two-factor authentication. '
'Additionally, it also contains modules to target some popular '
'social media apps.',
- 'extra': '{"adversary": "", "author": "AlienVault", "industries": '
- '["banking"], "pulse": "Android banking malware masquerades as '
- 'Flash Player", "pulse_key": "581b9aef324bc542d6b1fd84", "tags": '
- '["skype", "flash player", "android", "banker"], '
- '"targeted_countries": ["United States", "Germany", "France", '
- '"Australia", "Turkey", "Poland", "Austria"], "time_updated": '
- '"2016-11-03T20:15:43.26+00:00"}',
+ 'extra.tags': ['skype', 'flash player', 'android', 'banker'],
+ 'extra.pulse_key': '581b9aef324bc542d6b1fd84',
+ 'extra.targeted_countries': ['United States', 'Germany', 'France', 'Australia', 'Turkey', 'Poland', 'Austria'],
+ 'extra.industries': ['banking'],
+ 'extra.time_updated': '2016-11-03T20:15:43.26+00:00',
+ 'extra.author': 'AlienVault',
+ 'extra.pulse': 'Android banking malware masquerades as Flash Player',
'feed.name': 'AlienVault OTX',
'malware.hash.sha256': 'e5df30b41b0c50594c2b77c1d5d6916a9ce925f792c563f692426c2d50aa2524',
'raw': 'eyJhY2Nlc3NfZ3JvdXBzIjogW10sICJhY2Nlc3NfcmVhc29uIjogIiIsICJhY2Nlc3NfdHlwZSI6ICJwdWJsaWMiLCAiY29udGVudCI6ICIiLCAiY3JlYXRlZCI6ICIyMDE2LTExLTAzVDIwOjE1OjQ0IiwgImRlc2NyaXB0aW9uIjogIiIsICJleHBpcmF0aW9uIjogbnVsbCwgImlkIjogMTI2NTM1NCwgImluZGljYXRvciI6ICJlNWRmMzBiNDFiMGM1MDU5NGMyYjc3YzFkNWQ2OTE2YTljZTkyNWY3OTJjNTYzZjY5MjQyNmMyZDUwYWEyNTI0IiwgImlzX2FjdGl2ZSI6IDEsICJvYnNlcnZhdGlvbnMiOiAzLCAicm9sZSI6IG51bGwsICJ0aXRsZSI6ICIiLCAidHlwZSI6ICJGaWxlSGFzaC1TSEEyNTYifQ==',
@@ -91,10 +91,11 @@
'countries - including China - have been known to target '
'organizations of strategic interest with aggressive '
'malware-based espionage campaigns.',
- 'extra': '{"author": "AlienVault", "pulse": "PlugX Threat\\tActivity in '
- 'Myanmar", "pulse_key": "55e557fa4637f21c54c1bb0d", "tags": '
- '["plugx", "Myanmar", "rat", "Strategic\\tWeb\\tCompromise"], '
- '"time_updated": "2015-09-01T07:47:06.00+00:00"}',
+ 'extra.pulse_key': '55e557fa4637f21c54c1bb0d',
+ 'extra.tags': ['plugx', 'Myanmar', 'rat', 'Strategic\tWeb\tCompromise'],
+ 'extra.author': 'AlienVault',
+ 'extra.time_updated': '2015-09-01T07:47:06.00+00:00',
+ 'extra.pulse': 'PlugX Threat\tActivity in Myanmar',
'feed.name': 'AlienVault OTX',
'raw': 'eyJfaWQiOiAiNTVlNTU3ZmE0NjM3ZjIxYzU0YzFiYWY4IiwgImNyZWF0ZWQiOiAiMjAxNS0wOS0wMVQwNzo0NzowNi4wNzMiLCAiZGVzY3JpcHRpb24iOiAiIiwgImluZGljYXRvciI6ICJodHRwOi8vd3d3LnVlY215YW5tYXIub3JnL2RtZG9jdW1lbnRzL2ludml0YXRpb25zLnJhciIsICJ0eXBlIjogIlVSTCJ9',
'source.url': 'http://www.uecmyanmar.org/dmdocuments/invitations.rar',
@@ -105,10 +106,10 @@
'__type': 'Event',
'classification.type': 'blacklist',
'comment': 'HIDDEN COBRA – North Korean Malicious Cyber Activity',
- 'extra': '{"adversary": "", "author": "bschlaps", "industries": [], '
- '"pulse": "Alert (TA17-164A)", "pulse_key": '
- '"5942175dd78f563d01abc79c", "tags": [], "time_updated": '
- '"2017-06-15T05:17:12.18+00:00"}',
+ 'extra.pulse_key': '5942175dd78f563d01abc79c',
+ 'extra.pulse': 'Alert (TA17-164A)',
+ 'extra.author': 'bschlaps',
+ 'extra.time_updated': '2017-06-15T05:17:12.18+00:00',
'feed.name': 'AlienVault OTX',
'raw': 'eyJhY2Nlc3NfZ3JvdXBzIjogW10sICJhY2Nlc3NfcmVhc29uIjogIiIsICJhY2Nlc3NfdHlwZSI6ICJwdWJsaWMiLCAiY29udGVudCI6ICIiLCAiY3JlYXRlZCI6ICIyMDE3LTA2LTE1VDA1OjEzOjAyIiwgImRlc2NyaXB0aW9uIjogIiIsICJleHBpcmF0aW9uIjogbnVsbCwgImlkIjogMTM1Nzk4OSwgImluZGljYXRvciI6ICIxMzQuMTE5LjM2LjEzNSIsICJpc19hY3RpdmUiOiAxLCAib2JzZXJ2YXRpb25zIjogMTEsICJwdWxzZV9rZXkiOiAiNTk0MjE3NWRkNzhmNTYzZDAxYWJjNzljIiwgInJvbGUiOiBudWxsLCAidGl0bGUiOiAiIiwgInR5cGUiOiAiSVB2NCJ9',
'source.ip': '134.119.36.135',
diff --git a/intelmq/tests/bots/parsers/anubisnetworks/test_parser.py b/intelmq/tests/bots/parsers/anubisnetworks/test_parser.py
index c958ad6cd3..71a2953618 100644
--- a/intelmq/tests/bots/parsers/anubisnetworks/test_parser.py
+++ b/intelmq/tests/bots/parsers/anubisnetworks/test_parser.py
@@ -35,7 +35,9 @@
"__type": "Event",
"feed.name": "AnubisNetworks",
"raw": EXAMPLE_REPORT['raw'],
- 'extra': '{"_origin": "dnsmalware", "_provider": "spikens", "request_method": "POST"}',
+ 'extra._provider': 'spikens',
+ 'extra.request_method': 'POST',
+ 'extra._origin': 'dnsmalware',
}
EXAMPLE_REPORT2 = {"feed.name": "AnubisNetworks",
@@ -64,7 +66,7 @@
"time.source": "2016-04-19T23:15:54+00:00",
"source.port": 52888,
"time.observation": "2016-04-19T23:16:10+00:00",
- "extra": "{\"request_method\": \"POST\"}",
+ "extra.request_method": "POST",
"feed.url": "https://prod.cyberfeed.net/stream",
"destination.port": 80,
"feed.accuracy": 100.0,
diff --git a/intelmq/tests/bots/parsers/blueliv/test_parser_crimeserver.py b/intelmq/tests/bots/parsers/blueliv/test_parser_crimeserver.py
index f12d3856d2..8a4c26e5f6 100644
--- a/intelmq/tests/bots/parsers/blueliv/test_parser_crimeserver.py
+++ b/intelmq/tests/bots/parsers/blueliv/test_parser_crimeserver.py
@@ -23,9 +23,10 @@
"feed.name": "Blueliv Crimeserver",
"classification.type": "malware",
"raw": utils.base64_encode(EXAMPLE_LINES[0][:-1]),
- 'extra': '{"confidence": 4, "status": "ONLINE", "time_first_seen": '
- '"2015-10-20T06:14:00+0000", "time_updated": '
- '"2015-12-09T04:45:53+0000"}',
+ "extra.confidence": 4,
+ "extra.time_updated": "2015-12-09T04:45:53+0000",
+ "extra.time_first_seen": "2015-10-20T06:14:00+0000",
+ "extra.status": "ONLINE",
"source.url": "http://amirosterweil.weebly.com/uploads/2/3/2/5/23258576/corporate_elearning.pdf",
"time.source": "2015-12-09T04:43:29+00:00",
"time.observation": "2015-09-02T14:17:58+00:00",
@@ -36,9 +37,9 @@
"feed.name": "Blueliv Crimeserver",
"classification.type": "phishing",
"raw": utils.base64_encode(EXAMPLE_LINES[1][:-1]),
- "extra": '{"status": "ONLINE", "time_first_seen": '
- '"2015-07-04T17:08:23+0000", "time_updated": '
- '"2015-12-13T13:55:54+0000"}',
+ "extra.status": "ONLINE",
+ "extra.time_updated": "2015-12-13T13:55:54+0000",
+ "extra.time_first_seen": "2015-07-04T17:08:23+0000",
"source.url": "http://mondeos-italo.com/store/apple/",
"time.source": "2015-12-13T13:53:55+00:00",
"time.observation": "2015-09-02T14:17:58+00:00",
@@ -46,7 +47,7 @@
}, {
"__type": "Event",
"feed.name": "Blueliv Crimeserver",
- "extra": '{"status": "ONLINE"}',
+ "extra.status": "ONLINE",
"classification.type": "proxy",
"raw": utils.base64_encode(EXAMPLE_LINES[2]),
"source.tor_node": True,
diff --git a/intelmq/tests/bots/parsers/cleanmx/test_parser.py b/intelmq/tests/bots/parsers/cleanmx/test_parser.py
index fcdc4c2953..f9ad837ec7 100644
--- a/intelmq/tests/bots/parsers/cleanmx/test_parser.py
+++ b/intelmq/tests/bots/parsers/cleanmx/test_parser.py
@@ -22,10 +22,15 @@
PHISHING_EVENTS = [{'__type': 'Event',
'classification.type': 'phishing',
'event_description.target': 'DHL',
- 'extra': '{"ddescr": "Example Layer", "id": "9377142", "inetnum": '
- '"198.18.0.0 - 198.19.255.255", "netname": "EXAMPLE-NETWORK-15", '
- '"ns1": "ns2.example.com", "ns2": "ns1.example.com", "phishtank": '
- '"4647345", "response": "alive", "review": "198.18.0.1"}',
+ 'extra.id': '9377142',
+ 'extra.netname': 'EXAMPLE-NETWORK-15',
+ 'extra.phishtank': '4647345',
+ 'extra.inetnum': '198.18.0.0 - 198.19.255.255',
+ 'extra.ns1': 'ns2.example.com',
+ 'extra.response': 'alive',
+ 'extra.ns2': 'ns1.example.com',
+ 'extra.ddescr': 'Example Layer',
+ 'extra.review': '198.18.0.1',
'feed.name': 'CleanMX Phishing',
'feed.url': 'http://support.clean-mx.de/clean-mx/xmlphishing?response=alive&format=csv&domain=',
'raw': 'bGluZSxpZCxmaXJzdHRpbWUsbGFzdHRpbWUscGhpc2h0YW5rLHZpcnVzbmFtZSx1cmwscmVjZW50LHJlc3BvbnNlLGlwLHJldmlldyxkb21haW4sY291bnRyeSxzb3VyY2UsZW1haWwsaW5ldG51bSxuZXRuYW1lLGRkZXNjcixuczEsbnMyLG5zMyxuczQsbnM1DQoxLDkzNzcxNDIsMjAxNi0xMS0yOSAxMDozMTo0NSwxOTcwLTAxLTAxIDAxOjAwOjAwLDQ2NDczNDUsREhMLGh0dHA6Ly9leGFtcGxlLmNvbS9kZWhsJTIwcGFja2FnZS9jb25maXJtLyxkb3duLGFsaXZlLDE5OC4xOC4wLjEsMTk4LjE4LjAuMSwxOTguMTguMC4xLFVTLEFSSU4sYWJ1c2VAZXhhbXBsZS5jb20sMTk4LjE4LjAuMCAtIDE5OC4xOS4yNTUuMjU1LEVYQU1QTEUtTkVUV09SSy0xNSxFeGFtcGxlIExheWVyLG5zMi5leGFtcGxlLmNvbSxuczEuZXhhbXBsZS5jb20sLCwNCg==',
@@ -39,11 +44,16 @@
{'__type': 'Event',
'classification.type': 'phishing',
'event_description.target': 'Free',
- 'extra': '{"id": "9377136", "inetnum": "198.18.0.0 - 198.19.255.255", '
- '"netname": "EXAMPLE", "ns1": "ns-de.example.com", "ns2": '
- '"ns-de.example.net", "ns3": "ns-de.example.com", "ns4": '
- '"ns-de.example.org", "phishtank": "4647412", "response": "alive", '
- '"review": "198.18.0.7"}',
+ 'extra.phishtank': '4647412',
+ 'extra.id': '9377136',
+ 'extra.ns4': 'ns-de.example.org',
+ 'extra.response': 'alive',
+ 'extra.ns3': 'ns-de.example.com',
+ 'extra.inetnum': '198.18.0.0 - 198.19.255.255',
+ 'extra.review': '198.18.0.7',
+ 'extra.netname': 'EXAMPLE',
+ 'extra.ns2': 'ns-de.example.net',
+ 'extra.ns1': 'ns-de.example.com',
'feed.name': 'CleanMX Phishing',
'feed.url': 'http://support.clean-mx.de/clean-mx/xmlphishing?response=alive&format=csv&domain=',
'raw': 'bGluZSxpZCxmaXJzdHRpbWUsbGFzdHRpbWUscGhpc2h0YW5rLHZpcnVzbmFtZSx1cmwscmVjZW50LHJlc3BvbnNlLGlwLHJldmlldyxkb21haW4sY291bnRyeSxzb3VyY2UsZW1haWwsaW5ldG51bSxuZXRuYW1lLGRkZXNjcixuczEsbnMyLG5zMyxuczQsbnM1DQo3LDkzNzcxMzYsMjAxNi0xMS0yOSAxMDoxNzozOCwxOTcwLTAxLTAxIDAxOjAwOjAwLDQ2NDc0MTIsRnJlZSxodHRwOi8vZXhhbXBsZS5uZXQvRnIvNWI4Y2EzY2FmODlmNWNkNjI0YzJiNjkyYjk5NzFjY2MvLHVwLGFsaXZlLDE5OC4xOC4wLjcsMTk4LjE4LjAuNyxleGFtcGxlLm5ldCxQTCxSSVBFLGFidXNlQGV4YW1wbGUubmV0LDE5OC4xOC4wLjAgLSAxOTguMTkuMjU1LjI1NSxFWEFNUExFLCxucy1kZS5leGFtcGxlLmNvbSxucy1kZS5leGFtcGxlLm5ldCxucy1kZS5leGFtcGxlLmNvbSxucy1kZS5leGFtcGxlLm9yZywNCg==',
@@ -64,10 +74,16 @@
}
VIRUSES_EVENTS = [{'__type': 'Event',
'classification.type': 'malware',
- 'extra': '{"ddescr": "", "id": "104542833", "inetnum": '
- '"198.18.0.0 - 198.19.255.255", "netname": "EXAMPLE-COM", "ns1": '
- '"ns10.domaincontrol.com", "ns2": "ns09.domaincontrol.com", '
- '"response": "alive", "review": "198.18.0.8", "source": "ARIN", "sub": "sub16"}',
+ 'extra.response': 'alive',
+ 'extra.netname': 'EXAMPLE-COM',
+ 'extra.ns2': 'ns09.domaincontrol.com',
+ 'extra.inetnum': '198.18.0.0 - 198.19.255.255',
+ 'extra.ns1': 'ns10.domaincontrol.com',
+ 'extra.review': '198.18.0.8',
+ 'extra.sub': 'sub16',
+ 'extra.id': '104542833',
+ 'extra.source': 'ARIN',
+ 'extra.ddescr': '',
'feed.name': 'CleanMX Viruses',
'feed.url': 'http://support.clean-mx.de/clean-mx/xmlviruses?response=alive&format=csv&domain=',
'malware.name': 'solimba.032',
@@ -81,10 +97,16 @@
'time.source': '2016-11-29T11:18:24+00:00'},
{'__type': 'Event',
'classification.type': 'malware',
- 'extra': '{"ddescr": "", "id": "104542831", "inetnum": '
- '"198.18.0.0 - 198.19.255.255", "netname": "EXAMPLENET", "ns1": '
- '"f1g1ns2.example.net", "ns2": "f1g1ns1.example.net", "response": '
- '"alive", "review": "120.26.127.170", "source": "APNIC", "sub": "sub16"}',
+ 'extra.netname': 'EXAMPLENET',
+ 'extra.ns1': 'f1g1ns2.example.net',
+ 'extra.review': '120.26.127.170',
+ 'extra.sub': 'sub16',
+ 'extra.ddescr': '',
+ 'extra.inetnum': '198.18.0.0 - 198.19.255.255',
+ 'extra.ns2': 'f1g1ns1.example.net',
+ 'extra.id': '104542831',
+ 'extra.source': 'APNIC',
+ 'extra.response': 'alive',
'feed.name': 'CleanMX Viruses',
'feed.url': 'http://support.clean-mx.de/clean-mx/xmlviruses?response=alive&format=csv&domain=',
'malware.name': 'trj/ci.a',
diff --git a/intelmq/tests/bots/parsers/dshield/test_parser_asn.py b/intelmq/tests/bots/parsers/dshield/test_parser_asn.py
index 477504b16b..998f3697ef 100644
--- a/intelmq/tests/bots/parsers/dshield/test_parser_asn.py
+++ b/intelmq/tests/bots/parsers/dshield/test_parser_asn.py
@@ -25,7 +25,9 @@
"source.ip": "109.230.148.140",
"classification.type": "brute-force",
"time.observation": "2015-01-01T00:00:00+00:00",
- "extra": '{"last_seen": "2015-12-22", "reports": 85, "targets": 56}',
+ "extra.last_seen": "2015-12-22",
+ "extra.reports": 85,
+ "extra.targets": 56,
"raw": "MTA5LjIzMC4xNDguMTQwCTg1CTU2CQkyMDE1LTEyLTIyCTIwMTUtMTItMjIgMTE6MDk6MDc=",
},
{"feed.name": "DShield AS",
@@ -36,7 +38,9 @@
"source.ip": "109.230.155.61",
"classification.type": "brute-force",
"time.observation": "2015-01-01T00:00:00+00:00",
- "extra": '{"last_seen": "2015-12-10", "reports": 1, "targets": 1}',
+ "extra.targets": 1,
+ "extra.reports": 1,
+ "extra.last_seen": "2015-12-10",
"raw": "MTA5LjIzMC4xNTUuMDYxCTEJMQkJMjAxNS0xMi0xMAkyMDE1LTEyLTE0IDEyOjQwOjU5",
}]
diff --git a/intelmq/tests/bots/parsers/dshield/test_parser_block.py b/intelmq/tests/bots/parsers/dshield/test_parser_block.py
index effa376723..d1706300fb 100644
--- a/intelmq/tests/bots/parsers/dshield/test_parser_block.py
+++ b/intelmq/tests/bots/parsers/dshield/test_parser_block.py
@@ -26,7 +26,8 @@
"time.observation": "2015-01-01T00:00:00+00:00",
"source.geolocation.cc": "JP",
"source.abuse_contact": "admin@v6nic.net",
- "extra": '{"attacks": 788, "network_name": "Japan Inet"}',
+ "extra.network_name": "Japan Inet",
+ "extra.attacks": 788,
"raw": "NDMuMjI5LjUzLjAJNDMuMjI5LjUzLjI1NQkyNAk3ODgJSmFwYW4gSW5ldAlKUAlhZG1pbkB2Nm5pYy5uZXQ=",
},
{"feed.name": "DShield Block",
@@ -36,7 +37,7 @@
"source.network": "194.63.140.0/24",
"classification.type": "blacklist",
"time.observation": "2015-01-01T00:00:00+00:00",
- "extra": '{"attacks": 585}',
+ "extra.attacks": 585,
"raw": "MTk0LjYzLjE0MC4wCTE5NC42My4xNDAuMjU1CTI0CTU4NQ==",
}]
diff --git a/intelmq/tests/bots/parsers/generic/test_parser_csv_extra_regex.py b/intelmq/tests/bots/parsers/generic/test_parser_csv_extra_regex.py
index 1dca3ed96a..4c4909c166 100644
--- a/intelmq/tests/bots/parsers/generic/test_parser_csv_extra_regex.py
+++ b/intelmq/tests/bots/parsers/generic/test_parser_csv_extra_regex.py
@@ -21,8 +21,9 @@
"raw": utils.base64_encode(SAMPLE_SPLIT[1].replace('"', '')+'\r\n'),
"time.observation": "2015-01-01T00:00:00+00:00",
"classification.type": "botnet drone",
- 'extra': '{"http_request": "29|", "source": "Microsoft-DCU", '
- '"source.metro_code": "0"}',
+ "extra.source.metro_code": '0',
+ "extra.source": "Microsoft-DCU",
+ "extra.http_request": "29|",
"destination.ip": "224.1.1.1",
"destination.port": 1604,
"feed.name": "Microsoft DCU Feed",
@@ -41,14 +42,15 @@
"classification.type": "botnet drone",
"destination.ip": "224.1.1.2",
"destination.port": 80,
- 'extra': '{"http_host": "dcu-a-202.microsoftinternetsafety.net", '
- '"http_method": "POST", "http_referer": "null", '
- '"http_request": "/file-34fd81-003.php", '
- '"http_user_agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0;)", '
- '"http_version": "1.1", '
- '"source": "Microsoft-DCU", '
- '"source.metro_code": "0", '
- '"source.postal_code": "1100"}',
+ "extra.http_method": "POST",
+ "extra.source.postal_code": '1100',
+ "extra.http_version": "1.1",
+ "extra.http_host": "dcu-a-202.microsoftinternetsafety.net",
+ "extra.source.metro_code": '0',
+ "extra.http_request": "/file-34fd81-003.php",
+ "extra.source": "Microsoft-DCU",
+ "extra.http_referer": "null",
+ "extra.http_user_agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0;)",
"feed.name": "Microsoft DCU Feed",
"malware.name": "b54-config",
"source.geolocation.cc": "AT",
diff --git a/intelmq/tests/bots/parsers/n6/test_parser.py b/intelmq/tests/bots/parsers/n6/test_parser.py
index cfba1e3752..1f1b9d53c9 100644
--- a/intelmq/tests/bots/parsers/n6/test_parser.py
+++ b/intelmq/tests/bots/parsers/n6/test_parser.py
@@ -15,7 +15,8 @@
"source.ip": "68.68.96.235",
"time.source": "2015-11-17T12:17:27.043452+00:00",
"classification.taxonomy": "malicious code",
- "extra": '{"adip": "x.x.111.99", "feed_id": "d77ae8cf681dcdbb6e20014581401ed5"}',
+ "extra.adip": "x.x.111.99",
+ "extra.feed_id": "d77ae8cf681dcdbb6e20014581401ed5",
"source.port": 22308,
"time.observation": "2015-11-17T12:17:27.043452Z",
"source.geolocation.cc": "US",
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_accessible_smb.py b/intelmq/tests/bots/parsers/shadowserver/test_accessible_smb.py
index 9602952484..0c999b55ab 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_accessible_smb.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_accessible_smb.py
@@ -27,7 +27,7 @@
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
'classification.identifier': 'opensmb',
- 'extra': '{"smb_implant": false}',
+ 'extra.smb_implant': False,
'protocol.application': 'smb',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -44,7 +44,9 @@
'feed.name': 'ShadowServer Accessible-SMB',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"arch": "x86", "key": "0xcb68e558", "smb_implant": true}',
+ 'extra.smb_implant': True,
+ 'extra.arch': 'x86',
+ 'extra.key': '0xcb68e558',
'classification.identifier': 'opensmb',
'protocol.application': 'smb',
'protocol.transport': 'tcp',
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_accessible_vnc.py b/intelmq/tests/bots/parsers/shadowserver/test_accessible_vnc.py
index 3afcacf6a1..a49cb450b4 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_accessible_vnc.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_accessible_vnc.py
@@ -25,7 +25,8 @@
'feed.name': 'ShadowServer Accessible VNC',
'classification.type': 'vulnerable service',
'classification.identifier': 'accessiblevnc',
- 'extra': '{"banner": "RFB 005.000", "product": "RealVNC Enterprise v5.3 or later"}',
+ 'extra.product': 'RealVNC Enterprise v5.3 or later',
+ 'extra.banner': 'RFB 005.000',
'protocol.application': 'vnc',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -42,7 +43,10 @@
'feed.name': 'ShadowServer Accessible VNC',
'classification.type': 'vulnerable service',
'classification.identifier': 'accessiblevnc',
- 'extra': '{"banner": "RFB 003.006", "naics": 518210, "product": "VNC protocol 3.6", "sic": 737415}',
+ 'extra.sic': 737415,
+ 'extra.product': 'VNC protocol 3.6',
+ 'extra.banner': 'RFB 003.006',
+ 'extra.naics': 518210,
'protocol.application': 'vnc',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_chargen.py b/intelmq/tests/bots/parsers/shadowserver/test_chargen.py
index f622591dce..6413e7603b 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_chargen.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_chargen.py
@@ -34,8 +34,10 @@
'classification.type': 'vulnerable service',
'classification.identifier': 'openchargen',
'classification.taxonomy': 'vulnerable',
- 'extra': '{"naics": 123456, "response_size": 116, "sic": 654321, '
- '"tag": "chargen"}',
+ 'extra.response_size': 116,
+ 'extra.naics': 123456,
+ 'extra.tag': 'chargen',
+ 'extra.sic': 654321,
'protocol.application': 'chargen',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -53,7 +55,8 @@
'classification.type': 'vulnerable service',
'classification.identifier': 'openchargen',
'classification.taxonomy': 'vulnerable',
- 'extra': '{"response_size": 116, "tag": "chargen"}',
+ 'extra.tag': 'chargen',
+ 'extra.response_size': 116,
'protocol.application': 'chargen',
'protocol.transport': 'udp',
'raw': utils.base64_encode(
@@ -74,7 +77,8 @@
'classification.type': 'vulnerable service',
'classification.identifier': 'openchargen',
'classification.taxonomy': 'vulnerable',
- 'extra': '{"response_size": 116, "tag": "chargen"}',
+ 'extra.response_size': 116,
+ 'extra.tag': 'chargen',
'protocol.application': 'chargen',
'protocol.transport': 'udp',
'raw': utils.base64_encode(
@@ -97,7 +101,8 @@
'classification.type': 'vulnerable service',
'classification.identifier': 'openchargen',
'classification.taxonomy': 'vulnerable',
- 'extra': '{"response_size": 116, "tag": "chargen"}',
+ 'extra.tag': 'chargen',
+ 'extra.response_size': 116,
'protocol.application': 'chargen',
'protocol.transport': 'udp',
'raw': utils.base64_encode(
@@ -118,7 +123,8 @@
'classification.type': 'vulnerable service',
'classification.identifier': 'openchargen',
'classification.taxonomy': 'vulnerable',
- 'extra': '{"response_size": 116, "tag": "chargen"}',
+ 'extra.response_size': 116,
+ 'extra.tag': 'chargen',
'protocol.application': 'chargen',
'protocol.transport': 'udp',
'raw': utils.base64_encode(
@@ -140,7 +146,7 @@
'classification.type': 'vulnerable service',
'classification.identifier': 'openchargen',
'classification.taxonomy': 'vulnerable',
- 'extra': '{"tag": "chargen"}',
+ 'extra.tag': 'chargen',
'protocol.application': 'chargen',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([EXAMPLE_LINE_SHORT[0],
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_compromised_website.py b/intelmq/tests/bots/parsers/shadowserver/test_compromised_website.py
index 4a5070b928..c5af8dccc6 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_compromised_website.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_compromised_website.py
@@ -25,8 +25,9 @@
'feed.name': 'ShadowServer Compromised Website',
'classification.type': 'compromised',
'classification.identifier': 'compromised-website',
- 'extra': '{"detected_since": "2015-05-09 05:51:12", "naics": "0", "server": '
- '"Microsoft-IIS/7.5", "sic": "0", "system": "WINNT"}',
+ 'extra.server': 'Microsoft-IIS/7.5',
+ 'extra.system': 'WINNT',
+ 'extra.detected_since': '2015-05-09 05:51:12',
'protocol.application': 'http',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
RECONSTRUCTED_LINES[1], ''])),
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_drone_hadoop.py b/intelmq/tests/bots/parsers/shadowserver/test_drone_hadoop.py
index 2fdf340027..331d7b0aa3 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_drone_hadoop.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_drone_hadoop.py
@@ -30,7 +30,9 @@
'destination.geolocation.cc': 'US',
'destination.ip': '74.208.164.166',
'destination.port': 80,
- 'extra': '{"connection_count": 1, "os.name": "Windows", "os.version": "2000 SP4, XP SP1+"}',
+ 'extra.os.name': 'Windows',
+ 'extra.os.version': '2000 SP4, XP SP1+',
+ 'extra.connection_count': 1,
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -52,7 +54,8 @@
'destination.fqdn': '015.maxided.com',
'destination.geolocation.cc': 'NL',
'destination.ip': '94.75.228.147',
- 'extra': '{"connection_count": 1, "os.name": "WINXP"}',
+ 'extra.os.name': 'WINXP',
+ 'extra.connection_count': 1,
'malware.name': 'spyeye',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
RECONSTRUCTED_LINES[2], ''])),
@@ -73,7 +76,8 @@
'destination.geolocation.cc': 'DE',
'destination.ip': '87.106.24.200',
'destination.port': 80,
- 'extra': '{"os.name": "Windows", "os.version": "XP SP1+, 2000 SP3 (2)"}',
+ 'extra.os.version': 'XP SP1+, 2000 SP3 (2)',
+ 'extra.os.name': 'Windows',
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -95,7 +99,9 @@
'destination.geolocation.cc': 'DE',
'destination.ip': '87.106.24.200',
'destination.port': 443,
- 'extra': '{"connection_count": 1, "os.name": "Windows", "os.version": "2000 SP4, XP SP1+"}',
+ 'extra.os.version': '2000 SP4, XP SP1+',
+ 'extra.connection_count': 1,
+ 'extra.os.name': 'Windows',
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -117,7 +123,9 @@
'destination.geolocation.cc': 'US',
'destination.ip': '74.208.164.166',
'destination.port': 443,
- 'extra': '{"connection_count": 1, "os.name": "Windows", "os.version": "2000 SP4, XP SP1+"}',
+ 'extra.os.name': 'Windows',
+ 'extra.os.version': '2000 SP4, XP SP1+',
+ 'extra.connection_count': 1,
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -139,7 +147,8 @@
'destination.geolocation.cc': 'DE',
'destination.ip': '87.106.24.200',
'destination.port': 443,
- 'extra': '{"os.name": "Windows", "os.version": "2000 SP4, XP SP1+"}',
+ 'extra.os.name': 'Windows',
+ 'extra.os.version': '2000 SP4, XP SP1+',
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -161,7 +170,9 @@
'destination.geolocation.cc': 'DE',
'destination.ip': '87.106.24.200',
'destination.port': 443,
- 'extra': '{"connection_count": 1, "os.name": "Windows", "os.version": "XP/2000 (RFC1323+, w+, tstamp+)"}',
+ 'extra.connection_count': 1,
+ 'extra.os.version': 'XP/2000 (RFC1323+, w+, tstamp+)',
+ 'extra.os.name': 'Windows',
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -183,7 +194,9 @@
'destination.geolocation.cc': 'US',
'destination.ip': '74.208.164.166',
'destination.port': 80,
- 'extra': '{"connection_count": 1, "os.name": "Windows", "os.version": "XP SP1+, 2000 SP3 (2)"}',
+ 'extra.connection_count': 1,
+ 'extra.os.name': 'Windows',
+ 'extra.os.version': 'XP SP1+, 2000 SP3 (2)',
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -205,7 +218,9 @@
'destination.geolocation.cc': 'US',
'destination.ip': '74.208.164.166',
'destination.port': 443,
- 'extra': '{"connection_count": 1, "os.name": "Windows", "os.version": "2000 SP4, XP SP1+"}',
+ 'extra.os.name': 'Windows',
+ 'extra.os.version': '2000 SP4, XP SP1+',
+ 'extra.connection_count': 1,
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -227,7 +242,9 @@
'destination.geolocation.cc': 'DE',
'destination.ip': '87.106.24.200',
'destination.port': 443,
- 'extra': '{"connection_count": 1, "os.name": "Windows", "os.version": "2000 SP4, XP SP1+"}',
+ 'extra.os.version': '2000 SP4, XP SP1+',
+ 'extra.os.name': 'Windows',
+ 'extra.connection_count': 1,
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -251,7 +268,9 @@
'destination.port': 443,
'destination.url': 'https://115-166-54-44.ip.adam.com.au/index.php',
'protocol.application': 'https',
- 'extra': '{"connection_count": 1, "os.name": "Windows", "os.version": "2000 SP4, XP SP1+"}',
+ 'extra.os.name': 'Windows',
+ 'extra.connection_count': 1,
+ 'extra.os.version': '2000 SP4, XP SP1+',
'malware.name': 'sinkhole',
'protocol.transport': 'tcp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_microsoft_sinkhole.py b/intelmq/tests/bots/parsers/shadowserver/test_microsoft_sinkhole.py
index 4aa6e6ab74..f0225a603f 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_microsoft_sinkhole.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_microsoft_sinkhole.py
@@ -152,7 +152,8 @@
'destination.ip': '204.95.99.205',
'destination.port': 443,
'destination.url': 'http://204.95.99.205/index.php',
- 'extra': '{"http_host": "204.95.99.205", "user_agent": "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.8077)"}',
+ 'extra.user_agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.8077)',
+ 'extra.http_host': '204.95.99.205',
'malware.name': 'caphaw',
'protocol.application': 'http',
'protocol.transport': 'tcp',
@@ -175,7 +176,7 @@
'destination.fqdn': 'xf5wau9lcpf5.oonucoog.cc',
'destination.port': 443,
'destination.url': 'http://xf5wau9lcpf5.oonucoog.cc/ping.html',
- 'extra': '{"user_agent": "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.7357)"}',
+ 'extra.user_agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.7357)',
'malware.name': 'caphaw',
'protocol.application': 'http',
'protocol.transport': 'tcp',
@@ -198,7 +199,7 @@
'destination.fqdn': '3k3kwrnj.rgk.cc',
'destination.port': 443,
'destination.url': 'http://3k3kwrnj.rgk.cc/index.php',
- 'extra': '{"user_agent": "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.9121)"}',
+ 'extra.user_agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.9121)',
'malware.name': 'caphaw',
'protocol.application': 'http',
'protocol.transport': 'tcp',
@@ -221,7 +222,7 @@
'destination.fqdn': 'ultimaresource.com',
'destination.port': 80,
'destination.url': 'http://ultimaresource.com/wild/live/file.php',
- 'extra': '{"user_agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; BRI/1)"}',
+ 'extra.user_agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; BRI/1)',
'malware.name': 'citadel-b54',
'protocol.application': 'http',
'protocol.transport': 'tcp',
@@ -243,7 +244,8 @@
'destination.ip': '199.2.137.202',
'destination.port': 80,
'destination.url': 'http://199.2.137.202/file-b29d40.php',
- 'extra': '{"http_host": "199.2.137.202", "user_agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; WOW64; .NET CLR 3.5.21022)"}',
+ 'extra.user_agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; WOW64; .NET CLR 3.5.21022)',
+ 'extra.http_host': '199.2.137.202',
'malware.name': 'citadel-b54',
'protocol.application': 'http',
'protocol.transport': 'tcp',
@@ -266,7 +268,7 @@
'destination.fqdn': 'prohomemain.com',
'destination.port': 80,
'destination.url': 'http://prohomemain.com/367601b6737825deb58a244576e4f098/file.php',
- 'extra': '{"user_agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; AskTB5.6)"}',
+ 'extra.user_agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; AskTB5.6)',
'malware.name': 'citadel-b54',
'protocol.application': 'http',
'protocol.transport': 'tcp',
@@ -289,7 +291,7 @@
'destination.fqdn': 'ronapri.com',
'destination.port': 80,
'destination.url': 'http://ronapri.com/view/file.php',
- 'extra': '{"user_agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; AskTbFWV5/5.11.3.15590)"}',
+ 'extra.user_agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; AskTbFWV5/5.11.3.15590)',
'malware.name': 'citadel-b54',
'protocol.application': 'http',
'protocol.transport': 'tcp',
@@ -312,7 +314,7 @@
'destination.fqdn': '9a5bb34eede4b85b9e81f40d530b68ff.co.cc',
'destination.port': 80,
'destination.url': 'http://9A5BB34EEDE4B85B9E81F40D530B68FF.co.cc/message.php',
- 'extra': '{"user_agent": "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; .NET4.0C)"}',
+ 'extra.user_agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; .NET4.0C)',
'malware.name': 'bamital-b58',
'protocol.application': 'http',
'protocol.transport': 'tcp',
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_qotd.py b/intelmq/tests/bots/parsers/shadowserver/test_qotd.py
index e4562420f1..966a6d5709 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_qotd.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_qotd.py
@@ -26,7 +26,10 @@
'classification.identifier': 'openqotd',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"naics": 123456, "quote": "N?s matamos o tempo, mas ele enterra-nos.?? (Machado de Assis)??", "sic": 654321, "tag": "qotd"}',
+ 'extra.quote': 'N?s matamos o tempo, mas ele enterra-nos.?? (Machado de Assis)??',
+ 'extra.tag': 'qotd',
+ 'extra.sic': 654321,
+ 'extra.naics': 123456,
'protocol.application': 'qotd',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -45,7 +48,8 @@
'classification.identifier': 'openqotd',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"quote": "When a stupid man is doing something he is ashamed of, he always declares?? that it is his duty. George Bernard Shaw (1856-1950)??", "tag": "qotd"}',
+ 'extra.tag': 'qotd',
+ 'extra.quote': 'When a stupid man is doing something he is ashamed of, he always declares?? that it is his duty. George Bernard Shaw (1856-1950)??',
'protocol.application': 'qotd',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -63,7 +67,8 @@
'classification.identifier': 'openqotd',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"quote": "_The secret of being miserable is to have leisure to bother about whether?? you are happy or not. The cure for it is occupation._?? George Bernard Shaw (1856-1950)??", "tag": "qotd"}',
+ 'extra.quote': '_The secret of being miserable is to have leisure to bother about whether?? you are happy or not. The cure for it is occupation._?? George Bernard Shaw (1856-1950)??',
+ 'extra.tag': 'qotd',
'protocol.application': 'qotd',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -82,7 +87,8 @@
'classification.identifier': 'openqotd',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"quote": "_We have no more right to consume happiness without producing it than to?? consume wealth without producing it._ George Bernard Shaw (1856-1950)??", "tag": "qotd"}',
+ 'extra.tag': 'qotd',
+ 'extra.quote': '_We have no more right to consume happiness without producing it than to?? consume wealth without producing it._ George Bernard Shaw (1856-1950)??',
'protocol.application': 'qotd',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_snmp.py b/intelmq/tests/bots/parsers/shadowserver/test_snmp.py
index b2ea669bac..516839bb74 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_snmp.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_snmp.py
@@ -26,7 +26,9 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"sysdesc": "Hardware: x86 Family 6 Model 8 Stepping 6 AT/AT COMPATIBLE - Software: Windows 2000 Version 5.0 (Build 2195 Uniprocessor Free)", "sysname": "ORSONKA", "version": 2}',
+ 'extra.sysname': 'ORSONKA',
+ 'extra.version': 2,
+ 'extra.sysdesc': 'Hardware: x86 Family 6 Model 8 Stepping 6 AT/AT COMPATIBLE - Software: Windows 2000 Version 5.0 (Build 2195 Uniprocessor Free)',
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -45,7 +47,9 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"sysdesc": "ADSL Modem", "sysname": "tc", "version": 2}',
+ 'extra.sysdesc': 'ADSL Modem',
+ 'extra.version': 2,
+ 'extra.sysname': 'tc',
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -64,7 +68,7 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"version": 2}',
+ 'extra.version': 2,
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -83,7 +87,9 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"sysdesc": "Linux ADSL2PlusRouter 2.6.19 #7 Tue Apr 9 17:06:16 CST 2013 mips", "sysname": "TD5130", "version": 2}',
+ 'extra.sysname': 'TD5130',
+ 'extra.version': 2,
+ 'extra.sysdesc': 'Linux ADSL2PlusRouter 2.6.19 #7 Tue Apr 9 17:06:16 CST 2013 mips',
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -102,7 +108,9 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"sysdesc": "Linux R6100 2.6.31 #1 Tue Jun 4 06:50:58 EDT 2013 mips MIB=01a01", "sysname": "Unknow", "version": 2}',
+ 'extra.sysdesc': 'Linux R6100 2.6.31 #1 Tue Jun 4 06:50:58 EDT 2013 mips MIB=01a01',
+ 'extra.sysname': 'Unknow',
+ 'extra.version': 2,
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -121,7 +129,9 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"sysdesc": "110TC1", "sysname": "Beetel", "version": 2}',
+ 'extra.version': 2,
+ 'extra.sysname': 'Beetel',
+ 'extra.sysdesc': '110TC1',
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -140,7 +150,9 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"sysdesc": "BCW710J <>", "sysname": "CableHome", "version": 2}',
+ 'extra.sysname': 'CableHome',
+ 'extra.sysdesc': 'BCW710J <>',
+ 'extra.version': 2,
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -159,7 +171,9 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"sysdesc": "Linux WNR1000v2 2.6.15 #199 Thu Jan 28 09:49:57 CST 2010 mips MIB=01a01", "sysname": "Unknow", "version": 2}',
+ 'extra.sysname': 'Unknow',
+ 'extra.version': 2,
+ 'extra.sysdesc': 'Linux WNR1000v2 2.6.15 #199 Thu Jan 28 09:49:57 CST 2010 mips MIB=01a01',
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -178,7 +192,7 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"version": 2}',
+ 'extra.version': 2,
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -196,7 +210,9 @@
'classification.identifier': 'opensnmp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"sysdesc": "D-Link Wireless Voice Gateway <>", "sysname": "CableHome", "version": 2}',
+ 'extra.version': 2,
+ 'extra.sysdesc': 'D-Link Wireless Voice Gateway <>',
+ 'extra.sysname': 'CableHome',
'protocol.application': 'snmp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
diff --git a/intelmq/tests/bots/parsers/shadowserver/test_xdmcp.py b/intelmq/tests/bots/parsers/shadowserver/test_xdmcp.py
index 504e56347a..b0d8dd7ad5 100644
--- a/intelmq/tests/bots/parsers/shadowserver/test_xdmcp.py
+++ b/intelmq/tests/bots/parsers/shadowserver/test_xdmcp.py
@@ -26,7 +26,11 @@
'classification.identifier': 'openxdmcp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"opcode": "Willing", "reported_hostname": "netmanage", "size": "50", "status": "Linux 2.6.32-573.3.1.el6.i686", "tag": "xdmcp"}',
+ 'extra.tag': 'xdmcp',
+ 'extra.reported_hostname': 'netmanage',
+ 'extra.size': '50',
+ 'extra.opcode': 'Willing',
+ 'extra.status': 'Linux 2.6.32-573.3.1.el6.i686',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -44,7 +48,11 @@
'classification.identifier': 'openxdmcp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"opcode": "Willing", "reported_hostname": "bimsdev1", "size": "48", "status": "0 users load: 0.0, 0.0, 0.0", "tag": "xdmcp"}',
+ 'extra.size': '48',
+ 'extra.tag': 'xdmcp',
+ 'extra.reported_hostname': 'bimsdev1',
+ 'extra.status': '0 users load: 0.0, 0.0, 0.0',
+ 'extra.opcode': 'Willing',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -62,7 +70,11 @@
'classification.identifier': 'openxdmcp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"opcode": "Willing", "reported_hostname": "zyite01", "size": "50", "status": "4 users load: 28.2, 28.6, 28.8", "tag": "xdmcp"}',
+ 'extra.status': '4 users load: 28.2, 28.6, 28.8',
+ 'extra.opcode': 'Willing',
+ 'extra.size': '50',
+ 'extra.reported_hostname': 'zyite01',
+ 'extra.tag': 'xdmcp',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -79,7 +91,11 @@
'classification.identifier': 'openxdmcp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"opcode": "Willing", "reported_hostname": "PAGOS", "size": "44", "status": "Linux 3.12.55-52.42-default", "tag": "xdmcp"}',
+ 'extra.status': 'Linux 3.12.55-52.42-default',
+ 'extra.opcode': 'Willing',
+ 'extra.reported_hostname': 'PAGOS',
+ 'extra.tag': 'xdmcp',
+ 'extra.size': '44',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -97,7 +113,11 @@
'classification.identifier': 'openxdmcp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"opcode": "Willing", "reported_hostname": "linux-ws15", "size": "52", "status": "0 user, load: 0.00, 0.00, 0.00", "tag": "xdmcp"}',
+ 'extra.tag': 'xdmcp',
+ 'extra.size': '52',
+ 'extra.status': '0 user, load: 0.00, 0.00, 0.00',
+ 'extra.opcode': 'Willing',
+ 'extra.reported_hostname': 'linux-ws15',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -114,7 +134,11 @@
'classification.identifier': 'openxdmcp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"opcode": "Unwilling", "reported_hostname": "mvodtown", "size": "51", "status": "!Display not authorized to connect", "tag": "xdmcp"}',
+ 'extra.size': '51',
+ 'extra.tag': 'xdmcp',
+ 'extra.status': '!Display not authorized to connect',
+ 'extra.opcode': 'Unwilling',
+ 'extra.reported_hostname': 'mvodtown',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -133,7 +157,11 @@
'classification.type': 'vulnerable service',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
- 'extra': '{"opcode": "Willing", "reported_hostname": "WASWP", "size": "45", "status": "0 users load: 0.1, 0.2, 0.2", "tag": "xdmcp"}',
+ 'extra.tag': 'xdmcp',
+ 'extra.opcode': 'Willing',
+ 'extra.status': '0 users load: 0.1, 0.2, 0.2',
+ 'extra.size': '45',
+ 'extra.reported_hostname': 'WASWP',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
RECONSTRUCTED_LINES[7], ''])),
'source.asn': 38661,
@@ -148,7 +176,11 @@
'classification.identifier': 'openxdmcp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"opcode": "Willing", "reported_hostname": "VENDITTI.localdomain.net", "size": "58", "status": "Linux 2.6.32-64GB-i686", "tag": "xdmcp"}',
+ 'extra.reported_hostname': 'VENDITTI.localdomain.net',
+ 'extra.tag': 'xdmcp',
+ 'extra.opcode': 'Willing',
+ 'extra.status': 'Linux 2.6.32-64GB-i686',
+ 'extra.size': '58',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
@@ -166,7 +198,11 @@
'classification.identifier': 'openxdmcp',
'classification.taxonomy': 'vulnerable',
'classification.type': 'vulnerable service',
- 'extra': '{"opcode": "Willing", "reported_hostname": "kasei", "size": "45", "status": "0 users load: 11., 11., 11.", "tag": "xdmcp"}',
+ 'extra.status': '0 users load: 11., 11., 11.',
+ 'extra.opcode': 'Willing',
+ 'extra.reported_hostname': 'kasei',
+ 'extra.tag': 'xdmcp',
+ 'extra.size': '45',
'protocol.application': 'xdmcp',
'protocol.transport': 'udp',
'raw': utils.base64_encode('\n'.join([RECONSTRUCTED_LINES[0],
diff --git a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py
index 05c1435e67..64c6b501c0 100644
--- a/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py
+++ b/intelmq/tests/bots/parsers/spamhaus/test_parser_cert.py
@@ -44,7 +44,7 @@
'destination.port': 80,
'destination.fqdn': 'dxxt.sinkhole.dk',
'destination.ip': '212.227.20.19',
- 'extra': '{"destination.local_port": 1036}',
+ 'extra.destination.local_port': 1036,
'source.geolocation.cc': 'AT',
'protocol.transport': 'tcp',
},
@@ -57,7 +57,7 @@
'malware.name': 'conficker',
'destination.port': 80,
'destination.ip': '216.66.15.109',
- 'extra': '{"destination.local_port": 1430}',
+ 'extra.destination.local_port': 1430,
'source.geolocation.cc': 'AT',
'protocol.transport': 'tcp',
},
diff --git a/intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py b/intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py
index 84417226a1..b6778b5964 100644
--- a/intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py
+++ b/intelmq/tests/bots/parsers/spamhaus/test_parser_drop.py
@@ -28,7 +28,7 @@
'time.source': '2015-09-14T04:39:22+00:00',
'classification.type': 'spam',
'source.network': '1.4.0.0/17',
- 'extra': '{"blocklist": "SBL256893"}',
+ 'extra.blocklist': 'SBL256893',
'raw': 'MS40LjAuMC8xNyA7IFNCTDI1Njg5Mw==',
},
{'feed.name': 'Spamhaus drop feed',
@@ -38,7 +38,7 @@
'time.source': '2015-09-14T04:39:22+00:00',
'classification.type': 'spam',
'source.network': '2a06:e480::/29',
- 'extra': '{"blocklist": "SBL301771"}',
+ 'extra.blocklist': 'SBL301771',
'raw': 'MmEwNjplNDgwOjovMjkgOyBTQkwzMDE3NzE=',
}]
diff --git a/intelmq/tests/lib/test_harmonization.py b/intelmq/tests/lib/test_harmonization.py
index 2440bf7026..f543c408d6 100644
--- a/intelmq/tests/lib/test_harmonization.py
+++ b/intelmq/tests/lib/test_harmonization.py
@@ -266,11 +266,13 @@ def test_json_valid(self):
""" Test JSON.is_valid with valid arguments. """
self.assertTrue(harmonization.JSON.is_valid('{"foo": "bar"}',
sanitize=False))
+ self.assertTrue(harmonization.JSON.is_valid('"foo"',
+ sanitize=False))
def test_json_invalid(self):
""" Test JSON.is_valid with invalid arguments. """
- self.assertFalse(harmonization.JSON.is_valid('{}'))
- self.assertFalse(harmonization.JSON.is_valid('"example"'))
+ self.assertFalse(harmonization.JSON.is_valid('{'))
+ self.assertFalse(harmonization.JSON.is_valid('["foo", ]'))
self.assertFalse(harmonization.JSON.is_valid(b'{"foo": 1}',
sanitize=False))
self.assertFalse(harmonization.JSON.is_valid({"foo": "bar"},
@@ -285,6 +287,29 @@ def test_json_sanitize(self):
self.assertTrue(harmonization.JSON.is_valid(b'{"foo": "bar"}',
sanitize=True))
+ def test_jsondict_valid(self):
+ """ Test JSONDict.is_valid with valid arguments. """
+ self.assertTrue(harmonization.JSONDict.is_valid('{"foo": "bar"}',
+ sanitize=False))
+
+ def test_jsondict_invalid(self):
+ """ Test JSONDict.is_valid with invalid arguments. """
+ self.assertFalse(harmonization.JSONDict.is_valid('{}'))
+ self.assertFalse(harmonization.JSONDict.is_valid('"example"'))
+ self.assertFalse(harmonization.JSONDict.is_valid(b'{"foo": 1}',
+ sanitize=False))
+ self.assertFalse(harmonization.JSONDict.is_valid({"foo": "bar"},
+ sanitize=False))
+
+ def test_jsondict_sanitize(self):
+ """ Test JSONDict.sanitize with valid arguments. """
+ self.assertTrue(harmonization.JSONDict.is_valid({"foo": "bar"},
+ sanitize=True))
+ self.assertTrue(harmonization.JSONDict.is_valid('{"foo": "bar"}',
+ sanitize=True))
+ self.assertTrue(harmonization.JSONDict.is_valid(b'{"foo": "bar"}',
+ sanitize=True))
+
def test_lowercasestring_valid(self):
""" Test LowercaseString.is_valid with valid arguments. """
self.assertTrue(harmonization.LowercaseString.is_valid('foobar'))
diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py
index f9cd2fb530..c87d05cd3a 100644
--- a/intelmq/tests/lib/test_message.py
+++ b/intelmq/tests/lib/test_message.py
@@ -607,6 +607,56 @@ def test_message_update(self):
with self.assertRaises(exceptions.InvalidValue):
event.update({'source.asn': 'AS1'})
+ def test_message_extra_construction(self):
+ """
+ Test if field with name starting with 'extra.' is accepted and saved.
+ """
+ event = self.new_event()
+ event.add('extra.test', 'foobar')
+ event.add('extra.test2', 'foobar2')
+ self.assertEqual(event.to_dict(hierarchical=True),
+ {'extra': {"test": "foobar", "test2": "foobar2"}}
+ )
+ self.assertEqual(event.to_dict(hierarchical=False),
+ {'extra.test': "foobar", "extra.test2": "foobar2"}
+ )
+
+ def test_message_extra_getitem(self):
+ """
+ Test if extra field is saved and can be get.
+ """
+ event = self.new_event()
+ event.add('extra.test', 'foobar')
+ self.assertEqual(event['extra.test'], 'foobar')
+
+ def test_message_extra_set_oldstyle_string(self):
+ """
+ Test if extra accepts a string (backwards-compat) and field can be get.
+ """
+ event = self.new_event()
+ event.add('extra', '{"foo": "bar"}')
+ self.assertEqual(event['extra'], '{"foo": "bar"}')
+ self.assertEqual(event['extra.foo'], 'bar')
+
+ def test_message_extra_set_oldstyle_dict(self):
+ """
+ Test if extra accepts a dict and field can be get.
+ """
+ event = self.new_event()
+ event.add('extra', {"foo": "bar"})
+ self.assertEqual(event['extra'], '{"foo": "bar"}')
+ self.assertEqual(event['extra.foo'], 'bar')
+
+ def test_message_extra_set_dict_ignore_empty(self):
+ """
+ Test if extra accepts a dict and field can be get.
+ """
+ event = self.new_event()
+ event.add('extra', {"foo": ''})
+ with self.assertRaises(KeyError):
+ event['extra.foo']
+
+
if __name__ == '__main__': # pragma: no cover
unittest.main()