diff --git a/elasticutils/__init__.py b/elasticutils/__init__.py index 87ea7db..1a9af40 100644 --- a/elasticutils/__init__.py +++ b/elasticutils/__init__.py @@ -1,3 +1,4 @@ +import collections import copy import logging from datetime import datetime @@ -7,6 +8,7 @@ from elasticsearch.helpers import bulk_index from elasticutils._version import __version__ # noqa +from elasticutils.fields import SearchField log = logging.getLogger('elasticutils') @@ -2194,3 +2196,33 @@ def refresh_index(cls, es=None, index=None): index = cls.get_index() es.indices.refresh(index=index) + + +class DeclarativeMappingMeta(type): + + def __new__(cls, name, bases, attrs): + fields = [(name_, attrs.pop(name_)) for name_, column in attrs.items() + if isinstance(column, SearchField)] + # Put fields in order defined in the class. + fields.sort(key=lambda f: f[1]._creation_order) + attrs['fields'] = fields + return super(DeclarativeMappingMeta, cls).__new__(cls, name, bases, + attrs) + + +class DocumentType(object): + __metaclass__ = DeclarativeMappingMeta + + def get_mapping(self): + """ + Returns mapping based on defined fields. + """ + fields = collections.OrderedDict() + for name, field in self.fields: + name = field.index_fieldname or name + defn = field.get_definition() + fields[name] = defn + + mapping = {'properties': fields} + + return mapping diff --git a/elasticutils/exceptions.py b/elasticutils/exceptions.py new file mode 100644 index 0000000..a2d2aeb --- /dev/null +++ b/elasticutils/exceptions.py @@ -0,0 +1,3 @@ +class SearchFieldError(Exception): + """Raised when a field encounters an error.""" + pass diff --git a/elasticutils/fields.py b/elasticutils/fields.py new file mode 100644 index 0000000..30ea205 --- /dev/null +++ b/elasticutils/fields.py @@ -0,0 +1,222 @@ +import base64 +import datetime +import re +from decimal import Decimal + +from .exceptions import SearchFieldError + + +DATE_REGEX = re.compile('^(?P\d{4})-(?P\d{2})-(?P\d{2}).*?$') +DATETIME_REGEX = re.compile('^(?P\d{4})-(?P\d{2})-(?P\d{2})' + '(T|\s+)(?P\d{2}):(?P\d{2}):' + '(?P\d{2}).*?$') + + +class SearchField(object): + + field_type = None + attrs = [] + + # Used to maintain the order of fields as defined in the class. + _creation_order = 0 + + def __init__(self, *args, **kwargs): + # These are special. + for attr in ('index_fieldname', 'is_multivalue'): + setattr(self, attr, kwargs.pop(attr, None)) + + # Set all kwargs on self for later access. + for attr in kwargs.keys(): + self.attrs.append(attr) + setattr(self, attr, kwargs.pop(attr, None)) + + # Store this fields order. + self._creation_order = SearchField._creation_order + # Increment order number for future fields. + SearchField._creation_order += 1 + + def to_es(self, value): + """ + Converts a Python value to an Elasticsearch value. + + Extending classes should override this method. + """ + return value + + def to_python(self, value): + """ + Converts an Elasticsearch value to a Python value. + + Extending classes should override this method. + """ + return value + + def get_definition(self): + """ + Returns the resprentation for this field's definition in the mapping. + """ + f = {'type': self.field_type} + + for attr in self.attrs: + val = getattr(self, attr, None) + if val is not None: + f[attr] = val + + return f + + +class StringField(SearchField): + field_type = 'string' + + def to_es(self, value): + if value is None: + return None + + return unicode(value) + + def to_python(self, value): + if value is None: + return None + + return unicode(value) + + +class IntegerField(SearchField): + field_type = 'integer' + + def __init__(self, type='integer', *args, **kwargs): + if type in ('byte', 'short', 'integer', 'long'): + self.field_type = type + super(IntegerField, self).__init__(*args, **kwargs) + + def to_es(self, value): + if value is None: + return None + + return int(value) + + def to_python(self, value): + if value is None: + return None + + return int(value) + + +class FloatField(SearchField): + field_type = 'float' + + def __init__(self, type='float', *args, **kwargs): + if type in ('float', 'double'): + self.field_type = type + super(FloatField, self).__init__(*args, **kwargs) + + def to_es(self, value): + if value is None: + return None + + return float(value) + + def to_python(self, value): + if value is None: + return None + + return float(value) + + +class DecimalField(StringField): + + def to_es(self, value): + if value is None: + return None + + return str(float(value)) + + def to_python(self, value): + if value is None: + return None + + return Decimal(str(value)) + + +class BooleanField(SearchField): + field_type = 'boolean' + + def to_es(self, value): + if value is None: + return None + + return bool(value) + + def to_python(self, value): + if value is None: + return None + + return bool(value) + + +class DateField(SearchField): + field_type = 'date' + + def to_es(self, value): + if isinstance(value, (datetime.date, datetime.datetime)): + return value.isoformat() + + return value + + def to_python(self, value): + if value is None: + return None + + if isinstance(value, basestring): + match = DATE_REGEX.search(value) + + if match: + data = match.groupdict() + return datetime.date( + int(data['year']), int(data['month']), int(data['day'])) + else: + raise SearchFieldError( + "Date provided to '%s' field doesn't appear to be a valid " + "date string: '%s'" % (self.instance_name, value)) + + return value + + +class DateTimeField(DateField): + + def to_python(self, value): + if value is None: + return None + + if isinstance(value, basestring): + match = DATETIME_REGEX.search(value) + + if match: + data = match.groupdict() + return datetime.datetime( + int(data['year']), int(data['month']), int(data['day']), + int(data['hour']), int(data['minute']), + int(data['second'])) + else: + raise SearchFieldError( + "Datetime provided to '%s' field doesn't appear to be a " + "valid datetime string: '%s'" % ( + self.instance_name, value)) + + return value + + +class BinaryField(SearchField): + field_type = 'binary' + + def to_es(self, value): + if value is None: + return None + + return base64.b64encode(value) + + def to_python(self, value): + if value is None: + return None + + return base64.b64decode(value) diff --git a/elasticutils/tests/test_document_type.py b/elasticutils/tests/test_document_type.py new file mode 100644 index 0000000..ff06886 --- /dev/null +++ b/elasticutils/tests/test_document_type.py @@ -0,0 +1,42 @@ +from unittest import TestCase + +from nose.tools import eq_ + +from elasticutils import DocumentType, fields + + +class BookDocumentType(DocumentType): + + id = fields.IntegerField(type='long') + name = fields.StringField(analyzer='snowball') + name_sort = fields.StringField(index='not_analyzed') + authors = fields.StringField(is_multivalued=True) + published_date = fields.DateField() + price = fields.DecimalField() + is_autographed = fields.BooleanField() + sales = fields.IntegerField() + + +class DocumentTypeTest(TestCase): + + def setUp(self): + self._type = BookDocumentType + + def test_mapping(self): + mapping = self._type().get_mapping() + + # Check top level element. + eq_(mapping.keys(), ['properties']) + + fields = mapping['properties'] + + eq_(fields['id']['type'], 'long') + eq_(fields['name']['type'], 'string') + eq_(fields['name']['analyzer'], 'snowball') + eq_(fields['name_sort']['type'], 'string') + eq_(fields['name_sort']['index'], 'not_analyzed') + eq_(fields['authors']['type'], 'string') + eq_(fields['published_date']['type'], 'date') + eq_(fields['price']['type'], 'string') + eq_(fields['is_autographed']['type'], 'boolean') + eq_(fields['sales']['type'], 'integer') diff --git a/elasticutils/tests/test_fields.py b/elasticutils/tests/test_fields.py new file mode 100644 index 0000000..5553795 --- /dev/null +++ b/elasticutils/tests/test_fields.py @@ -0,0 +1,320 @@ +import base64 +import datetime +from decimal import Decimal +from unittest import TestCase + +from nose.tools import eq_ + +from elasticutils import fields + + +class TestStringField(TestCase): + + def test_type(self): + eq_(fields.StringField().field_type, 'string') + + def test_to_es(self): + eq_(fields.StringField().to_es(None), None) + eq_(fields.StringField().to_es('test'), 'test') + + def test_to_python(self): + eq_(fields.StringField().to_python(None), None) + eq_(fields.StringField().to_python('test'), 'test') + + def test_index(self): + field = fields.StringField(index='not_analyzed') + eq_(field.get_definition()['index'], 'not_analyzed') + + def test_store(self): + field = fields.StringField(store='yes') + eq_(field.get_definition()['store'], 'yes') + + def test_term_vector(self): + field = fields.StringField(term_vector='with_offsets') + eq_(field.get_definition()['term_vector'], 'with_offsets') + + def test_boost(self): + field = fields.StringField(boost=2.5) + eq_(field.get_definition()['boost'], 2.5) + + def test_null_value(self): + field = fields.StringField(null_value='na') + eq_(field.get_definition()['null_value'], 'na') + + def test_boolean_attributes(self): + for attr in ('omit_norms', 'include_in_all'): + field = fields.StringField(**{attr: True}) + eq_(field.get_definition()[attr], True) + field = fields.StringField(**{attr: False}) + eq_(field.get_definition()[attr], False) + + def test_index_options(self): + field = fields.StringField(index_options='positions') + eq_(field.get_definition()['index_options'], 'positions') + + def test_analyzer(self): + field = fields.StringField(analyzer='snowball') + eq_(field.get_definition()['analyzer'], 'snowball') + + def test_index_analyzer(self): + field = fields.StringField(index_analyzer='snowball') + eq_(field.get_definition()['index_analyzer'], 'snowball') + + def test_search_analyzer(self): + field = fields.StringField(search_analyzer='snowball') + eq_(field.get_definition()['search_analyzer'], 'snowball') + + def test_ignore_above(self): + field = fields.StringField(ignore_above='1024') + eq_(field.get_definition()['ignore_above'], '1024') + + def test_position_offset_gap(self): + field = fields.StringField(position_offset_gap=2) + eq_(field.get_definition()['position_offset_gap'], 2) + + +class TestIntegerField(TestCase): + + def test_type(self): + eq_(fields.IntegerField().field_type, 'integer') + eq_(fields.IntegerField(type='byte').field_type, 'byte') + eq_(fields.IntegerField(type='short').field_type, 'short') + eq_(fields.IntegerField(type='long').field_type, 'long') + eq_(fields.IntegerField(type='foo').field_type, 'integer') + + def test_to_es(self): + eq_(fields.IntegerField().to_python(None), None) + eq_(fields.IntegerField().to_es(100), 100) + eq_(fields.IntegerField().to_es('100'), 100) + + def test_to_python(self): + eq_(fields.IntegerField().to_python(None), None) + eq_(fields.IntegerField().to_python(100), 100) + eq_(fields.IntegerField().to_es('100'), 100) + + def test_index(self): + field = fields.IntegerField(index='no') + eq_(field.get_definition()['index'], 'no') + + def test_store(self): + field = fields.IntegerField(store='yes') + eq_(field.get_definition()['store'], 'yes') + + def test_precision_step(self): + field = fields.IntegerField(precision_step=4) + eq_(field.get_definition()['precision_step'], 4) + + def test_boost(self): + field = fields.IntegerField(boost=2.5) + eq_(field.get_definition()['boost'], 2.5) + + def test_null_value(self): + field = fields.IntegerField(null_value=1) + eq_(field.get_definition()['null_value'], 1) + + def test_boolean_attributes(self): + for attr in ('ignore_malformed', 'include_in_all'): + field = fields.IntegerField(**{attr: True}) + eq_(field.get_definition()[attr], True) + field = fields.IntegerField(**{attr: False}) + eq_(field.get_definition()[attr], False) + + +class TestFloatField(TestCase): + + def test_type(self): + eq_(fields.FloatField().field_type, 'float') + eq_(fields.FloatField(type='double').field_type, 'double') + eq_(fields.FloatField(type='foo').field_type, 'float') + + def test_to_es(self): + eq_(fields.FloatField().to_python(None), None) + eq_(fields.FloatField().to_es(100), 100.0) + eq_(fields.FloatField().to_es('100'), 100.0) + + def test_to_python(self): + eq_(fields.FloatField().to_python(None), None) + eq_(fields.FloatField().to_python(100), 100.0) + eq_(fields.FloatField().to_es('100'), 100.0) + + def test_index(self): + field = fields.FloatField(index='no') + eq_(field.get_definition()['index'], 'no') + + def test_store(self): + field = fields.FloatField(store='yes') + eq_(field.get_definition()['store'], 'yes') + + def test_precision_step(self): + field = fields.FloatField(precision_step=4) + eq_(field.get_definition()['precision_step'], 4) + + def test_boost(self): + field = fields.FloatField(boost=2.5) + eq_(field.get_definition()['boost'], 2.5) + + def test_null_value(self): + field = fields.FloatField(null_value=1.0) + eq_(field.get_definition()['null_value'], 1.0) + + def test_boolean_attributes(self): + for attr in ('ignore_malformed', 'include_in_all'): + field = fields.FloatField(**{attr: True}) + eq_(field.get_definition()[attr], True) + field = fields.FloatField(**{attr: False}) + eq_(field.get_definition()[attr], False) + + +class TestDecimalField(TestCase): + """DecimalField subclasses StringField, so we just test a few things.""" + + def test_type(self): + eq_(fields.DecimalField().field_type, 'string') + + def test_to_es(self): + eq_(fields.DecimalField().to_python(None), None) + eq_(fields.DecimalField().to_es(Decimal('100.0')), '100.0') + eq_(fields.DecimalField().to_es(Decimal('100')), '100.0') + + def test_to_python(self): + eq_(fields.DecimalField().to_python(None), None) + eq_(fields.DecimalField().to_python('100.0'), Decimal('100.0')) + + +class TestBooleanField(TestCase): + + def test_type(self): + eq_(fields.BooleanField().field_type, 'boolean') + + def test_to_es(self): + eq_(fields.BooleanField().to_python(None), None) + eq_(fields.BooleanField().to_es(True), True) + eq_(fields.BooleanField().to_es(False), False) + + def test_to_python(self): + eq_(fields.BooleanField().to_python(None), None) + eq_(fields.BooleanField().to_python(True), True) + eq_(fields.BooleanField().to_python(False), False) + + def test_index(self): + field = fields.BooleanField(index='no') + eq_(field.get_definition()['index'], 'no') + + def test_store(self): + field = fields.BooleanField(store='yes') + eq_(field.get_definition()['store'], 'yes') + + def test_boost(self): + field = fields.BooleanField(boost=2.5) + eq_(field.get_definition()['boost'], 2.5) + + def test_null_value(self): + field = fields.BooleanField(null_value=True) + eq_(field.get_definition()['null_value'], True) + + def test_boolean_attributes(self): + field = fields.BooleanField(include_in_all=True) + eq_(field.get_definition()['include_in_all'], True) + field = fields.BooleanField(include_in_all=False) + eq_(field.get_definition()['include_in_all'], False) + + +class TestDateField(TestCase): + + def test_type(self): + eq_(fields.DateField().field_type, 'date') + + def test_to_es(self): + eq_(fields.DateField().to_es(datetime.date(2013, 11, 22)), + '2013-11-22') + + def test_to_python(self): + eq_(fields.DateField().to_python(None), None) + eq_(fields.DateField().to_python('2013-11-22'), + datetime.date(2013, 11, 22)) + eq_(fields.DateField().to_python('2013-11-22T12:34:56'), + datetime.date(2013, 11, 22)) + + def test_index(self): + field = fields.DateField(index='no') + eq_(field.get_definition()['index'], 'no') + + def test_store(self): + field = fields.DateField(store='yes') + eq_(field.get_definition()['store'], 'yes') + + def test_boost(self): + field = fields.DateField(boost=2.5) + eq_(field.get_definition()['boost'], 2.5) + + def test_null_value(self): + field = fields.DateField(null_value='2013-11-22') + eq_(field.get_definition()['null_value'], '2013-11-22') + + def test_precision_step(self): + field = fields.IntegerField(precision_step=4) + eq_(field.get_definition()['precision_step'], 4) + + def test_boolean_attributes(self): + for attr in ('ignore_malformed', 'include_in_all'): + field = fields.FloatField(**{attr: True}) + eq_(field.get_definition()[attr], True) + field = fields.FloatField(**{attr: False}) + eq_(field.get_definition()[attr], False) + + +class TestDateTimeField(TestCase): + + def test_type(self): + eq_(fields.DateTimeField().field_type, 'date') + + def test_to_es(self): + eq_(fields.DateTimeField().to_es( + datetime.datetime(2013, 11, 22, 12, 34, 56)), + '2013-11-22T12:34:56') + + def test_to_python(self): + eq_(fields.DateTimeField().to_python(None), None) + eq_(fields.DateTimeField().to_python('2013-11-22T12:34:56'), + datetime.datetime(2013, 11, 22, 12, 34, 56)) + + def test_index(self): + field = fields.DateTimeField(index='no') + eq_(field.get_definition()['index'], 'no') + + def test_store(self): + field = fields.DateTimeField(store='yes') + eq_(field.get_definition()['store'], 'yes') + + def test_boost(self): + field = fields.DateTimeField(boost=2.5) + eq_(field.get_definition()['boost'], 2.5) + + def test_null_value(self): + field = fields.DateTimeField(null_value='2013-11-22') + eq_(field.get_definition()['null_value'], '2013-11-22') + + def test_precision_step(self): + field = fields.IntegerField(precision_step=4) + eq_(field.get_definition()['precision_step'], 4) + + def test_boolean_attributes(self): + for attr in ('ignore_malformed', 'include_in_all'): + field = fields.FloatField(**{attr: True}) + eq_(field.get_definition()[attr], True) + field = fields.FloatField(**{attr: False}) + eq_(field.get_definition()[attr], False) + + +class TestBinaryField(TestCase): + + def test_type(self): + eq_(fields.BinaryField().field_type, 'binary') + + def test_to_es(self): + eq_(fields.BinaryField().to_es(None), None) + eq_(fields.BinaryField().to_es('test'), base64.b64encode('test')) + + def test_to_python(self): + eq_(fields.BinaryField().to_python(None), None) + eq_(fields.BinaryField().to_python(base64.b64encode('test')), 'test')