From c0b04e47891f447555a9dd2d2ceefe94f4da026f Mon Sep 17 00:00:00 2001 From: Cedric G Hurst II Date: Mon, 25 Jul 2011 00:16:48 -0500 Subject: [PATCH 1/6] added preliminary support for grouping (new in Solr 3.3). At the moment, only main (flattened) group values are supported: http://wiki.apache.org/solr/FieldCollapsing --- sunburnt/search.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/sunburnt/search.py b/sunburnt/search.py index 8267ac3..9adb21c 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -358,7 +358,7 @@ def add_boost(self, kwargs, boost_score): class SolrSearch(object): - option_modules = ('query_obj', 'filter_obj', 'paginator', 'more_like_this', 'highlighter', 'faceter', 'sorter', 'facet_querier', 'field_limiter',) + option_modules = ('query_obj', 'filter_obj', 'paginator', 'more_like_this', 'highlighter', 'faceter', 'grouper', 'sorter', 'facet_querier', 'field_limiter',) def __init__(self, interface, original=None): self.interface = interface self.schema = interface.schema @@ -369,6 +369,7 @@ def __init__(self, interface, original=None): self.more_like_this = MoreLikeThisOptions(self.schema) self.highlighter = HighlightOptions(self.schema) self.faceter = FacetOptions(self.schema) + self.grouper = GroupOptions(self.schema) self.sorter = SortOptions(self.schema) self.field_limiter = FieldLimitOptions(self.schema) self.facet_querier = FacetQueryOptions(self.schema) @@ -418,6 +419,11 @@ def facet_by(self, field, **kwargs): newself = self.clone() newself.faceter.update(field, **kwargs) return newself + + def group_by(self, field, **kwargs): + newself = self.clone() + newself.grouper.update(field, **kwargs) + return newself def facet_query(self, *args, **kwargs): newself = self.clone() @@ -554,6 +560,23 @@ def __init__(self, schema, original=None): def field_names_in_opts(self, opts, fields): if fields: opts["facet.field"] = sorted(fields) + +class GroupOptions(Options): + option_name = "group" + opts = {"limit":int + } + + def __init__(self, schema, original=None): + self.schema = schema + if original is None: + self.fields = collections.defaultdict(dict) + else: + self.fields = copy.copy(original.fields) + + def field_names_in_opts(self, opts, fields): + if fields: + opts["group.field"] = sorted(fields) + opts["group.main"] = "true" class HighlightOptions(Options): From 14e4e0b44952a90f81bf0f8cf88d4a507dc4ca63 Mon Sep 17 00:00:00 2001 From: Cedric G Hurst II Date: Tue, 26 Jul 2011 17:06:38 -0500 Subject: [PATCH 2/6] adding support to parse the group information --- sunburnt/schema.py | 31 ++++++++++++++++++++++++++----- sunburnt/search.py | 21 +++++++++++++++------ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/sunburnt/schema.py b/sunburnt/schema.py index 7ba818f..6c07ef6 100644 --- a/sunburnt/schema.py +++ b/sunburnt/schema.py @@ -489,6 +489,11 @@ def parse_result_doc(self, doc, name=None): elif field_class is None: raise SolrError("unexpected field found in result") return name, SolrFieldInstance.from_solr(field_class, doc.text or '').to_user_data() + + def parse_group(self, group, value=None): + if value is None: + value = group.xpath("str[@name='groupValue']")[0].text + return value, [self.parse_result_doc(n) for n in group.xpath("result/doc")] class SolrUpdate(object): @@ -613,8 +618,12 @@ def __init__(self, schema, xmlmsg): setattr(self, attr, details['responseHeader'].get(attr)) if self.status != 0: raise ValueError("Response indicates an error") - result_node = doc.xpath("/response/result")[0] - self.result = SolrResult(schema, result_node) + result_node_list = doc.xpath("/response/result") + group_node_list = doc.xpath("/response/lst[@name='grouped']") + if result_node_list: + self.result = SolrResult(schema, result_node_list[0]) + else: + self.result = SolrResult(schema, group_node_list[0]) self.facet_counts = SolrFacetCounts.from_response(details) self.highlighting = dict((k, dict(v)) for k, v in details.get("highlighting", ())) @@ -641,12 +650,22 @@ def __getitem__(self, key): class SolrResult(object): def __init__(self, schema, node): + self.grouped = True if (node.tag == 'lst' and node.attrib['name'] == 'grouped') else False self.schema = schema self.name = node.attrib['name'] - self.numFound = int(node.attrib['numFound']) - self.start = int(node.attrib['start']) + self.numFound = node.xpath("lst/int[@name='matches']")[0].text if self.grouped else int(node.attrib['numFound']) + if self.grouped: + ngroups = node.xpath("lst/int[@name='ngroups']") + if ngroups: + self.ngroups = int(ngroups[0].text) + if 'start' in node.attrib: + self.start = int(node.attrib['start']) + else: + start_param = node.xpath("../lst[@name='responseHeader']/lst[@name='params']/str[@name='start']") + self.start = start_param[0].text if start_param else 0 self.docs = [schema.parse_result_doc(n) for n in node.xpath("doc")] - + self.groups = [schema.parse_group(n) for n in node.xpath("lst/arr[@name='groups']/lst")] + def __str__(self): return "%(numFound)s results found, starting at #%(start)s\n\n" % self.__dict__ + str(self.docs) @@ -720,6 +739,8 @@ def value_from_node(node): value = float(node.text) elif node.tag == 'date': value = solr_date(node.text) + elif node.tag == 'result': + value = [value_from_node(n) for n in node.getchildren()] if name is not None: return name, value else: diff --git a/sunburnt/search.py b/sunburnt/search.py index 9adb21c..580c08a 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -417,12 +417,18 @@ def filter_exclude(self, *args, **kwargs): def facet_by(self, field, **kwargs): newself = self.clone() - newself.faceter.update(field, **kwargs) + args.field = field + newself.faceter.update(field, a) return newself def group_by(self, field, **kwargs): newself = self.clone() - newself.grouper.update(field, **kwargs) + kwargs['field'] = field + + if not kwargs.has_key('ngroups'): + kwargs['ngroups'] = True + + newself.grouper.update(None, **kwargs) return newself def facet_query(self, *args, **kwargs): @@ -528,6 +534,7 @@ def options(self): fields = [field for field in self.fields if field] self.field_names_in_opts(opts, fields) for field_name, field_opts in self.fields.items(): + print("field_name: %s, field_opts: %s" % (field_name, field_opts)) if not field_name: for field_opt, v in field_opts.items(): opts['%s.%s'%(self.option_name, field_opt)] = v @@ -563,7 +570,10 @@ def field_names_in_opts(self, opts, fields): class GroupOptions(Options): option_name = "group" - opts = {"limit":int + opts = {"field":unicode, + "limit":int, + "main":bool, + "ngroups":bool } def __init__(self, schema, original=None): @@ -572,11 +582,10 @@ def __init__(self, schema, original=None): self.fields = collections.defaultdict(dict) else: self.fields = copy.copy(original.fields) - + def field_names_in_opts(self, opts, fields): if fields: - opts["group.field"] = sorted(fields) - opts["group.main"] = "true" + opts["facet.field"] = sorted(fields) class HighlightOptions(Options): From 72d0a70c17a840a07ad22e4e5fa0ae2b61c886f1 Mon Sep 17 00:00:00 2001 From: Cedric G Hurst II Date: Tue, 26 Jul 2011 17:09:29 -0500 Subject: [PATCH 3/6] adding group field to the result object --- sunburnt/schema.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sunburnt/schema.py b/sunburnt/schema.py index 6c07ef6..2008aa6 100644 --- a/sunburnt/schema.py +++ b/sunburnt/schema.py @@ -654,10 +654,13 @@ def __init__(self, schema, node): self.schema = schema self.name = node.attrib['name'] self.numFound = node.xpath("lst/int[@name='matches']")[0].text if self.grouped else int(node.attrib['numFound']) + if self.grouped: ngroups = node.xpath("lst/int[@name='ngroups']") if ngroups: self.ngroups = int(ngroups[0].text) + self.groupField = node.xpath("lst")[0].attrib['name'] + if 'start' in node.attrib: self.start = int(node.attrib['start']) else: From dfec12edf8552e1281806856583cbe1c5a9905fe Mon Sep 17 00:00:00 2001 From: Cedric G Hurst II Date: Thu, 4 Aug 2011 20:31:52 -0700 Subject: [PATCH 4/6] adding a line i accidentally deleted from facet_by --- sunburnt/search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sunburnt/search.py b/sunburnt/search.py index 580c08a..4ea819d 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -417,6 +417,7 @@ def filter_exclude(self, *args, **kwargs): def facet_by(self, field, **kwargs): newself = self.clone() + newself.faceter.update(field, **kwargs) args.field = field newself.faceter.update(field, a) return newself From 0378761a03eab0563282754b78d28b66c110a317 Mon Sep 17 00:00:00 2001 From: Cedric G Hurst II Date: Thu, 4 Aug 2011 20:36:46 -0700 Subject: [PATCH 5/6] fixed an unintentional code change on facet_by --- sunburnt/search.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sunburnt/search.py b/sunburnt/search.py index 4ea819d..738e161 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -418,8 +418,6 @@ def filter_exclude(self, *args, **kwargs): def facet_by(self, field, **kwargs): newself = self.clone() newself.faceter.update(field, **kwargs) - args.field = field - newself.faceter.update(field, a) return newself def group_by(self, field, **kwargs): From 57cac939ca0e3606cf8279878e9d70f16f22ba28 Mon Sep 17 00:00:00 2001 From: Cedric G Hurst II Date: Sun, 7 Aug 2011 13:07:02 -0500 Subject: [PATCH 6/6] removing unnecessary println --- sunburnt/search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sunburnt/search.py b/sunburnt/search.py index 738e161..5ce83d0 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -533,7 +533,6 @@ def options(self): fields = [field for field in self.fields if field] self.field_names_in_opts(opts, fields) for field_name, field_opts in self.fields.items(): - print("field_name: %s, field_opts: %s" % (field_name, field_opts)) if not field_name: for field_opt, v in field_opts.items(): opts['%s.%s'%(self.option_name, field_opt)] = v