Skip to content

Commit

Permalink
made min/max aggregate functions support all literals
Browse files Browse the repository at this point in the history
not just numerics as before.

fixes #628
  • Loading branch information
gromgull committed Jan 20, 2017
1 parent b1113d7 commit ab422a5
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 50 deletions.
17 changes: 8 additions & 9 deletions rdflib/plugins/sparql/aggregates.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from rdflib import Literal, XSD

from rdflib.plugins.sparql.evalutils import _eval, NotBoundError
from rdflib.plugins.sparql.evalutils import _eval, NotBoundError, _val
from rdflib.plugins.sparql.operators import numeric
from rdflib.plugins.sparql.datatypes import type_promotion

from rdflib.plugins.sparql.compat import num_max, num_min
from rdflib.plugins.sparql.sparql import SPARQLTypeError

from decimal import Decimal
Expand Down Expand Up @@ -157,10 +156,10 @@ def set_value(self, bindings):
def update(self, row, aggregator):
try:
if self.value is None:
self.value = numeric(_eval(self.expr, row))
self.value = _eval(self.expr, row)
else:
# self.compare is implemented by Minimum/Maximum
self.value = self.compare(self.value, numeric(_eval(self.expr, row)))
self.value = self.compare(self.value, _eval(self.expr, row))
# skip UNDEF or BNode => SPARQLTypeError
except NotBoundError:
pass
Expand All @@ -171,13 +170,13 @@ def update(self, row, aggregator):
class Minimum(Extremum):

def compare(self, val1, val2):
return num_min(val1, val2)
return min(val1, val2, key=_val)


class Maximum(Extremum):

def compare(self, val1, val2):
return num_max(val1, val2)
return max(val1, val2, key=_val)


class Sample(Accumulator):
Expand All @@ -199,7 +198,7 @@ def update(self, row, aggregator):

def get_value(self):
# set None if no value was set
return None
return None

class GroupConcat(Accumulator):

Expand Down Expand Up @@ -247,8 +246,9 @@ def __init__(self, aggregations):

def update(self, row):
"""update all own accumulators"""
# SAMPLE accumulators may delete themselves
# SAMPLE accumulators may delete themselves
# => iterate over list not generator

for acc in self.accumulators.values():
if acc.use_row(row):
acc.update(row, self)
Expand All @@ -258,4 +258,3 @@ def get_bindings(self):
for acc in self.accumulators.itervalues():
acc.set_value(self.bindings)
return self.bindings

26 changes: 0 additions & 26 deletions rdflib/plugins/sparql/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,3 @@ def keys(self):
from collections import OrderedDict # was added in 2.7
except ImportError:
from ordereddict import OrderedDict # extra module

import sys

if sys.version_info[:2] < (2, 7):

from decimal import Decimal
# Pre-2.7 decimal and float did not compare correctly

def _numericKey(n):
if isinstance(n, Decimal):
return float(n)
else:
return n

def num_max(*args, **kwargs):
kwargs["key"] = _numericKey
return max(*args, **kwargs)

def num_min(*args, **kwargs):
kwargs["key"] = _numericKey
return min(*args, **kwargs)

else:

num_max = max
num_min = min
17 changes: 3 additions & 14 deletions rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from rdflib.plugins.sparql.sparql import (
QueryContext, AlreadyBound, FrozenBindings, SPARQLError)
from rdflib.plugins.sparql.evalutils import (
_filter, _eval, _join, _diff, _minus, _fillTemplate, _ebv)
_filter, _eval, _join, _diff, _minus, _fillTemplate, _ebv, _val)

from rdflib.plugins.sparql.aggregates import Aggregator
from rdflib.plugins.sparql.algebra import Join, ToMultiSet, Values
Expand Down Expand Up @@ -316,19 +316,8 @@ def evalOrderBy(ctx, part):

for e in reversed(part.expr):

def val(x):
v = value(x, e.expr, variables=True)
if isinstance(v, Variable):
return (0, v)
elif isinstance(v, BNode):
return (1, v)
elif isinstance(v, URIRef):
return (2, v)
elif isinstance(v, Literal):
return (3, v)

reverse = bool(e.order and e.order == 'DESC')
res = sorted(res, key=val, reverse=reverse)
res = sorted(res, key=lambda x: _val(value(x, e.expr, variables=True)), reverse=reverse)

return res

Expand Down Expand Up @@ -366,7 +355,7 @@ def evalReduced(ctx, part):
MAX = 1
# TODO: add configuration or determine "best" size for most use cases
# 0: No reduction
# 1: compare only with the last row, almost no reduction with
# 1: compare only with the last row, almost no reduction with
# unordered incoming rows
# N: The greater the buffer size the greater the reduction but more
# memory and time are needed
Expand Down
13 changes: 12 additions & 1 deletion rdflib/plugins/sparql/evalutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from rdflib.term import Variable, Literal, BNode, URIRef

from rdflib.plugins.sparql.operators import EBV
from rdflib.plugins.sparql.parserutils import Expr, CompValue
from rdflib.plugins.sparql.parserutils import Expr, CompValue, value
from rdflib.plugins.sparql.sparql import SPARQLError, NotBoundError


Expand Down Expand Up @@ -112,3 +112,14 @@ def _fillTemplate(template, solution):
_o is not None:

yield (_s, _p, _o)

def _val(v):
""" utilitity for ordering things"""
if isinstance(v, Variable):
return (0, v)
elif isinstance(v, BNode):
return (1, v)
elif isinstance(v, URIRef):
return (2, v)
elif isinstance(v, Literal):
return (3, v)
33 changes: 33 additions & 0 deletions test/DAWG/rdflib/manifest.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
:whitespacedot
:minusfilter
:notexistsfilter

:bindscope
:bindscope2

:filtersubquery1
:filtersubquery2
:filtersubquery3
Expand All @@ -27,6 +29,9 @@
:subquery2
:subquery3

:subqueryagg1
:subqueryagg2

) .


Expand Down Expand Up @@ -213,3 +218,31 @@ From https://github.com/RDFLib/rdflib/issues/615, contributed by https://github.
qt:data <subquery.ttl> ] ;
mf:result <subquery.tsv>
.


:subqueryagg1 rdf:type mf:QueryEvaluationTest ;
mf:name "plain query aggregation function";
rdfs:comment """
From https://github.com/RDFLib/rdflib/issues/628, contributed by https://github.com/wrobell
""";
dawgt:approval dawgt:Approved ;
dawgt:approvedBy <http://gromgull.net/me> ;
mf:action
[ qt:query <subqueryagg1.rq> ;
qt:data <subqueryagg.ttl> ] ;
mf:result <subqueryagg.tsv>
.


:subqueryagg2 rdf:type mf:QueryEvaluationTest ;
mf:name "subquery aggregation function";
rdfs:comment """
From https://github.com/RDFLib/rdflib/issues/628, contributed by https://github.com/wrobell
""";
dawgt:approval dawgt:Approved ;
dawgt:approvedBy <http://gromgull.net/me> ;
mf:action
[ qt:query <subqueryagg.rq> ;
qt:data <subqueryagg.ttl> ] ;
mf:result <subqueryagg.tsv>
.
12 changes: 12 additions & 0 deletions test/DAWG/rdflib/subqueryagg.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
PREFIX : <http://people.example/>

SELECT ?y ?minName
WHERE {
:alice :knows ?y .
{
SELECT ?y (MIN(?name) AS ?minName)
WHERE {
?y :name ?name .
} GROUP BY ?y
}
}
3 changes: 3 additions & 0 deletions test/DAWG/rdflib/subqueryagg.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
?y ?minName
<http://people.example/bob> "B. Bar"
<http://people.example/carol> "C. Baz"
6 changes: 6 additions & 0 deletions test/DAWG/rdflib/subqueryagg.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@prefix : <http://people.example/> .

:alice :name "Alice", "Alice Foo", "A. Foo" .
:alice :knows :bob, :carol .
:bob :name "Bob", "Bob Bar", "B. Bar" .
:carol :name "Carol", "Carol Baz", "C. Baz" .
7 changes: 7 additions & 0 deletions test/DAWG/rdflib/subqueryagg1.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
PREFIX : <http://people.example/>

SELECT ?y (MIN(?name) as ?minName)
WHERE {
:alice :knows ?y .
?y :name ?name .
} GROUP BY ?y

0 comments on commit ab422a5

Please sign in to comment.