From 6d76750b0e602a5ce07a7c74a046b56772a32e72 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Tue, 25 Oct 2016 13:48:33 +0100 Subject: [PATCH 01/18] fix #pragma coffee linear loop --- tsfc/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsfc/driver.py b/tsfc/driver.py index e2d8b8c0..2b05d312 100644 --- a/tsfc/driver.py +++ b/tsfc/driver.py @@ -188,7 +188,7 @@ def compile_integral(integral_data, form_data, prefix, parameters, for i, quadrature_index in enumerate(quadrature_indices): index_names.append((quadrature_index, 'ip_%d' % i)) - body = generate_coffee(impero_c, index_names, parameters["precision"], ir, argument_indices) + body = generate_coffee(impero_c, index_names, parameters["precision"], ir, tuple(chain(*argument_indices))) kernel_name = "%s_%s_integral_%s" % (prefix, integral_type, integral_data.subdomain_id) return builder.construct_kernel(kernel_name, body) From d508d89b74fd8a06f1430644509efd8a750fda14 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Mon, 24 Oct 2016 16:51:19 +0100 Subject: [PATCH 02/18] delta elimination and sum factorisation --- gem/optimise.py | 145 +++++++++++++++++++++++++++++++++++++++++++++++- tsfc/fem.py | 1 + 2 files changed, 144 insertions(+), 2 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index 1c5c71da..8cb07e5c 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -2,17 +2,22 @@ expressions.""" from __future__ import absolute_import, print_function, division -from six.moves import map +from six.moves import map, range, zip +from collections import deque from functools import reduce +from itertools import permutations + +import numpy from singledispatch import singledispatch from gem.node import Memoizer, MemoizerArg, reuse_if_untouched, reuse_if_untouched_arg from gem.gem import (Node, Terminal, Failure, Identity, Literal, Zero, - Sum, Comparison, Conditional, Index, + Product, Sum, Comparison, Conditional, Index, VariableIndex, Indexed, FlexiblyIndexed, IndexSum, ComponentTensor, ListTensor, Delta, partial_indexed) +from gem.utils import OrderedSet @singledispatch @@ -190,6 +195,142 @@ def select_expression(expressions, index): return ComponentTensor(selected, alpha) +@singledispatch +def _pull_delta_from_listtensor(node, self): + raise AssertionError("cannot handle type %s" % type(node)) + + +_pull_delta_from_listtensor.register(Node)(reuse_if_untouched) + + +@_pull_delta_from_listtensor.register(ListTensor) +def _pull_delta_from_listtensor_listtensor(node, self): + # Separate Delta nodes from other expressions + deltaz = [] + rests = [] + + for child in node.children: + deltas = OrderedSet() + others = [] + + # Traverse Product tree + queue = deque([child]) + while queue: + expr = queue.popleft() + if isinstance(expr, Product): + queue.extend(expr.children) + elif isinstance(expr, Delta): + assert expr not in deltas + deltas.add(expr) + else: + others.append(self(expr)) # looks for more ListTensors inside + + deltaz.append(deltas) + rests.append(reduce(Product, others)) + + # Factor out common Delta factors + common_deltas = set.intersection(*[set(ds) for ds in deltaz]) + deltaz = [[d for d in ds if d not in common_deltas] for ds in deltaz] + + # Rebuild ListTensor + new_children = [reduce(Product, ds, rest) + for ds, rest in zip(deltaz, rests)] + result = node.reconstruct(*new_children) + + # Apply common Delta factors + if common_deltas: + alpha = tuple(Index(extent=d) for d in result.shape) + expr = reduce(Product, common_deltas, Indexed(result, alpha)) + result = ComponentTensor(expr, alpha) + return result + + +def pull_delta_from_listtensor(expression): + mapper = Memoizer(_pull_delta_from_listtensor) + return mapper(expression) + + +def contraction(expression): + # Pull Delta nodes out of annoying ListTensors, and eliminate + # annoying ComponentTensors + expression, = remove_componenttensors([expression]) + expression = pull_delta_from_listtensor(expression) + expression, = remove_componenttensors([expression]) + + # Flatten a product tree + sum_indices = [] + factors = [] + + queue = deque([expression]) + while queue: + expr = queue.popleft() + if isinstance(expr, IndexSum): + queue.append(expr.children[0]) + sum_indices.append(expr.index) + elif isinstance(expr, Product): + queue.extend(expr.children) + else: + factors.append(expr) + + # Try to eliminate Delta nodes + delta_queue = [(f, index) + for f in factors if isinstance(f, Delta) + for index in (f.i, f.j) if index in sum_indices] + while delta_queue: + delta, from_ = delta_queue[0] + to_, = list({delta.i, delta.j} - {from_}) + + sum_indices.remove(from_) + + mapper = MemoizerArg(filtered_replace_indices) + factors = [mapper(e, ((from_, to_),)) for e in factors] + + delta_queue = [(f, index) + for f in factors if isinstance(f, Delta) + for index in (f.i, f.j) if index in sum_indices] + + # Drop ones + factors = [e for e in factors if e != Literal(1)] + + # Sum factorisation + expression = None + best_flops = numpy.inf + + for ordering in permutations(factors): + deps = [set(sum_indices) & set(factor.free_indices) + for factor in ordering] + + scan_deps = [None] * len(factors) + scan_deps[0] = deps[0] + for i in range(1, len(factors)): + scan_deps[i] = scan_deps[i - 1] | deps[i] + + sum_at = [None] * len(factors) + sum_at[0] = scan_deps[0] + for i in range(1, len(factors)): + sum_at[i] = scan_deps[i] - scan_deps[i - 1] + + expr = None + flops = 0 + for s, f in reversed(list(zip(sum_at, ordering))): + if expr is None: + expr = f + else: + expr = Product(f, expr) + flops += numpy.prod([i.extent for i in expr.free_indices], dtype=int) + if s: + flops += numpy.prod([i.extent for i in s]) + for i in sum_indices: + if i in s: + expr = IndexSum(expr, i) + + if flops < best_flops: + expression = expr + best_flops = flops + + return expression + + @singledispatch def _replace_delta(node, self): raise AssertionError("cannot handle type %s" % type(node)) diff --git a/tsfc/fem.py b/tsfc/fem.py index f4f2be71..b3b70104 100644 --- a/tsfc/fem.py +++ b/tsfc/fem.py @@ -219,6 +219,7 @@ def callback(entity_id): gem.Indexed(vec, alpha)) for i in alpha: result = gem.IndexSum(result, i) + result = gem.optimise.contraction(result) if vi: return gem.ComponentTensor(result, vi) else: From 106fd0284ecc590302871060fb01e95c78558ff4 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Mon, 24 Oct 2016 17:55:42 +0100 Subject: [PATCH 03/18] do not generate empty for loops COFFEE loop merger does not like them. --- gem/impero.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gem/impero.py b/gem/impero.py index 9994e5ff..b990aa99 100644 --- a/gem/impero.py +++ b/gem/impero.py @@ -146,6 +146,13 @@ class For(Node): __slots__ = ('index', 'children') __front__ = ('index',) + def __new__(cls, index, statement): + assert isinstance(statement, Block) + if not statement.children: + return Noop(None) + else: + return super(For, cls).__new__(cls) + def __init__(self, index, statement): self.index = index self.children = (statement,) From 3b315929936ebcf6b692ac476a109af7429e8a68 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Mon, 24 Oct 2016 19:27:51 +0100 Subject: [PATCH 04/18] IndexSum with multiindex --- gem/gem.py | 34 +++++++++++++++++----------------- gem/optimise.py | 21 ++++++++++++--------- tests/test_codegen.py | 2 +- tsfc/driver.py | 8 ++++---- tsfc/fem.py | 7 ++----- tsfc/ufl2gem.py | 4 ++-- 6 files changed, 38 insertions(+), 38 deletions(-) diff --git a/gem/gem.py b/gem/gem.py index 5251e40f..1713fc68 100644 --- a/gem/gem.py +++ b/gem/gem.py @@ -553,26 +553,27 @@ def __new__(cls, expression, multiindex): class IndexSum(Scalar): - __slots__ = ('children', 'index') - __back__ = ('index',) + __slots__ = ('children', 'multiindex') + __back__ = ('multiindex',) - def __new__(cls, summand, index): + def __new__(cls, summand, multiindex): # Sum zeros assert not summand.shape if isinstance(summand, Zero): return summand - # Sum a single expression - if index.extent == 1: - return Indexed(ComponentTensor(summand, (index,)), (0,)) + # No indices case + multiindex = tuple(multiindex) + if not multiindex: + return summand self = super(IndexSum, cls).__new__(cls) self.children = (summand,) - self.index = index + self.multiindex = multiindex # Collect shape and free indices - assert index in summand.free_indices - self.free_indices = unique(set(summand.free_indices) - {index}) + assert set(multiindex) <= set(summand.free_indices) + self.free_indices = unique(set(summand.free_indices) - set(multiindex)) return self @@ -714,14 +715,13 @@ def unique(indices): return tuple(sorted(set(indices), key=id)) -def index_sum(expression, index): - """Eliminates an index from the free indices of an expression by - summing over it. Returns the expression unchanged if the index is - not a free index of the expression.""" - if index in expression.free_indices: - return IndexSum(expression, index) - else: - return expression +def index_sum(expression, indices): + """Eliminates indices from the free indices of an expression by + summing over them. Skips any index that is not a free index of + the expression.""" + multiindex = tuple(index for index in indices + if index in expression.free_indices) + return IndexSum(expression, multiindex) def partial_indexed(tensor, indices): diff --git a/gem/optimise.py b/gem/optimise.py index 8cb07e5c..7784e2ba 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -266,7 +266,7 @@ def contraction(expression): expr = queue.popleft() if isinstance(expr, IndexSum): queue.append(expr.children[0]) - sum_indices.append(expr.index) + sum_indices.extend(expr.multiindex) elif isinstance(expr, Product): queue.extend(expr.children) else: @@ -320,9 +320,7 @@ def contraction(expression): flops += numpy.prod([i.extent for i in expr.free_indices], dtype=int) if s: flops += numpy.prod([i.extent for i in s]) - for i in sum_indices: - if i in s: - expr = IndexSum(expr, i) + expr = IndexSum(expr, tuple(i for i in sum_indices if i in s)) if flops < best_flops: expression = expr @@ -387,13 +385,18 @@ def _unroll_indexsum(node, self): @_unroll_indexsum.register(IndexSum) # noqa def _(node, self): - if node.index.extent <= self.max_extent: + unroll = tuple(index for index in node.multiindex + if index.extent <= self.max_extent) + if unroll: # Unrolling summand = self(node.children[0]) - return reduce(Sum, - (Indexed(ComponentTensor(summand, (node.index,)), (i,)) - for i in range(node.index.extent)), - Zero()) + shape = tuple(index.extent for index in unroll) + unrolled = reduce(Sum, + (Indexed(ComponentTensor(summand, unroll), alpha) + for alpha in numpy.ndindex(shape)), + Zero()) + return IndexSum(unrolled, tuple(index for index in node.multiindex + if index not in unroll)) else: return reuse_if_untouched(node, self) diff --git a/tests/test_codegen.py b/tests/test_codegen.py index af8c2f63..ee9a8d31 100644 --- a/tests/test_codegen.py +++ b/tests/test_codegen.py @@ -13,7 +13,7 @@ def test_loop_fusion(): def make_expression(i, j): A = Variable('A', (6,)) - s = IndexSum(Indexed(A, (j,)), j) + s = IndexSum(Indexed(A, (j,)), (j,)) return Product(Indexed(A, (i,)), s) e1 = make_expression(i, j) diff --git a/tsfc/driver.py b/tsfc/driver.py index 2b05d312..eeaf1c21 100644 --- a/tsfc/driver.py +++ b/tsfc/driver.py @@ -93,6 +93,7 @@ def compile_integral(integral_data, form_data, prefix, parameters, argument_indices = tuple(tuple(gem.Index(extent=e) for e in create_element(arg.ufl_element()).index_shape) for arg in arguments) + flat_argument_indices = tuple(chain(*argument_indices)) quadrature_indices = [] # Dict mapping domains to index in original_form.ufl_domains() @@ -159,8 +160,7 @@ def compile_integral(integral_data, form_data, prefix, parameters, ir = fem.compile_ufl(integrand, interior_facet=interior_facet, **config) if parameters["unroll_indexsum"]: ir = opt.unroll_indexsum(ir, max_extent=parameters["unroll_indexsum"]) - for quadrature_index in quadrature_multiindex: - ir = [gem.index_sum(expr, quadrature_index) for expr in ir] + ir = [gem.index_sum(expr, quadrature_multiindex) for expr in ir] irs.append(ir) # Sum the expressions that are part of the same restriction @@ -175,7 +175,7 @@ def compile_integral(integral_data, form_data, prefix, parameters, builder.require_cell_orientations() impero_c = impero_utils.compile_gem(return_variables, ir, - tuple(quadrature_indices) + tuple(chain(*argument_indices)), + tuple(quadrature_indices) + flat_argument_indices, remove_zeros=True) # Generate COFFEE @@ -188,7 +188,7 @@ def compile_integral(integral_data, form_data, prefix, parameters, for i, quadrature_index in enumerate(quadrature_indices): index_names.append((quadrature_index, 'ip_%d' % i)) - body = generate_coffee(impero_c, index_names, parameters["precision"], ir, tuple(chain(*argument_indices))) + body = generate_coffee(impero_c, index_names, parameters["precision"], ir, flat_argument_indices) kernel_name = "%s_%s_integral_%s" % (prefix, integral_type, integral_data.subdomain_id) return builder.construct_kernel(kernel_name, body) diff --git a/tsfc/fem.py b/tsfc/fem.py index b3b70104..7aff1cb0 100644 --- a/tsfc/fem.py +++ b/tsfc/fem.py @@ -217,9 +217,7 @@ def callback(entity_id): vi = tuple(gem.Index(extent=d) for d in mt.expr.ufl_shape) result = gem.Product(gem.Indexed(M, alpha + vi), gem.Indexed(vec, alpha)) - for i in alpha: - result = gem.IndexSum(result, i) - result = gem.optimise.contraction(result) + result = gem.optimise.contraction(gem.IndexSum(result, alpha)) if vi: return gem.ComponentTensor(result, vi) else: @@ -242,6 +240,5 @@ def compile_ufl(expression, interior_facet=False, point_sum=False, **kwargs): translator = Translator(context) result = map_expr_dags(translator, expressions) if point_sum: - for index in context.point_set.indices: - result = [gem.index_sum(expr, index) for expr in result] + result = [gem.index_sum(expr, context.point_set.indices) for expr in result] return result diff --git a/tsfc/ufl2gem.py b/tsfc/ufl2gem.py index 673d39c6..79ff09df 100644 --- a/tsfc/ufl2gem.py +++ b/tsfc/ufl2gem.py @@ -112,9 +112,9 @@ def index_sum(self, o, summand, indices): if o.ufl_shape: indices = tuple(Index() for i in range(len(o.ufl_shape))) - return ComponentTensor(IndexSum(Indexed(summand, indices), index), indices) + return ComponentTensor(IndexSum(Indexed(summand, indices), (index,)), indices) else: - return IndexSum(summand, index) + return IndexSum(summand, (index,)) def variable(self, o, expression, label): # Only used by UFL AD, at this point, the bare expression is From 7e24f0d949591720d7610822d404e5799eeef7c2 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Wed, 16 Nov 2016 16:08:44 +0000 Subject: [PATCH 05/18] little clean up --- gem/optimise.py | 2 +- tsfc/fem.py | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index 7784e2ba..82079999 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -367,7 +367,7 @@ def expression(index): def replace_delta(expressions): """Lowers all Deltas in a multi-root expression DAG.""" mapper = Memoizer(_replace_delta) - return map(mapper, expressions) + return list(map(mapper, expressions)) @singledispatch diff --git a/tsfc/fem.py b/tsfc/fem.py index 7aff1cb0..365f82c3 100644 --- a/tsfc/fem.py +++ b/tsfc/fem.py @@ -189,10 +189,7 @@ def callback(entity_id): vi = tuple(gem.Index(extent=d) for d in mt.expr.ufl_shape) argument_index = ctx.argument_indices[terminal.number()] result = gem.Indexed(M, argument_index + vi) - if vi: - return gem.ComponentTensor(result, vi) - else: - return result + return gem.ComponentTensor(result, vi) @translate.register(Coefficient) @@ -218,10 +215,7 @@ def callback(entity_id): result = gem.Product(gem.Indexed(M, alpha + vi), gem.Indexed(vec, alpha)) result = gem.optimise.contraction(gem.IndexSum(result, alpha)) - if vi: - return gem.ComponentTensor(result, vi) - else: - return result + return gem.ComponentTensor(result, vi) def compile_ufl(expression, interior_facet=False, point_sum=False, **kwargs): From 5c9c16902972c4a6b37a68420bfcac4dfd67a2fe Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Wed, 16 Nov 2016 16:35:10 +0000 Subject: [PATCH 06/18] add docstrings --- gem/optimise.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gem/optimise.py b/gem/optimise.py index 82079999..51be1cea 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -197,6 +197,11 @@ def select_expression(expressions, index): @singledispatch def _pull_delta_from_listtensor(node, self): + """Pull common delta factors out of ListTensor entries. + + :arg node: root of the expression + :arg self: function for recursive calls + """ raise AssertionError("cannot handle type %s" % type(node)) @@ -246,11 +251,21 @@ def _pull_delta_from_listtensor_listtensor(node, self): def pull_delta_from_listtensor(expression): + """Pull common delta factors out of ListTensor entries.""" mapper = Memoizer(_pull_delta_from_listtensor) return mapper(expression) def contraction(expression): + """Optimise the contractions of the tensor product at the root of + the expression, including: + + - IndexSum-Delta cancellation + - Sum factorisation + + This routine was designed with finite element coefficient + evaluation in mind. + """ # Pull Delta nodes out of annoying ListTensors, and eliminate # annoying ComponentTensors expression, = remove_componenttensors([expression]) From 1485550ba337421c66322aca71c0f99e467d5696 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Mon, 21 Nov 2016 11:57:57 +0000 Subject: [PATCH 07/18] limit O(N!) algorithm to N <= 5 --- gem/optimise.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index 51be1cea..e4521f30 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -308,21 +308,19 @@ def contraction(expression): factors = [e for e in factors if e != Literal(1)] # Sum factorisation - expression = None - best_flops = numpy.inf - - for ordering in permutations(factors): + def construct(ordering): + """Construct tensor product from a given ordering.""" deps = [set(sum_indices) & set(factor.free_indices) for factor in ordering] - scan_deps = [None] * len(factors) + scan_deps = [None] * len(ordering) scan_deps[0] = deps[0] - for i in range(1, len(factors)): + for i in range(1, len(ordering)): scan_deps[i] = scan_deps[i - 1] | deps[i] - sum_at = [None] * len(factors) + sum_at = [None] * len(ordering) sum_at[0] = scan_deps[0] - for i in range(1, len(factors)): + for i in range(1, len(ordering)): sum_at[i] = scan_deps[i] - scan_deps[i - 1] expr = None @@ -336,10 +334,25 @@ def contraction(expression): if s: flops += numpy.prod([i.extent for i in s]) expr = IndexSum(expr, tuple(i for i in sum_indices if i in s)) + return expr, flops + + if len(factors) <= 5: + expression = None + best_flops = numpy.inf + + for ordering in permutations(factors): + expr, flops = construct(ordering) + if flops < best_flops: + expression = expr + best_flops = flops + else: + # Cheap heuristic + def key(factor): + return len(set(sum_indices) & set(factor.free_indices)) + ordering = sorted(factors, key=key) - if flops < best_flops: - expression = expr - best_flops = flops + # FIXME: Log this unexpected case. + expression, flops = construct(ordering) return expression From 241883447cf18d9798784d286f4487347ffc7e37 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Mon, 21 Nov 2016 13:00:39 +0000 Subject: [PATCH 08/18] log warning for unexpected case --- gem/optimise.py | 6 ++++-- tsfc/fem.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index e4521f30..447d8029 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -256,7 +256,7 @@ def pull_delta_from_listtensor(expression): return mapper(expression) -def contraction(expression): +def contraction(expression, logger=None): """Optimise the contractions of the tensor product at the root of the expression, including: @@ -347,11 +347,13 @@ def construct(ordering): best_flops = flops else: # Cheap heuristic + logger.warning("Unexpectedly many terms for sum factorisation: %d" + "; falling back on cheap heuristic.", len(factors)) + def key(factor): return len(set(sum_indices) & set(factor.free_indices)) ordering = sorted(factors, key=key) - # FIXME: Log this unexpected case. expression, flops = construct(ordering) return expression diff --git a/tsfc/fem.py b/tsfc/fem.py index 365f82c3..cc10cc05 100644 --- a/tsfc/fem.py +++ b/tsfc/fem.py @@ -20,6 +20,7 @@ from tsfc import ufl2gem, geometric from tsfc.finatinterface import create_element, as_fiat_cell from tsfc.kernel_interface import ProxyKernelInterface +from tsfc.logging import logger from tsfc.modified_terminals import analyse_modified_terminal from tsfc.parameters import PARAMETERS from tsfc.ufl_utils import ModifiedTerminalMixin, PickRestriction, simplify_abs @@ -214,7 +215,7 @@ def callback(entity_id): vi = tuple(gem.Index(extent=d) for d in mt.expr.ufl_shape) result = gem.Product(gem.Indexed(M, alpha + vi), gem.Indexed(vec, alpha)) - result = gem.optimise.contraction(gem.IndexSum(result, alpha)) + result = gem.optimise.contraction(gem.IndexSum(result, alpha), logger=logger) return gem.ComponentTensor(result, vi) From e38596951845a787ea1afcbeea095d65caf06d11 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Mon, 21 Nov 2016 13:15:09 +0000 Subject: [PATCH 09/18] add more comments --- gem/impero.py | 4 ++++ gem/optimise.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/gem/impero.py b/gem/impero.py index b990aa99..3eee267e 100644 --- a/gem/impero.py +++ b/gem/impero.py @@ -147,8 +147,12 @@ class For(Node): __front__ = ('index',) def __new__(cls, index, statement): + # In case of an empty loop, create a Noop instead. + # Related: https://github.com/coneoproject/COFFEE/issues/98 assert isinstance(statement, Block) if not statement.children: + # This "works" because the loop_shape of this node is not + # asked any more. return Noop(None) else: return super(For, cls).__new__(cls) diff --git a/gem/optimise.py b/gem/optimise.py index 447d8029..15b18172 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -310,19 +310,24 @@ def contraction(expression, logger=None): # Sum factorisation def construct(ordering): """Construct tensor product from a given ordering.""" + # deps: Indices for each term that need to be summed over. deps = [set(sum_indices) & set(factor.free_indices) for factor in ordering] + # scan_deps: Scan deps to the right with union operation. scan_deps = [None] * len(ordering) scan_deps[0] = deps[0] for i in range(1, len(ordering)): scan_deps[i] = scan_deps[i - 1] | deps[i] + # sum_at: What IndexSum nodes should be inserted before each + # term. An IndexSum binds all terms to its right. sum_at = [None] * len(ordering) sum_at[0] = scan_deps[0] for i in range(1, len(ordering)): sum_at[i] = scan_deps[i] - scan_deps[i - 1] + # Construct expression and count floating-point operations expr = None flops = 0 for s, f in reversed(list(zip(sum_at, ordering))): From 48875f0a31c168493caecf8d8998bd0d42624f68 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Thu, 24 Nov 2016 16:36:33 +0000 Subject: [PATCH 10/18] revise coefficient evaluation Seems to fix sum factorisation for coefficient derivatives. --- gem/gem.py | 4 ++ tsfc/fem.py | 115 ++++++++++++++++++++++++++++++++++------------------ 2 files changed, 79 insertions(+), 40 deletions(-) diff --git a/gem/gem.py b/gem/gem.py index 1713fc68..c298261a 100644 --- a/gem/gem.py +++ b/gem/gem.py @@ -448,6 +448,10 @@ def __new__(cls, aggregate, multiindex): if isinstance(index, Index): index.set_extent(extent) + # Empty multiindex + if not multiindex: + return aggregate + # Zero folding if isinstance(aggregate, Zero): return Zero() diff --git a/tsfc/fem.py b/tsfc/fem.py index cc10cc05..60cf3a9c 100644 --- a/tsfc/fem.py +++ b/tsfc/fem.py @@ -1,9 +1,11 @@ from __future__ import absolute_import, print_function, division +from six import iterkeys, iteritems, itervalues from six.moves import map import collections import itertools +import numpy from singledispatch import singledispatch from ufl.corealg.map_dag import map_expr_dag, map_expr_dags @@ -151,46 +153,51 @@ def translate_facetarea(terminal, mt, ctx): return ctx.facetarea() -def basis_evaluation(element, ps, derivative=0, entity=None, epsilon=0.0): - # TODO: clean up, potentially remove this function. - import numpy +def fiat_to_ufl(fiat_dict, order): + # All derivative multiindices must be of the same dimension. + dimension, = list(set(len(alpha) for alpha in iterkeys(fiat_dict))) - finat_result = element.basis_evaluation(derivative, ps, entity) - i = element.get_indices() - vi = element.get_value_indices() + # All derivative tables must have the same shape. + shape, = list(set(table.shape for table in itervalues(fiat_dict))) + sigma = tuple(gem.Index(extent=extent) for extent in shape) - dimension = element.cell.get_spatial_dimension() + # Convert from FIAT to UFL format eye = numpy.eye(dimension, dtype=int) - tensor = numpy.empty((dimension,) * derivative, dtype=object) + tensor = numpy.empty((dimension,) * order, dtype=object) for multiindex in numpy.ndindex(tensor.shape): alpha = tuple(eye[multiindex, :].sum(axis=0)) - tensor[multiindex] = gem.Indexed(finat_result[alpha], i + vi) - di = tuple(gem.Index(extent=dimension) for _ in range(derivative)) - if derivative: - tensor = gem.Indexed(gem.ListTensor(tensor), di) + tensor[multiindex] = gem.Indexed(fiat_dict[alpha], sigma) + delta = tuple(gem.Index(extent=dimension) for _ in range(order)) + if order > 0: + tensor = gem.Indexed(gem.ListTensor(tensor), delta) else: tensor = tensor[()] - # A numerical hack that FFC used to apply on FIAT tables still - # lives on after ditching FFC and switching to FInAT. - tensor = ffc_rounding(tensor, epsilon) - return gem.ComponentTensor(tensor, i + vi + di) + return gem.ComponentTensor(tensor, sigma + delta) @translate.register(Argument) def translate_argument(terminal, mt, ctx): + argument_index = ctx.argument_indices[terminal.number()] + sigma = tuple(gem.Index(extent=d) for d in mt.expr.ufl_shape) element = create_element(terminal.ufl_element()) def callback(entity_id): - return basis_evaluation(element, - ctx.point_set, - derivative=mt.local_derivatives, - entity=(ctx.integration_dim, entity_id), - epsilon=ctx.epsilon) - M = ctx.entity_selector(callback, mt.restriction) - vi = tuple(gem.Index(extent=d) for d in mt.expr.ufl_shape) - argument_index = ctx.argument_indices[terminal.number()] - result = gem.Indexed(M, argument_index + vi) - return gem.ComponentTensor(result, vi) + finat_dict = element.basis_evaluation(mt.local_derivatives, + ctx.point_set, + (ctx.integration_dim, entity_id)) + # Filter out irrelevant derivatives + filtered_dict = {alpha: table + for alpha, table in iteritems(finat_dict) + if sum(alpha) == mt.local_derivatives} + + # Change from FIAT to UFL arrangement + square = fiat_to_ufl(filtered_dict, mt.local_derivatives) + + # A numerical hack that FFC used to apply on FIAT tables still + # lives on after ditching FFC and switching to FInAT. + return ffc_rounding(square, ctx.epsilon) + table = ctx.entity_selector(callback, mt.restriction) + return gem.ComponentTensor(gem.Indexed(table, argument_index + sigma), sigma) @translate.register(Coefficient) @@ -203,20 +210,48 @@ def translate_coefficient(terminal, mt, ctx): element = create_element(terminal.ufl_element()) - def callback(entity_id): - return basis_evaluation(element, - ctx.point_set, - derivative=mt.local_derivatives, - entity=(ctx.integration_dim, entity_id), - epsilon=ctx.epsilon) - M = ctx.entity_selector(callback, mt.restriction) - - alpha = element.get_indices() - vi = tuple(gem.Index(extent=d) for d in mt.expr.ufl_shape) - result = gem.Product(gem.Indexed(M, alpha + vi), - gem.Indexed(vec, alpha)) - result = gem.optimise.contraction(gem.IndexSum(result, alpha), logger=logger) - return gem.ComponentTensor(result, vi) + # Collect FInAT tabulation for all entities + per_derivative = collections.defaultdict(list) + for entity_id in ctx.entity_ids: + finat_dict = element.basis_evaluation(mt.local_derivatives, + ctx.point_set, + (ctx.integration_dim, entity_id)) + for alpha, table in iteritems(finat_dict): + # Filter out irrelevant derivatives + if sum(alpha) == mt.local_derivatives: + # A numerical hack that FFC used to apply on FIAT + # tables still lives on after ditching FFC and + # switching to FInAT. + table = ffc_rounding(table, ctx.epsilon) + per_derivative[alpha].append(table) + + # Merge entity tabulations for each derivative + if len(ctx.entity_ids) == 1: + def take_singleton(xs): + x, = xs # asserts singleton + return x + per_derivative = {alpha: take_singleton(tables) + for alpha, tables in iteritems(per_derivative)} + else: + f = ctx.facet_number(mt.restriction) + per_derivative = {alpha: gem.select_expression(tables, f) + for alpha, tables in iteritems(per_derivative)} + + # Coefficient evaluation + beta = element.get_indices() + zeta = element.get_value_indices() + value_dict = {} + for alpha, table in iteritems(per_derivative): + value = gem.IndexSum(gem.Product(gem.Indexed(table, beta + zeta), + gem.Indexed(vec, beta)), + beta) + optimised_value = gem.optimise.contraction(value, logger=logger) + value_dict[alpha] = gem.ComponentTensor(optimised_value, zeta) + + # Change from FIAT to UFL arrangement + result = fiat_to_ufl(value_dict, mt.local_derivatives) + assert result.shape == mt.expr.ufl_shape + return result def compile_ufl(expression, interior_facet=False, point_sum=False, **kwargs): From a65bdfb9f145a5cd6793e4b893cef03ee2b53a18 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Thu, 24 Nov 2016 11:03:28 +0000 Subject: [PATCH 11/18] improve constant folding --- gem/gem.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/gem/gem.py b/gem/gem.py index c298261a..a5c1ec59 100644 --- a/gem/gem.py +++ b/gem/gem.py @@ -200,12 +200,15 @@ def __new__(cls, a, b): assert not a.shape assert not b.shape - # Zero folding + # Constant folding if isinstance(a, Zero): return b elif isinstance(b, Zero): return a + if isinstance(a, Constant) and isinstance(b, Constant): + return Literal(a.value + b.value) + self = super(Sum, cls).__new__(cls) self.children = a, b return self @@ -218,10 +221,18 @@ def __new__(cls, a, b): assert not a.shape assert not b.shape - # Zero folding + # Constant folding if isinstance(a, Zero) or isinstance(b, Zero): return Zero() + if a == one: + return b + if b == one: + return a + + if isinstance(a, Constant) and isinstance(b, Constant): + return Literal(a.value * b.value) + self = super(Product, cls).__new__(cls) self.children = a, b return self @@ -234,12 +245,18 @@ def __new__(cls, a, b): assert not a.shape assert not b.shape - # Zero folding + # Constant folding if isinstance(b, Zero): raise ValueError("division by zero") if isinstance(a, Zero): return Zero() + if b == one: + return a + + if isinstance(a, Constant) and isinstance(b, Constant): + return Literal(a.value / b.value) + self = super(Division, cls).__new__(cls) self.children = a, b return self @@ -258,7 +275,7 @@ def __new__(cls, base, exponent): raise ValueError("cannot solve 0^0") return Zero() elif isinstance(exponent, Zero): - return Literal(1) + return one self = super(Power, cls).__new__(cls) self.children = base, exponent @@ -646,7 +663,7 @@ def __new__(cls, i, j): # \delta_{i,i} = 1 if i == j: - return Literal(1) + return one # Fixed indices if isinstance(i, int) and isinstance(j, int): @@ -768,3 +785,7 @@ def reshape(variable, *shapes): dim2idxs.append((0, tuple(idxs))) expr = FlexiblyIndexed(variable, tuple(dim2idxs)) return ComponentTensor(expr, tuple(indices)) + + +# Static one object for quicker constant folding +one = Literal(1) From b0fe22acf63d18716bb3f0e7613dfe4f7aa59d8f Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Thu, 24 Nov 2016 11:45:53 +0000 Subject: [PATCH 12/18] generate fast Jacobian code snippets --- gem/optimise.py | 16 ++++++++++++++++ tsfc/fem.py | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/gem/optimise.py b/gem/optimise.py index 15b18172..b32a2d3a 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -446,3 +446,19 @@ def unroll_indexsum(expressions, max_extent): mapper = Memoizer(_unroll_indexsum) mapper.max_extent = max_extent return list(map(mapper, expressions)) + + +def aggressive_unroll(expression): + """Aggressively unrolls all loop structures.""" + # Unroll expression shape + if expression.shape: + tensor = numpy.empty(expression.shape, dtype=object) + for alpha in numpy.ndindex(expression.shape): + tensor[alpha] = Indexed(expression, alpha) + expression = ListTensor(tensor) + expression, = remove_componenttensors((ListTensor(tensor),)) + + # Unroll summation + expression, = unroll_indexsum((expression,), max_extent=numpy.inf) + expression, = remove_componenttensors((expression,)) + return expression diff --git a/tsfc/fem.py b/tsfc/fem.py index 60cf3a9c..545da67c 100644 --- a/tsfc/fem.py +++ b/tsfc/fem.py @@ -14,6 +14,7 @@ GeometricQuantity, QuadratureWeight) import gem +from gem.node import traversal from gem.optimise import ffc_rounding from gem.utils import cached_property @@ -251,6 +252,13 @@ def take_singleton(xs): # Change from FIAT to UFL arrangement result = fiat_to_ufl(value_dict, mt.local_derivatives) assert result.shape == mt.expr.ufl_shape + assert set(result.free_indices) <= set(ctx.point_set.indices) + + # Detect Jacobian of affine cells + if not result.free_indices and all(numpy.count_nonzero(node.array) <= 2 + for node in traversal((result,)) + if isinstance(node, gem.Literal)): + result = gem.optimise.aggressive_unroll(result) return result From 629b3929903bc748133f6449d4eeed29ce41fc2a Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Thu, 24 Nov 2016 17:13:40 +0000 Subject: [PATCH 13/18] remove Delta * ListTensor factorisation Not need any more. --- gem/optimise.py | 67 +------------------------------------------------ 1 file changed, 1 insertion(+), 66 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index b32a2d3a..4cf8e930 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -17,7 +17,6 @@ VariableIndex, Indexed, FlexiblyIndexed, IndexSum, ComponentTensor, ListTensor, Delta, partial_indexed) -from gem.utils import OrderedSet @singledispatch @@ -195,67 +194,6 @@ def select_expression(expressions, index): return ComponentTensor(selected, alpha) -@singledispatch -def _pull_delta_from_listtensor(node, self): - """Pull common delta factors out of ListTensor entries. - - :arg node: root of the expression - :arg self: function for recursive calls - """ - raise AssertionError("cannot handle type %s" % type(node)) - - -_pull_delta_from_listtensor.register(Node)(reuse_if_untouched) - - -@_pull_delta_from_listtensor.register(ListTensor) -def _pull_delta_from_listtensor_listtensor(node, self): - # Separate Delta nodes from other expressions - deltaz = [] - rests = [] - - for child in node.children: - deltas = OrderedSet() - others = [] - - # Traverse Product tree - queue = deque([child]) - while queue: - expr = queue.popleft() - if isinstance(expr, Product): - queue.extend(expr.children) - elif isinstance(expr, Delta): - assert expr not in deltas - deltas.add(expr) - else: - others.append(self(expr)) # looks for more ListTensors inside - - deltaz.append(deltas) - rests.append(reduce(Product, others)) - - # Factor out common Delta factors - common_deltas = set.intersection(*[set(ds) for ds in deltaz]) - deltaz = [[d for d in ds if d not in common_deltas] for ds in deltaz] - - # Rebuild ListTensor - new_children = [reduce(Product, ds, rest) - for ds, rest in zip(deltaz, rests)] - result = node.reconstruct(*new_children) - - # Apply common Delta factors - if common_deltas: - alpha = tuple(Index(extent=d) for d in result.shape) - expr = reduce(Product, common_deltas, Indexed(result, alpha)) - result = ComponentTensor(expr, alpha) - return result - - -def pull_delta_from_listtensor(expression): - """Pull common delta factors out of ListTensor entries.""" - mapper = Memoizer(_pull_delta_from_listtensor) - return mapper(expression) - - def contraction(expression, logger=None): """Optimise the contractions of the tensor product at the root of the expression, including: @@ -266,10 +204,7 @@ def contraction(expression, logger=None): This routine was designed with finite element coefficient evaluation in mind. """ - # Pull Delta nodes out of annoying ListTensors, and eliminate - # annoying ComponentTensors - expression, = remove_componenttensors([expression]) - expression = pull_delta_from_listtensor(expression) + # Eliminate annoying ComponentTensors expression, = remove_componenttensors([expression]) # Flatten a product tree From f78545be32a65563b7fac9b2283c14915a3541ba Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Fri, 25 Nov 2016 11:23:44 +0000 Subject: [PATCH 14/18] use one in gem.optimise --- gem/optimise.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index 4cf8e930..d1b6a13b 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -16,7 +16,7 @@ Product, Sum, Comparison, Conditional, Index, VariableIndex, Indexed, FlexiblyIndexed, IndexSum, ComponentTensor, ListTensor, Delta, - partial_indexed) + partial_indexed, one) @singledispatch @@ -240,7 +240,7 @@ def contraction(expression, logger=None): for index in (f.i, f.j) if index in sum_indices] # Drop ones - factors = [e for e in factors if e != Literal(1)] + factors = [e for e in factors if e != one] # Sum factorisation def construct(ordering): @@ -331,7 +331,7 @@ def expression(index): raise ValueError("Cannot convert running index to expression.") e_i = expression(i) e_j = expression(j) - return Conditional(Comparison("==", e_i, e_j), Literal(1), Zero()) + return Conditional(Comparison("==", e_i, e_j), one, Zero()) def replace_delta(expressions): From e6aec8faa9e3b53765585dafeb5d4f4ef36ac6b7 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Fri, 25 Nov 2016 12:08:09 +0000 Subject: [PATCH 15/18] replace sum factorisation algorithm --- gem/optimise.py | 83 ++++++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 53 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index d1b6a13b..b5d37346 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -2,9 +2,10 @@ expressions.""" from __future__ import absolute_import, print_function, division -from six.moves import map, range, zip +from six import itervalues +from six.moves import map, zip -from collections import deque +from collections import OrderedDict, deque from functools import reduce from itertools import permutations @@ -242,59 +243,35 @@ def contraction(expression, logger=None): # Drop ones factors = [e for e in factors if e != one] - # Sum factorisation - def construct(ordering): - """Construct tensor product from a given ordering.""" - # deps: Indices for each term that need to be summed over. - deps = [set(sum_indices) & set(factor.free_indices) - for factor in ordering] - - # scan_deps: Scan deps to the right with union operation. - scan_deps = [None] * len(ordering) - scan_deps[0] = deps[0] - for i in range(1, len(ordering)): - scan_deps[i] = scan_deps[i - 1] | deps[i] - - # sum_at: What IndexSum nodes should be inserted before each - # term. An IndexSum binds all terms to its right. - sum_at = [None] * len(ordering) - sum_at[0] = scan_deps[0] - for i in range(1, len(ordering)): - sum_at[i] = scan_deps[i] - scan_deps[i - 1] - - # Construct expression and count floating-point operations - expr = None - flops = 0 - for s, f in reversed(list(zip(sum_at, ordering))): - if expr is None: - expr = f - else: - expr = Product(f, expr) - flops += numpy.prod([i.extent for i in expr.free_indices], dtype=int) - if s: - flops += numpy.prod([i.extent for i in s]) - expr = IndexSum(expr, tuple(i for i in sum_indices if i in s)) - return expr, flops - - if len(factors) <= 5: - expression = None - best_flops = numpy.inf - - for ordering in permutations(factors): - expr, flops = construct(ordering) - if flops < best_flops: - expression = expr - best_flops = flops - else: - # Cheap heuristic - logger.warning("Unexpectedly many terms for sum factorisation: %d" - "; falling back on cheap heuristic.", len(factors)) + # Form groups by free indices + groups = OrderedDict() + for factor in factors: + groups[factor.free_indices] = [] + for factor in factors: + groups[factor.free_indices].append(factor) + groups = [reduce(Product, terms) for terms in itervalues(groups)] - def key(factor): - return len(set(sum_indices) & set(factor.free_indices)) - ordering = sorted(factors, key=key) + # Sum factorisation + expression = None + best_flops = numpy.inf - expression, flops = construct(ordering) + for ordering in permutations(sum_indices): + terms = groups[:] + flops = 0 + for sum_index in ordering: + contract = [t for t in terms if sum_index in t.free_indices] + deferred = [t for t in terms if sum_index not in t.free_indices] + + product = reduce(Product, contract) + term = IndexSum(product, (sum_index,)) + flops += len(contract) * numpy.prod([i.extent for i in product.free_indices], dtype=int) + terms = deferred + [term] + expr = reduce(Product, terms) + flops += (len(terms) - 1) * numpy.prod([i.extent for i in expr.free_indices], dtype=int) + + if flops < best_flops: + expression = expr + best_flops = flops return expression From ac430874c5548c8551233fdece79efc642454cd6 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Fri, 25 Nov 2016 15:34:14 +0000 Subject: [PATCH 16/18] clean up sum factorisation --- gem/optimise.py | 90 ++++++++++++++++++++++++++++++++++--------------- tsfc/fem.py | 3 +- 2 files changed, 64 insertions(+), 29 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index b5d37346..05c1baa4 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -195,35 +195,15 @@ def select_expression(expressions, index): return ComponentTensor(selected, alpha) -def contraction(expression, logger=None): - """Optimise the contractions of the tensor product at the root of - the expression, including: - - - IndexSum-Delta cancellation - - Sum factorisation +def delta_elimination(sum_indices, factors): + """IndexSum-Delta cancellation. - This routine was designed with finite element coefficient - evaluation in mind. + :arg sum_indices: free indices for contractions + :arg factors: product factors + :returns: optimised (sum_indices, factors) """ - # Eliminate annoying ComponentTensors - expression, = remove_componenttensors([expression]) - - # Flatten a product tree - sum_indices = [] - factors = [] - - queue = deque([expression]) - while queue: - expr = queue.popleft() - if isinstance(expr, IndexSum): - queue.append(expr.children[0]) - sum_indices.extend(expr.multiindex) - elif isinstance(expr, Product): - queue.extend(expr.children) - else: - factors.append(expr) + sum_indices = list(sum_indices) # copy for modification - # Try to eliminate Delta nodes delta_queue = [(f, index) for f in factors if isinstance(f, Delta) for index in (f.i, f.j) if index in sum_indices] @@ -241,7 +221,18 @@ def contraction(expression, logger=None): for index in (f.i, f.j) if index in sum_indices] # Drop ones - factors = [e for e in factors if e != one] + return sum_indices, [e for e in factors if e != one] + + +def sum_factorise(sum_indices, factors): + """Optimise a tensor product throw sum factorisation. + + :arg sum_indices: free indices for contractions + :arg factors: product factors + :returns: optimised GEM expression + """ + if len(sum_indices) > 5: + raise NotImplementedError("Too many indices for sum factorisation!") # Form groups by free indices groups = OrderedDict() @@ -255,17 +246,31 @@ def contraction(expression, logger=None): expression = None best_flops = numpy.inf + # Consider all orderings of contraction indices for ordering in permutations(sum_indices): terms = groups[:] flops = 0 + # Apply contraction index by index for sum_index in ordering: + # Select terms that need to be part of the contraction contract = [t for t in terms if sum_index in t.free_indices] deferred = [t for t in terms if sum_index not in t.free_indices] + # A further optimisation opportunity is to consider + # various ways of building the product tree. product = reduce(Product, contract) term = IndexSum(product, (sum_index,)) + # For the operation count estimation we assume that no + # operations were saved with the particular product tree + # that we built above. flops += len(contract) * numpy.prod([i.extent for i in product.free_indices], dtype=int) + + # Replace the contracted terms with the result of the + # contraction. terms = deferred + [term] + + # If some contraction indices were independent, then we may + # still have several terms at this point. expr = reduce(Product, terms) flops += (len(terms) - 1) * numpy.prod([i.extent for i in expr.free_indices], dtype=int) @@ -276,6 +281,37 @@ def contraction(expression, logger=None): return expression +def contraction(expression): + """Optimise the contractions of the tensor product at the root of + the expression, including: + + - IndexSum-Delta cancellation + - Sum factorisation + + This routine was designed with finite element coefficient + evaluation in mind. + """ + # Eliminate annoying ComponentTensors + expression, = remove_componenttensors([expression]) + + # Flatten a product tree + sum_indices = [] + factors = [] + + queue = deque([expression]) + while queue: + expr = queue.popleft() + if isinstance(expr, IndexSum): + queue.append(expr.children[0]) + sum_indices.extend(expr.multiindex) + elif isinstance(expr, Product): + queue.extend(expr.children) + else: + factors.append(expr) + + return sum_factorise(*delta_elimination(sum_indices, factors)) + + @singledispatch def _replace_delta(node, self): raise AssertionError("cannot handle type %s" % type(node)) diff --git a/tsfc/fem.py b/tsfc/fem.py index 545da67c..e7ab97f7 100644 --- a/tsfc/fem.py +++ b/tsfc/fem.py @@ -23,7 +23,6 @@ from tsfc import ufl2gem, geometric from tsfc.finatinterface import create_element, as_fiat_cell from tsfc.kernel_interface import ProxyKernelInterface -from tsfc.logging import logger from tsfc.modified_terminals import analyse_modified_terminal from tsfc.parameters import PARAMETERS from tsfc.ufl_utils import ModifiedTerminalMixin, PickRestriction, simplify_abs @@ -246,7 +245,7 @@ def take_singleton(xs): value = gem.IndexSum(gem.Product(gem.Indexed(table, beta + zeta), gem.Indexed(vec, beta)), beta) - optimised_value = gem.optimise.contraction(value, logger=logger) + optimised_value = gem.optimise.contraction(value) value_dict[alpha] = gem.ComponentTensor(optimised_value, zeta) # Change from FIAT to UFL arrangement From 4eea68d05f64a9551962fb89776693cc09e76b95 Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Mon, 28 Nov 2016 12:34:12 +0000 Subject: [PATCH 17/18] use setdefault --- gem/optimise.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gem/optimise.py b/gem/optimise.py index 05c1baa4..c8e9def1 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -237,9 +237,7 @@ def sum_factorise(sum_indices, factors): # Form groups by free indices groups = OrderedDict() for factor in factors: - groups[factor.free_indices] = [] - for factor in factors: - groups[factor.free_indices].append(factor) + groups.setdefault(factor.free_indices, []).append(factor) groups = [reduce(Product, terms) for terms in itervalues(groups)] # Sum factorisation From e73fceb24cffedc8e1faa9c7de7f3095133d39cd Mon Sep 17 00:00:00 2001 From: Miklos Homolya Date: Mon, 28 Nov 2016 13:42:59 +0000 Subject: [PATCH 18/18] remove redundant line --- gem/optimise.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gem/optimise.py b/gem/optimise.py index c8e9def1..58078d2b 100644 --- a/gem/optimise.py +++ b/gem/optimise.py @@ -401,7 +401,6 @@ def aggressive_unroll(expression): tensor = numpy.empty(expression.shape, dtype=object) for alpha in numpy.ndindex(expression.shape): tensor[alpha] = Indexed(expression, alpha) - expression = ListTensor(tensor) expression, = remove_componenttensors((ListTensor(tensor),)) # Unroll summation