Skip to content

Commit

Permalink
Optimization for joins/filters on col1 = col2 where the value of col1…
Browse files Browse the repository at this point in the history
… or col2 is already fixed by a WHERE clause

Improves performance of #234 after paging fix e8e6580 made it v. slow
  • Loading branch information
MarkMpn committed Sep 8, 2022
1 parent e7408b7 commit 9973be9
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 11 deletions.
30 changes: 30 additions & 0 deletions MarkMpn.Sql4Cds.Engine.Tests/ExecutionPlanTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5105,5 +5105,35 @@ public void ExecuteSprocNamedParameters()
var execute = AssertNode<ExecuteMessageNode>(assign.Source);
var select = AssertNode<SelectNode>(plans[2]);
}

[TestMethod]
public void FoldMultipleJoinConditionsWithKnownValue()
{
var metadata = new AttributeMetadataCache(_service);
var planBuilder = new ExecutionPlanBuilder(metadata, new StubTableSizeCache(), new StubMessageCache(), this);

var query = @"SELECT a.name, c.fullname FROM account a INNER JOIN contact c ON a.accountid = c.parentcustomerid AND a.name = c.fullname WHERE a.name = 'Data8'";

var plans = planBuilder.Build(query, null, out _);

Assert.AreEqual(1, plans.Length);
var select = AssertNode<SelectNode>(plans[0]);
var fetch = AssertNode<FetchXmlScan>(select.Source);
AssertFetchXml(fetch, @"
<fetch xmlns:generator='MarkMpn.SQL4CDS'>
<entity name='contact'>
<attribute name='fullname' />
<link-entity name='account' alias='a' from='accountid' to='parentcustomerid' link-type='inner'>
<attribute name='name' />
<filter>
<condition attribute='name' operator='eq' value='Data8' />
</filter>
</link-entity>
<filter>
<condition attribute='fullname' operator='eq' value='Data8' />
</filter>
</entity>
</fetch>");
}
}
}
136 changes: 136 additions & 0 deletions MarkMpn.Sql4Cds.Engine/ExecutionPlan/FilterNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ public override IDataExecutionPlanNodeInternal FoldQuery(IDictionary<string, Dat
foldedFilters |= FoldTableSpoolToIndexSpool(dataSources, options, parameterTypes, hints);
foldedFilters |= FoldFiltersToDataSources(dataSources, options, parameterTypes);

if (FoldColumnComparisonsWithKnownValues(dataSources, parameterTypes))
foldedFilters |= FoldFiltersToDataSources(dataSources, options, parameterTypes);

foreach (var addedLink in addedLinks)
{
addedLink.Key.SemiJoin = true;
Expand Down Expand Up @@ -713,6 +716,139 @@ private bool FoldFiltersToDataSources(IDictionary<string, DataSource> dataSource
return foldedFilters;
}

private bool FoldColumnComparisonsWithKnownValues(IDictionary<string, DataSource> dataSources, IDictionary<string, DataTypeReference> parameterTypes)
{
var foldedFilters = false;

// Find all the data source nodes we could fold this into. Include direct data sources, those from either side of an inner join, or the main side of an outer join
foreach (var source in GetFoldableSources(Source))
{
if (source is FetchXmlScan fetchXml && !fetchXml.FetchXml.aggregate)
{
if (!dataSources.TryGetValue(fetchXml.DataSource, out var dataSource))
throw new NotSupportedQueryFragmentException("Missing datasource " + fetchXml.DataSource);

var schema = source.GetSchema(dataSources, parameterTypes);

var newFilter = FoldColumnComparisonsWithKnownValues(dataSources, parameterTypes, fetchXml, schema, Filter);

if (newFilter != Filter)
{
Filter = newFilter;
foldedFilters = true;
}
}

//if (source is MetadataQueryNode meta)
// foldedFilters |= FoldColumnComparisonsWithKnownValues(dataSources, options, parameterTypes, meta, Filter);
}

return foldedFilters;
}

private BooleanExpression FoldColumnComparisonsWithKnownValues(IDictionary<string, DataSource> dataSources, IDictionary<string, DataTypeReference> parameterTypes, FetchXmlScan fetchXml, INodeSchema schema, BooleanExpression filter)
{
if (filter is BooleanComparisonExpression cmp &&
cmp.FirstExpression is ColumnReferenceExpression col1 &&
cmp.SecondExpression is ColumnReferenceExpression col2)
{
if (HasKnownValue(dataSources, parameterTypes, fetchXml, col1, schema, out var value))
{
return new BooleanComparisonExpression
{
FirstExpression = value,
ComparisonType = cmp.ComparisonType,
SecondExpression = col2
};
}
else if (HasKnownValue(dataSources, parameterTypes, fetchXml, col2, schema, out value))
{
return new BooleanComparisonExpression
{
FirstExpression = col1,
ComparisonType = cmp.ComparisonType,
SecondExpression = value
};
}
}
else if (filter is BooleanBinaryExpression bin &&
bin.BinaryExpressionType == BooleanBinaryExpressionType.And)
{
var bin1 = FoldColumnComparisonsWithKnownValues(dataSources, parameterTypes, fetchXml, schema, bin.FirstExpression);
var bin2 = FoldColumnComparisonsWithKnownValues(dataSources, parameterTypes, fetchXml, schema, bin.SecondExpression);

if (bin1 != bin.FirstExpression || bin2 != bin.SecondExpression)
{
return new BooleanBinaryExpression
{
FirstExpression = bin1,
BinaryExpressionType = bin.BinaryExpressionType,
SecondExpression = bin2
};
}
}

return filter;
}

private bool HasKnownValue(IDictionary<string, DataSource> dataSources, IDictionary<string, DataTypeReference> parameterTypes, FetchXmlScan fetchXml, ColumnReferenceExpression col, INodeSchema schema, out Literal value)
{
value = null;

if (!schema.ContainsColumn(col.GetColumnName(), out var colName))
return false;

var parts = colName.Split('.');
object[] items;

if (parts[0] == fetchXml.Alias)
items = fetchXml.Entity.Items;
else
items = fetchXml.Entity.GetLinkEntities().SingleOrDefault(le => le.alias == parts[0])?.Items;

if (items != fetchXml.Entity.Items)
{
foreach (var filter in fetchXml.Entity.Items.OfType<filter>())
{
if (HasKnownValue(parts[0], parts[1], filter, out value))
return true;
}
}

if (items == null)
return false;

foreach (var filter in items.OfType<filter>())
{
if (HasKnownValue(null, parts[1], filter, out value))
return true;
}

return false;
}

private bool HasKnownValue(string table, string column, filter filter, out Literal value)
{
value = null;

if (filter.type == filterType.or)
return false;

if (filter.Items == null)
return false;

foreach (var condition in filter.Items.OfType<condition>())
{
if (condition.entityname == table && condition.attribute == column && condition.@operator == @operator.eq)
{
value = new StringLiteral { Value = condition.value };
return true;
}
}

return false;
}

private BooleanIsNullExpression FindNotNullFilter(BooleanExpression filter, string attribute, out bool and)
{
if (filter is BooleanIsNullExpression isNull &&
Expand Down
50 changes: 48 additions & 2 deletions MarkMpn.Sql4Cds.Engine/ExecutionPlanBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3216,16 +3216,17 @@ private IDataExecutionPlanNodeInternal ConvertTableReference(TableReference refe
var rhs = ConvertTableReference(join.SecondTableReference, hints, query, outerSchema, outerReferences, parameterTypes);
var lhsSchema = lhs.GetSchema(DataSources, parameterTypes);
var rhsSchema = rhs.GetSchema(DataSources, parameterTypes);
var fixedValueColumns = GetFixedValueColumnsFromWhereClause(query, lhsSchema, rhsSchema);

var joinConditionVisitor = new JoinConditionVisitor(lhsSchema, rhsSchema);
var joinConditionVisitor = new JoinConditionVisitor(lhsSchema, rhsSchema, fixedValueColumns);
join.SearchCondition.Accept(joinConditionVisitor);

// If we didn't find any join criteria equating two columns in the table, try again
// but allowing computed columns instead. This lets us use more efficient join types (merge or hash join)
// by pre-computing the values of the expressions to use as the join keys
if (joinConditionVisitor.LhsKey == null || joinConditionVisitor.RhsKey == null)
{
joinConditionVisitor = new JoinConditionVisitor(lhsSchema, rhsSchema);
joinConditionVisitor = new JoinConditionVisitor(lhsSchema, rhsSchema, fixedValueColumns);
joinConditionVisitor.AllowExpressions = true;

join.SearchCondition.Accept(joinConditionVisitor);
Expand Down Expand Up @@ -3447,6 +3448,51 @@ private IDataExecutionPlanNodeInternal ConvertTableReference(TableReference refe
throw new NotSupportedQueryFragmentException("Unhandled table reference", reference);
}

private HashSet<string> GetFixedValueColumnsFromWhereClause(TSqlFragment query, params INodeSchema[] schemas)
{
var columns = new HashSet<string>();

if (query is QuerySpecification select && select.WhereClause != null)
GetFixedValueColumnsFromWhereClause(columns, select.WhereClause.SearchCondition, schemas);

return columns;
}

private void GetFixedValueColumnsFromWhereClause(HashSet<string> columns, BooleanExpression searchCondition, INodeSchema[] schemas)
{
if (searchCondition is BooleanComparisonExpression cmp &&
cmp.ComparisonType == BooleanComparisonType.Equals)
{
var col = cmp.FirstExpression as ColumnReferenceExpression;
var lit = cmp.SecondExpression as Literal;

if (col == null && lit == null)
{
col = cmp.SecondExpression as ColumnReferenceExpression;
lit = cmp.FirstExpression as Literal;
}

if (col != null && lit != null)
{
foreach (var schema in schemas)
{
if (schema.ContainsColumn(col.GetColumnName(), out var colName))
{
columns.Add(colName);
break;
}
}
}
}

if (searchCondition is BooleanBinaryExpression bin &&
bin.BinaryExpressionType == BooleanBinaryExpressionType.And)
{
GetFixedValueColumnsFromWhereClause(columns, bin.FirstExpression, schemas);
GetFixedValueColumnsFromWhereClause(columns, bin.SecondExpression, schemas);
}
}

private IDataExecutionPlanNodeInternal ConvertInlineDerivedTable(InlineDerivedTable inlineDerivedTable, IList<OptimizerHint> hints, INodeSchema outerSchema, Dictionary<string, string> outerReferences, IDictionary<string, DataTypeReference> parameterTypes)
{
// Check all the rows have the expected number of values and column names are unique
Expand Down
40 changes: 31 additions & 9 deletions MarkMpn.Sql4Cds.Engine/Visitors/JoinConditionVisitor.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.Linq;
using MarkMpn.Sql4Cds.Engine.ExecutionPlan;
using Microsoft.SqlServer.TransactSql.ScriptDom;
Expand All @@ -9,11 +10,15 @@ internal class JoinConditionVisitor : TSqlConcreteFragmentVisitor
{
private readonly INodeSchema _lhs;
private readonly INodeSchema _rhs;
private readonly HashSet<string> _fixedValueColumns;
private string _lhsColName;
private string _rhsColName;

public JoinConditionVisitor(INodeSchema lhs, INodeSchema rhs)
public JoinConditionVisitor(INodeSchema lhs, INodeSchema rhs, HashSet<string> fixedValueColumns)
{
_lhs = lhs;
_rhs = rhs;
_fixedValueColumns = fixedValueColumns;
}

public bool AllowExpressions { get; set; }
Expand All @@ -39,18 +44,35 @@ node.SecondExpression is ColumnReferenceExpression rhsCol &&
var lhsName = lhsCol.GetColumnName();
var rhsName = rhsCol.GetColumnName();

if (_lhs.ContainsColumn(lhsName, out _) && _rhs.ContainsColumn(rhsName, out _))
if (_lhs.ContainsColumn(lhsName, out var lhsColName) && _rhs.ContainsColumn(rhsName, out var rhsColName))
{
LhsKey = lhsCol;
RhsKey = rhsCol;
}
else if (_lhs.ContainsColumn(rhsName, out _) && _rhs.ContainsColumn(lhsName, out _))
else if (_lhs.ContainsColumn(rhsName, out rhsColName) && _rhs.ContainsColumn(lhsName, out lhsColName))
{
(lhsCol, rhsCol) = (rhsCol, lhsCol);
(lhsColName, rhsName) = (rhsName, lhsColName);
}
else
{
LhsKey = rhsCol;
RhsKey = lhsCol;
return;
}

// Use this join key if we don't already have one or this is better (prefer joining on primary/foreign key vs. other fields,
// and prefer using columns that aren't being filtered on separately - we can apply them as secondary filters on the joined
// table as well rather than using them as the join key).
if (JoinCondition == null ||
_lhs.PrimaryKey == lhsColName ||
_rhs.PrimaryKey == rhsColName ||
_fixedValueColumns.Contains(_lhsColName) ||
_fixedValueColumns.Contains(_rhsColName))
{
LhsKey = lhsCol;
RhsKey = rhsCol;
_lhsColName = lhsColName;
_rhsColName = rhsColName;
JoinCondition = node;
}

JoinCondition = node;
return;
}

Expand Down Expand Up @@ -91,7 +113,7 @@ node.SecondExpression is ColumnReferenceExpression rhsCol &&

public override void ExplicitVisit(BooleanBinaryExpression node)
{
if (node.BinaryExpressionType == BooleanBinaryExpressionType.And && JoinCondition == null)
if (node.BinaryExpressionType == BooleanBinaryExpressionType.And)
base.ExplicitVisit(node);
}
}
Expand Down

0 comments on commit 9973be9

Please sign in to comment.