Skip to content

Commit

Permalink
sql: optimize NeededColumnFamilyIDs
Browse files Browse the repository at this point in the history
I made the logic for determining which column families need to be
scanned more clever in two ways:

- Previously we were always including column family 0 as a sentinel,
  since other column families have no KV entry if all their columns are
  null. This is not necessary if any of the column families being
  scanned have a NOT NULL column.
- If a needed column is indexed and not composite, it can be decoded
  from the key, so we don't need to take it into account when
  determining the needed column families.

Release note: None
  • Loading branch information
solongordon committed Jan 22, 2020
1 parent 989bc3d commit 40cc474
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 20 deletions.
4 changes: 2 additions & 2 deletions pkg/sql/span/span_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func MakeBuilder(table *sqlbase.TableDescriptor, index *sqlbase.IndexDescriptor)
// SetNeededColumns sets the needed columns on the Builder. This information
// is used by MaybeSplitSpanIntoSeparateFamilies.
func (s *Builder) SetNeededColumns(neededCols util.FastIntSet) {
s.neededFamilies = sqlbase.NeededColumnFamilyIDs(s.table.ColumnIdxMap(), s.table.Families, neededCols)
s.neededFamilies = sqlbase.NeededColumnFamilyIDs(neededCols, s.table, s.index)
}

// UnsetNeededColumns resets the needed columns for column family specific optimizations
Expand Down Expand Up @@ -257,7 +257,7 @@ func (s *Builder) appendSpansFromConstraintSpan(
// families, only scan the relevant column families. This is disabled for
// deletions to ensure that the entire row is deleted.
if !forDelete && needed.Len() > 0 && span.Key.Equal(span.EndKey) {
neededFamilyIDs := sqlbase.NeededColumnFamilyIDs(s.table.ColumnIdxMap(), s.table.Families, needed)
neededFamilyIDs := sqlbase.NeededColumnFamilyIDs(needed, s.table, s.index)
if s.CanSplitSpanIntoSeparateFamilies(len(neededFamilyIDs), cs.StartKey().Length(), containsNull) {
return append(spans, sqlbase.SplitSpanIntoSeparateFamilies(span, neededFamilyIDs)...), nil
}
Expand Down
72 changes: 54 additions & 18 deletions pkg/sql/sqlbase/index_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,35 +239,71 @@ func MakeSpanFromEncDatums(
return roachpb.Span{Key: startKey, EndKey: endKey}, containsNull, nil
}

// NeededColumnFamilyIDs returns a slice of FamilyIDs which contain
// the families needed to load a set of neededCols
// NeededColumnFamilyIDs returns the minimal set of column families required to
// retrieve neededCols for the specified table and index.
func NeededColumnFamilyIDs(
colIdxMap map[ColumnID]int, families []ColumnFamilyDescriptor, neededCols util.FastIntSet,
neededCols util.FastIntSet, table *TableDescriptor, index *IndexDescriptor,
) []FamilyID {
// Column family 0 is always included so we can distinguish null rows from
// absent rows.
needed := []FamilyID{0}
for i := range families {
family := &families[i]
colIdxMap := table.ColumnIdxMap()

var indexedIDs util.FastIntSet
var compositeIDs util.FastIntSet
for _, columnID := range index.ColumnIDs {
indexedIDs.Add(int(columnID))
}
for _, columnID := range index.CompositeColumnIDs {
compositeIDs.Add(int(columnID))
}

// The column family with ID 0 is special because it always has a KV entry.
// Other column families will omit a value if all their columns are null, so
// we may need to retrieve family 0 to use as a sentinel for distinguishing
// between null values and the absence of a row.
var family0 *ColumnFamilyDescriptor

// Iterate over the column families to find which ones contain needed
// columns. We also keep track of whether all of the needed family columns are
// nullable, since this means we need column family 0 as a sentinel.
var neededFamilyIDs []FamilyID
allFamiliesNullable := true
for i := range table.Families {
family := &table.Families[i]
if family.ID == 0 {
// Already added above.
continue
// Set column family 0 aside in case we need it as a sentinel.
family0 = family
}
needed := false
nullable := true
for _, columnID := range family.ColumnIDs {
columnOrdinal := colIdxMap[columnID]
if neededCols.Contains(columnOrdinal) {
needed = append(needed, family.ID)
break
// We need this column family if it includes a needed column, unless that
// column can be decoded from the key, meaning it is indexed and not
// composite.
if neededCols.Contains(columnOrdinal) &&
(!indexedIDs.Contains(int(columnID)) || compositeIDs.Contains(int(columnID))) {
needed = true
}
if !table.Columns[columnOrdinal].Nullable {
nullable = false
}
}
if needed {
neededFamilyIDs = append(neededFamilyIDs, family.ID)
if !nullable || family.ID == 0 {
allFamiliesNullable = false
}
}
}
if family0 == nil {
panic("column family 0 not found")
}

// TODO(solon): There is a further optimization possible here: if there is at
// least one non-nullable column in the needed column families, we can
// potentially omit the primary family, since the primary keys are encoded
// in all families. (Note that composite datums are an exception.)
// If all the needed families are nullable, we need family 0 as a sentinel.
if allFamiliesNullable {
return append([]FamilyID{0}, neededFamilyIDs...)
}

return needed
return neededFamilyIDs
}

// SplitSpanIntoSeparateFamilies can only be used to split a span representing
Expand Down

0 comments on commit 40cc474

Please sign in to comment.