Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[performance] cache v2 filter keyword regular expressions #2903

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions internal/cache/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,11 @@ func (c *Caches) initFilterKeyword() {
// See internal/db/bundb/filter.go.
filterKeyword2.Filter = nil

// We specifically DO NOT unset
// the regexp field here, as any
// regexp.Regexp instance is safe
// for concurrent access.

return filterKeyword2
}

Expand Down
67 changes: 51 additions & 16 deletions internal/db/bundb/filterkeyword.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/uptrace/bun"
)
Expand All @@ -34,12 +35,22 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod
"ID",
func() (*gtsmodel.FilterKeyword, error) {
var filterKeyword gtsmodel.FilterKeyword
err := f.db.

// Scan from DB.
if err := f.db.
NewSelect().
Model(&filterKeyword).
Where("? = ?", bun.Ident("id"), id).
Scan(ctx)
return &filterKeyword, err
Scan(ctx); err != nil {
return nil, err
}

// Pre-compile filter keyword regular expression.
if err := filterKeyword.Compile(); err != nil {
return nil, gtserror.Newf("error compiling filter keyword regex: %w", err)
}

return &filterKeyword, nil
},
id,
)
Expand All @@ -57,20 +68,20 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod
return filterKeyword, nil
}

func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error {
func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) (err error) {
if filterKeyword.Filter == nil {
// Filter is not set, fetch from the cache or database.
filter, err := f.state.DB.GetFilterByID(
// Don't populate the filter with all of its keywords and statuses or we'll just end up back here.
filterKeyword.Filter, err = f.state.DB.GetFilterByID(

// Don't populate the filter with all of its keywords
// and statuses or we'll just end up back here.
gtscontext.SetBarebones(ctx),
filterKeyword.FilterID,
)
if err != nil {
return err
}
filterKeyword.Filter = filter
}

return nil
}

Expand All @@ -84,6 +95,7 @@ func (f *filterDB) GetFilterKeywordsForAccountID(ctx context.Context, accountID

func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) {
var filterKeywordIDs []string

if err := f.db.
NewSelect().
Model((*gtsmodel.FilterKeyword)(nil)).
Expand All @@ -92,6 +104,7 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
Scan(ctx, &filterKeywordIDs); err != nil {
return nil, err
}

if len(filterKeywordIDs) == 0 {
return nil, nil
}
Expand All @@ -101,13 +114,25 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
filterKeywordIDs,
func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) {
uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs))

// Scan from DB.
if err := f.db.
NewSelect().
Model(&uncachedFilterKeywords).
Where("? IN (?)", bun.Ident("id"), bun.In(uncachedFilterKeywordIDs)).
Scan(ctx); err != nil {
return nil, err
}

// Compile all the keyword regular expressions.
uncachedFilterKeywords = slices.DeleteFunc(uncachedFilterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
if err := filterKeyword.Compile(); err != nil {
log.Errorf(ctx, "error compiling filter keyword regex: %v", err)
return true
}
return false
})

return uncachedFilterKeywords, nil
},
)
Expand All @@ -125,23 +150,26 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
}

// Populate the filter keywords. Remove any that we can't populate from the return slice.
errs := gtserror.NewMultiError(len(filterKeywords))
filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil {
errs.Appendf(
"error populating filter keyword %s: %w",
filterKeyword.ID,
err,
)
log.Errorf(ctx, "error populating filter keyword: %v", err)
return true
}
return false
})

return filterKeywords, errs.Combine()
return filterKeywords, nil
}

func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error {
if filterKeyword.Regexp == nil {
// Ensure regexp is compiled
// before attempted caching.
err := filterKeyword.Compile()
if err != nil {
return gtserror.Newf("error compiling filter keyword regex: %w", err)
}
}
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
_, err := f.db.
NewInsert().
Expand All @@ -156,7 +184,14 @@ func (f *filterDB) UpdateFilterKeyword(ctx context.Context, filterKeyword *gtsmo
if len(columns) > 0 {
columns = append(columns, "updated_at")
}

if filterKeyword.Regexp == nil {
// Ensure regexp is compiled
// before attempted caching.
err := filterKeyword.Compile()
if err != nil {
return gtserror.Newf("error compiling filter keyword regex: %w", err)
}
}
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
_, err := f.db.
NewUpdate().
Expand Down
35 changes: 26 additions & 9 deletions internal/gtsmodel/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

package gtsmodel

import "time"
import (
"regexp"
"time"
)

// Filter stores a filter created by a local account.
type Filter struct {
Expand All @@ -39,14 +42,28 @@ type Filter struct {

// FilterKeyword stores a single keyword to filter statuses against.
type FilterKeyword struct {
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
Filter *Filter `bun:"-"` // Filter corresponding to FilterID
Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
Filter *Filter `bun:"-"` // Filter corresponding to FilterID
Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
Regexp *regexp.Regexp `bun:"-"` // pre-prepared regular expression
}

// Compile will compile this FilterKeyword as a prepared regular expression.
func (k *FilterKeyword) Compile() (err error) {
var wordBreak string
if k.WholeWord != nil && *k.WholeWord {
wordBreak = `\b`
}

// Compile keyword filter regexp.
quoted := regexp.QuoteMeta(k.Keyword)
k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak)
return // caller is expected to wrap this error
}

// FilterStatus stores a single status to filter.
Expand Down
12 changes: 1 addition & 11 deletions internal/typeutils/internaltofrontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"errors"
"fmt"
"math"
"regexp"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -746,18 +745,9 @@ func (c *Converter) statusToAPIFilterResults(
keywordMatches := make([]string, 0, len(filter.Keywords))
fields := filterableTextFields(s)
for _, filterKeyword := range filter.Keywords {
wholeWord := util.PtrValueOr(filterKeyword.WholeWord, false)
wordBreak := ``
if wholeWord {
wordBreak = `\b`
}
re, err := regexp.Compile(`(?i)` + wordBreak + regexp.QuoteMeta(filterKeyword.Keyword) + wordBreak)
if err != nil {
return nil, err
}
var isMatch bool
for _, field := range fields {
if re.MatchString(field) {
if filterKeyword.Regexp.MatchString(field) {
isMatch = true
break
}
Expand Down
2 changes: 2 additions & 0 deletions internal/typeutils/internaltofrontend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ func (suite *InternalToFrontendTestSuite) TestWarnFilteredStatusToFrontend() {
requestingAccount := suite.testAccounts["local_account_1"]
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
suite.NoError(expectedMatchingFilterKeyword.Compile())
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}
Expand Down Expand Up @@ -700,6 +701,7 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredStatusToFrontend() {
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
expectedMatchingFilter.Action = gtsmodel.FilterActionHide
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
suite.NoError(expectedMatchingFilterKeyword.Compile())
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}
Expand Down