diff --git a/cmd/server/main.go b/cmd/server/main.go index 1f7da28e..90ef6705 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -129,9 +129,9 @@ func main() { var pipeline *pipeliner if debug, err := strconv.ParseBool(os.Getenv("DEBUG_NAME_PIPELINE")); debug && err == nil { - pipeline = newPipeliner(logger) + pipeline = newPipeliner(logger, true) } else { - pipeline = newPipeliner(log.NewNopLogger()) + pipeline = newPipeliner(log.NewNopLogger(), false) } searchWorkers := readInt(os.Getenv("SEARCH_MAX_WORKERS"), *flagWorkers) diff --git a/cmd/server/pipeline.go b/cmd/server/pipeline.go index 8cf80d86..d7dc9cae 100644 --- a/cmd/server/pipeline.go +++ b/cmd/server/pipeline.go @@ -210,15 +210,21 @@ func (ds *debugStep) apply(in *Name) error { return nil } -func newPipeliner(logger log.Logger) *pipeliner { +func newPipeliner(logger log.Logger, debug bool) *pipeliner { + steps := []step{ + &reorderSDNStep{}, + &companyNameCleanupStep{}, + &stopwordsStep{}, + &normalizeStep{}, + } + if debug { + for i := range steps { + steps[i] = &debugStep{logger: logger, step: steps[i]} + } + } return &pipeliner{ logger: logger, - steps: []step{ - &debugStep{logger: logger, step: &reorderSDNStep{}}, - &debugStep{logger: logger, step: &companyNameCleanupStep{}}, - &debugStep{logger: logger, step: &stopwordsStep{}}, - &debugStep{logger: logger, step: &normalizeStep{}}, - }, + steps: steps, } } diff --git a/cmd/server/pipeline_normalize.go b/cmd/server/pipeline_normalize.go index cade7ee6..7495bdc5 100644 --- a/cmd/server/pipeline_normalize.go +++ b/cmd/server/pipeline_normalize.go @@ -6,6 +6,7 @@ package main import ( "strings" + "sync" "unicode" "golang.org/x/text/runes" @@ -34,7 +35,35 @@ func precompute(s string) string { trimmed := strings.TrimSpace(strings.ToLower(punctuationReplacer.Replace(s))) // UTF-8 normalization - t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) // Mn: nonspacing marks - result, _, _ := transform.String(t, trimmed) + chain := getTransformChain() + defer saveBuffer(chain) + + result, _, _ := transform.String(chain, trimmed) return result } + +var ( + transformChainPool = sync.Pool{ + New: func() any { + return newTransformChain() + }, + } +) + +func newTransformChain() transform.Transformer { + nonspacingMarksRemover := runes.Remove(runes.In(unicode.Mn)) // Mn: nonspacing marks + return transform.Chain(norm.NFD, nonspacingMarksRemover, norm.NFC) +} + +func getTransformChain() transform.Transformer { + t, ok := transformChainPool.Get().(transform.Transformer) + if !ok { + return newTransformChain() + } + return t +} + +func saveBuffer(t transform.Transformer) { + t.Reset() + transformChainPool.Put(t) +} diff --git a/cmd/server/pipeline_test.go b/cmd/server/pipeline_test.go index 880540ad..71657142 100644 --- a/cmd/server/pipeline_test.go +++ b/cmd/server/pipeline_test.go @@ -17,7 +17,7 @@ var ( steps: []step{}, } - noLogPipeliner = newPipeliner(log.NewNopLogger()) + noLogPipeliner = newPipeliner(log.NewNopLogger(), false) ) func TestPipelineNoop(t *testing.T) { @@ -75,6 +75,7 @@ func TestFullPipeline(t *testing.T) { // Normalize ("-" -> " ") {company("ANGLO-CARIBBEAN CO., LTD."), "anglo caribbean"}, } + for i := range cases { if err := noLogPipeliner.Do(cases[i].in); err != nil { t.Error(err) diff --git a/cmd/server/search_benchmark_test.go b/cmd/server/search_benchmark_test.go index 732597af..df59b35c 100644 --- a/cmd/server/search_benchmark_test.go +++ b/cmd/server/search_benchmark_test.go @@ -19,8 +19,14 @@ func BenchmarkSearch__All(b *testing.B) { searcher := createBenchmarkSearcher(b) b.ResetTimer() + var filters filterRequest + for i := 0; i < b.N; i++ { - buildFullSearchResponse(searcher, filterRequest{}, 10, 0.0, fake.Person().Name()) + b.StopTimer() + name := fake.Person().Name() + b.StartTimer() + + buildFullSearchResponse(searcher, filters, 10, 0.0, name) } } diff --git a/go.mod b/go.mod index a8d31884..19dbb662 100644 --- a/go.mod +++ b/go.mod @@ -17,8 +17,8 @@ require ( github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 go4.org v0.0.0-20230225012048-214862532bf5 golang.org/x/oauth2 v0.14.0 - golang.org/x/sync v0.6.0 - golang.org/x/text v0.14.0 + golang.org/x/sync v0.8.0 + golang.org/x/text v0.19.0 ) require ( diff --git a/go.sum b/go.sum index e7d49230..1c0e32c1 100644 --- a/go.sum +++ b/go.sum @@ -192,8 +192,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -224,8 +224,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=