diff --git a/api/client.yaml b/api/client.yaml index 79acef81..9c847669 100644 --- a/api/client.yaml +++ b/api/client.yaml @@ -1141,6 +1141,10 @@ components: type: number description: Match percentage of search query example: 0.91 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe MilitaryEndUser: properties: entityID: @@ -1165,6 +1169,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe Unverified: properties: entityID: @@ -1189,6 +1197,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe NonProliferationSanction: properties: entityID: @@ -1233,6 +1245,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe ForeignSanctionsEvader: properties: entityID: @@ -1281,6 +1297,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe PalestinianLegislativeCouncil: properties: entityID: @@ -1333,6 +1353,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe CAPTAList: properties: entityID: @@ -1391,6 +1415,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe ITARDebarred: properties: entityID: @@ -1419,6 +1447,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe NonSDNChineseMilitaryIndustrialComplex: properties: entityID: @@ -1475,6 +1507,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe NonSDNMenuBasedSanctionsList: properties: EntityID: @@ -1531,6 +1567,10 @@ components: type: number description: Match percentage of search query example: 0.92 + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe EUConsolidatedSanctionsList: properties: fileGenerationDate: @@ -1593,11 +1633,15 @@ components: description: Match percentage of search query example: 0.92 type: number + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe UKConsolidatedSanctionsList: properties: names: type: array - items: + items: type: string addresses: type: array @@ -1613,11 +1657,15 @@ components: description: Match percentage of search query example: 0.92 type: number + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe UKSanctionsList: properties: names: type: array - items: + items: type: string nonLatinNames: type: array @@ -1641,6 +1689,10 @@ components: description: Match percentage of search query example: 0.92 type: number + matchedName: + type: string + desc: The highest scoring term from the search query + example: Jane Doe UpdateOfacCompanyStatus: description: Request body to update a company status. properties: diff --git a/cmd/server/largest.go b/cmd/server/largest.go index 204f04d4..627a9e97 100644 --- a/cmd/server/largest.go +++ b/cmd/server/largest.go @@ -8,8 +8,9 @@ import "sync" // item represents an arbitrary value with an associated weight type item struct { - value interface{} - weight float64 + matched string + value interface{} + weight float64 } // newLargest returns a `largest` instance which can be used to track items with the highest weights diff --git a/cmd/server/search_eu_csl_test.go b/cmd/server/search_eu_csl_test.go index 714c9d80..22422356 100644 --- a/cmd/server/search_eu_csl_test.go +++ b/cmd/server/search_eu_csl_test.go @@ -29,6 +29,7 @@ func TestSearch__EU_CSL(t *testing.T) { require.Equal(t, http.StatusOK, w.Code) require.Contains(t, w.Body.String(), `"match":0.65555`) + require.Contains(t, w.Body.String(), `"matchedName":"saddam hussein al tikriti"`) var wrapper struct { EUConsolidatedSanctionsList []csl.EUCSLRecord `json:"euConsolidatedSanctionsList"` diff --git a/cmd/server/search_generic.go b/cmd/server/search_generic.go index a0f2af43..8a1d2e5f 100644 --- a/cmd/server/search_generic.go +++ b/cmd/server/search_generic.go @@ -6,7 +6,6 @@ package main import ( "encoding/json" - "math" "reflect" "sync" ) @@ -14,7 +13,9 @@ import ( type Result[T any] struct { Data T - match float64 + match float64 + matchedName string + precomputedName string precomputedAlts []string } @@ -40,6 +41,7 @@ func (e Result[T]) MarshalJSON() ([]byte, error) { } result["match"] = e.match + result["matchedName"] = e.matchedName return json.Marshal(result) } @@ -60,15 +62,21 @@ func topResults[T any](limit int, minMatch float64, name string, data []*Result[ defer wg.Done() it := &item{ - value: data[i], - weight: jaroWinkler(data[i].precomputedName, name), + matched: data[i].precomputedName, + value: data[i], + weight: jaroWinkler(data[i].precomputedName, name), } for _, alt := range data[i].precomputedAlts { if alt == "" { continue } - it.weight = math.Max(it.weight, jaroWinkler(alt, name)) + + score := jaroWinkler(alt, name) + if score > it.weight { + it.matched = alt + it.weight = score + } } xs.add(it) @@ -86,6 +94,7 @@ func topResults[T any](limit int, minMatch float64, name string, data []*Result[ res := &Result[T]{ Data: vv.Data, match: v.weight, + matchedName: v.matched, precomputedName: vv.precomputedName, precomputedAlts: vv.precomputedAlts, } diff --git a/cmd/server/search_handlers_test.go b/cmd/server/search_handlers_test.go index aca8e5d0..f96a70ff 100644 --- a/cmd/server/search_handlers_test.go +++ b/cmd/server/search_handlers_test.go @@ -66,13 +66,9 @@ func TestSearch__AddressCountry(t *testing.T) { router.ServeHTTP(w, req) w.Flush() - if w.Code != http.StatusOK { - t.Errorf("bogus status code: %d", w.Code) - } + require.Equal(t, http.StatusOK, w.Code) + require.Contains(t, w.Body.String(), `"match":1`) - if v := w.Body.String(); !strings.Contains(v, `"match":1`) { - t.Errorf("%#v", v) - } } func TestSearch__AddressMulti(t *testing.T) { @@ -84,13 +80,9 @@ func TestSearch__AddressMulti(t *testing.T) { router.ServeHTTP(w, req) w.Flush() - if w.Code != http.StatusOK { - t.Errorf("bogus status code: %d", w.Code) - } + require.Equal(t, http.StatusOK, w.Code) + require.Contains(t, w.Body.String(), `"match":0.8847`) - if v := w.Body.String(); !strings.Contains(v, `"match":0.8847`) { - t.Errorf("%#v", v) - } } func TestSearch__AddressProvidence(t *testing.T) { @@ -102,13 +94,9 @@ func TestSearch__AddressProvidence(t *testing.T) { router.ServeHTTP(w, req) w.Flush() - if w.Code != http.StatusOK { - t.Errorf("bogus status code: %d", w.Code) - } + require.Equal(t, http.StatusOK, w.Code) + require.Contains(t, w.Body.String(), `"match":0.923`) - if v := w.Body.String(); !strings.Contains(v, `"match":0.923`) { - t.Errorf("%#v", v) - } } func TestSearch__AddressCity(t *testing.T) { @@ -120,13 +108,9 @@ func TestSearch__AddressCity(t *testing.T) { router.ServeHTTP(w, req) w.Flush() - if w.Code != http.StatusOK { - t.Errorf("bogus status code: %d", w.Code) - } + require.Equal(t, http.StatusOK, w.Code) + require.Contains(t, w.Body.String(), `"match":0.923`) - if v := w.Body.String(); !strings.Contains(v, `"match":0.923`) { - t.Errorf("%#v", v) - } } func TestSearch__AddressState(t *testing.T) { @@ -138,13 +122,9 @@ func TestSearch__AddressState(t *testing.T) { router.ServeHTTP(w, req) w.Flush() - if w.Code != http.StatusOK { - t.Errorf("bogus status code: %d", w.Code) - } + require.Equal(t, http.StatusOK, w.Code) + require.Contains(t, w.Body.String(), `"match":0.923`) - if v := w.Body.String(); !strings.Contains(v, `"match":0.923`) { - t.Errorf("%#v", v) - } } func TestSearch__NameAndAddress(t *testing.T) { diff --git a/cmd/server/search_uk_csl_test.go b/cmd/server/search_uk_csl_test.go index 1994d4c4..6812e770 100644 --- a/cmd/server/search_uk_csl_test.go +++ b/cmd/server/search_uk_csl_test.go @@ -28,6 +28,7 @@ func TestSearch_UK_CSL(t *testing.T) { require.Equal(t, http.StatusOK, w.Code) require.Contains(t, w.Body.String(), `"match":1`) + require.Contains(t, w.Body.String(), `"matchedName":"'abd al nasir"`) var wrapper struct { UKCSL []csl.UKCSLRecord `json:"ukConsolidatedSanctionsList"` @@ -51,6 +52,7 @@ func TestSearch_UK_SanctionsList(t *testing.T) { require.Equal(t, http.StatusOK, w.Code) require.Contains(t, w.Body.String(), `"match":1`) + require.Contains(t, w.Body.String(), `"matchedName":"haji khairullah haji sattar money exchange"`) var wrapper struct { UKSanctionsList []csl.UKSanctionsListRecord `json:"ukSanctionsList"` diff --git a/cmd/server/search_us_csl_test.go b/cmd/server/search_us_csl_test.go index 4e7218d2..3f04f600 100644 --- a/cmd/server/search_us_csl_test.go +++ b/cmd/server/search_us_csl_test.go @@ -31,6 +31,7 @@ func TestSearch_US_CSL(t *testing.T) { require.Equal(t, http.StatusOK, w.Code) require.Contains(t, w.Body.String(), `"match":0.6333`) + require.Contains(t, w.Body.String(), `"matchedName":"zaman"`) var wrapper struct { NonProliferationSanctions []csl.ISN `json:"nonProliferationSanctions"`