Skip to content

Commit

Permalink
adds evaluation counter in Performance; adds copy function for all in…
Browse files Browse the repository at this point in the history
…dexes
  • Loading branch information
sadit committed Jan 19, 2021
1 parent 4cb8ccb commit 60fb7b9
Show file tree
Hide file tree
Showing 10 changed files with 50 additions and 21 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "SimilaritySearch"
uuid = "053f045d-5466-53fd-b400-a066f88fe02a"
authors = ["Eric S. Tellez <[email protected]>"]
version = "0.4.0"
version = "0.4.1"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand Down
7 changes: 3 additions & 4 deletions src/graph/beamsearch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ function BeamSearch(bsize::Integer=16, ssize=bsize; beam=KnnResult(bsize), hints
BeamSearch(bsize, ssize, beam, hints, vstate)
end

BeamSearch(bsearch::BeamSearch; bsize=bsearch.bsize, ssize=bsearch.ssize, hints=bsearch.hints, vstate=bsearch.vstate, beam=bsearch.beam) =
Base.copy(bsearch::BeamSearch; bsize=bsearch.bsize, ssize=bsearch.ssize, hints=bsearch.hints, vstate=bsearch.vstate, beam=bsearch.beam) =
BeamSearch(bsize, ssize, beam, hints, vstate)

function Base.copy!(dst::BeamSearch, src::BeamSearch)
Expand Down Expand Up @@ -99,9 +99,8 @@ end
function opt_expand_neighborhood(fun, gsearch::BeamSearch, n::Integer, iter::Integer, probes::Integer)
logn = ceil(Int, log(2, n+1))
probes = probes == 0 ? logn : probes
f(x) = max(1, x + rand(-logn:logn))
f(x) = max(2, x + rand(-logn:logn))
for i in 1:probes
BeamSearch(gsearch, bsize=f(gsearch.bsize), ssize=f(gsearch.ssize)) |> fun
# BeamSearch(gsearch, bsize=f(gsearch.bsize)) |> fun
copy(gsearch, bsize=f(gsearch.bsize), ssize=f(gsearch.ssize)) |> fun
end
end
5 changes: 4 additions & 1 deletion src/graph/graph.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ struct SearchGraph{DistType<:PreMetric, DataType<:AbstractVector, SearchType<:Lo
opts::SearchGraphOptions
end

Base.copy(g::SearchGraph; dist=g.dist, db=g.db, links=g.links, search_algo=g.search_algo, neighborhood_algo=g.neighborhood_algo, res=g.res, opts=g.opts) =
SearchGraph(dist, db, links, search_algo, neighborhood_algo, res, opts)

function SearchGraph(dist::PreMetric, db::AbstractVector;
search_algo::LocalSearchAlgorithm=BeamSearch(),
neighborhood_algo::NeighborhoodAlgorithm=LogNeighborhood(1.1),
neighborhood_algo::NeighborhoodAlgorithm=LogNeighborhood(),
automatic_optimization=false,
recall=0.9,
ksearch=10,
Expand Down
4 changes: 2 additions & 2 deletions src/graph/ihc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function IHCSearch(restarts::Integer=20; localimprovements=false)
IHCSearch(restarts, Int32[], VisitedVertices(), localimprovements)
end

IHCSearch(ihc::IHCSearch; restarts=ihc.restarts, hints=ihc.hints, vstate=ihc.vstate, localimprovements=ihc.localimprovements) =
Base.copy(ihc::IHCSearch; restarts=ihc.restarts, hints=ihc.hints, vstate=ihc.vstate, localimprovements=ihc.localimprovements) =
IHCSearch(restarts, hints, vstate, localimprovements)

function Base.copy!(dst::IHCSearch, src::IHCSearch)
Expand Down Expand Up @@ -119,6 +119,6 @@ function opt_expand_neighborhood(fun, ihc::IHCSearch, n::Integer, iter::Integer,
f(x) = max(1, x + rand(-logn:logn))

for i in 1:probes
IHCSearch(ihc, restarts=f(ihc.restarts)) |> fun
copy(ihc, restarts=f(ihc.restarts)) |> fun
end
end
8 changes: 4 additions & 4 deletions src/graph/opt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ function optimize!(
probes::Integer=0
)
n = length(index.db)
score_function(p) = p.macrorecall < recall ? p.macrorecall : 1.0 + 1.0 / p.searchtime
# score_function(p) = p.macrorecall < recall ? p.macrorecall : 1.0 + n / p.distances
#score_function(p) = p.recall < recall ? p.recall : 1.0 + 1.0 / (1.0 + sum(p.distances))
#score_function(p) = 1.0 / (1.0 + sum(p.distances))
score_function(p) = p.macrorecall < recall ? p.macrorecall : 1.0 + n / p.evaluations
# score_function(p) = p.macrorecall < recall ? p.macrorecall : 1.0 + n / p.evaluations
#score_function(p) = p.recall < recall ? p.recall : 1.0 + 1.0 / (1.0 + sum(p.evaluations))
#score_function(p) = 1.0 / (1.0 + sum(p.evaluations))
p = probe(perf, index)
best_list = [(score=score_function(p), state=search_algo, perf=p)]
exploration = Dict(search_algo => 0) ## -1 unexplored; 0 visited; 1 visited & expanded
Expand Down
3 changes: 3 additions & 0 deletions src/indexes/pivottable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ struct PivotedSearch{DataType<:AbstractVector, DistanceType<:PreMetric} <: Abstr
res::KnnResult
end

Base.copy(index::PivotedSearch; dist=index.dist, db=index.db, pivots=index.pivots, table=index.table, dqp=index.dqp, res=index.res) =
PivotedSearch(dist, db, pivots, table, dqp, res)

PivotedSearch(dist::PreMetric, db, pivots, table, k::Integer=10) =
PivotedSearch(dist, db, pivots, table, zeros(Float32, length(pivots)), KnnResult(k))

Expand Down
1 change: 1 addition & 0 deletions src/indexes/seq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct ExhaustiveSearch{DistanceType<:PreMetric, DataType<:AbstractVector} <: Ab
end

ExhaustiveSearch(dist::PreMetric, db::AbstractVector; ksearch::Integer=10) = ExhaustiveSearch(dist, db, KnnResult(ksearch))
Base.copy(seq::ExhaustiveSearch; dist=seq.dist, db=seq.db, res=seq.res) = ExhaustiveSearch(dist, db, res)

"""
search(seq::ExhaustiveSearch, q, res::KnnResult)
Expand Down
3 changes: 3 additions & 0 deletions src/knr/knr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ struct Knr{RefSearchType<:AbstractSearchContext, DataType<:AbstractVector, Dista
opts::KnrOptions
end

Base.copy(knr::Knr; dist=knr.dist, db=knr.db, refsearch=knr.refsearch, kbuild=knr.kbuild, invindex=knr.invindex, res=knr.res, opts=knr.opts) =
Knr(dist, db, refsearch, kbuild, invindex, res, opts)

function Knr(
dist::PreMetric,
db::AbstractVector,
Expand Down
4 changes: 3 additions & 1 deletion src/knr/kvp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
using SimilaritySearch
export Kvp, k_near_and_far, fit, search, push!

mutable struct Kvp{DataType<:AbstractVector, DistanceType<:PreMetric} <: AbstractSearchContext
struct Kvp{DataType<:AbstractVector, DistanceType<:PreMetric} <: AbstractSearchContext
dist::DistanceType
db::DataType
refs::DataType
Expand All @@ -13,6 +13,8 @@ mutable struct Kvp{DataType<:AbstractVector, DistanceType<:PreMetric} <: Abstrac
res::KnnResult
end

Base.copy(kvp::Kvp; dist=kvp.dist, db=kvp.db, refs=kvp.refs, sparsetable=kvp.sparsetable, ksparse=kvp.ksparse, res=kvp.res) =
Kvp(dist, db, refs, sparsetable, ksparse, res)

Kvp(dist::PreMetric, db, refs, sparsetable, ksparse::Integer, ksearch::Integer=10) =
Kvp(dist, db, refs, sparsetable, ksparse, KnnResult(ksearch))
Expand Down
34 changes: 26 additions & 8 deletions src/utils/perf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@

export Performance, StatsKnn, StatsComparison, scores, probe

mutable struct DistCounter{DistType<:PreMetric} <: PreMetric
dist::DistType
count::Int
end

function evaluate(D::DistCounter, a, b)
D.count += 1
evaluate(D.dist, a, b)
end

struct StatsKnn
distancessum::Float64
nearestdist::Float64
Expand All @@ -25,7 +35,6 @@ struct StatsKnn
end

n = length(reslist)

new(distancessum/n, nearestdist/n, farthestdist/n, len/n)
end
end
Expand All @@ -35,6 +44,7 @@ struct StatsComparison
macroprecision::Float64
macrof1::Float64
searchtime::Float64
evaluations::Float64
stats::StatsKnn
goldsearchtime::Float64
goldstats::StatsKnn
Expand All @@ -46,6 +56,7 @@ struct Performance{DataType<:AbstractVector}
popnearest::Bool
goldreslist::Vector{KnnResult}
goldsearchtime::Float64
goldevaluations::Float64
goldstats::StatsKnn
end

Expand All @@ -55,22 +66,29 @@ function perf_search_batch(index::AbstractSearchContext, queries, ksearch::Integ
ksearch += 1
end
reslist = [KnnResult(ksearch) for i in 1:m]
search(index, queries[1]) # warming step
evaluations = index.dist.count
start = time()

for i in 1:m
search(index, queries[i], reslist[i])
popnearest && popfirst!(reslist[i])
end

elapsed = time() - start
reslist, elapsed / m
reslist, elapsed / m, (index.dist.count - evaluations) / m
end

function Performance(goldsearch::AbstractSearchContext, queries::AbstractVector, ksearch::Integer; popnearest=false)
gold, searchtime = perf_search_batch(goldsearch, queries, ksearch, popnearest)
Performance(queries, ksearch, popnearest, gold, searchtime, StatsKnn(gold))
function Performance(_goldsearch::AbstractSearchContext, queries::AbstractVector, ksearch::Integer; popnearest=false)
dist = DistCounter(_goldsearch.dist, 0)
goldsearch = copy(_goldsearch, dist=dist)
gold, searchtime, evaluations = perf_search_batch(goldsearch, queries, ksearch, popnearest)
Performance(queries, ksearch, popnearest, gold, searchtime, evaluations, StatsKnn(gold))
end

function probe(perf::Performance, index::AbstractSearchContext)
reslist, searchtime = perf_search_batch(index, perf.queries, perf.ksearch, perf.popnearest)
function probe(perf::Performance, _index::AbstractSearchContext)
index = copy(_index, dist=DistCounter(_index.dist, 0))
reslist, searchtime, evaluations = perf_search_batch(index, perf.queries, perf.ksearch, perf.popnearest)
n = length(reslist)
recall = 0.0
nearest = 0.0
Expand All @@ -83,7 +101,7 @@ function probe(perf::Performance, index::AbstractSearchContext)
f1 += p.f1
end

StatsComparison(recall/n, precision/n, f1/n, searchtime, StatsKnn(reslist), perf.goldsearchtime, perf.goldstats)
StatsComparison(recall/n, precision/n, f1/n, searchtime, evaluations, StatsKnn(reslist), perf.goldsearchtime, perf.goldstats)
end

function scores(gold::Set, res::Set)
Expand Down

0 comments on commit 60fb7b9

Please sign in to comment.