From 900dc5070d9ac7c2e093cc62b7ff2224bb9679b1 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 27 Jun 2018 14:14:53 +0200 Subject: [PATCH] more 0.7 updates --- .gitignore | 2 ++ .travis.yml | 23 +++++++++++++++-------- Project.toml | 12 ++++++++++++ README.md | 14 +------------- REQUIRE | 6 +++--- appveyor.yml | 12 ++++++------ benchmarks/benchmarkdatafreetree.jl | 1 + src/NearestNeighbors.jl | 2 -- src/ball_tree.jl | 22 ++++++++++------------ src/brute_tree.jl | 2 +- src/datafreetree.jl | 12 +++--------- src/hyperrectangles.jl | 4 ++-- src/hyperspheres.jl | 2 +- src/inrange.jl | 2 +- src/kd_tree.jl | 22 ++++++++++------------ src/knn.jl | 10 +++++----- src/tree_data.jl | 2 +- src/utilities.jl | 7 +------ test/runtests.jl | 3 ++- test/test_datafreetree.jl | 15 +++------------ 20 files changed, 80 insertions(+), 95 deletions(-) create mode 100644 Project.toml diff --git a/.gitignore b/.gitignore index 511e24d..853fc45 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ *.ipynb_checkpoints/ benchmarks/*.jld benchmarks/*.md +Manifest.toml + diff --git a/.travis.yml b/.travis.yml index 80293e3..e3c886f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,19 @@ +# Documentation: http://docs.travis-ci.com/user/languages/julia/ language: julia +os: + - linux + - osx + julia: - - 0.6 - - nightly -notifications: - email: false -#script: # use default script setting which is equivalent to the following -# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi -# - julia -e 'Pkg.clone(pwd()); Pkg.test("NearestNeighbors"; coverage=true)' + - 0.7 + - nightly +notifications: + email: false +script: + - julia -e 'import Pkg; Pkg.build(); Pkg.test(; coverage=true)' after_success: -- julia -e 'cd(Pkg.dir("NearestNeighbors")); Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())' + # push coverage results to Coveralls + - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' + # push coverage results to Codecov + - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..32a836a --- /dev/null +++ b/Project.toml @@ -0,0 +1,12 @@ +name = "NearestNeighbors" +uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce" +version = "0.4.0" + +[deps] +Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[targets.test.deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Mmap = "a63ad114-7e13-5084-954f-fe012c677804" diff --git a/README.md b/README.md index d9d1a67..a36b94d 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,7 @@ By default, the trees store a copy of the `data` provided during construction. T `DataFreeTree` can be used to strip a constructed tree of its data field and re-link it with that data at a later stage. An example of using a large on-disk data set looks like this: ```jl +using Mmap ndim = 2; ndata = 10_000_000_000 data = Mmap.mmap(datafilename, Matrix{Float32}, (ndim, ndata)) data[:] = rand(Float32, ndim, ndata) # create example data @@ -138,19 +139,6 @@ tree = injectdata(dftree, data) # yields a KDTree knn(tree, data[:,1], 3) # perform operations as usual ``` -In case you want to exploit the reordering feature, which can improve access times by placing data items close together in memory / on disk when they are close together according to the metric used, you can pass a custom `reorderbuffer`. This can be either in-memory or mmapped, as in the following example: - -```jl -reorderbuffer = Mmap.mmap(reorderedfilename, Matrix{Float32}, (ndim, ndata)) -dftree = DataFreeTree(KDTree, data, reorderbuffer = reorderbuffer) -# all future operations are indepented of 'data' -tree = injectdata(dftree, reorderbuffer) -``` - -## Debugging - -There are some basic debugging/statistics functionality implemented. These are activated by setting the -`DEBUG` variable to `true` in the `NearestNeighbors.jl` file. For the debugging options, please see the `debugging.jl` file. Pull requests to enhance this are welcome. ## Author diff --git a/REQUIRE b/REQUIRE index 6ecc63f..fd0162d 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,3 @@ -julia 0.6 -Distances 0.4 -StaticArrays 0.0.4 +julia 0.7- +Distances 0.6 +StaticArrays 0.7 diff --git a/appveyor.yml b/appveyor.yml index b1c60b7..400e311 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,5 +1,7 @@ environment: matrix: + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.7/julia-0.7-latest-win32.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.7/julia-0.7-latest-win64.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" branches: @@ -23,11 +25,9 @@ install: - C:\projects\julia-binary.exe /S /D=C:\projects\julia build_script: -# Need to convert from shallow to complete for Pkg.clone to work - - IF EXIST .git\shallow (git fetch --unshallow) - - C:\projects\julia\bin\julia -e "versioninfo(); - Pkg.clone(pwd(), \"NearestNeighbors\");" - + - C:\projects\julia\bin\julia -e " + import InteractiveUtils; versioninfo(); + import Pkg; Pkg.build()" test_script: - - C:\projects\julia\bin\julia --check-bounds=yes -e "Pkg.test(\"NearestNeighbors\")" + - C:\projects\julia\bin\julia -e "Pkg.test()" diff --git a/benchmarks/benchmarkdatafreetree.jl b/benchmarks/benchmarkdatafreetree.jl index 5d29f96..89e70bd 100644 --- a/benchmarks/benchmarkdatafreetree.jl +++ b/benchmarks/benchmarkdatafreetree.jl @@ -1,5 +1,6 @@ using NearestNeighbors using Benchmarks +using Mmap runtimes = [] runtimesreordered = [] diff --git a/src/NearestNeighbors.jl b/src/NearestNeighbors.jl index a23554a..1557f6b 100644 --- a/src/NearestNeighbors.jl +++ b/src/NearestNeighbors.jl @@ -2,8 +2,6 @@ __precompile__() module NearestNeighbors -using Compat - using Distances import Distances: Metric, result_type, eval_reduce, eval_end, eval_op, eval_start, evaluate diff --git a/src/ball_tree.jl b/src/ball_tree.jl index 671a4c3..7b4d2df 100644 --- a/src/ball_tree.jl +++ b/src/ball_tree.jl @@ -33,8 +33,7 @@ function BallTree(data::Vector{V}, leafsize::Int = 10, reorder::Bool = true, storedata::Bool = true, - reorderbuffer::Vector{V} = Vector{V}(), - indicesfor::Symbol = :data) where {V <: AbstractArray, M <: Metric} + reorderbuffer::Vector{V} = Vector{V}()) where {V <: AbstractArray, M <: Metric} reorder = !isempty(reorderbuffer) || (storedata ? reorder : false) tree_data = TreeData(data, leafsize) @@ -45,12 +44,12 @@ function BallTree(data::Vector{V}, indices = collect(1:n_p) # Bottom up creation of hyper spheres so need spheres even for leafs) - @compat hyper_spheres = Vector{HyperSphere{length(V),eltype(V)}}(uninitialized, tree_data.n_internal_nodes + tree_data.n_leafs) + hyper_spheres = Vector{HyperSphere{length(V),eltype(V)}}(undef, tree_data.n_internal_nodes + tree_data.n_leafs) if reorder - @compat indices_reordered = Vector{Int}(uninitialized, n_p) + indices_reordered = Vector{Int}(undef, n_p) if isempty(reorderbuffer) - @compat data_reordered = Vector{V}(uninitialized, n_p) + data_reordered = Vector{V}(undef, n_p) else data_reordered = reorderbuffer end @@ -68,29 +67,28 @@ function BallTree(data::Vector{V}, if reorder data = data_reordered - indices = indicesfor == :data ? indices_reordered : collect(1:n_p) + indices = indices_reordered end BallTree(storedata ? data : similar(data, 0), hyper_spheres, indices, metric, tree_data, reorder) end -@compat function BallTree(data::Matrix{T}, + function BallTree(data::Matrix{T}, metric::M = Euclidean(); leafsize::Int = 10, storedata::Bool = true, reorder::Bool = true, - reorderbuffer::Matrix{T} = Matrix{T}(uninitialized, 0, 0), - indicesfor::Symbol = :data) where {T <: AbstractFloat, M <: Metric} + reorderbuffer::Matrix{T} = Matrix{T}(undef, 0, 0)) where {T <: AbstractFloat, M <: Metric} dim = size(data, 1) npoints = size(data, 2) - points = reinterpret_or_copy(T, data, Val(dim)) + points = copy_svec(T, data, Val(dim)) if isempty(reorderbuffer) reorderbuffer_points = Vector{SVector{dim,T}}() else - reorderbuffer_points = reinterpret_or_copy(T, reorderbuffer, Val(dim)) + reorderbuffer_points = copy_svec(T, reorderbuffer, Val(dim)) end BallTree(points, metric, leafsize = leafsize, storedata = storedata, reorder = reorder, - reorderbuffer = reorderbuffer_points, indicesfor = indicesfor) + reorderbuffer = reorderbuffer_points) end # Recursive function to build the tree. diff --git a/src/brute_tree.jl b/src/brute_tree.jl index 926b78a..4ac6000 100644 --- a/src/brute_tree.jl +++ b/src/brute_tree.jl @@ -18,7 +18,7 @@ function BruteTree(data::Matrix{T}, metric::Metric = Euclidean(); reorder::Bool=false, leafsize::Int=0, storedata::Bool=true) where {T} dim = size(data, 1) npoints = size(data, 2) - BruteTree(reinterpret_or_copy(T, data, Val(dim)), + BruteTree(copy_svec(T, data, Val(dim)), metric, reorder = reorder, leafsize = leafsize, storedata = storedata) end diff --git a/src/datafreetree.jl b/src/datafreetree.jl index 4103c70..e9924b2 100644 --- a/src/datafreetree.jl +++ b/src/datafreetree.jl @@ -20,19 +20,13 @@ function get_points_dim(data) end """ - DataFreeTree(treetype, data[, reorderbufffer = similar(data), indicesfor = :data, kargs...]) -> datafreetree + DataFreeTree(treetype, data[, reorderbufffer = similar(data), kwargs...]) -> datafreetree Creates a `DataFreeTree` which wraps a `KDTree` or `BallTree`. Keywords arguments are passed to their respective constructors. The `KDTree` or `BallTree` will be stored without a reference to the underlaying data. `injectdata` has to be used to re-link them to a data array before use. - -By default the `reorder` feature of `KDTree`/`BallTree` is turned off. In case a `reorderbuffer` -is provided, reordering is performed and the contents of `reorderbuffer` have to be later provided to -`injectdata`. - -`indicesfor` controlls whether the indices returned by the query functions should refer to `data` or the `reorderbuffer`. Valid values are `:data` and `:reordered`. """ function DataFreeTree(::Type{T}, data, args...; reorderbuffer = data[:, 1:0], kargs...) where {T <: NNTree} tree = T(data, args...; storedata = false, reorderbuffer = reorderbuffer, kargs...) @@ -49,7 +43,7 @@ function injectdata(datafreetree::DataFreeTree, data::Matrix{T}) where {T} dim = size(data, 1) npoints = size(data, 2) if isbits(T) - new_data = reinterpret_or_copy(T, data, Val(dim)) + new_data = copy_svec(T, data, Val(dim)) else new_data = SVector{dim,T}[SVector{dim,T}(data[:, i]) for i in 1:npoints] end @@ -69,6 +63,6 @@ function injectdata(datafreetree::DataFreeTree, data::Vector{V}, new_hash::UInt6 end typ = typeof(datafreetree.tree) - fields = map(x -> getfield(datafreetree.tree, x), fieldnames(datafreetree.tree))[2:end] + fields = map(x -> getfield(datafreetree.tree, x), fieldnames(typeof(datafreetree.tree)))[2:end] typ(data, fields...) end diff --git a/src/hyperrectangles.jl b/src/hyperrectangles.jl index 17b68a2..4f4f4bb 100644 --- a/src/hyperrectangles.jl +++ b/src/hyperrectangles.jl @@ -9,8 +9,8 @@ end function compute_bbox(data::Vector{V}) where {V <: AbstractVector} T = eltype(V) n_dim = length(V) - @compat maxes = Vector{T}(uninitialized, n_dim) - @compat mins = Vector{T}(uninitialized, n_dim) + maxes = Vector{T}(undef, n_dim) + mins = Vector{T}(undef, n_dim) @inbounds for j in 1:length(V) dim_max = typemin(T) dim_min = typemax(T) diff --git a/src/hyperspheres.jl b/src/hyperspheres.jl index a06f574..9810e38 100644 --- a/src/hyperspheres.jl +++ b/src/hyperspheres.jl @@ -52,7 +52,7 @@ function create_bsphere(data::Vector{V}, metric::Metric, indices::Vector{Int}, l ab.center[j] += data[indices[i]][j] end end - scale!(ab.center, 1 / n_points) + ab.center .*= 1 / n_points # Then find r r = zero(get_T(eltype(V))) diff --git a/src/inrange.jl b/src/inrange.jl index 0ec75a7..48d4f8d 100644 --- a/src/inrange.jl +++ b/src/inrange.jl @@ -44,7 +44,7 @@ function inrange(tree::NNTree{V}, point::Matrix{T}, radius::Number, sortres=fals dim = size(point, 1) npoints = size(point, 2) if isbits(T) - new_data = reinterpret_or_copy(T, point, Val(dim)) + new_data = copy_svec(T, point, Val(dim)) else new_data = SVector{dim,T}[SVector{dim,T}(point[:, i]) for i in 1:npoints] end diff --git a/src/kd_tree.jl b/src/kd_tree.jl index f0362ba..edcb75d 100644 --- a/src/kd_tree.jl +++ b/src/kd_tree.jl @@ -29,8 +29,7 @@ function KDTree(data::Vector{V}, leafsize::Int = 10, storedata::Bool = true, reorder::Bool = true, - reorderbuffer::Vector{V} = Vector{V}(), - indicesfor::Symbol = :data) where {V <: AbstractArray, M <: MinkowskiMetric} + reorderbuffer::Vector{V} = Vector{V}()) where {V <: AbstractArray, M <: MinkowskiMetric} reorder = !isempty(reorderbuffer) || (storedata ? reorder : false) tree_data = TreeData(data, leafsize) @@ -38,12 +37,12 @@ function KDTree(data::Vector{V}, n_p = length(data) indices = collect(1:n_p) - @compat nodes = Vector{KDNode{eltype(V)}}(uninitialized, tree_data.n_internal_nodes) + nodes = Vector{KDNode{eltype(V)}}(undef, tree_data.n_internal_nodes) if reorder - @compat indices_reordered = Vector{Int}(uninitialized, n_p) + indices_reordered = Vector{Int}(undef, n_p) if isempty(reorderbuffer) - @compat data_reordered = Vector{V}(uninitialized, n_p) + data_reordered = Vector{V}(undef, n_p) else data_reordered = reorderbuffer end @@ -61,29 +60,28 @@ function KDTree(data::Vector{V}, 1, length(data), tree_data, reorder) if reorder data = data_reordered - indices = indicesfor == :data ? indices_reordered : collect(1:n_p) + indices = indices_reordered end KDTree(storedata ? data : similar(data, 0), hyper_rec, indices, metric, nodes, tree_data, reorder) end -@compat function KDTree(data::Matrix{T}, + function KDTree(data::Matrix{T}, metric::M = Euclidean(); leafsize::Int = 10, storedata::Bool = true, reorder::Bool = true, - reorderbuffer::Matrix{T} = Matrix{T}(uninitialized, 0, 0), - indicesfor::Symbol = :data) where {T <: AbstractFloat, M <: MinkowskiMetric} + reorderbuffer::Matrix{T} = Matrix{T}(undef, 0, 0)) where {T <: AbstractFloat, M <: MinkowskiMetric} dim = size(data, 1) npoints = size(data, 2) - points = reinterpret_or_copy(T, data, Val(dim)) + points = copy_svec(T, data, Val(dim)) if isempty(reorderbuffer) reorderbuffer_points = Vector{SVector{dim,T}}() else - reorderbuffer_points = reinterpret_or_copy(T, reorderbuffer, Val(dim)) + reorderbuffer_points = copy_svec(T, reorderbuffer, Val(dim)) end KDTree(points, metric, leafsize = leafsize, storedata = storedata, reorder = reorder, - reorderbuffer = reorderbuffer_points, indicesfor = indicesfor) + reorderbuffer = reorderbuffer_points) end function build_KDTree(index::Int, diff --git a/src/knn.jl b/src/knn.jl index 76ffc56..67c8474 100644 --- a/src/knn.jl +++ b/src/knn.jl @@ -16,8 +16,8 @@ function knn(tree::NNTree{V}, points::Vector{T}, k::Int, sortres=false, skip::Fu check_input(tree, points) check_k(tree, k) n_points = length(points) - @compat dists = [Vector{get_T(eltype(V))}(uninitialized, k) for _ in 1:n_points] - @compat idxs = [Vector{Int}(uninitialized, k) for _ in 1:n_points] + dists = [Vector{get_T(eltype(V))}(undef, k) for _ in 1:n_points] + idxs = [Vector{Int}(undef, k) for _ in 1:n_points] for i in 1:n_points knn_point!(tree, points[i], sortres, dists[i], idxs[i], skip) end @@ -38,8 +38,8 @@ end function knn(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres=false, skip::Function=always_false) where {V, T <: Number} check_k(tree, k) - @compat idx = Vector{Int}(uninitialized, k) - @compat dist = Vector{get_T(eltype(V))}(uninitialized, k) + idx = Vector{Int}(undef, k) + dist = Vector{get_T(eltype(V))}(undef, k) knn_point!(tree, point, sortres, dist, idx, skip) return idx, dist end @@ -48,7 +48,7 @@ function knn(tree::NNTree{V}, point::Matrix{T}, k::Int, sortres=false, skip::Fun dim = size(point, 1) npoints = size(point, 2) if isbits(T) - new_data = reinterpret_or_copy(T, point, Val(dim)) + new_data = copy_svec(T, point, Val(dim)) else new_data = SVector{dim,T}[SVector{dim,T}(point[:, i]) for i in 1:npoints] end diff --git a/src/tree_data.jl b/src/tree_data.jl index c1b5c75..e8274d5 100644 --- a/src/tree_data.jl +++ b/src/tree_data.jl @@ -10,7 +10,7 @@ struct TreeData end -function TreeData{V}(data::Vector{V}, leafsize) +function TreeData(data::Vector{V}, leafsize) where V n_dim, n_p = length(V), length(data) # If number of points is zero diff --git a/src/utilities.jl b/src/utilities.jl index 5dcb45f..8cea0ab 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -93,10 +93,5 @@ end end # Instead of ReinterpretArray wrapper, copy an array, interpreting it as a vector of SVectors -if VERSION < v"0.7.0-DEV.2008" - @noinline reinterpret_or_copy(::Type{T}, data, ::Val{dim}) where {T, dim} = - reinterpret(SVector{dim,T}, data, (length(data) รท dim,)) -else - @noinline reinterpret_or_copy(::Type{T}, data, ::Val{dim}) where {T, dim} = +copy_svec(::Type{T}, data, ::Val{dim}) where {T, dim} = [SVector{dim,T}(ntuple(i -> data[n+i], Val(dim))) for n in 0:dim:(length(data)-1)] -end diff --git a/test/runtests.jl b/test/runtests.jl index 92bc5c0..fc28cdd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,8 @@ using NearestNeighbors using StaticArrays -using Base.Test +using Test +using LinearAlgebra import Distances: Metric, evaluate struct CustomMetric1 <: Metric end diff --git a/test/test_datafreetree.jl b/test/test_datafreetree.jl index bf16c73..f194a8e 100644 --- a/test/test_datafreetree.jl +++ b/test/test_datafreetree.jl @@ -1,3 +1,5 @@ +using Mmap + @testset "datafreetree" begin data = rand(2,100) data2 = rand(2,100) @@ -7,26 +9,15 @@ @test_throws DimensionMismatch injectdata(t, data3) for typ in [KDTree, BallTree] dfilename = tempname() - rfilename = tempname() d = 2 n = 100 data = Mmap.mmap(dfilename, Matrix{Float32}, (d, n)) data[:] = rand(Float32, d, n) - reorderbuffer = Mmap.mmap(rfilename, Matrix{Float32}, (d, n)) t = injectdata(DataFreeTree(typ, data), data) - tr = injectdata(DataFreeTree(typ, data, reorderbuffer = reorderbuffer), reorderbuffer) + tr = typ(data) for i = 1:n @test knn(t, data[:,i], 3) == knn(tr, data[:,i], 3) end rm(dfilename) - rm(rfilename) - end - - data = rand(2,1000) - buf = zeros(data) - for typ in [KDTree, BallTree] - t = injectdata(DataFreeTree(typ, data, indicesfor = :data), data) - t2 = injectdata(DataFreeTree(typ, data, reorderbuffer = buf, indicesfor = :reordered), buf) - @test data[:,knn(t, data[:,1], 3)[1]] == buf[:,knn(t2, data[:,1], 3)[1]] end end