Skip to content

Commit

Permalink
more 0.7 updates
Browse files Browse the repository at this point in the history
  • Loading branch information
KristofferC committed Jun 27, 2018
1 parent 46377ad commit 900dc50
Show file tree
Hide file tree
Showing 20 changed files with 80 additions and 95 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
*.ipynb_checkpoints/
benchmarks/*.jld
benchmarks/*.md
Manifest.toml

23 changes: 15 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
# Documentation: http://docs.travis-ci.com/user/languages/julia/
language: julia
os:
- linux
- osx

julia:
- 0.6
- nightly
notifications:
email: false
#script: # use default script setting which is equivalent to the following
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia -e 'Pkg.clone(pwd()); Pkg.test("NearestNeighbors"; coverage=true)'
- 0.7
- nightly

notifications:
email: false
script:
- julia -e 'import Pkg; Pkg.build(); Pkg.test(; coverage=true)'
after_success:
- julia -e 'cd(Pkg.dir("NearestNeighbors")); Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())'
# push coverage results to Coveralls
- julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
# push coverage results to Codecov
- julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
12 changes: 12 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name = "NearestNeighbors"
uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
version = "0.4.0"

[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[targets.test.deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
14 changes: 1 addition & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ By default, the trees store a copy of the `data` provided during construction. T
`DataFreeTree` can be used to strip a constructed tree of its data field and re-link it with that data at a later stage. An example of using a large on-disk data set looks like this:

```jl
using Mmap
ndim = 2; ndata = 10_000_000_000
data = Mmap.mmap(datafilename, Matrix{Float32}, (ndim, ndata))
data[:] = rand(Float32, ndim, ndata) # create example data
Expand All @@ -138,19 +139,6 @@ tree = injectdata(dftree, data) # yields a KDTree
knn(tree, data[:,1], 3) # perform operations as usual
```

In case you want to exploit the reordering feature, which can improve access times by placing data items close together in memory / on disk when they are close together according to the metric used, you can pass a custom `reorderbuffer`. This can be either in-memory or mmapped, as in the following example:

```jl
reorderbuffer = Mmap.mmap(reorderedfilename, Matrix{Float32}, (ndim, ndata))
dftree = DataFreeTree(KDTree, data, reorderbuffer = reorderbuffer)
# all future operations are indepented of 'data'
tree = injectdata(dftree, reorderbuffer)
```

## Debugging

There are some basic debugging/statistics functionality implemented. These are activated by setting the
`DEBUG` variable to `true` in the `NearestNeighbors.jl` file. For the debugging options, please see the `debugging.jl` file. Pull requests to enhance this are welcome.

## Author

Expand Down
6 changes: 3 additions & 3 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
julia 0.6
Distances 0.4
StaticArrays 0.0.4
julia 0.7-
Distances 0.6
StaticArrays 0.7
12 changes: 6 additions & 6 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
environment:
matrix:
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.7/julia-0.7-latest-win32.exe"
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.7/julia-0.7-latest-win64.exe"
- JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe"
- JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe"
branches:
Expand All @@ -23,11 +25,9 @@ install:
- C:\projects\julia-binary.exe /S /D=C:\projects\julia

build_script:
# Need to convert from shallow to complete for Pkg.clone to work
- IF EXIST .git\shallow (git fetch --unshallow)
- C:\projects\julia\bin\julia -e "versioninfo();
Pkg.clone(pwd(), \"NearestNeighbors\");"

- C:\projects\julia\bin\julia -e "
import InteractiveUtils; versioninfo();
import Pkg; Pkg.build()"

test_script:
- C:\projects\julia\bin\julia --check-bounds=yes -e "Pkg.test(\"NearestNeighbors\")"
- C:\projects\julia\bin\julia -e "Pkg.test()"
1 change: 1 addition & 0 deletions benchmarks/benchmarkdatafreetree.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using NearestNeighbors
using Benchmarks
using Mmap

runtimes = []
runtimesreordered = []
Expand Down
2 changes: 0 additions & 2 deletions src/NearestNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ __precompile__()

module NearestNeighbors

using Compat

using Distances
import Distances: Metric, result_type, eval_reduce, eval_end, eval_op, eval_start, evaluate

Expand Down
22 changes: 10 additions & 12 deletions src/ball_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ function BallTree(data::Vector{V},
leafsize::Int = 10,
reorder::Bool = true,
storedata::Bool = true,
reorderbuffer::Vector{V} = Vector{V}(),
indicesfor::Symbol = :data) where {V <: AbstractArray, M <: Metric}
reorderbuffer::Vector{V} = Vector{V}()) where {V <: AbstractArray, M <: Metric}
reorder = !isempty(reorderbuffer) || (storedata ? reorder : false)

tree_data = TreeData(data, leafsize)
Expand All @@ -45,12 +44,12 @@ function BallTree(data::Vector{V},
indices = collect(1:n_p)

# Bottom up creation of hyper spheres so need spheres even for leafs)
@compat hyper_spheres = Vector{HyperSphere{length(V),eltype(V)}}(uninitialized, tree_data.n_internal_nodes + tree_data.n_leafs)
hyper_spheres = Vector{HyperSphere{length(V),eltype(V)}}(undef, tree_data.n_internal_nodes + tree_data.n_leafs)

if reorder
@compat indices_reordered = Vector{Int}(uninitialized, n_p)
indices_reordered = Vector{Int}(undef, n_p)
if isempty(reorderbuffer)
@compat data_reordered = Vector{V}(uninitialized, n_p)
data_reordered = Vector{V}(undef, n_p)
else
data_reordered = reorderbuffer
end
Expand All @@ -68,29 +67,28 @@ function BallTree(data::Vector{V},

if reorder
data = data_reordered
indices = indicesfor == :data ? indices_reordered : collect(1:n_p)
indices = indices_reordered
end

BallTree(storedata ? data : similar(data, 0), hyper_spheres, indices, metric, tree_data, reorder)
end

@compat function BallTree(data::Matrix{T},
function BallTree(data::Matrix{T},
metric::M = Euclidean();
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Matrix{T} = Matrix{T}(uninitialized, 0, 0),
indicesfor::Symbol = :data) where {T <: AbstractFloat, M <: Metric}
reorderbuffer::Matrix{T} = Matrix{T}(undef, 0, 0)) where {T <: AbstractFloat, M <: Metric}
dim = size(data, 1)
npoints = size(data, 2)
points = reinterpret_or_copy(T, data, Val(dim))
points = copy_svec(T, data, Val(dim))
if isempty(reorderbuffer)
reorderbuffer_points = Vector{SVector{dim,T}}()
else
reorderbuffer_points = reinterpret_or_copy(T, reorderbuffer, Val(dim))
reorderbuffer_points = copy_svec(T, reorderbuffer, Val(dim))
end
BallTree(points, metric, leafsize = leafsize, storedata = storedata, reorder = reorder,
reorderbuffer = reorderbuffer_points, indicesfor = indicesfor)
reorderbuffer = reorderbuffer_points)
end

# Recursive function to build the tree.
Expand Down
2 changes: 1 addition & 1 deletion src/brute_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ function BruteTree(data::Matrix{T}, metric::Metric = Euclidean();
reorder::Bool=false, leafsize::Int=0, storedata::Bool=true) where {T}
dim = size(data, 1)
npoints = size(data, 2)
BruteTree(reinterpret_or_copy(T, data, Val(dim)),
BruteTree(copy_svec(T, data, Val(dim)),
metric, reorder = reorder, leafsize = leafsize, storedata = storedata)
end

Expand Down
12 changes: 3 additions & 9 deletions src/datafreetree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,13 @@ function get_points_dim(data)
end

"""
DataFreeTree(treetype, data[, reorderbufffer = similar(data), indicesfor = :data, kargs...]) -> datafreetree
DataFreeTree(treetype, data[, reorderbufffer = similar(data), kwargs...]) -> datafreetree
Creates a `DataFreeTree` which wraps a `KDTree` or `BallTree`. Keywords arguments are passed
to their respective constructors.
The `KDTree` or `BallTree` will be stored without a reference to the underlaying data. `injectdata`
has to be used to re-link them to a data array before use.
By default the `reorder` feature of `KDTree`/`BallTree` is turned off. In case a `reorderbuffer`
is provided, reordering is performed and the contents of `reorderbuffer` have to be later provided to
`injectdata`.
`indicesfor` controlls whether the indices returned by the query functions should refer to `data` or the `reorderbuffer`. Valid values are `:data` and `:reordered`.
"""
function DataFreeTree(::Type{T}, data, args...; reorderbuffer = data[:, 1:0], kargs...) where {T <: NNTree}
tree = T(data, args...; storedata = false, reorderbuffer = reorderbuffer, kargs...)
Expand All @@ -49,7 +43,7 @@ function injectdata(datafreetree::DataFreeTree, data::Matrix{T}) where {T}
dim = size(data, 1)
npoints = size(data, 2)
if isbits(T)
new_data = reinterpret_or_copy(T, data, Val(dim))
new_data = copy_svec(T, data, Val(dim))
else
new_data = SVector{dim,T}[SVector{dim,T}(data[:, i]) for i in 1:npoints]
end
Expand All @@ -69,6 +63,6 @@ function injectdata(datafreetree::DataFreeTree, data::Vector{V}, new_hash::UInt6
end

typ = typeof(datafreetree.tree)
fields = map(x -> getfield(datafreetree.tree, x), fieldnames(datafreetree.tree))[2:end]
fields = map(x -> getfield(datafreetree.tree, x), fieldnames(typeof(datafreetree.tree)))[2:end]
typ(data, fields...)
end
4 changes: 2 additions & 2 deletions src/hyperrectangles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ end
function compute_bbox(data::Vector{V}) where {V <: AbstractVector}
T = eltype(V)
n_dim = length(V)
@compat maxes = Vector{T}(uninitialized, n_dim)
@compat mins = Vector{T}(uninitialized, n_dim)
maxes = Vector{T}(undef, n_dim)
mins = Vector{T}(undef, n_dim)
@inbounds for j in 1:length(V)
dim_max = typemin(T)
dim_min = typemax(T)
Expand Down
2 changes: 1 addition & 1 deletion src/hyperspheres.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function create_bsphere(data::Vector{V}, metric::Metric, indices::Vector{Int}, l
ab.center[j] += data[indices[i]][j]
end
end
scale!(ab.center, 1 / n_points)
ab.center .*= 1 / n_points

# Then find r
r = zero(get_T(eltype(V)))
Expand Down
2 changes: 1 addition & 1 deletion src/inrange.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function inrange(tree::NNTree{V}, point::Matrix{T}, radius::Number, sortres=fals
dim = size(point, 1)
npoints = size(point, 2)
if isbits(T)
new_data = reinterpret_or_copy(T, point, Val(dim))
new_data = copy_svec(T, point, Val(dim))
else
new_data = SVector{dim,T}[SVector{dim,T}(point[:, i]) for i in 1:npoints]
end
Expand Down
22 changes: 10 additions & 12 deletions src/kd_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,20 @@ function KDTree(data::Vector{V},
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Vector{V} = Vector{V}(),
indicesfor::Symbol = :data) where {V <: AbstractArray, M <: MinkowskiMetric}
reorderbuffer::Vector{V} = Vector{V}()) where {V <: AbstractArray, M <: MinkowskiMetric}
reorder = !isempty(reorderbuffer) || (storedata ? reorder : false)

tree_data = TreeData(data, leafsize)
n_d = length(V)
n_p = length(data)

indices = collect(1:n_p)
@compat nodes = Vector{KDNode{eltype(V)}}(uninitialized, tree_data.n_internal_nodes)
nodes = Vector{KDNode{eltype(V)}}(undef, tree_data.n_internal_nodes)

if reorder
@compat indices_reordered = Vector{Int}(uninitialized, n_p)
indices_reordered = Vector{Int}(undef, n_p)
if isempty(reorderbuffer)
@compat data_reordered = Vector{V}(uninitialized, n_p)
data_reordered = Vector{V}(undef, n_p)
else
data_reordered = reorderbuffer
end
Expand All @@ -61,29 +60,28 @@ function KDTree(data::Vector{V},
1, length(data), tree_data, reorder)
if reorder
data = data_reordered
indices = indicesfor == :data ? indices_reordered : collect(1:n_p)
indices = indices_reordered
end

KDTree(storedata ? data : similar(data, 0), hyper_rec, indices, metric, nodes, tree_data, reorder)
end

@compat function KDTree(data::Matrix{T},
function KDTree(data::Matrix{T},
metric::M = Euclidean();
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Matrix{T} = Matrix{T}(uninitialized, 0, 0),
indicesfor::Symbol = :data) where {T <: AbstractFloat, M <: MinkowskiMetric}
reorderbuffer::Matrix{T} = Matrix{T}(undef, 0, 0)) where {T <: AbstractFloat, M <: MinkowskiMetric}
dim = size(data, 1)
npoints = size(data, 2)
points = reinterpret_or_copy(T, data, Val(dim))
points = copy_svec(T, data, Val(dim))
if isempty(reorderbuffer)
reorderbuffer_points = Vector{SVector{dim,T}}()
else
reorderbuffer_points = reinterpret_or_copy(T, reorderbuffer, Val(dim))
reorderbuffer_points = copy_svec(T, reorderbuffer, Val(dim))
end
KDTree(points, metric, leafsize = leafsize, storedata = storedata, reorder = reorder,
reorderbuffer = reorderbuffer_points, indicesfor = indicesfor)
reorderbuffer = reorderbuffer_points)
end

function build_KDTree(index::Int,
Expand Down
10 changes: 5 additions & 5 deletions src/knn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ function knn(tree::NNTree{V}, points::Vector{T}, k::Int, sortres=false, skip::Fu
check_input(tree, points)
check_k(tree, k)
n_points = length(points)
@compat dists = [Vector{get_T(eltype(V))}(uninitialized, k) for _ in 1:n_points]
@compat idxs = [Vector{Int}(uninitialized, k) for _ in 1:n_points]
dists = [Vector{get_T(eltype(V))}(undef, k) for _ in 1:n_points]
idxs = [Vector{Int}(undef, k) for _ in 1:n_points]
for i in 1:n_points
knn_point!(tree, points[i], sortres, dists[i], idxs[i], skip)
end
Expand All @@ -38,8 +38,8 @@ end

function knn(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres=false, skip::Function=always_false) where {V, T <: Number}
check_k(tree, k)
@compat idx = Vector{Int}(uninitialized, k)
@compat dist = Vector{get_T(eltype(V))}(uninitialized, k)
idx = Vector{Int}(undef, k)
dist = Vector{get_T(eltype(V))}(undef, k)
knn_point!(tree, point, sortres, dist, idx, skip)
return idx, dist
end
Expand All @@ -48,7 +48,7 @@ function knn(tree::NNTree{V}, point::Matrix{T}, k::Int, sortres=false, skip::Fun
dim = size(point, 1)
npoints = size(point, 2)
if isbits(T)
new_data = reinterpret_or_copy(T, point, Val(dim))
new_data = copy_svec(T, point, Val(dim))
else
new_data = SVector{dim,T}[SVector{dim,T}(point[:, i]) for i in 1:npoints]
end
Expand Down
2 changes: 1 addition & 1 deletion src/tree_data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ struct TreeData
end


function TreeData{V}(data::Vector{V}, leafsize)
function TreeData(data::Vector{V}, leafsize) where V
n_dim, n_p = length(V), length(data)

# If number of points is zero
Expand Down
7 changes: 1 addition & 6 deletions src/utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,5 @@ end
end

# Instead of ReinterpretArray wrapper, copy an array, interpreting it as a vector of SVectors
if VERSION < v"0.7.0-DEV.2008"
@noinline reinterpret_or_copy(::Type{T}, data, ::Val{dim}) where {T, dim} =
reinterpret(SVector{dim,T}, data, (length(data) ÷ dim,))
else
@noinline reinterpret_or_copy(::Type{T}, data, ::Val{dim}) where {T, dim} =
copy_svec(::Type{T}, data, ::Val{dim}) where {T, dim} =
[SVector{dim,T}(ntuple(i -> data[n+i], Val(dim))) for n in 0:dim:(length(data)-1)]
end
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
using NearestNeighbors
using StaticArrays

using Base.Test
using Test
using LinearAlgebra

import Distances: Metric, evaluate
struct CustomMetric1 <: Metric end
Expand Down
Loading

0 comments on commit 900dc50

Please sign in to comment.