Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Selection and large data performance improvements #41

Merged
merged 49 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
e680cc8
Binary attributes for data; use updatetriggers on selection update.
lmcinnes Sep 5, 2024
61610f7
Selection manager using set intersections
lmcinnes Sep 5, 2024
1121319
Make histogram more efficient and rendering focus data
lmcinnes Sep 6, 2024
f95689b
Fix brushing
lmcinnes Sep 6, 2024
8d7ea5f
Remove loaders dependency; pure arrow.
lmcinnes Sep 6, 2024
c1d6e6b
Ensure we don't mess up histogram selections
lmcinnes Sep 7, 2024
086bc14
Fix focusData creation
lmcinnes Sep 7, 2024
958736a
Add initial lasso selection capability
lmcinnes Sep 7, 2024
c199127
Refactor lasso selection into a class.
lmcinnes Sep 7, 2024
4157df3
Add a quad-tree to speed up selection somewhat.
lmcinnes Sep 8, 2024
063b002
DataSelectionManager has two tiers
lmcinnes Sep 8, 2024
d91bf34
Lasso select leaves unselected points faded but visible
lmcinnes Sep 8, 2024
7eda869
Fix offline_data for the new loading system with arrow-js.
lmcinnes Sep 9, 2024
b087583
Add drawer animation for selection display
lmcinnes Sep 9, 2024
120de9e
Fix addition of dependencies from selection handler
lmcinnes Sep 10, 2024
491faf7
Have a basic datamap class to handle everything
lmcinnes Sep 14, 2024
e7c0001
Remove unnecessary comment
lmcinnes Sep 14, 2024
84798b9
Put array writing for binary data in datamap class
lmcinnes Sep 14, 2024
406ed1a
Handle not yet loaded metadata in selection handlers
lmcinnes Sep 14, 2024
a83c50c
Webworkers load the data off the main thread
lmcinnes Sep 14, 2024
67b68db
Clean up histogram data structures a little
lmcinnes Sep 15, 2024
0cdfc8e
Switch to JSON for metadata as it is usually strings anyway.
lmcinnes Sep 15, 2024
3109a59
Generate histogram bin data in python
lmcinnes Sep 16, 2024
62dc18f
Alpha shapes were taking too long; make them faster
lmcinnes Sep 17, 2024
b86f9b9
Precompute histogram binning
lmcinnes Sep 17, 2024
517d953
Handle file based histogram data
lmcinnes Sep 17, 2024
fd0db0a
Index data has to be cast to frame
lmcinnes Sep 17, 2024
439fcbf
Clean up CSS; allow log scale in histograms
lmcinnes Sep 17, 2024
0530bc2
Clean up selection API on Datamp objects
lmcinnes Sep 17, 2024
1ef8a4f
Handle origin URLs in iFrames
lmcinnes Sep 18, 2024
db5cb65
Make everything work with jupyter -- this is messy, but I believe it …
lmcinnes Sep 18, 2024
977ab06
Clean up commented out code; restore tooltips
lmcinnes Sep 19, 2024
dac37e8
Fix up custom docs and examples for new selection etc.
lmcinnes Sep 19, 2024
e4ac020
Add data files for customization docs
lmcinnes Sep 19, 2024
3644bdb
Delete old data files for customization docs
lmcinnes Sep 19, 2024
f65e5e8
Add a word cloud selection handler
lmcinnes Sep 21, 2024
12d7e60
Fix corner cases of bin handling; add bin ranges
lmcinnes Sep 21, 2024
41dcec9
Add histogram options; fix histogram bugs
lmcinnes Sep 21, 2024
fb89275
Fix bin axis labelling and ticks
lmcinnes Sep 21, 2024
9cf6153
Get selection directly from the datamap object
lmcinnes Sep 21, 2024
74b07b1
An assortment of bug fixes.
lmcinnes Sep 23, 2024
839c088
Data files for new documentation, plus new notebook
lmcinnes Sep 23, 2024
ba5bb3b
Update docs
lmcinnes Sep 23, 2024
6e986cc
Fix lasso drawing
lmcinnes Sep 24, 2024
eef9144
Update selection docs
lmcinnes Sep 24, 2024
df4ff12
Fix potential issues in alpha shapes
lmcinnes Sep 26, 2024
9f3b2ea
Add progress bars on loading data for inline data.
lmcinnes Sep 26, 2024
2218c80
Fix cursor during selection
lmcinnes Sep 29, 2024
2e884fc
Update docs for new interactive plotting updates
lmcinnes Sep 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 34 additions & 22 deletions datamapplot/alpha_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,41 @@
def circumradius(points):
bc = points[1:] - points[0]
d = 2 * (bc[0, 0] * bc[1, 1] - bc[0, 1] * bc[1, 0])
if d == 0:
return 0
b_norm = bc[0, 0] * bc[0, 0] + bc[0, 1] * bc[0, 1]
c_norm = bc[1, 0] * bc[1, 0] + bc[1, 1] * bc[1, 1]
ux = (bc[1, 1] * b_norm - bc[0, 1] * c_norm) / d
uy = (bc[0, 0] * c_norm - bc[1, 0] * b_norm) / d
return np.sqrt(ux * ux + uy * uy)


def create_boundary_polygons(points, simplices, alpha=0.1):
all_edges = set([(np.int32(0), np.int32(0)) for i in range(0)])
boundary = set([(np.int32(0), np.int32(0)) for i in range(0)])
@numba.njit(locals={"candidate_idx": numba.uint64})
def find_boundary_candidates(points, simplices, alpha=0.1):
candidates = np.full((simplices.shape[0] * 3, 2), -1, dtype=np.int32)
candidate_idx = 0
for simplex in simplices:
if circumradius(points[simplex]) < alpha:
for e in (
(simplex[0], simplex[1]),
(simplex[0], simplex[2]),
(simplex[1], simplex[2]),
):
if e[0] < e[1]:
if (e[0], e[1]) not in all_edges:
all_edges.add((e[0], e[1]))
boundary.add((e[0], e[1]))
else:
boundary.remove((e[0], e[1]))
else:
if (e[1], e[0]) not in all_edges:
all_edges.add((e[1], e[0]))
boundary.add((e[1], e[0]))
else:
boundary.remove((e[1], e[0]))
candidates[candidate_idx] = (simplex[0], simplex[1])
candidates[candidate_idx + 1] = (simplex[0], simplex[2])
candidates[candidate_idx + 2] = (simplex[1], simplex[2])
candidate_idx += 3
return candidates[:candidate_idx]

if len(boundary) == 0:
raise ValueError("The value of polygon_alpha was too low, and no boundary was formed. Try increasing polygon_alpha.")
@numba.njit()
def boundary_from_candidates(boundary_candidates):
occurrence_counts = {(np.int32(0), np.int32(0)):0 for i in range(0)}
for candidate in boundary_candidates:
tuple_candidate = (candidate[0], candidate[1])
if tuple_candidate in occurrence_counts:
occurrence_counts[tuple_candidate] += 1
else:
occurrence_counts[tuple_candidate] = 1

return set([x for x in occurrence_counts if occurrence_counts[x] == 1])

@numba.njit()
def build_polygons(boundary):
polygons = []
search_set = boundary.copy()
sequence = list(search_set.pop())
Expand All @@ -59,6 +62,15 @@ def create_boundary_polygons(points, simplices, alpha=0.1):
sequence = list(search_set.pop())

polygons.append(sequence)
return polygons

def create_boundary_polygons(points, simplices, alpha=0.1):
simplices.sort(axis=1)
boundary_candidates = find_boundary_candidates(points, simplices, alpha=alpha)
boundary = boundary_from_candidates(boundary_candidates)
if len(boundary) == 0:
raise ValueError("The value of polygon_alpha was too low, and no boundary was formed. Try increasing polygon_alpha.")
polygons = build_polygons(boundary)

result = [
np.empty((len(sequence) + 1, 2), dtype=np.float32) for sequence in polygons
Expand Down
8 changes: 7 additions & 1 deletion datamapplot/create_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ def create_interactive_plot(
color_cluster_boundaries=True,
polygon_alpha=0.1,
cvd_safer=False,
jupyterhub_api_token=None,
**render_html_kwds,
):
"""
Expand Down Expand Up @@ -442,6 +443,11 @@ def create_interactive_plot(
Whether to use a colour palette that is safer for colour vision deficiency (CVD).
This will override any provided cmap and use a CVD safer palette instead.

jupyterhub_api_token: str or None (optional, default=None)
The JupyterHub API token to use when rendering the plot inline in a notebook via jupyterhub.
This should not be necessary for most users, but can be useful in some environments where
the default token is not available.

**render_html_kwds:
All other keyword arguments will be passed through the `render_html` function. Please
see the docstring of that function for further options that can control the
Expand Down Expand Up @@ -607,4 +613,4 @@ def create_interactive_plot(
**render_html_kwds,
)

return InteractiveFigure(html_str, width=width, height=height)
return InteractiveFigure(html_str, width=width, height=height, api_token=jupyterhub_api_token)
Loading