From 683529a8fc2d0a4a6ab9137e84af952fdbf8a64e Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 1 Aug 2023 15:24:06 +0200 Subject: [PATCH 01/12] copy and paste functions --- structuretoolkit/analyse/spatial.py | 205 ++++++++++------------------ 1 file changed, 72 insertions(+), 133 deletions(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 6b97f52a4..762e700c4 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -55,6 +55,49 @@ def get_mean_positions(positions, cell, pbc, labels): return mean_positions +def create_gridpoints(structure, n_gridpoints_per_angstrom=5): + cell = get_vertical_length(structure=structure) + n_points = (n_gridpoints_per_angstrom * cell).astype(int) + positions = np.meshgrid( + *[np.linspace(0, 1, n_points[i], endpoint=False) for i in range(3)] + ) + positions = np.stack(positions, axis=-1).reshape(-1, 3) + return np.einsum("ji,nj->ni", structure.cell, positions) + + +def remove_too_close(positions, structure, min_distance=1): + neigh = get_neighborhood( + structure=structure, positions=positions, num_neighbors=1 + ) + return positions[neigh.distances.flatten() > min_distance] + + +def set_to_high_symmetry_points(positions, structure, neigh, decimals=4): + for _ in range(10): + neigh = neigh.get_neighborhood(positions) + dx = np.mean(neigh.vecs, axis=-2) + if np.allclose(dx, 0): + return positions + positions += dx + positions = get_wrapped_coordinates( + structure=structure, positions=positions + ) + unique_indices = np.unique(np.round(positions, decimals=decimals), axis=0, return_index=True)[1] + positions = positions[unique_indices] + raise ValueError("High symmetry points could not be detected") + + +def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio): + neigh = neigh.get_neighborhood(positions) + Q_values = np.array([neigh.get_steinhardt_parameter(ll) for ll in l_values]) + db = DBSCAN(q_eps) + var = np.std(neigh.distances, axis=-1) + descriptors = np.concatenate((Q_values, [var * var_ratio]), axis=0) + labels = db.fit_predict(descriptors.T) + var_mean = np.array([np.mean(var[labels==ll]) for ll in np.unique(labels) if ll >= 0]) + return positions[labels==np.argmin(var_mean)] + + class Interstitials: """ Class to search for interstitial sites @@ -92,9 +135,10 @@ def __init__( n_gridpoints_per_angstrom=5, min_distance=1, use_voronoi=False, - variance_buffer=0.01, - n_iterations=2, - eps=0.1, + x_eps=0.15, + l_values=np.arange(2, 13), + q_eps=0.3, + var_ratio=5, ): """ @@ -116,66 +160,35 @@ def __init__( It should be close to 0 for perfect crystals and slightly higher values for structures containing defects. Set `variance_buffer` to `numpy.inf` if no selection by variance value should take place. - n_iterations (int): Number of iterations for the shifting of the candidate positions - to the nearest symmetric positions with respect to `num_neighbors`. In most of the - cases, 1 is enough. In some rare cases (notably tetrahedral sites in bcc), it - should be at least 2. It is unlikely that it has to be larger than 2. Set - `n_iterations` to 0 if no shifting should take place. eps (float): Distance below which two interstitial candidate sites to be considered as one site after the symmetrization of the points. Set `eps` to 0 if clustering should not be done. """ - self._hull = None - self._neigh = None - self._positions = None - self.num_neighbors = num_neighbors - self.structure = structure - self._initialize( - n_gridpoints_per_angstrom=n_gridpoints_per_angstrom, - min_distance=min_distance, - use_voronoi=use_voronoi, - variance_buffer=variance_buffer, - n_iterations=n_iterations, - eps=eps, - ) - - def _initialize( - self, - n_gridpoints_per_angstrom=5, - min_distance=1, - use_voronoi=False, - variance_buffer=0.01, - n_iterations=2, - eps=0.1, - ): if use_voronoi: - self.positions = self.structure.analyse.get_voronoi_vertices() + self.initial_positions = structure.analyse.get_voronoi_vertices() else: - self.positions = self._create_gridpoints( - n_gridpoints_per_angstrom=n_gridpoints_per_angstrom + self.initial_positions = create_gridpoints( + structure=structure, n_gridpoints_per_angstrom=n_gridpoints_per_angstrom ) - self._remove_too_close(min_distance=min_distance) - for _ in range(n_iterations): - self._set_interstitials_to_high_symmetry_points() - self._kick_out_points(variance_buffer=variance_buffer) - self._cluster_points(eps=eps) - - @property - def num_neighbors(self): - """ - Number of atoms (vertices) to consider for each interstitial atom. By definition, - tetrahedral sites should have 4 and octahedral sites 6. - """ - return self._num_neighbors - - @num_neighbors.setter - def num_neighbors(self, n): - self.reset() - self._num_neighbors = n + self._neigh = structure.get_neighbors(num_neighbors=num_neighbors) + self.workflow = [ + {"f": remove_too_close, "args": {"structure": structure, "min_distance": min_distance}}, + {"f": set_to_high_symmetry_points, "args": {"structure": structure, "neigh": self.neigh}}, + {"f": structure.analyse.cluster_positions, "args": {"eps": x_eps}}, + { + "f": cluster_by_steinhardt, + "args": {"neigh": self.neigh, "l_values": l_values, "q_eps": q_eps, "var_ratio": var_ratio} + }, + ] + self._positions = None + self.structure = structure - def reset(self): - self._hull = None - self._neigh = None + def run_workflow(self, positions, steps=-1): + for ii, ww in enumerate(self.workflow): + positions = ww["f"](positions=positions, **ww["args"]) + if ii == steps: + return positions + return positions @property def neigh(self): @@ -185,96 +198,22 @@ def neigh(self): its nearest neighboring atoms. The functionalities of `neigh` follow those of `structuretoolkit.analyse.neighbors`. """ - if self._neigh is None: - self._neigh = get_neighborhood( - structure=self.structure, - positions=self.positions, - num_neighbors=self.num_neighbors, - ) return self._neigh @property def positions(self): - """ - Positions of the interstitial candidates (and not those of the atoms). - - IMPORTANT: Do not set positions via numpy setter, i.e. - - BAD: - ``` - >>> Interstitials.neigh.positions[0][0] = x - ``` - - GOOD: - ``` - >>> positions = Interstitials.neigh.positions - >>> positions[0][0] = x - >>> Interstitialsneigh.positions = positions - ``` - - This is because in the first case related properties (most importantly the neighborhood - information) is not updated, which might lead to inconsistencies. - """ + if self._positions is None: + self._positions = self.run_workflow(self.initial_positions) + self._neigh = self.neigh.get_neighborhood(self._positions) return self._positions - @positions.setter - def positions(self, x): - self.reset() - self._positions = x - @property def hull(self): """ Convex hull of each atom. It is mainly used for the volume and area calculation of each interstitial candidate. For more info, see `get_volumes` and `get_areas`. """ - if self._hull is None: - self._hull = [ConvexHull(v) for v in self.neigh.vecs] - return self._hull - - def _create_gridpoints(self, n_gridpoints_per_angstrom=5): - cell = get_vertical_length(structure=self.structure) - n_points = (n_gridpoints_per_angstrom * cell).astype(int) - positions = np.meshgrid( - *[np.linspace(0, 1, n_points[i], endpoint=False) for i in range(3)] - ) - positions = np.stack(positions, axis=-1).reshape(-1, 3) - return np.einsum("ji,nj->ni", self.structure.cell, positions) - - def _remove_too_close(self, min_distance=1): - neigh = get_neighborhood( - structure=self.structure, positions=self.positions, num_neighbors=1 - ) - self.positions = self.positions[neigh.distances.flatten() > min_distance] - - def _set_interstitials_to_high_symmetry_points(self): - self.positions = self.positions + np.mean(self.neigh.vecs, axis=-2) - self.positions = get_wrapped_coordinates( - structure=self.structure, positions=self.positions - ) - - def _kick_out_points(self, variance_buffer=0.01): - variance = self.get_variances() - min_var = variance.min() - self.positions = self.positions[variance < min_var + variance_buffer] - - def _cluster_points(self, eps=0.1): - from sklearn.cluster import DBSCAN - - if eps == 0: - return - extended_positions, indices = get_extended_positions( - structure=self.structure, - width=eps, - return_indices=True, - positions=self.positions, - ) - labels = DBSCAN(eps=eps, min_samples=1).fit_predict(extended_positions) - coo = coo_matrix((labels, (np.arange(len(labels)), indices))) - labels = coo.max(axis=0).toarray().flatten() - self.positions = get_mean_positions( - self.positions, self.structure.cell, self.structure.pbc, labels - ) + return [ConvexHull(v) for v in self.neigh.vecs] def get_variances(self): """ From 3275c4fcf94fbec7522849ab98571f13150dd817 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 1 Aug 2023 16:22:49 +0200 Subject: [PATCH 02/12] adapt to environment --- structuretoolkit/analyse/spatial.py | 30 ++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 762e700c4..2d209b642 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -6,13 +6,14 @@ from scipy.sparse import coo_matrix from scipy.spatial import ConvexHull, Delaunay, Voronoi -from structuretoolkit.analyse.neighbors import get_neighborhood +from structuretoolkit.analyse.neighbors import get_neighborhood, get_neighbors from structuretoolkit.common.helper import ( get_average_of_unique_labels, get_extended_positions, get_vertical_length, get_wrapped_coordinates, ) +from sklearn.cluster import DBSCAN __author__ = "Joerg Neugebauer, Sam Waseda" __copyright__ = ( @@ -139,6 +140,7 @@ def __init__( l_values=np.arange(2, 13), q_eps=0.3, var_ratio=5, + **args ): """ @@ -165,19 +167,33 @@ def __init__( not be done. """ if use_voronoi: - self.initial_positions = structure.analyse.get_voronoi_vertices() + self.initial_positions = get_voronoi_vertices(structure) else: self.initial_positions = create_gridpoints( structure=structure, n_gridpoints_per_angstrom=n_gridpoints_per_angstrom ) - self._neigh = structure.get_neighbors(num_neighbors=num_neighbors) + self._neigh = get_neighbors(structure=structure, num_neighbors=num_neighbors) self.workflow = [ - {"f": remove_too_close, "args": {"structure": structure, "min_distance": min_distance}}, - {"f": set_to_high_symmetry_points, "args": {"structure": structure, "neigh": self.neigh}}, - {"f": structure.analyse.cluster_positions, "args": {"eps": x_eps}}, + { + "f": remove_too_close, + "args": {"structure": structure, "min_distance": min_distance} + }, + { + "f": set_to_high_symmetry_points, + "args": {"structure": structure, "neigh": self.neigh} + }, + { + "f": lambda **args: get_cluster_positions(structure, **args), + "args": {"eps": x_eps} + }, { "f": cluster_by_steinhardt, - "args": {"neigh": self.neigh, "l_values": l_values, "q_eps": q_eps, "var_ratio": var_ratio} + "args": { + "neigh": self.neigh, + "l_values": l_values, + "q_eps": q_eps, + "var_ratio": var_ratio + } }, ] self._positions = None From b2cd2b3ee103da90f89953e7db9bfcedb0994f5e Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 1 Aug 2023 17:44:29 +0200 Subject: [PATCH 03/12] add min_samples --- structuretoolkit/analyse/spatial.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 2d209b642..6a9ceba66 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -88,10 +88,12 @@ def set_to_high_symmetry_points(positions, structure, neigh, decimals=4): raise ValueError("High symmetry points could not be detected") -def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio): +def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio, min_samples): + if min_samples is None: + min_samples = min(len(neigh.distances), 5) neigh = neigh.get_neighborhood(positions) Q_values = np.array([neigh.get_steinhardt_parameter(ll) for ll in l_values]) - db = DBSCAN(q_eps) + db = DBSCAN(q_eps, min_samples=min_samples) var = np.std(neigh.distances, axis=-1) descriptors = np.concatenate((Q_values, [var * var_ratio]), axis=0) labels = db.fit_predict(descriptors.T) @@ -136,10 +138,12 @@ def __init__( n_gridpoints_per_angstrom=5, min_distance=1, use_voronoi=False, - x_eps=0.15, + x_eps=0.1, l_values=np.arange(2, 13), q_eps=0.3, var_ratio=5, + min_samples=None, + neigh_args={}, **args ): """ @@ -172,7 +176,9 @@ def __init__( self.initial_positions = create_gridpoints( structure=structure, n_gridpoints_per_angstrom=n_gridpoints_per_angstrom ) - self._neigh = get_neighbors(structure=structure, num_neighbors=num_neighbors) + self._neigh = get_neighbors( + structure=structure, num_neighbors=num_neighbors, **neigh_args + ) self.workflow = [ { "f": remove_too_close, @@ -192,14 +198,17 @@ def __init__( "neigh": self.neigh, "l_values": l_values, "q_eps": q_eps, - "var_ratio": var_ratio + "var_ratio": var_ratio, + "min_samples": min_samples } }, ] self._positions = None self.structure = structure - def run_workflow(self, positions, steps=-1): + def run_workflow(self, positions=None, steps=-1): + if positions is None: + positions = self.initial_positions.copy() for ii, ww in enumerate(self.workflow): positions = ww["f"](positions=positions, **ww["args"]) if ii == steps: @@ -219,7 +228,7 @@ def neigh(self): @property def positions(self): if self._positions is None: - self._positions = self.run_workflow(self.initial_positions) + self._positions = self.run_workflow() self._neigh = self.neigh.get_neighborhood(self._positions) return self._positions @@ -568,7 +577,6 @@ def get_cluster_positions( positions (numpy.ndarray): Mean positions label (numpy.ndarray): Labels of the positions (returned when `return_labels = True`) """ - from sklearn.cluster import DBSCAN positions = structure.positions if positions is None else np.array(positions) if buffer_width is None: From 618078010a69ee58759e2369ba620e49bb7c3c7e Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 1 Aug 2023 18:11:07 +0200 Subject: [PATCH 04/12] update doc --- structuretoolkit/analyse/spatial.py | 67 +++++++++++++++++++---------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 6a9ceba66..8b02fa933 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -107,16 +107,16 @@ class Interstitials: This class internally does the following steps: - 1. Initialize grid points (or Voronoi vertices) which are considered as + 0. Initialize grid points (or Voronoi vertices) which are considered as interstitial site candidates. - 2. Eliminate points within a distance from the nearest neighboring atoms as + 1. Eliminate points within a distance from the nearest neighboring atoms as given by `min_distance` - 3. Initialize neighbor environment using `get_neighbors` - 4. Shift interstitial candidates to the nearest symmetric points with respect to the + 2. Shift interstitial candidates to the nearest symmetric points with respect to the neighboring atom sites/vertices. - 5. Kick out points with large neighbor distance variances; this eliminates "irregular" - shaped interstitials - 6. Cluster interstitial candidates to avoid point overlapping. + 3. Cluster interstitial candidates to avoid point overlapping. + 4. Cluster interstitial candidates by their Steinhardt parameters (cf. `l_values` for + the values of the spherical harmonics) and the variance of the distances and + take the group with the smallest average distance variance The interstitial sites can be obtained via `positions` @@ -129,6 +129,19 @@ class Interstitials: - `get_steinhardt_parameters()` - `get_volumes()` - `get_areas()` + + Troubleshooting: + + Identifying interstitial sites is not a very easy task. The algorithm employed here will + probably do a good job, but if it fails, it might be good to look at the following points + + - The intermediate results can be accessed via `run_workflow` by specifying the step number. + - The most vulnerable point is the last step, clustering by Steinhardt parameters. Take a + look at the step before and after. If the interstitial sites are present in the step + before the clustering by Steinhardt parameters, you might want to change the values of + `q_eps` and `var_ratio`. It might make a difference to use `l_values` as well. + - It might fail to find sites if the box is very small. In that case it might make sense to + set `min_samples` very low (you can take 1) """ def __init__( @@ -161,15 +174,17 @@ def __init__( `min_distance` to 0 if no point should be removed. use_voronoi (bool): Use Voronoi vertices for the initial interstitial candidate positions instead of grid points. - variance_buffer (bool): Maximum permitted variance value (in distance unit) of the - neighbor distance values with respect to the minimum value found for each point. - It should be close to 0 for perfect crystals and slightly higher values for - structures containing defects. Set `variance_buffer` to `numpy.inf` if no selection - by variance value should take place. - eps (float): Distance below which two interstitial candidate sites to be considered as - one site after the symmetrization of the points. Set `eps` to 0 if clustering should - not be done. - """ + x_eps (bool): eps value for the clustering of interstitial candidate positions + l_values (list): list of values for the Steinhardt parameter values for the + classification of the interstitial candidate points + q_eps (float): eps value for the clustering of interstitial candidate points based + on Steinhardt parameters and distance variances. This might play a crucial role + in identifying the correct interstitial sites + var_ratio (float): factor to be multiplied to the variance values in order to give + a larger weight to the variances. + min_samples (int/None): `min_sample` in the point clustering. + neigh_argss (dict): arguments to be added to `get_neighbors` + """ if use_voronoi: self.initial_positions = get_voronoi_vertices(structure) else: @@ -295,9 +310,13 @@ def get_interstitials( n_gridpoints_per_angstrom=5, min_distance=1, use_voronoi=False, - variance_buffer=0.01, - n_iterations=2, - eps=0.1, + x_eps=0.1, + l_values=np.arange(2, 13), + q_eps=0.3, + var_ratio=5, + min_samples=None, + neigh_args={}, + **args ): return Interstitials( structure=structure, @@ -305,9 +324,13 @@ def get_interstitials( n_gridpoints_per_angstrom=n_gridpoints_per_angstrom, min_distance=min_distance, use_voronoi=use_voronoi, - variance_buffer=variance_buffer, - n_iterations=n_iterations, - eps=eps, + x_eps=x_eps, + l_values=l_values, + q_eps=q_eps, + var_ratio=var_ratio, + min_samples=min_samples, + neigh_args=neigh_args, + **args ) From 63e68d88b58376df376ead05ef4e4c05435dc57e Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 1 Aug 2023 18:18:03 +0200 Subject: [PATCH 05/12] Manual Black --- structuretoolkit/analyse/spatial.py | 30 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 8b02fa933..7b252ab55 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -67,9 +67,7 @@ def create_gridpoints(structure, n_gridpoints_per_angstrom=5): def remove_too_close(positions, structure, min_distance=1): - neigh = get_neighborhood( - structure=structure, positions=positions, num_neighbors=1 - ) + neigh = get_neighborhood(structure=structure, positions=positions, num_neighbors=1) return positions[neigh.distances.flatten() > min_distance] @@ -80,10 +78,10 @@ def set_to_high_symmetry_points(positions, structure, neigh, decimals=4): if np.allclose(dx, 0): return positions positions += dx - positions = get_wrapped_coordinates( - structure=structure, positions=positions - ) - unique_indices = np.unique(np.round(positions, decimals=decimals), axis=0, return_index=True)[1] + positions = get_wrapped_coordinates(structure=structure, positions=positions) + unique_indices = np.unique( + np.round(positions, decimals=decimals), axis=0, return_index=True + )[1] positions = positions[unique_indices] raise ValueError("High symmetry points could not be detected") @@ -97,8 +95,10 @@ def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio, min_samp var = np.std(neigh.distances, axis=-1) descriptors = np.concatenate((Q_values, [var * var_ratio]), axis=0) labels = db.fit_predict(descriptors.T) - var_mean = np.array([np.mean(var[labels==ll]) for ll in np.unique(labels) if ll >= 0]) - return positions[labels==np.argmin(var_mean)] + var_mean = np.array( + [np.mean(var[labels==ll]) for ll in np.unique(labels) if ll >= 0] + ) + return positions[labels == np.argmin(var_mean)] class Interstitials: @@ -183,8 +183,8 @@ def __init__( var_ratio (float): factor to be multiplied to the variance values in order to give a larger weight to the variances. min_samples (int/None): `min_sample` in the point clustering. - neigh_argss (dict): arguments to be added to `get_neighbors` - """ + neigh_args (dict): arguments to be added to `get_neighbors` + """ if use_voronoi: self.initial_positions = get_voronoi_vertices(structure) else: @@ -197,11 +197,11 @@ def __init__( self.workflow = [ { "f": remove_too_close, - "args": {"structure": structure, "min_distance": min_distance} + "args": {"structure": structure, "min_distance": min_distance}, }, { "f": set_to_high_symmetry_points, - "args": {"structure": structure, "neigh": self.neigh} + "args": {"structure": structure, "neigh": self.neigh}, }, { "f": lambda **args: get_cluster_positions(structure, **args), @@ -214,8 +214,8 @@ def __init__( "l_values": l_values, "q_eps": q_eps, "var_ratio": var_ratio, - "min_samples": min_samples - } + "min_samples": min_samples, + }, }, ] self._positions = None From 039a57f354aecc8ee73adb6e91f26b85cc2988ad Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 1 Aug 2023 15:00:37 -0600 Subject: [PATCH 06/12] black formatting --- structuretoolkit/analyse/spatial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 7b252ab55..d9d0be028 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -96,7 +96,7 @@ def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio, min_samp descriptors = np.concatenate((Q_values, [var * var_ratio]), axis=0) labels = db.fit_predict(descriptors.T) var_mean = np.array( - [np.mean(var[labels==ll]) for ll in np.unique(labels) if ll >= 0] + [np.mean(var[labels == ll]) for ll in np.unique(labels) if ll >= 0] ) return positions[labels == np.argmin(var_mean)] @@ -205,7 +205,7 @@ def __init__( }, { "f": lambda **args: get_cluster_positions(structure, **args), - "args": {"eps": x_eps} + "args": {"eps": x_eps}, }, { "f": cluster_by_steinhardt, From 9d3aa4e7857a7a30a9fcd51413f2bd1e5ccdac8d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 1 Aug 2023 15:02:15 -0600 Subject: [PATCH 07/12] move import statements into functions --- structuretoolkit/analyse/spatial.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index d9d0be028..537739dcc 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -13,7 +13,6 @@ get_vertical_length, get_wrapped_coordinates, ) -from sklearn.cluster import DBSCAN __author__ = "Joerg Neugebauer, Sam Waseda" __copyright__ = ( @@ -87,6 +86,8 @@ def set_to_high_symmetry_points(positions, structure, neigh, decimals=4): def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio, min_samples): + from sklearn.cluster import DBSCAN + if min_samples is None: min_samples = min(len(neigh.distances), 5) neigh = neigh.get_neighborhood(positions) @@ -600,6 +601,7 @@ def get_cluster_positions( positions (numpy.ndarray): Mean positions label (numpy.ndarray): Labels of the positions (returned when `return_labels = True`) """ + from sklearn.cluster import DBSCAN positions = structure.positions if positions is None else np.array(positions) if buffer_width is None: From 8676b65ac9f483611dd406890d9a1b97253874d3 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Wed, 2 Aug 2023 09:49:28 +0200 Subject: [PATCH 08/12] test individual functionalities --- tests/test_analyse.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_analyse.py b/tests/test_analyse.py index 28ea8feb9..8fb073e7b 100644 --- a/tests/test_analyse.py +++ b/tests/test_analyse.py @@ -10,6 +10,7 @@ from scipy.spatial import Voronoi from ase.lattice.cubic import BodyCenteredCubic import structuretoolkit as stk +from scipy.spatial import cKDTree try: @@ -176,6 +177,20 @@ def test_get_interstitials_fcc(self): msg='Distance variance in FCC must be 0' ) + def test_interstitials_details(self): + fcc = bulk('Al', cubic=True) + inter = stk.analyse.get_interstitials(structure=fcc, num_neighbors=4) + x = inter.run_workflow(steps=0) + tree = cKDTree(fcc.positions) + self.assertGreater( + tree.query(x)[0].min(), 1, msg="remove_too_close did not remove closest points" + ) + self.assertGreater( + len(x), + len(inter.run_workflow(steps=1)), + msg="set_to_high_symmetry_points did not reduce overlapping points" + ) + def test_strain(self): structure_bulk = bulk('Fe', cubic=True) a_0 = structure_bulk.cell[0, 0] From 743323bdbc13503f8a84b2cbb3ef668c1a6cbea7 Mon Sep 17 00:00:00 2001 From: Sam Dareska <37879103+samwaseda@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:36:06 +0200 Subject: [PATCH 09/12] Update structuretoolkit/analyse/spatial.py Co-authored-by: Marvin Poul --- structuretoolkit/analyse/spatial.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 537739dcc..8f1c2667c 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -86,6 +86,22 @@ def set_to_high_symmetry_points(positions, structure, neigh, decimals=4): def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio, min_samples): + """ + Clusters candidate positions via Steinhardt parameters and the variance in distances to host atoms. + + The cluster that has the lowest variance is returned, i.e. those positions that have the most "regular" coordination polyhedra. + + Args: + positions (array): candidate positions + neigh (Neighbor): neighborhood information of the candidate positions + l_values (list of int): which steinhardt parameters to use for clustering + q_eps (float): maximum intercluster distance in steinhardt parameters for DBSCAN clustering + var_ratio (float): multiplier to make steinhardt's and distance variance numerically comparable + min_samples (int): minimum size of clusters + + Returns: + array: Positions of the most likely interstitial sites + """ from sklearn.cluster import DBSCAN if min_samples is None: From 55425c70c953a7735318a77a7f12b7818cb69826 Mon Sep 17 00:00:00 2001 From: Sam Dareska <37879103+samwaseda@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:37:49 +0200 Subject: [PATCH 10/12] Update structuretoolkit/analyse/spatial.py Co-authored-by: Marvin Poul --- structuretoolkit/analyse/spatial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 8f1c2667c..2283ef9ed 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -130,7 +130,7 @@ class Interstitials: given by `min_distance` 2. Shift interstitial candidates to the nearest symmetric points with respect to the neighboring atom sites/vertices. - 3. Cluster interstitial candidates to avoid point overlapping. + 3. Cluster interstitial candidate positions to avoid point overlapping. 4. Cluster interstitial candidates by their Steinhardt parameters (cf. `l_values` for the values of the spherical harmonics) and the variance of the distances and take the group with the smallest average distance variance From b6b39cc89c4099f81ab94c02f01fb835463ce9cf Mon Sep 17 00:00:00 2001 From: Sam Dareska <37879103+samwaseda@users.noreply.github.com> Date: Tue, 8 Aug 2023 13:38:11 +0200 Subject: [PATCH 11/12] Update structuretoolkit/analyse/spatial.py Co-authored-by: Marvin Poul --- structuretoolkit/analyse/spatial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 2283ef9ed..6c1d64415 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -242,7 +242,7 @@ def run_workflow(self, positions=None, steps=-1): if positions is None: positions = self.initial_positions.copy() for ii, ww in enumerate(self.workflow): - positions = ww["f"](positions=positions, **ww["args"]) + positions = ww["f"](positions=positions, **ww["kwargs"]) if ii == steps: return positions return positions From 4a446bc45244ef9a76988f2135cad000a9c6aed3 Mon Sep 17 00:00:00 2001 From: samwaseda Date: Tue, 8 Aug 2023 12:27:42 +0000 Subject: [PATCH 12/12] replace args by kwargs --- structuretoolkit/analyse/spatial.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/structuretoolkit/analyse/spatial.py b/structuretoolkit/analyse/spatial.py index 6c1d64415..5b4fc988f 100644 --- a/structuretoolkit/analyse/spatial.py +++ b/structuretoolkit/analyse/spatial.py @@ -174,7 +174,7 @@ def __init__( var_ratio=5, min_samples=None, neigh_args={}, - **args + **kwargs ): """ @@ -214,19 +214,19 @@ def __init__( self.workflow = [ { "f": remove_too_close, - "args": {"structure": structure, "min_distance": min_distance}, + "kwargs": {"structure": structure, "min_distance": min_distance}, }, { "f": set_to_high_symmetry_points, - "args": {"structure": structure, "neigh": self.neigh}, + "kwargs": {"structure": structure, "neigh": self.neigh}, }, { - "f": lambda **args: get_cluster_positions(structure, **args), - "args": {"eps": x_eps}, + "f": lambda **kwargs: get_cluster_positions(structure, **kwargs), + "kwargs": {"eps": x_eps}, }, { "f": cluster_by_steinhardt, - "args": { + "kwargs": { "neigh": self.neigh, "l_values": l_values, "q_eps": q_eps, @@ -333,7 +333,7 @@ def get_interstitials( var_ratio=5, min_samples=None, neigh_args={}, - **args + **kwargs ): return Interstitials( structure=structure, @@ -347,7 +347,7 @@ def get_interstitials( var_ratio=var_ratio, min_samples=min_samples, neigh_args=neigh_args, - **args + **kwargs )