From 119d8f480dcd7dcba2e3f14f47dbb53451ecf31b Mon Sep 17 00:00:00 2001 From: NikoOinonen Date: Fri, 2 Feb 2024 19:11:07 +0200 Subject: [PATCH] ED-AFM stats scripts --- papers/ed-afm/figures/stats/stats_distance.py | 130 ++++++++++++++++ .../ed-afm/figures/stats/stats_distance_Xe.py | 141 ++++++++++++++++++ papers/ed-afm/figures/stats/stats_noise.py | 132 ++++++++++++++++ .../stats/stats_spring_constant_lat.py | 135 +++++++++++++++++ .../stats/stats_spring_constant_rad.py | 135 +++++++++++++++++ 5 files changed, 673 insertions(+) create mode 100644 papers/ed-afm/figures/stats/stats_distance.py create mode 100644 papers/ed-afm/figures/stats/stats_distance_Xe.py create mode 100644 papers/ed-afm/figures/stats/stats_noise.py create mode 100644 papers/ed-afm/figures/stats/stats_spring_constant_lat.py create mode 100644 papers/ed-afm/figures/stats/stats_spring_constant_rad.py diff --git a/papers/ed-afm/figures/stats/stats_distance.py b/papers/ed-afm/figures/stats/stats_distance.py new file mode 100644 index 0000000..47f3f2f --- /dev/null +++ b/papers/ed-afm/figures/stats/stats_distance.py @@ -0,0 +1,130 @@ + +import random +import time +from pathlib import Path + +import numpy as np +import ppafm.ml.AuxMap as aux +import ppafm.ocl.field as FFcl +import ppafm.ocl.oclUtils as oclu +import ppafm.ocl.relax as oclr +import torch +from ppafm.ml.Generator import InverseAFMtrainer +from ppafm.ocl.AFMulator import AFMulator + +import mlspm.preprocessing as pp +from mlspm.datasets import download_dataset +from mlspm.models import EDAFMNet + +def apply_preprocessing(batch): + Xs, Ys, _ = batch + Xs = [x[...,2:8] for x in Xs] + pp.add_norm(Xs) + pp.add_noise(Xs, c=0.08, randomize_amplitude=False) + return Xs, Ys + +if __name__ == "__main__": + + # Indepenent tips model + model_type = "base" # Type of pretrained weights to use + save_file = Path("mse_independent.csv") # File to save MSE values into + + # # Matched tips model + # model_type = "matched-tips" # Type of pretrained weights to use + # save_file = Path("mse_matched.csv") # File to save MSE values into + + device = "cuda" # Device to run inference on + molecules_dir = Path("../../molecules") # Path to molecule database + test_heights = np.linspace(4.9, 5.7, 21) # Test heights to run + n_samples = 3000 # Number of samples to run + + if save_file.exists(): + raise RuntimeError("Save file already exists") + + afmulator_args = { + "pixPerAngstrome" : 20, + "scan_dim" : (128, 128, 19), + "scan_window" : ((2.0, 2.0, 6.0), (18.0, 18.0, 7.9)), + "df_steps" : 10, + "tipR0" : [0.0, 0.0, 4.0] + } + + generator_kwargs = { + "batch_size" : 30, + "distAbove" : 5.3, + "iZPPs" : [8, 54], + "Qs" : [[ -10, 20, -10, 0 ], [ 30, -60, 30, 0 ]], + "QZs" : [[ 0.1, 0, -0.1, 0 ], [ 0.1, 0, -0.1, 0 ]] + } + + # Set random seed for reproducibility + random.seed(0) + + # Initialize OpenCL environment on GPU + env = oclu.OCLEnvironment( i_platform = 0 ) + FFcl.init(env) + oclr.init(env) + + # Define AFMulator + afmulator = AFMulator(**afmulator_args) + afmulator.npbc = (0,0,0) + + # Define AuxMaps + aux_maps = [ + aux.ESMapConstant( + scan_dim = afmulator.scan_dim[:2], + scan_window = [afmulator.scan_window[0][:2], afmulator.scan_window[1][:2]], + height = 4.0, + vdW_cutoff = -2.0, + Rpp = 1.0 + ) + ] + + # Download molecules if not already there + download_dataset("ED-AFM-molecules", molecules_dir) + + # Define generator + xyz_paths = (molecules_dir / "test").glob("*.xyz") + trainer = InverseAFMtrainer(afmulator, aux_maps, xyz_paths, **generator_kwargs) + + # Pick samples + random.shuffle(trainer.molecules) + trainer.molecules = trainer.molecules[:n_samples] + + # Make model + model = EDAFMNet(device=device, pretrained_weights=model_type) + + # Initialize save file + with open(save_file, "w") as f: + pass + + # Calculate MSE at every height for every batch + start_time = time.time() + total_len = len(test_heights)*len(trainer) + for ih, height in enumerate(test_heights): + + print(f"Height = {height:.2f}") + trainer.distAboveActive = height + + mses = [] + for ib, batch in enumerate(trainer): + + X, ref = apply_preprocessing(batch) + X = [torch.from_numpy(x).unsqueeze(1).to(device) for x in X] + ref = [torch.from_numpy(r).to(device) for r in ref] + + with torch.no_grad(): + pred, _ = model(X) + pred = pred[0] + + diff = pred - ref[0] + for d in diff: + mses.append((d**2).mean().cpu().numpy()) + + eta = (time.time() - start_time) * (total_len / (ih*len(trainer)+ib+1) - 1) + print(f"Batch {ib+1}/{len(trainer)} - ETA: {eta:.1f}s") + + with open(save_file, "a") as f: + f.write(f"{height:.2f},") + f.write(",".join([str(v) for v in mses])) + f.write("\n") diff --git a/papers/ed-afm/figures/stats/stats_distance_Xe.py b/papers/ed-afm/figures/stats/stats_distance_Xe.py new file mode 100644 index 0000000..cf1381b --- /dev/null +++ b/papers/ed-afm/figures/stats/stats_distance_Xe.py @@ -0,0 +1,141 @@ + +import random +import time +from pathlib import Path + +import numpy as np +import ppafm.ml.AuxMap as aux +import ppafm.ocl.field as FFcl +import ppafm.ocl.oclUtils as oclu +import ppafm.ocl.relax as oclr +import torch +from ppafm.ml.Generator import InverseAFMtrainer +from ppafm.ocl.AFMulator import AFMulator + +import mlspm.preprocessing as pp +from mlspm.datasets import download_dataset +from mlspm.models import EDAFMNet + + +class Trainer(InverseAFMtrainer): + + # Override this method to set the Xe tip at a different height + def handle_distance(self): + if self.afmulator.iZPP == 54: + self.distAboveActive = self.distAboveXe + super().handle_distance() + if self.afmulator.iZPP == 54: + self.distAboveActive = self.distAbove + +def apply_preprocessing(batch): + Xs, Ys, _ = batch + Xs = [x[...,2:8] for x in Xs] + pp.add_norm(Xs) + pp.add_noise(Xs, c=0.08, randomize_amplitude=False) + return Xs, Ys + +if __name__ == "__main__": + + # # Independent tips model + # model_type = "base" # Type of pretrained weights to use + # save_file = Path("mse_independent_Xe.csv") # File to save MSE values into + + # Matched tips model + model_type = "matched-tips" # Type of pretrained weights to use + save_file = Path("./mse_matched_Xe.csv") # File to save MSE values into + + device = "cuda" # Device to run inference on + molecules_dir = Path("../../molecules") # Path to molecule database + test_heights = np.linspace(4.9, 5.7, 21) # Test heights to run + n_samples = 3000 # Number of samples to run + + if save_file.exists(): + raise RuntimeError("Save file already exists") + + afmulator_args = { + "pixPerAngstrome" : 20, + "scan_dim" : (128, 128, 19), + "scan_window" : ((2.0, 2.0, 6.0), (18.0, 18.0, 7.9)), + "df_steps" : 10, + "tipR0" : [0.0, 0.0, 4.0] + } + + generator_kwargs = { + "batch_size" : 30, + "distAbove" : 5.3, + "iZPPs" : [8, 54], + "Qs" : [[ -10, 20, -10, 0 ], [ 30, -60, 30, 0 ]], + "QZs" : [[ 0.1, 0, -0.1, 0 ], [ 0.1, 0, -0.1, 0 ]] + } + + # Set random seed for reproducibility + random.seed(0) + + # Initialize OpenCL environment on GPU + env = oclu.OCLEnvironment( i_platform = 0 ) + FFcl.init(env) + oclr.init(env) + + # Define AFMulator + afmulator = AFMulator(**afmulator_args) + afmulator.npbc = (0,0,0) + + # Define AuxMaps + aux_maps = [ + aux.ESMapConstant( + scan_dim = afmulator.scan_dim[:2], + scan_window = [afmulator.scan_window[0][:2], afmulator.scan_window[1][:2]], + height = 4.0, + vdW_cutoff = -2.0, + Rpp = 1.0 + ) + ] + + # Download molecules if not already there + download_dataset("ED-AFM-molecules", molecules_dir) + + # Define generator + xyz_paths = (molecules_dir / "test").glob("*.xyz") + trainer = Trainer(afmulator, aux_maps, xyz_paths, **generator_kwargs) + + # Pick samples + random.shuffle(trainer.molecules) + trainer.molecules = trainer.molecules[:n_samples] + + # Make model + model = EDAFMNet(device=device, pretrained_weights=model_type) + + # Initialize save file + with open(save_file, "w") as f: + pass + + # Calculate MSE at every height for every batch + start_time = time.time() + total_len = len(test_heights)*len(trainer) + for ih, height in enumerate(test_heights): + + print(f"Height = {height:.2f}") + trainer.distAboveXe = height + + mses = [] + for ib, batch in enumerate(trainer): + + X, ref = apply_preprocessing(batch) + X = [torch.from_numpy(x).unsqueeze(1).to(device) for x in X] + ref = [torch.from_numpy(r).to(device) for r in ref] + + with torch.no_grad(): + pred, _ = model(X) + pred = pred[0] + + diff = pred - ref[0] + for d in diff: + mses.append((d**2).mean().cpu().numpy()) + + eta = (time.time() - start_time) * (total_len / (ih*len(trainer)+ib+1) - 1) + print(f"Batch {ib+1}/{len(trainer)} - ETA: {eta:.1f}s") + + with open(save_file, "a") as f: + f.write(f"{height:.2f},") + f.write(",".join([str(v) for v in mses])) + f.write("\n") diff --git a/papers/ed-afm/figures/stats/stats_noise.py b/papers/ed-afm/figures/stats/stats_noise.py new file mode 100644 index 0000000..fe93122 --- /dev/null +++ b/papers/ed-afm/figures/stats/stats_noise.py @@ -0,0 +1,132 @@ + +from pathlib import Path +import random +import time + +import numpy as np +import ppafm.ml.AuxMap as aux +import ppafm.ocl.field as FFcl +import ppafm.ocl.oclUtils as oclu +import ppafm.ocl.relax as oclr +import torch +from ppafm.ml.Generator import InverseAFMtrainer +from ppafm.ocl.AFMulator import AFMulator + +import mlspm.preprocessing as pp +from mlspm.datasets import download_dataset +from mlspm.models import EDAFMNet + +if __name__ == "__main__": + # # Normal noise model + # model_type = 'base' # Type of pretrained weights to use + # save_file = Path('mse_normal.csv') # File to save MSE values into + + # # Uniform noise model + # model_type = 'uniform-noise' # Type of pretrained weights to use + # save_file = Path('mse_uniform.csv') # File to save MSE values into + + # Constant noise model + model_type = 'constant-noise' # Type of pretrained weights to use + save_file = Path('mse_constant.csv') # File to save MSE values into + + device = 'cuda' # Device to run inference on + molecules_dir = Path('../../molecules') # Path to molecule database + test_amplitudes = np.linspace(0, 0.2, 21) # Test amplitudes to run + n_samples = 3000 # Number of samples to run + + if save_file.exists(): + raise RuntimeError("Save file already exists") + + afmulator_args = { + 'pixPerAngstrome' : 20, + 'lvec' : np.array([ + [ 0.0, 0.0, 0.0], + [20.0, 0.0, 0.0], + [ 0.0, 20.0, 0.0], + [ 0.0, 0.0, 5.0] + ]), + 'scan_dim' : (128, 128, 20), + 'scan_window' : ((2.0, 2.0, 6.0), (18.0, 18.0, 8.0)), + 'amplitude' : 1.0, + 'df_steps' : 10, + 'initFF' : True + } + + generator_kwargs = { + 'batch_size' : 30, + 'distAbove' : 5.3, + 'iZPPs' : [8, 54], + 'Qs' : [[ -10, 20, -10, 0 ], [ 30, -60, 30, 0 ]], + 'QZs' : [[ 0.1, 0, -0.1, 0 ], [ 0.1, 0, -0.1, 0 ]] + } + + # Set random seed for reproducibility + random.seed(0) + + # Initialize OpenCL environment on GPU + env = oclu.OCLEnvironment( i_platform = 0 ) + FFcl.init(env) + oclr.init(env) + + # Define AFMulator + afmulator = AFMulator(**afmulator_args) + afmulator.npbc = (0,0,0) + + # Define AuxMaps + aux_maps = [ + aux.ESMapConstant( + scan_dim = afmulator.scan_dim[:2], + scan_window = [afmulator.scan_window[0][:2], afmulator.scan_window[1][:2]], + height = 4.0, + vdW_cutoff = -2.0, + Rpp = 1.0 + ) + ] + + # Download molecules if not already there + download_dataset("ED-AFM-molecules", molecules_dir) + + # Define generator + xyz_paths = (molecules_dir / "test").glob("*.xyz") + trainer = InverseAFMtrainer(afmulator, aux_maps, xyz_paths, **generator_kwargs) + + # Pick samples + random.shuffle(trainer.molecules) + trainer.molecules = trainer.molecules[:n_samples] + + # Make model + model = EDAFMNet(device=device, pretrained_weights=model_type) + + # Calculate MSE at every height for every batch + start_time = time.time() + total_len = len(test_amplitudes)*len(trainer) + mses = [[] for _ in range(len(test_amplitudes))] + for ib, batch in enumerate(trainer): + + X, ref, _ = batch + X = [x[...,2:8] for x in X] + pp.add_norm(X) + ref = [torch.from_numpy(r).to(device) for r in ref] + + for ia, noise_amp in enumerate(test_amplitudes): + + X_ = [x.copy() for x in X] + pp.add_noise(X_, c=noise_amp, randomize_amplitude=False) + X_ = [torch.from_numpy(x).unsqueeze(1).to(device) for x in X_] + + with torch.no_grad(): + pred, _ = model(X) + pred = pred[0] + + diff = pred - ref[0] + for d in diff: + mses[ia].append((d**2).mean().cpu().numpy()) + + eta = (time.time() - start_time) * (len(trainer) / (ib+1) - 1) + print(f'Batch {ib+1}/{len(trainer)} - ETA: {eta:.1f}s') + + with open(save_file, 'w') as f: + for noise_amp, mse_amp in zip(test_amplitudes, mses): + f.write(f'{noise_amp:.2f},') + f.write(','.join([str(v) for v in mse_amp])) + f.write('\n') diff --git a/papers/ed-afm/figures/stats/stats_spring_constant_lat.py b/papers/ed-afm/figures/stats/stats_spring_constant_lat.py new file mode 100644 index 0000000..fa3e86e --- /dev/null +++ b/papers/ed-afm/figures/stats/stats_spring_constant_lat.py @@ -0,0 +1,135 @@ + +import random +import time +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import ppafm.ml.AuxMap as aux +import ppafm.ocl.field as FFcl +import ppafm.ocl.oclUtils as oclu +import ppafm.ocl.relax as oclr +import torch +from ppafm.ml.Generator import InverseAFMtrainer +from ppafm.ocl.AFMulator import AFMulator + +import mlspm.preprocessing as pp +from mlspm.datasets import download_dataset +from mlspm.models import EDAFMNet +from mlspm.visualization import plot_input + + +def apply_preprocessing(batch): + Xs, Ys, _ = batch + Xs = [x[...,2:8] for x in Xs] + pp.add_norm(Xs) + pp.add_noise(Xs, c=0.08, randomize_amplitude=False) + return Xs, Ys + +if __name__ == "__main__": + + model_type = "base" # Type of pretrained weights to use + save_file = Path("mse_spring_constants_lat.csv") # File to save MSE values into + device = "cuda" # Device to run inference on + molecules_dir = Path("../../molecules") # Path to molecule database + sim_save_dir = Path("./test_sim_lat") # Directory where to save test simulations for inspection + test_constants = np.linspace(0.2, 0.3, 21) # Test lateral spring constants to run + n_samples = 3000 # Number of samples to run + + if save_file.exists(): + raise RuntimeError("Save file already exists") + + afmulator_args = { + "pixPerAngstrome" : 20, + "scan_dim" : (128, 128, 19), + "scan_window" : ((2.0, 2.0, 6.0), (18.0, 18.0, 7.9)), + "df_steps" : 10, + "tipR0" : [0.0, 0.0, 4.0] + } + + generator_kwargs = { + "batch_size" : 30, + "distAbove" : 5.3, + "iZPPs" : [8, 54], + "Qs" : [[ -10, 20, -10, 0 ], [ 30, -60, 30, 0 ]], + "QZs" : [[ 0.1, 0, -0.1, 0 ], [ 0.1, 0, -0.1, 0 ]] + } + # Set random seed for reproducibility + random.seed(0) + + # Initialize OpenCL environment on GPU + env = oclu.OCLEnvironment( i_platform = 0 ) + FFcl.init(env) + oclr.init(env) + + # Define AFMulator + afmulator = AFMulator(**afmulator_args) + afmulator.npbc = (0,0,0) + + # Define AuxMaps + aux_maps = [ + aux.ESMapConstant( + scan_dim = afmulator.scan_dim[:2], + scan_window = [afmulator.scan_window[0][:2], afmulator.scan_window[1][:2]], + height = 4.0, + vdW_cutoff = -2.0, + Rpp = 1.0 + ) + ] + + # Download molecules if not already there + download_dataset("ED-AFM-molecules", molecules_dir) + + # Define generator + xyz_paths = (molecules_dir / "test").glob("*.xyz") + trainer = InverseAFMtrainer(afmulator, aux_maps, xyz_paths, **generator_kwargs) + + # Pick samples + random.shuffle(trainer.molecules) + trainer.molecules = trainer.molecules[:n_samples] + + # Make model + model = EDAFMNet(device=device, pretrained_weights=model_type) + + # Initialize save file + with open(save_file, "w") as f: + pass + sim_save_dir.mkdir(exist_ok=True, parents=True) + + # Calculate MSE at every height for every batch + start_time = time.time() + total_len = len(test_constants)*len(trainer) + for ih, k_lat in enumerate(test_constants): + + print(f"Lateral spring constant = {k_lat:.3f}") + afmulator.scanner.stiffness = np.array([k_lat, k_lat, 0.0, 30.0], dtype=np.float32) / -16.0217662 + + mses = [] + for ib, batch in enumerate(trainer): + + if ib == 0: + print("Saving example simulations...") + for s in range(10): + fig = plot_input(batch[0][0][s]) + plt.savefig(sim_save_dir / f"sim{s}_klat_{k_lat:.3f}.png") + plt.close() + + X, ref = apply_preprocessing(batch) + X = [torch.from_numpy(x).unsqueeze(1).to(device) for x in X] + ref = [torch.from_numpy(r).to(device) for r in ref] + + with torch.no_grad(): + pred, _ = model(X) + pred = pred[0] + + diff = pred - ref[0] + for d in diff: + mses.append((d**2).mean().cpu().numpy()) + + eta = (time.time() - start_time) * (total_len / (ih*len(trainer)+ib+1) - 1) + print(f"Batch {ib+1}/{len(trainer)} - ETA: {eta:.1f}s") + + with open(save_file, "a") as f: + f.write(f"{k_lat:.3f},") + f.write(",".join([str(v) for v in mses])) + f.write("\n") diff --git a/papers/ed-afm/figures/stats/stats_spring_constant_rad.py b/papers/ed-afm/figures/stats/stats_spring_constant_rad.py new file mode 100644 index 0000000..67071c6 --- /dev/null +++ b/papers/ed-afm/figures/stats/stats_spring_constant_rad.py @@ -0,0 +1,135 @@ + +import random +import time +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import ppafm.ml.AuxMap as aux +import ppafm.ocl.field as FFcl +import ppafm.ocl.oclUtils as oclu +import ppafm.ocl.relax as oclr +import torch +from ppafm.ml.Generator import InverseAFMtrainer +from ppafm.ocl.AFMulator import AFMulator + +import mlspm.preprocessing as pp +from mlspm.datasets import download_dataset +from mlspm.models import EDAFMNet +from mlspm.visualization import plot_input + + +def apply_preprocessing(batch): + Xs, Ys, _ = batch + Xs = [x[...,2:8] for x in Xs] + pp.add_norm(Xs) + pp.add_noise(Xs, c=0.08, randomize_amplitude=False) + return Xs, Ys + +if __name__ == "__main__": + + model_type = "base" # Type of pretrained weights to use + save_file = Path("mse_spring_constants_rad.csv") # File to save MSE values into + device = "cuda" # Device to run inference on + molecules_dir = Path("../../molecules") # Path to molecule database + sim_save_dir = Path("./test_sim_rad") # Directory where to save test simulations for inspection + test_constants = np.linspace(20, 40, 21) # Test radial spring constants to run + n_samples = 3000 # Number of samples to run + + if save_file.exists(): + raise RuntimeError("Save file already exists") + + afmulator_args = { + "pixPerAngstrome" : 20, + "scan_dim" : (128, 128, 19), + "scan_window" : ((2.0, 2.0, 6.0), (18.0, 18.0, 7.9)), + "df_steps" : 10, + "tipR0" : [0.0, 0.0, 4.0] + } + + generator_kwargs = { + "batch_size" : 30, + "distAbove" : 5.3, + "iZPPs" : [8, 54], + "Qs" : [[ -10, 20, -10, 0 ], [ 30, -60, 30, 0 ]], + "QZs" : [[ 0.1, 0, -0.1, 0 ], [ 0.1, 0, -0.1, 0 ]] + } + # Set random seed for reproducibility + random.seed(0) + + # Initialize OpenCL environment on GPU + env = oclu.OCLEnvironment( i_platform = 0 ) + FFcl.init(env) + oclr.init(env) + + # Define AFMulator + afmulator = AFMulator(**afmulator_args) + afmulator.npbc = (0,0,0) + + # Define AuxMaps + aux_maps = [ + aux.ESMapConstant( + scan_dim = afmulator.scan_dim[:2], + scan_window = [afmulator.scan_window[0][:2], afmulator.scan_window[1][:2]], + height = 4.0, + vdW_cutoff = -2.0, + Rpp = 1.0 + ) + ] + + # Download molecules if not already there + download_dataset("ED-AFM-molecules", molecules_dir) + + # Define generator + xyz_paths = (molecules_dir / "test").glob("*.xyz") + trainer = InverseAFMtrainer(afmulator, aux_maps, xyz_paths, **generator_kwargs) + + # Pick samples + random.shuffle(trainer.molecules) + trainer.molecules = trainer.molecules[:n_samples] + + # Make model + model = EDAFMNet(device=device, pretrained_weights=model_type) + + # Initialize save file + with open(save_file, "w") as f: + pass + sim_save_dir.mkdir(exist_ok=True, parents=True) + + # Calculate MSE at every height for every batch + start_time = time.time() + total_len = len(test_constants)*len(trainer) + for ih, k_rad in enumerate(test_constants): + + print(f"Radial spring constant = {k_rad:.1f}") + afmulator.scanner.stiffness = np.array([0.25, 0.25, 0.0, k_rad], dtype=np.float32) / -16.0217662 + + mses = [] + for ib, batch in enumerate(trainer): + + if ib == 0: + print("Saving example simulations...") + for s in range(10): + fig = plot_input(batch[0][0][s]) + plt.savefig(sim_save_dir / f"sim{s}_krad_{k_rad:.3f}.png") + plt.close() + + X, ref = apply_preprocessing(batch) + X = [torch.from_numpy(x).unsqueeze(1).to(device) for x in X] + ref = [torch.from_numpy(r).to(device) for r in ref] + + with torch.no_grad(): + pred, _ = model(X) + pred = pred[0] + + diff = pred - ref[0] + for d in diff: + mses.append((d**2).mean().cpu().numpy()) + + eta = (time.time() - start_time) * (total_len / (ih*len(trainer)+ib+1) - 1) + print(f"Batch {ib+1}/{len(trainer)} - ETA: {eta:.1f}s") + + with open(save_file, "a") as f: + f.write(f"{k_rad:.1f},") + f.write(",".join([str(v) for v in mses])) + f.write("\n")