diff --git a/.github/workflows/flare.yml b/.github/workflows/flare.yml index 3c2d0aa93..f4dd43aa9 100644 --- a/.github/workflows/flare.yml +++ b/.github/workflows/flare.yml @@ -116,7 +116,7 @@ jobs: run: | sudo apt-get update sudo apt-get install python3-sphinx python3-sphinx-rtd-theme python3-breathe python3-nbsphinx - + - name: Run Doxygen uses: mattnotmitt/doxygen-action@v1.1.0 with: @@ -127,11 +127,12 @@ jobs: - name: Run Sphinx run: | + export PYTHONPATH=$PYTHONPATH:$PWD/lammps/python cd docs pwd ls make html - + - name: Publish the docs uses: peaceiris/actions-gh-pages@v3 with: diff --git a/CMakeLists.txt b/CMakeLists.txt index 61fb33db6..e526e716e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,34 +84,12 @@ include_directories(${SOURCE_DIR}) # pybind11 ############################################################################### -ExternalProject_Add( - pybind11_project - SOURCE_DIR "${CMAKE_BINARY_DIR}/External/pybind11" - URL "https://github.com/pybind/pybind11/archive/v2.3.0.tar.gz" - URL_HASH MD5=e2120c5b0e3c20a93f2dfcdc55026ba8 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" +set(PYBIND11_FINDPYTHON ON) +FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11 ) -if (NOT FLARE_PYTHON_VERSION) - set(Python_ADDITIONAL_VERSIONS 3.8 3.7 3.6 3.5) -endif() -find_package(PythonLibsNew ${FLARE_PYTHON_VERSION} REQUIRED) - -add_library(pybind11 INTERFACE) -ExternalProject_Get_Property(pybind11_project SOURCE_DIR) -target_include_directories(pybind11 SYSTEM INTERFACE ${SOURCE_DIR}/include) -target_include_directories(pybind11 SYSTEM INTERFACE ${PYTHON_INCLUDE_DIRS}) - -if(APPLE) - TARGET_LINK_LIBRARIES(pybind11 INTERFACE "-undefined dynamic_lookup") - message(STATUS "Building in conda environment on MAC") -else() - target_link_libraries(pybind11 INTERFACE ${PYTHON_LIBRARIES}) -endif() - -# Greatly reduces the code bloat -target_compile_options(pybind11 INTERFACE "-fvisibility=hidden") +FetchContent_MakeAvailable(pybind11) ############################################################################### # Specify source files. @@ -204,11 +182,8 @@ else() endif() # Create pybind module. -add_library(flare_module SHARED ${PYBIND_SOURCES}) -target_link_libraries(flare_module PUBLIC flare pybind11) -set_target_properties(flare_module PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" - SUFFIX "${PYTHON_MODULE_EXTENSION}") -add_dependencies(flare_module pybind11_project) +pybind11_add_module(flare_module ${PYBIND_SOURCES}) +target_link_libraries(flare_module PUBLIC flare) set_target_properties(flare_module PROPERTIES OUTPUT_NAME "_C_flare") # Add test directory. diff --git a/README.md b/README.md index 7d66a6658..aed58f1a0 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@

-FLARE is an open-source Python package for creating fast and accurate interatomic potentials. +FLARE is an open-source Python package for creating fast and accurate interatomic potentials. ## Major Features @@ -20,7 +20,7 @@ Note: We implement Sparse GP, all the kernels and descriptors in C++ with Python interface. -We implement Full GP, Mapped GP, RBCM, Squared Exponential kernel and 2+3-body descriptors in Python. +We implement Full GP, Mapped GP, RBCM, Squared Exponential kernel and 2+3-body descriptors in Python. Please do NOT mix them. @@ -35,20 +35,16 @@ Documentation of the code can be accessed here: https://mir-group.github.io/flar [FLARE (ACE descriptors + sparse GP)](https://colab.research.google.com/drive/1rZ-p3kN5CJbPJgD8HuQHSc7ecmwZYse6). The tutorial shows how to run flare with ACE and SGP on energy and force data, demoing "offline" training on the MD17 dataset and "online" on-the-fly training of a simple aluminum force field. All the trainings use yaml files for configuration. +[FLARE (LAMMPS active learning)](https://bit.ly/flarelmpotf) +This tutorial demonstrates new functionality for running active learning all within LAMMPS, with LAMMPS running the dynamics to allow arbitrarily complex molecular dynamics workflows while maintaining a simple interface. This also demonstrates how to use the C++ API directly from Python through `pybind11`. Finally, there's a simple demonstration of phonon calculations with FLARE using `phonopy`. + [FLARE (ACE descriptors + sparse GP) with LAMMPS](https://colab.research.google.com/drive/1qgGlfu1BlXQgSrnolS4c4AYeZ-2TaX5Y). The tutorial shows how to compile LAMMPS with FLARE pair style and uncertainty compute code, and use LAMMPS for Bayesian active learning and uncertainty-aware molecular dynamics. -[FLARE (ACE descriptors + sparse GP) Python API](https://colab.research.google.com/drive/18_pTcWM19AUiksaRyCgg9BCpVyw744xv). -The tutorial shows how to do the offline and online trainings with python scripts. -A video walkthrough of the tutorial, including detailed discussion of expected outputs, is available [here](https://youtu.be/-FH_VqRQrso). - -[FLARE (2+3-body + GP)](https://colab.research.google.com/drive/1Q2NCCQWYQdTW9-e35v1W-mBlWTiQ4zfT). -The tutorial shows how to use flare 2+3 body descriptors and squared exponential kernel to train a Gaussian Process force field on-the-fly. - [Compute thermal conductivity from FLARE and Boltzmann transport equations](https://phoebe.readthedocs.io/en/develop/tutorials/mlPhononTransport.html). The tutorial shows how to use FLARE (LAMMPS) potential to compute lattice thermal conductivity from Boltzmann transport equation method, with [Phono3py](https://phonopy.github.io/phono3py/) for force constants calculations and [Phoebe](https://mir-group.github.io/phoebe/) for thermal conductivities. -[Using your own customized descriptors with FLARE](https://colab.research.google.com/drive/1VzbIPmx1z-uygKstOYTj2Nqr53AMC5NL?usp=sharing). +[Using your own customized descriptors with FLARE](https://colab.research.google.com/drive/1VzbIPmx1z-uygKstOYTj2Nqr53AMC5NL?usp=sharing). The tutorial shows how to attach your own descriptors with FLARE sparse GP model and do training and testing. All the tutorials take a few minutes to run on a normal desktop computer or laptop (excluding installation time). @@ -82,7 +78,7 @@ flare++ is tested on a Linux operating system (Ubuntu 20.04.3), but should also ### Hardware requirements There are no non-standard hardware requirements to download the software and train simple models—the introductory tutorial can be run on a single cpu. To train large models (10k+ sparse environments), we recommend using a compute node with at least 100GB of RAM. - + ## Tests We recommend running unit tests to confirm that FLARE is running properly on your machine. We have implemented our tests using the pytest suite. You can call `pytest` from the command line in the tests directory. @@ -94,11 +90,10 @@ pytest ``` ## References - If you use FLARE++ including B2 descriptors, NormalizedDotProduct kernel and Sparse GP, please cite the following paper: > [1] Vandermause, J., Xie, Y., Lim, J.S., Owen, C.J. and Kozinsky, B., 2021. *Active learning of reactive Bayesian force fields: Application to heterogeneous hydrogen-platinum catalysis dynamics.* Nature Communications 13.1 (2022): 5183. https://www.nature.com/articles/s41467-022-32294-0 - + If you use FLARE active learning workflow, full Gaussian process or 2-body/3-body kernel in your research, please cite the following paper: > [2] Vandermause, J., Torrisi, S. B., Batzner, S., Xie, Y., Sun, L., Kolpak, A. M. & Kozinsky, B. *On-the-fly active learning of interpretable Bayesian force fields for atomistic rare events.* npj Comput Mater 6, 20 (2020). https://doi.org/10.1038/s41524-020-0283-z diff --git a/docs/source/conf.py b/docs/source/conf.py index eb7f2ed98..3382bbc9a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -55,6 +55,8 @@ breathe_projects = {"flare_pp": "xml"} breathe_default_project = "flare_pp" breathe_default_members = ("members", "undoc-members") +napoleon_google_docstring = True +napoleon_numpy_docstring = True napoleon_use_param = False # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/flare/learners/learners.rst b/docs/source/flare/learners/learners.rst index 2691ee2ab..4b9d35a89 100644 --- a/docs/source/flare/learners/learners.rst +++ b/docs/source/flare/learners/learners.rst @@ -5,5 +5,6 @@ Bayesian Active Learning :maxdepth: 2 otf + lmpotf gp_from_aimd utils diff --git a/docs/source/flare/learners/lmpotf.rst b/docs/source/flare/learners/lmpotf.rst new file mode 100644 index 000000000..98ec224b5 --- /dev/null +++ b/docs/source/flare/learners/lmpotf.rst @@ -0,0 +1,5 @@ +On-the-Fly Training in LAMMPS +============================= + +.. automodule:: flare.learners.lmpotf + :members: diff --git a/flare/_version.py b/flare/_version.py index 4ded0a39f..3e8d9f946 100644 --- a/flare/_version.py +++ b/flare/_version.py @@ -1 +1 @@ -__version__ = "1.3.3" \ No newline at end of file +__version__ = "1.4.0" diff --git a/flare/learners/lmpotf.py b/flare/learners/lmpotf.py new file mode 100644 index 000000000..a6f7817a9 --- /dev/null +++ b/flare/learners/lmpotf.py @@ -0,0 +1,307 @@ +from lammps import lammps +import ase, os, numpy as np, sys +from typing import Union, Optional, Callable, Any, List +from flare.bffs.sgp._C_flare import Structure, SparseGP +from flare.bffs.sgp.sparse_gp import optimize_hyperparameters +from flare.bffs.sgp.calculator import sort_variances +import logging +import time + + +def transform_stress(stress: List[List[float]]) -> List[List[float]]: + return -np.array( + [ + stress[(0, 0)], + stress[(0, 1)], + stress[(0, 2)], + stress[(1, 1)], + stress[(1, 2)], + stress[(2, 2)], + ] + ) + + +class LMPOTF: + """ + Module for performing On-The-Fly (OTF) training, also known as active learning, + entirely within LAMMPS. + + Parameters + ---------- + sparse_gp + The :cpp:class:`SparseGP` object to train. + descriptors + A list of descriptor objects, or a single descriptor (most common), e.g. :cpp:class:`B2`. + rcut + The interaction cut-off radius. + type2number + The atomic numbers of all LAMMPS types. + dftcalc + An ASE calculator, e.g. Espresso. + energy_correction + Per-type corrections to the DFT potential energy. + dft_call_threshold + Uncertainty threshold for whether to call DFT. + dft_add_threshold + Uncertainty threshold for whether to add an atom to the training set. + dft_xyz_fname + Name of the file in which to save the DFT results. + Should contain '*', which will be replaced with the current step. + std_xyz_fname + Name of the file in which to save ASE Atoms with per-atom uncertainties as charges. + Should contain '*', which will be replaced with the current step. + model_fname + Name of the saved model, must correspond to `pair_coeff`. + hyperparameter_optimization + Boolean function that determines whether to run hyperparameter optimization, as a function of this LMPOTF + object, the LAMMPS instance and the current step. + opt_bounds + Bounds for the hyperparameter optimization. + opt_method + Algorithm for the hyperparameter optimization. + opt_iterations + Max number of iterations for the hyperparameter optimization. + post_dft_callback + A function that is called after every DFT call. Receives this LMPOTF object and the current step. + wandb + The wandb object, which should already be initialized. + log_fname + An output file to which logging info is written. + """ + + def __init__( + self, + sparse_gp: SparseGP, + descriptors: List, + rcut: float, + type2number: Union[(int, List[int])], + dftcalc: object, + energy_correction: List[float] = 0.0, + force_training=True, + energy_training=True, + stress_training=True, + dft_call_threshold: float = 0.005, + dft_add_threshold: float = 0.0025, + dft_xyz_fname: Optional[str] = None, + std_xyz_fname: Optional[str] = None, + model_fname: str = "otf.flare", + hyperparameter_optimization: Callable[ + (["LMPOTF", object, int], bool) + ] = lambda lmpotf, lmp, step: False, + opt_bounds: Optional[List[float]] = None, + opt_method: Optional[str] = "L-BFGS-B", + opt_iterations: Optional[int] = 50, + post_dft_callback: Callable[(["LMPOTF", int], None)] = lambda lmpotf, step: 0, + wandb: object = None, + log_fname: str = "otf.log", + ) -> object: + """ + + """ + self.sparse_gp = sparse_gp + self.descriptors = np.atleast_1d(descriptors) + self.rcut = rcut + self.type2number = np.atleast_1d(type2number) + self.ntypes = len(self.type2number) + self.energy_correction = np.atleast_1d(energy_correction) + assert len(self.energy_correction) == self.ntypes + self.dftcalc = dftcalc + self.dft_call_threshold = dft_call_threshold + self.dft_add_threshold = dft_add_threshold + self.post_dft_callback = post_dft_callback + self.force_training = force_training + self.energy_training = energy_training + self.stress_training = stress_training + self.dft_calls = 0 + self.last_dft_call = -100 + self.dft_xyz_fname = dft_xyz_fname + self.std_xyz_fname = std_xyz_fname + self.model_fname = model_fname + self.hyperparameter_optimization = hyperparameter_optimization + self.opt_bounds = opt_bounds + self.opt_method = opt_method + self.opt_iterations = opt_iterations + self.wandb = wandb + logging.basicConfig( + filename=log_fname, level=(logging.DEBUG), format="%(asctime)s: %(message)s" + ) + self.logger = logging.getLogger("lmpotf") + + self.time_dft = 0.0 + self.time_hyp_opt = 0.0 + self.time_training = 0.0 + self.time_predict_uncertainties = 0.0 + self.time_prediction = 0.0 + self.time_lammps = 0.0 + self.t0 = time.time() + + def save(self, fname): + self.sparse_gp.write_mapping_coefficients(fname, "LMPOTF", 0) + + def step(self, lmpptr, evflag=0): + """ + Function called by LAMMPS at every step. + This is the function that must be called by `fix python/invoke`. + + Parameters + ---------- + lmpptr : ptr + Pointer to running LAMMPS instance. + evflag : int + evflag given by LAMMPS, ignored. + """ + try: + self.time_lammps += time.time() - self.t0 + lmp = lammps(ptr=lmpptr) + natoms = lmp.get_natoms() + x = lmp.gather_atoms("x", 1, 3) + x = np.ctypeslib.as_array(x, shape=(natoms, 3)).reshape(natoms, 3) + step = int(lmp.get_thermo("step")) + boxlo, boxhi, xy, yz, xz, _, _ = lmp.extract_box() + cell = np.diag(np.array(boxhi) - np.array(boxlo)) + cell[(1, 0)] = xy + cell[(2, 0)] = xz + cell[(2, 1)] = yz + types = lmp.gather_atoms("type", 0, 1) + types = np.ctypeslib.as_array(types, shape=natoms) + structure = Structure(cell, types - 1, x, self.rcut, self.descriptors) + if self.dft_calls == 0: + self.logger.info("Initial step, calling DFT") + pe, F = self.run_dft(cell, x, types, step, structure) + t0 = time.time() + self.sparse_gp.add_training_structure(structure) + self.sparse_gp.add_random_environments(structure, [int(natoms/4)]) + self.sparse_gp.update_matrices_QR() + self.time_training += time.time() - t0 + self.save(self.model_fname) + else: + self.logger.info(f"Step {step}") + sigma = self.sparse_gp.hyperparameters[0] + t0 = time.time() + variances = sort_variances(structure, self.sparse_gp.compute_cluster_uncertainties(structure)[0]) + self.time_predict_uncertainties += time.time() - t0 + stds = np.sqrt(np.abs(variances)) / sigma + if self.std_xyz_fname is not None: + frame = ase.Atoms( + positions=x, + numbers=(self.type2number[types - 1]), + cell=cell, + pbc=True, + ) + frame.set_array("charges", stds) + ase.io.write(self.std_xyz_fname.replace("*", str(step)), frame, format="extxyz") + wandb_log = {"max_uncertainty": np.amax(stds)} + self.logger.info(f"Max uncertainty: {np.amax(stds)}") + call_dft = np.any(stds > self.dft_call_threshold) + if call_dft: + t0 = time.time() + self.sparse_gp.predict_DTC(structure) + self.time_prediction += time.time() - t0 + predE = structure.mean_efs[0] + predF = structure.mean_efs[1:-6].reshape((-1, 3)) + predS = structure.mean_efs[-6:] + Fstd = np.sqrt(np.abs(structure.variance_efs[1:-6])).reshape( + (-1, 3) + ) + Estd = np.sqrt(np.abs(structure.variance_efs[0])) + Sstd = np.sqrt(np.abs(structure.variance_efs[-6:])) + wandb_log["max_F_uncertainty"] = np.amax(Fstd) + self.logger.info(f"Max force uncertainty: {np.amax(Fstd)}") + self.logger.info(f"DFT call #{self.dft_calls}") + pe, F = self.run_dft(cell, x, types, step, structure) + atoms_to_be_added = np.arange(natoms)[stds > self.dft_add_threshold] + t0 = time.time() + self.sparse_gp.add_training_structure(structure) + self.sparse_gp.add_specific_environments( + structure, atoms_to_be_added + ) + self.sparse_gp.update_matrices_QR() + self.time_training += time.time() - t0 + if self.hyperparameter_optimization(self, lmp, step): + self.logger.info("Optimizing hyperparameters!") + self.sparse_gp.compute_likelihood_stable() + likelihood_before = self.sparse_gp.log_marginal_likelihood + t0 = time.time() + optimize_hyperparameters( + (self.sparse_gp), + bounds=(self.opt_bounds), + method=(self.opt_method), + max_iterations=(self.opt_iterations), + ) + self.time_hyp_opt += time.time() - t0 + likelihood_after = self.sparse_gp.log_marginal_likelihood + self.logger.info( + f"Likelihood before/after: {likelihood_before:.2e} {likelihood_after:.2e}" + ) + self.logger.info( + f"Likelihood gradient: {self.sparse_gp.likelihood_gradient}" + ) + self.logger.info( + f"Hyperparameters: {self.sparse_gp.hyperparameters}" + ) + self.save(self.model_fname) + lmp.command(f"pair_coeff * * {self.model_fname}") + wandb_log["Fmae"] = np.mean(np.abs(F - predF)) + wandb_log["Emae"] = np.abs(pe - predE) / natoms + wandb_log["n_added"] = len(atoms_to_be_added) + for qty in ("n_added", "Fmae", "Emae"): + self.logger.info(f"{qty}: {wandb_log[qty]}") + + if self.wandb is not None: + wandb_log["uncertainties"] = self.wandb.Histogram(stds) + wandb_log["Temp"] = lmp.get_thermo("temp") + wandb_log["Press"] = lmp.get_thermo("press") + wandb_log["PotEng"] = lmp.get_thermo("pe") + wandb_log["Vol"] = lmp.get_thermo("vol") + wandb_log["time_dft"] = self.time_dft + wandb_log["time_training"] = self.time_training + wandb_log["time_prediction"] = self.time_prediction + wandb_log["time_predict_uncertainties"] = self.time_predict_uncertainties + wandb_log["time_hyp_opt"] = self.time_hyp_opt + wandb_log["time_lammps"] = self.time_lammps + if call_dft: + wandb_log["Funcertainties"] = self.wandb.Histogram(Fstd.ravel()) + wandb_log["Ferror"] = self.wandb.Histogram( + np.abs(F - predF).ravel() + ) + wandb_log["logrelFerror"] = self.wandb.Histogram( + np.log10(np.abs(F - predF)/np.abs(F)).ravel() + ) + self.wandb.log(wandb_log, step=step) + self.t0 = time.time() + except Exception as err: + try: + self.logger.exception("LMPOTF ERROR") + raise err + finally: + err = None + del err + + def run_dft(self, cell, x, types, step, structure): + t0 = time.time() + atomic_numbers = self.type2number[types - 1] + frame = ase.Atoms( + positions=x, + numbers=atomic_numbers, + cell=cell, + calculator=(self.dftcalc), + pbc=True, + ) + pe = frame.get_potential_energy() + pe -= np.sum(self.energy_correction[types - 1]) + F = frame.get_forces() + stress = frame.get_stress(voigt=False) + if self.dft_xyz_fname is not None: + ase.io.write(self.dft_xyz_fname.replace("*", str(step)), frame, format="extxyz") + if self.force_training: + structure.forces = F.reshape(-1) + if self.energy_training: + structure.energy = np.array([pe]) + if self.stress_training: + structure.stresses = transform_stress(stress) + self.dft_calls += 1 + self.last_dft_call = step + self.post_dft_callback(self, step) + + self.time_dft += time.time() - t0 + return (pe, F) diff --git a/lammps_plugins/pair_flare.cpp b/lammps_plugins/pair_flare.cpp index 387c87800..a3949f45b 100644 --- a/lammps_plugins/pair_flare.cpp +++ b/lammps_plugins/pair_flare.cpp @@ -193,9 +193,14 @@ void PairFLARE::allocate() { memory->create(setflag, n + 1, n + 1, "pair:setflag"); - // Set the diagonal of setflag to 1 (otherwise pair.cpp will throw an error) + // Set the entire setflag to 1 (otherwise pair.cpp will throw an error) + // off-diagonal needed for hybrid/overlay (note: we only support pair_coeff * *) for (int i = 1; i <= n; i++) - setflag[i][i] = 1; + for (int j = 1; j <= n; j++) + setflag[i][j] = 1; + + // Create cutsq array (used in pair.cpp) + memory->create(cutsq, n + 1, n + 1, "pair:cutsq"); } /* ---------------------------------------------------------------------- diff --git a/requirements.txt b/requirements.txt index 4f6cf6233..2d1bf81ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy >=1.18, <1.23 scipy memory_profiler numba -ase +ase==3.22 IPython pytest>=4.6 pytest-mock diff --git a/tests/test_lammps.py b/tests/test_lammps.py index fb7729678..917aea9fe 100644 --- a/tests/test_lammps.py +++ b/tests/test_lammps.py @@ -261,7 +261,6 @@ def test_lmp_calc(): param_dict = { "pair_style": "lj/cut 2.5", "pair_coeff": ["* * 1 1"], - "compute": ["1 all pair/local dist", "2 all reduce max c_1"], "velocity": ["all create 300 12345 dist gaussian rot yes mom yes"], "fix": ["1 all nvt temp 300 300 $(100.0*dt)"], "dump_period": 1,