ProjectTorreyPines · orso82 · Jan 28, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/deploy/omega/base.lua b/deploy/omega/base.lua
@@ -0,0 +1,40 @@
+whatis("Name    : fuse")
+whatis("Version : " .. fuse_env)
+
+depends_on("julia/1.11.2")
+depends_on("env/gcc11.x")
+
+-- FUSE environment uses its own conda install with custom jupyter kernels
+conflict("python")
+conflict("mamba")
+
+local envdir = basedir .. "/environments/" .. fuse_env
+local user_depot = os.getenv("JULIA_USER_DEPOT")
+local base_depot = envdir .. "/.julia"
+
+setenv("FUSE_HOME", basedir)
+setenv("FUSE_ENVIRONMENT", fuse_env)
+
+-- We put the user depot first so their own packages get installed there,
+--   then the FUSE environment's depot after so it can find packages for the
+--   precompiled sysimage
+setenv("JULIA_DEPOT_PATH",  user_depot  .. ":" .. base_depot .. ":")
+
+-- The FUSE sysimage enviornment is the last place julia looks for packages
+--   when a user does `using <package>`, but this allows Julia to automatically
+--   find FUSE, Plots, and IJulia.
+setenv("JULIA_LOAD_PATH", ":" .. envdir)
+
+setenv("JULIA_CC", "gcc -O3")
+
+-- This lets the compiled sysimage work on login and worker nodes,
+--   modeled after how the Julia binaries are built
+setenv("JULIA_CPU_TARGET", "generic;sandybridge,-xsaveopt,clone_all;haswell,-rdrnd,base(1)")
+
+
+prepend_path("JUPYTER_PATH", envdir .. "/.jupyter")
+
+prepend_path("PATH", basedir .. "/miniconda3/bin")
+
+local fuse_sysimage = envdir .. "/sys_fuse.so"
+set_alias("julia", "julia --sysimage=" .. fuse_sysimage)
diff --git a/deploy/omega/install_fuse_environment.jl b/deploy/omega/install_fuse_environment.jl
@@ -0,0 +1,67 @@
+@assert (Threads.nthreads() == 1) "Error: Installing FUSE sysimage requires running Julia with one thread"
+
+@assert ("FUSE_ENVIRONMENT" in keys(ENV)) "Error: Must define FUSE_ENVIRONMENT environment variable"
+fuse_env = ENV["FUSE_ENVIRONMENT"]
+env_dir = joinpath(ENV["FUSE_HOME"], "environments", fuse_env)
+
+import Pkg
+
+# Setup main environment for installer
+Pkg.activate()
+Pkg.Registry.add(Pkg.RegistrySpec(url="https://github.com/ProjectTorreyPines/FuseRegistry.jl.git"))
+Pkg.Registry.add("General")
+Pkg.add("PackageCompiler")
+using PackageCompiler
+
+# Setup new environment
+Pkg.activate(env_dir)
+Pkg.add(["FUSE", "Plots", "IJulia", "WebIO", "Interact"])
+Pkg.build("IJulia")
+
+# Freeze Project and Manifest to read only
+chmod(joinpath(env_dir, "Project.toml"),  0o444)
+chmod(joinpath(env_dir, "Manifest.toml"), 0o444)
+
+# Create precompile script
+precompile_execution_file = joinpath(env_dir, "precompile_script.jl")
+precompile_cmds = """
+using FUSE
+include(joinpath(pkgdir(FUSE), "docs", "src", "tutorial.jl"))
+include(joinpath(pkgdir(FUSE), "test", "runtests.jl"))
+"""
+write(precompile_execution_file, precompile_cmds)
+
+# Precompile FUSE sys image
+sysimage_path = joinpath(env_dir, "sys_fuse.so")
+cpu_target = ENV["JULIA_CPU_TARGET"]
+create_sysimage(["FUSE"]; sysimage_path, precompile_execution_file, cpu_target)
+
+# Create IJulia kernels (10 threads for login, 40 for worker)
+import IJulia
+IJulia.installkernel("Julia+FUSE (login 10-threads)",  "--sysimage=$sysimage_path"; env=Dict("JULIA_NUM_THREADS"=>"10"))
+IJulia.installkernel("Julia+FUSE (worker 40-threads)", "--sysimage=$sysimage_path"; env=Dict("JULIA_NUM_THREADS"=>"40"))
+
+# Create module file
+module_file = joinpath(ENV["FUSE_HOME"], "modules", "fuse", fuse_env * ".lua")
+header = """
+local basedir = "/fusion/projects/codes/julia/fuse"
+local fuse_env = "$fuse_env"
+
+help([[
+Module for julia with FUSE $fuse_env sysimage
+Automatically created by FUSE install script:
+  `julia /fusion/projects/codes/julia/fuse/install/install_fuse_environment.jl`
+Maintainers: B.C. Lyons, [email protected]
+             C.M. Clark, [email protected]
+Physics Officers: O.M. Meneghini, [email protected]
+                  B.C. Lyons, [email protected]
+Known technical debt: 
+The first time a custom Jupyter kernel is used, it may hang.
+Restarting (sometimes twice) normally resolves the issue. 
+]])
+
+"""
+
+base = read(joinpath(@__DIR__, "base.lua"), String)
+
+write(module_file, header * base)
diff --git a/docs/src/install_omega.md b/docs/src/install_omega.md
@@ -1,31 +1,97 @@
-## Getting started on the OMEGA cluster
+# Users: Public FUSE installation on GA's Omega
+
+If you only intend to use FUSE and don't plan to develop the source code, release versions of FUSE
+have been installed on the Omega cluster. To load the latest version, do
+```
+module load fuse
+```
+All available versions can be found with `module avail fuse`.
+
+These modules do several things to make running FUSE easier for Omega users:
+
+1. The `julia` module is loaded, which gives you access to a public Julia installation with your
+   own private "depot" in which each user can add or develop their own packages. The location
+   of this private depot is given by the environment variable `JULIA_USER_DEPOT`.
+
+1. The FUSE codebase has been precompiled and made available to Julia via a
+   [sysimage](https://julialang.github.io/PackageCompiler.jl/dev/sysimages.html).
+   This greatly reduces the time-to-first-execution (TTFX) for many functions in the FUSE code suite,
+   at the expense of "locking" those packages and functions to the versions with which they
+   were compiled.
+
+1. FUSE is already available when you launch Julia, so there's no need to do `Pkg.add("FUSE")`.
+   You can simply do `using FUSE` and being working.
+
+1. A custom conda installation is made available to you that has Jupyter notebooks with
+   precompiled Julia kernels that include the FUSE sysimage. You can just do `jupyter lab` to
+   start a Jupyter session and select the desired kernels. There is a kernel with 10 threads meant
+   for the login nodes and one with 40 threads meant for the worker nodes.
+   !!! warning
+       **Problem**: There's a bug that occurs when a new user first launches one of these
+       Julia + FUSE Jupyter kernels.
+       In your terminal, you will see output about precompiling IJulia, which is expected.
+       Once the precompilation is done, it will report `Starting kernel event loops` but then the
+       kernel may hang and your notebook may not work. It is unclear why this happens, but it is
+       only the first time for each user.
+
+       **Solution**: Restart the kernel. Occasionally this needs to be done twice, perhaps if you
+       restart too quickly and the precompilation was not finished. In any case, if the problem
+       does not resolve after restarting the kernel twice, reach out to the FUSE developers.
+
+
+# Developing FUSE on GA's Omega cluster
+
+## Setting up Jupyter Notebooks
+
+!!! note
+    Omega does have a system version of conda (available via `module load conda`) and one can create
+    a custom environment as described in the
+    [Omega documentation](http://mkdocs.gat.com/Software_On_Omega/Conda/), but this
+    has not been tested yet. Two important caveats if one wants to try this:
+    - The `conda` module should be loaded _after_ the `julia` module, as `julia` purges all other
+      modules, as discussed below.
+    - The `conda` module requires use of the bash shell. No other shell is supported.
+
+The following is a robust setup to make Jupyter notebooks compatible with Julia on Omega:
 
 1. Install miniconda
    ```
    cd # in your home folder
    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
    sh Miniconda3-latest-Linux-x86_64.sh
    ```
-   read and accept the license, and install under `$HOME/miniconda3`, answer questions, and restart your shell
+   read and accept the license, and install under `$HOME/miniconda3`, answer questions, and restart
+   your shell.
 
-1. install `mamba` for faster package management
+1. Install `mamba` for faster package management
    ```
    $HOME/miniconda3/bin/conda install -c conda-forge mamba
    ```
    !!! note
        We use the full `conda` path to avoid picking up the system `conda` install. There is no system-wide `mamba` executable, so that's not necessary when running `mamba`.
 
-1. install `jupyterlab`
+1. Install `jupyterlab`
    ```
    mamba install -c conda-forge jupyterlab
    ```
 
-1. Remove `module load defaults` from your `~/.bashrc`
-   This module is used to run experimental tools like review+, efit_veiwer, etc...
-   but it does not play well with the Julia executable.
-   (alternatively you'll have to `module purge` or `module unload defaults`)
-
-1. Now follow the standard Julia and FUSE installation instructions
+1. Load the Julia module with
+   ```
+   module load julia
+   ```
+   This gives you access to a public Julia
+   installation with your own private "depot" in which each user can add or develop their own
+   packages. The location of this private depot is given by the environment variable
+   `JULIA_USER_DEPOT`.
+   !!! warning
+       Julia is incompatible with some modules loaded by the `defaults` module, which typically
+       comes pre-loaded on Omega and is used to run experimental tools like review+, efit_viewer,
+       etc.... The `julia` module automatically purges your modules to prevent this conflict.
+       If you do use your own installation of Julia, you'll want to do a `module purge` yourself
+       or remove `module load defaults` from your `~/.bashrc` file.
+
+1. Follow the instructions under the [Install FUSE](https://fuse.help/dev/install.html) page,
+   ignoring the sections about _Julia installation_ and _Updating Julia_.
 
 1. Setup a multi-threaded Jupyter Julia kernel that does not take the whole login node
    ```
@@ -35,13 +101,13 @@
    export JULIA_NUM_THREADS=40
    fusebot install_IJulia
    ```
-   OMEGA login nodes are a shared resource. Each login node has 40 cores.
+   Omega login nodes are a shared resource. Each login node has 40 cores.
    This will setup a Jupyter Julia kernel with both 10 and 40 threads.
    Use 10 threads on login nodes and 40 threads on worker nodes.
 
-## Distributed.jl on OMEGA
+## Distributed.jl on Omega
 
-We have found issues when trying to run parallel jobs using `Distributed.jl` on OMEGA.
+We have found issues when trying to run parallel jobs using `Distributed.jl` on Omega.
 The fix for this is simple: don't use the `Main` environment, rather activate a separate environment.
 
 This can be easily by doing the following in the first cell of your Jupyter notebook:
@@ -54,7 +120,7 @@ Pkg.add(("Plots", "FUSE"))
 
 ## Three ways to run parallel jobs
 
-Keep in mind that each worker node on OMEGA has 128 CPUs
+Keep in mind that each worker node on Omega has 128 CPUs
 
 1. Screen + Jupyter on the login node, workers on the worker nodes
 
@@ -77,7 +143,7 @@ Keep in mind that each worker node on OMEGA has 128 CPUs
    Here we will use the `FUSE.parallel_environment("omega", ...)` call.
 
 
-## FUSE on OMEGA cluster
+## FUSE on Omega cluster
 
 1. Connect to `omega` and launch `screen`
 
@@ -92,7 +158,8 @@ Keep in mind that each worker node on OMEGA has 128 CPUs
        Use the queue, time, CPU, and memory limits that make the most sense for your application
        see these [instructions](https://fusionga.sharepoint.com/sites/Computing/SitePages/Omega.aspx#using-slurm-to-run-interactive-tasks%E2%80%8B%E2%80%8B%E2%80%8B%E2%80%8B%E2%80%8B%E2%80%8B%E2%80%8B) for help
 
-1. Then start the Jupyter lab server from the `screen` session (`screen` will keep `jupyter` running even when you log out)
+1. Then start the Jupyter lab server from the `screen` session (`screen` will keep `jupyter`
+   running even when you log out)
    ```
    jupyter lab --no-browser --port 55667
    ```
@@ -123,11 +190,13 @@ Keep in mind that each worker node on OMEGA has 128 CPUs
    ssh -N -L localhost:33445:localhost:55667 omegae
    ```
    !!! note
-       Keep this terminal always open. You may need to re-issue this command whenever you put your laptop to sleep.
+       Keep this terminal always open. You may need to re-issue this command whenever you put your
+       laptop to sleep.
 
-1. On your computer open a web browser tab to `localhost:33445` to connect to the Jupyter-lab session on `omega`. Use the token when prompted.
+1. On your computer open a web browser tab to `localhost:33445` to connect to the Jupyter-lab
+   session on `omega`. Use the token when prompted.
 
-## Using Revise on OMEGA
+## Using Revise on Omega
 When working on omega it seems ones need to manually trigger revise to pick up code changes:
 ```
 import Revise
@@ -136,10 +205,10 @@ Revise.revise()  # manual trigger
 
 This is even if setting [`JULIA_REVISE_POLL=1`](https://timholy.github.io/Revise.jl/stable/config/#Polling-and-NFS-mounted-code-directories:-JULIA_REVISE_POLL)
 
-## Using GACODE on OMEGA with Julia
+## Using GACODE on Omega with Julia
 Julia may be incompatible with some environments and will crash when launched.
-This is the case for the GACODE environment on OMEGA.
-To be able to run both GACODE and Julia on OMEGA (eg. to run NEO and TGLF) do the following:
+This is the case for the GACODE environment on Omega.
+To be able to run both GACODE and Julia on Omega (eg. to run NEO and TGLF) do the following:
 ```
 module load atom
 module unload gcc