diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md
index 0f78879378..4c0e83fb08 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md
@@ -310,7 +310,7 @@ limitations under the License.
| [enable\_external\_prolog\_epilog](#input\_enable\_external\_prolog\_epilog) | Automatically enable a script that will execute prolog and epilog scripts
shared by NFS from the controller to compute nodes. Find more details at:
https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/tools/prologs-epilogs/README.md | `bool` | `null` | no |
| [enable\_oslogin](#input\_enable\_oslogin) | Enables Google Cloud os-login for user login and authentication for VMs.
See https://cloud.google.com/compute/docs/oslogin | `bool` | `true` | no |
| [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Enable the Shielded VM configuration. Note: the instance image must support option. | `bool` | `false` | no |
-| [enable\_slurm\_gcp\_plugins](#input\_enable\_slurm\_gcp\_plugins) | Enables calling hooks in scripts/slurm\_gcp\_plugins during cluster resume and suspend. | `any` | `false` | no |
+| [enable\_slurm\_gcp\_plugins](#input\_enable\_slurm\_gcp\_plugins) | DEPRECATED: Slurm GCP plugins have been deprecated.
Instead of 'max\_hops' plugin please use the 'placement\_max\_distance' nodeset property.
Instead of 'enable\_vpmu' plugin please use 'advanced\_machine\_features.performance\_monitoring\_unit' nodeset property. | `any` | `null` | no |
| [enable\_smt](#input\_enable\_smt) | DEPRECATED: Use `advanced_machine_features.threads_per_core` instead. | `bool` | `null` | no |
| [endpoint\_versions](#input\_endpoint\_versions) | Version of the API to use (The compute service is the only API currently supported) |
object({
compute = string
})
| {
"compute": "beta"
}
| no |
| [epilog\_scripts](#input\_epilog\_scripts) | List of scripts to be used for Epilog. Programs for the slurmd to execute
on every node when a user's job completes.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Epilog. | list(object({
filename = string
content = optional(string)
source = optional(string)
}))
| `[]` | no |
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/README.md
index 1b1db61cc4..4d60f7ded6 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/README.md
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/README.md
@@ -77,7 +77,6 @@ No modules.
| [enable\_debug\_logging](#input\_enable\_debug\_logging) | Enables debug logging mode. Not for production use. | `bool` | `false` | no |
| [enable\_external\_prolog\_epilog](#input\_enable\_external\_prolog\_epilog) | Automatically enable a script that will execute prolog and epilog scripts
shared by NFS from the controller to compute nodes. Find more details at:
https://github.com/GoogleCloudPlatform/slurm-gcp/blob/v5/tools/prologs-epilogs/README.md | `bool` | `false` | no |
| [enable\_hybrid](#input\_enable\_hybrid) | Enables use of hybrid controller mode. When true, controller\_hybrid\_config will
be used instead of controller\_instance\_config and will disable login instances. | `bool` | `false` | no |
-| [enable\_slurm\_gcp\_plugins](#input\_enable\_slurm\_gcp\_plugins) | Enables calling hooks in scripts/slurm\_gcp\_plugins during cluster resume and suspend. | `any` | `false` | no |
| [endpoint\_versions](#input\_endpoint\_versions) | Version of the API to use (The compute service is the only API currently supported) | object({
compute = string
})
| {
"compute": null
}
| no |
| [epilog\_scripts](#input\_epilog\_scripts) | List of scripts to be used for Epilog. Programs for the slurmd to execute
on every node when a user's job completes.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Epilog. | list(object({
filename = string
content = optional(string)
source = optional(string)
}))
| `[]` | no |
| [extra\_logging\_flags](#input\_extra\_logging\_flags) | The only available flag is `trace_api` | `map(bool)` | `{}` | no |
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/main.tf
index 959d928176..f4ebaa00b2 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/main.tf
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/main.tf
@@ -43,15 +43,14 @@ locals {
tp = "${local.bucket_dir}/" # prefix to trim from the bucket path to get a "file name"
config = {
- enable_slurm_gcp_plugins = var.enable_slurm_gcp_plugins
- enable_bigquery_load = var.enable_bigquery_load
- cloudsql_secret = var.cloudsql_secret
- cluster_id = random_uuid.cluster_id.result
- project = var.project_id
- slurm_cluster_name = var.slurm_cluster_name
- bucket_path = local.bucket_path
- enable_debug_logging = var.enable_debug_logging
- extra_logging_flags = var.extra_logging_flags
+ enable_bigquery_load = var.enable_bigquery_load
+ cloudsql_secret = var.cloudsql_secret
+ cluster_id = random_uuid.cluster_id.result
+ project = var.project_id
+ slurm_cluster_name = var.slurm_cluster_name
+ bucket_path = local.bucket_path
+ enable_debug_logging = var.enable_debug_logging
+ extra_logging_flags = var.extra_logging_flags
# storage
disable_default_mounts = var.disable_default_mounts
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py
index 639c99c347..016ca14dc6 100755
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py
@@ -44,8 +44,6 @@
from util import lookup, NSDict
import tpu
-import slurm_gcp_plugins
-
log = logging.getLogger()
PLACEMENT_MAX_CNT = 1500
@@ -202,14 +200,6 @@ def create_instances_request(nodes: List[str], placement_group: Optional[str], e
targetShape = nodeset.zone_target_shape,
)
- if lookup().cfg.enable_slurm_gcp_plugins:
- slurm_gcp_plugins.pre_instance_bulk_insert(
- lkp=lookup(),
- nodes=nodes,
- placement_group=placement_group,
- request_body=body,
- )
-
req = api_method(
project=lookup().project,
body=body,
@@ -453,10 +443,7 @@ def create_placement_request(pg_name: str, region: str, max_distance: Optional[i
"maxDistance": max_distance
},
}
- if lookup().cfg.enable_slurm_gcp_plugins:
- slurm_gcp_plugins.pre_placement_group_insert(
- lkp=lookup(), pg_name=pg_name, region=region, request_body=config
- )
+
request = lookup().compute.resourcePolicies().insert(
project=lookup().project, region=region, body=config
)
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/README.md
deleted file mode 100644
index 57664beb14..0000000000
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/README.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Plugin mechanism for slurm-gcp
-
-## Introduction
-
-Slurm in general provides many hooks for customization of its various functions.
-In fact - slurm-gcp is using one of these customization points, PrologSlurmctld,
-to perform tasks related to VM instance creation as a response to job node
-allocation.
-
-The plugin mechanism in this directory similarly allows deployment specific
-customizations to slurm-gcp by dropping Python modules in
-`/scripts/slurm_gcp_plugins` and enabling plugins setting the
-configuration directive `enable_slurm_gcp_plugins = true` in
-`/scripts/config.yaml`
-
-A very basic `test_plugin`, is provided as an example.
-
-## Plugins
-
-Callbacks to registered plugins can be made from various places in resume.py and
-suspend.py. The following callbacks are currently made:
-
-### Callback function signature
-
-Callback functions in the plugins are recommended to be declared as follows:
-
-```python
-def post_main_resume_nodes(*pos_args, **keyword_args):
-...
-```
-
-and extract arguments from `keyword_args`. Check the callback sites to
-understand which values that are available.
-
-### Current callback sites
-
-Callbacks are currently performed from the following places:
-
-#### scripts/resume.py:main_resume_nodes
-
-At the end of main the following callback is called
-
-```python
-def post_main_resume_nodes(*pos_args, **keyword_args):
-```
-
-The primary intention is allow a plugin to record details about the instance
-and/or setup/change properties for which the VMs needs to be up and running.
-
-Currently the call is made regardless of if the the resume node operation
-succeeded or not.
-
-#### scripts/resume.py:create_instances_request
-
-In create_instances_request just before the bulk instance insert is called, the
-following callback is called
-
-```python
-def pre_instance_bulk_insert(*pos_args, **keyword_args):
-```
-
-The primary intention is allow a plugin to modify the instance creation request.
-
-#### scripts/resume.py:create_placement_request
-
-In create_instances_request just before the resource policy creation, the
-following callback is called
-
-```python
-def pre_placement_group_insert(*pos_args, **keyword_args):
-```
-
-The primary intention is allow a plugin to modify the resource policy creation
-request.
-
-#### scripts/suspend.py:main_suspend_nodes
-
-In main just before the VMs are deleted but while they still (should) exist, the
-following callback is called
-
-```python
-def pre_main_suspend_nodes(*pos_args, **keyword_args):
-```
-
-The primary intention is allow a plugin to cleanup or record details while the
-node still exists.
-
-#### scripts/util.py:instances
-
-Just before the per-instance information is requested the following callback is
-called:
-
-```python
-def register_instance_information_fields(*pos_args, **keyword_args):
-```
-
-The primary intention is allow a plugin to add information to the per instance
-lookup.
-
-### Logging and error handling
-
-Plugin functions are recommended to use `logging` to communicate information,
-warnings and errors. The `slurm_gcp_plugins` registry tries to isolate the
-caller of the callbacks (i.e. resume.py and suspend.py) from effects of errors
-with a general try-catch wrapper for each plugin callback. However - as the
-callback happens in the same process there are notable limits on how much
-isolation that can be achieved.
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/__init__.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/__init__.py
deleted file mode 100644
index dec7085994..0000000000
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/__init__.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright 2024 "Google LLC"
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import importlib
-import pkgutil
-import logging
-import inspect
-
-# Only perform discovery at init
-discovered_plugins = {
- name.lstrip("."): importlib.import_module(name=name, package="slurm_gcp_plugins")
- for finder, name, ispkg in pkgutil.iter_modules(path=__path__, prefix=".")
- if name.lstrip(".") != "utils"
-}
-
-logging.info(
- (
- "slurm_gcp_plugins found:"
- + ", ".join(
- [
- "slurm_gcp_plugins" + plugin
- for plugin in sorted(discovered_plugins.keys())
- ]
- )
- )
-)
-
-
-def get_plugins():
- return discovered_plugins
-
-
-def get_plugins_function(function_name):
- plugins = get_plugins()
-
- return {
- plugin: function
- for plugin in sorted(plugins.keys())
- for name, function in inspect.getmembers(plugins[plugin], inspect.isfunction)
- if name == function_name
- }
-
-
-def run_plugins_for_function(plugin_function_name, pos_args, keyword_args):
- if "lkp" not in keyword_args:
- logging.error(
- (
- f"Plugin callback {plugin_function_name} called"
- + 'without a "lkp" argument need to get obtain deployment'
- + "information"
- )
- )
- return
-
- if not keyword_args["lkp"].cfg:
- logging.error(
- (
- f"Plugin callback {plugin_function_name} called"
- + 'with "lkp.cfg" unpopulated. lkp.cfg is needed'
- + "to argument need to get obtain deployment"
- + "information"
- )
- )
- return
-
- cfg = keyword_args["lkp"].cfg
- if cfg.enable_slurm_gcp_plugins:
- for plugin, function in get_plugins_function(plugin_function_name).items():
- if plugin in cfg.enable_slurm_gcp_plugins:
- logging.debug(f"Running {function} from plugin {plugin}")
- try:
- function(*pos_args, **keyword_args)
- except BaseException as e:
- logging.error(
- f"Plugin callback {plugin}:{function} caused an exception: {e}"
- )
- else:
- logging.debug(
- f"Not running {function} from non-enabled plugin {plugin}"
- )
-
-
-# Implement this function to add fields to the cached VM instance lookup
-def register_instance_information_fields(*pos_args, **keyword_args):
- run_plugins_for_function(
- plugin_function_name="register_instance_information_fields",
- pos_args=pos_args,
- keyword_args=keyword_args,
- )
-
-
-
-# Called just before VM instances are deleted should be still up
-# (NOTE: if a node has failed it might not be up or unresponsive)
-def pre_main_suspend_nodes(*pos_args, **keyword_args):
- run_plugins_for_function(
- plugin_function_name="pre_main_suspend_nodes",
- pos_args=pos_args,
- keyword_args=keyword_args,
- )
-
-
-# Called just before VM instances are created are created with
-# bulkInsert- this function can be implemented to inspect and/or
-# modify the insertion request.
-def pre_instance_bulk_insert(*pos_args, **keyword_args):
- run_plugins_for_function(
- plugin_function_name="pre_instance_bulk_insert",
- pos_args=pos_args,
- keyword_args=keyword_args,
- )
-
-
-# Called just before placement groups are created - this function can
-# be implemented to inspect and/or modify the insertion request.
-def pre_placement_group_insert(*pos_args, **keyword_args):
- run_plugins_for_function(
- plugin_function_name="pre_placement_group_insert",
- pos_args=pos_args,
- keyword_args=keyword_args,
- )
-
-
-__all__ = [
- "pre_main_suspend_nodes",
- "register_instance_information_fields",
- "pre_instance_bulk_insert",
- "pre_placement_group_insert",
-]
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/test_plugin/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/test_plugin/README.md
deleted file mode 100644
index c3a46ca420..0000000000
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/test_plugin/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Test slurm_gcp_plugin plugin
-
-## Overview
-
-This is a very basic but still useful test plugin that records the VM instance
-id of the nodes used for jobs (when dynamic nodes are used).
-
-## Callbacks used
-
-### post_main_resume_nodes
-
-Used to log the instance id of created VMs
-
-### register_instance_information_fields
-
-Used to add the instance id to the information collected for VM instances.
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/test_plugin/__init__.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/test_plugin/__init__.py
deleted file mode 100644
index b4b3be580d..0000000000
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/test_plugin/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright 2024 "Google LLC"
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-
-instance_information_fields = ["resourceStatus", "id"]
-
-
-def register_instance_information_fields(*pos_args, **keyword_args):
- logging.debug("register_instance_information_fields called from test_plugin")
- keyword_args["instance_information_fields"].extend(instance_information_fields)
-
-
-
-__all__ = [
- "register_instance_information_fields",
-]
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/utils/__init__.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/utils/__init__.py
deleted file mode 100644
index d24e38aa25..0000000000
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/utils/__init__.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2024 "Google LLC"
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import subprocess
-import logging
-
-# Various plugin utility functions
-
-# Plugin helper function to get plugin settings in the following order:
-#
-# 1. from job features with
-# 2. from slurm-gcp config
-# 3. If provided, the default
-# 4. None
-
-
-def get_plugin_setting(plugin, setting, lkp, job, default=None):
- features = get_job_features(job)
- if f"{plugin}.{setting}" in features:
- return features[f"{plugin}.{setting}"]
-
- if "enable_slurm_gcp_plugins" in lkp.cfg:
- if plugin in lkp.cfg.enable_slurm_gcp_plugins:
- try:
- iter(lkp.cfg.enable_slurm_gcp_plugins[plugin])
- except TypeError:
- # not iterable
- 1
- else:
- if setting in lkp.cfg.enable_slurm_gcp_plugins[plugin]:
- return lkp.cfg.enable_slurm_gcp_plugins[plugin][setting]
-
- return default
-
-
-# Plugin helper function to get job features
-def get_job_features(job):
- if job is None:
- return {}
-
- features = {}
- res, output = subprocess.getstatusoutput(f"squeue -h -o %f -j {job}")
- if res == 0:
- for feature in output.split("&"):
- kv = feature.split("=", 1)
- v = None
- if len(kv) == 2:
- v = kv[1]
- features[kv[0]] = v
- else:
- logging.error("Unable to retrieve features of job:{job}")
-
- return features
-
-
-__all__ = [
- "get_plugin_setting",
- "get_job_features",
-]
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py
index 7d6ae28f9f..c308df11c5 100755
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py
@@ -31,8 +31,6 @@
from util import lookup
import tpu
-import slurm_gcp_plugins
-
log = logging.getLogger()
TOT_REQ_CNT = 1000
@@ -105,8 +103,6 @@ def main(nodelist):
return
log.info(f"suspend {nodelist}")
- if lookup().cfg.enable_slurm_gcp_plugins:
- slurm_gcp_plugins.pre_main_suspend_nodes(lkp=lookup(), nodelist=nodelist)
suspend_nodes(pm_nodes)
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py
index 96955309d3..53f847d81d 100755
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py
@@ -43,8 +43,6 @@
from pathlib import Path
from time import sleep, time
-import slurm_gcp_plugins
-
from google.cloud import secretmanager
from google.cloud import storage
@@ -1525,14 +1523,7 @@ def instances(self) -> Dict[str, object]:
# "deletionProtection",
# "startRestricted",
]
- if lookup().cfg.enable_slurm_gcp_plugins:
- slurm_gcp_plugins.register_instance_information_fields(
- lkp=lookup(),
- project=self.project,
- slurm_cluster_name=self.cfg.slurm_cluster_name,
- instance_information_fields=instance_information_fields,
- )
-
+
# TODO: Merge this with all fields when upcoming maintenance is
# supported in beta.
if endpoint_version(ApiEndpoint.COMPUTE) == 'alpha':
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf
index 653e7d74ca..0000c34b7d 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf
@@ -58,18 +58,6 @@ variable "slurm_cluster_name" {
}
}
-variable "enable_slurm_gcp_plugins" {
- description = <