Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create new test for create multiple device classes #11096

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 154 additions & 16 deletions ocs_ci/helpers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,31 +574,51 @@ def default_ceph_block_pool():


def create_ceph_block_pool(
pool_name=None, replica=3, compression=None, failure_domain=None, verify=True
pool_name=None,
replica=3,
compression=None,
failure_domain=None,
verify=True,
namespace=None,
device_class=None,
yaml_file=None,
):
"""
Create a Ceph block pool
** This method should not be used anymore **
** This method is for internal testing only **
Create a Ceph block pool with optional parameters.

Args:
pool_name (str): The pool name to create
failure_domain (str): Failure domain name
verify (bool): True to verify the pool exists after creation,
False otherwise
replica (int): The replica size for a pool
compression (str): Compression type for a pool
pool_name (str): The pool name to create (optional).
replica (int): The replica size for the pool.
compression (str): Compression type for the pool (optional).
failure_domain (str): Failure domain name (optional).
verify (bool): True to verify the pool exists after creation, False otherwise.
namespace (str): The pool namespace (optional).
device_class (str): The device class name (optional).
yaml_file (str): The name of the YAML file for the Ceph block pool (optional).

Returns:
OCS: The OCS instance for the Ceph block pool.

"""
# Load the YAML template
if yaml_file:
cbp_data = templating.load_yaml(yaml_file)
elif device_class:
# Use the appropriate yaml for the device class CephBlockPool
cbp_data = templating.load_yaml(constants.DEVICECLASS_CEPHBLOCKPOOL_YAML)
cbp_data["spec"]["deviceClass"] = device_class
else:
# Use the appropriate yaml for the CephBlockPool
cbp_data = templating.load_yaml(constants.CEPHBLOCKPOOL_YAML)

Returns:
OCS: An OCS instance for the Ceph block pool
"""
cbp_data = templating.load_yaml(constants.CEPHBLOCKPOOL_YAML)
cbp_data["metadata"]["name"] = (
pool_name if pool_name else create_unique_resource_name("test", "cbp")
)
cbp_data["metadata"]["namespace"] = config.ENV_DATA["cluster_namespace"]
cbp_data["spec"]["replicated"]["size"] = replica
cbp_data["metadata"]["namespace"] = (
namespace or config.ENV_DATA["cluster_namespace"]
)

cbp_data["spec"]["replicated"]["size"] = replica
cbp_data["spec"]["failureDomain"] = failure_domain or get_failure_domin()

if compression:
Expand All @@ -612,6 +632,7 @@ def create_ceph_block_pool(
assert verify_block_pool_exists(
cbp_obj.name
), f"Block pool {cbp_obj.name} does not exist"

return cbp_obj


Expand Down Expand Up @@ -5744,3 +5765,120 @@ def verify_reclaimspacecronjob_suspend_state_for_pvc(pvc_obj):

logger.info(f"ReclaimSpace operation is enabled for PVC '{pvc_obj.name}'")
return False


def create_lvs_resource(
name, storageclass, worker_nodes=None, min_size=None, max_size=None
):
"""
Create the LocalVolumeSet resource.

Args:
name (str): The name of the LocalVolumeSet CR
storageclass (str): storageClassName value to be used in
LocalVolumeSet CR based on LOCAL_VOLUME_YAML
worker_nodes (list): The worker node names to be used in the LocalVolumeSet resource
min_size (str): The min size to be used in the LocalVolumeSet resource
max_size (str): The max size to be used in the LocalVolumeSet resource

Returns:
OCS: The OCS instance for the LocalVolumeSet resource

"""
worker_nodes = worker_nodes or node.get_worker_nodes()

# Pull local volume set yaml data
logger.info("Pulling LocalVolumeSet CR data from yaml")
lvs_data = templating.load_yaml(constants.LOCAL_VOLUME_SET_YAML)

# Since we don't have datastore with SSD on our current VMware machines, localvolumeset doesn't detect
# NonRotational disk. As a workaround we are setting Rotational to device MechanicalProperties to detect
# HDD disk
if config.ENV_DATA.get(
"local_storage_allow_rotational_disks"
) or config.ENV_DATA.get("odf_provider_mode_deployment"):
logger.info(
"Adding Rotational for deviceMechanicalProperties spec"
" to detect HDD disk"
)
lvs_data["spec"]["deviceInclusionSpec"]["deviceMechanicalProperties"].append(
"Rotational"
)

lvs_data["metadata"]["name"] = name

if min_size:
lvs_data["spec"]["deviceInclusionSpec"]["minSize"] = min_size
if max_size:
lvs_data["spec"]["deviceInclusionSpec"]["maxSize"] = max_size
# Update local volume set data with Worker node Names
logger.info(
"Updating LocalVolumeSet CR data with worker nodes Name: %s", worker_nodes
)
lvs_data["spec"]["nodeSelector"]["nodeSelectorTerms"][0]["matchExpressions"][0][
"values"
] = worker_nodes

# Set storage class
logger.info(
"Updating LocalVolumeSet CR data with LSO storageclass: %s", storageclass
)
lvs_data["spec"]["storageClassName"] = storageclass

# set volumeMode to Filesystem for MCG only deployment
if config.ENV_DATA["mcg_only_deployment"]:
lvs_data["spec"]["volumeMode"] = constants.VOLUME_MODE_FILESYSTEM

lvs_obj = create_resource(**lvs_data)
lvs_obj.reload()
return lvs_obj


def create_rbd_deviceclass_storageclass(
pool_name,
sc_name=None,
cluster_id="openshift-storage",
reclaim_policy="Delete",
volume_binding_mode="WaitForFirstConsumer",
image_features=None,
encrypted="false",
allow_volume_expansion=True,
):
"""
Create an RBD StorageClass resource for device class from provided parameters.

Args:
pool_name (str): Name of the pool.
sc_name (str): Name of the StorageClass. If not provided, it will set a random name.
cluster_id (str): Cluster ID.
reclaim_policy (str): Reclaim policy (e.g., "Delete" or "Retain").
volume_binding_mode (str): Volume binding mode (e.g., "Immediate", "WaitForFirstConsumer").
image_features (str): Image features for the pool.
encrypted (str): Encryption flag ("true" or "false").
allow_volume_expansion (bool): Allow volume expansion (True/False).

Returns:
OCS: The OCS instance for the StorageClass resource

"""
suffix = "".join(random.choices("0123456789", k=5))
sc_name = sc_name or f"ssd{suffix}"
image_features = (
image_features or "layering,deep-flatten,exclusive-lock,object-map,fast-diff"
)

sc_data = templating.load_yaml(constants.DEVICECLASS_STORAGECLASS_YAML)

# Update the YAML with the provided parameters
sc_data["metadata"]["name"] = sc_name
sc_data["parameters"]["pool"] = pool_name
sc_data["allowVolumeExpansion"] = allow_volume_expansion
sc_data["reclaimPolicy"] = reclaim_policy
sc_data["volumeBindingMode"] = volume_binding_mode
sc_data["parameters"]["imageFeatures"] = image_features
sc_data["parameters"]["clusterID"] = cluster_id
sc_data["parameters"]["encrypted"] = encrypted

sc_obj = create_resource(**sc_data)
sc_obj.reload()
return sc_obj
14 changes: 14 additions & 0 deletions ocs_ci/ocs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@
AI_NETWORK_CONFIG_TEMPLATE = os.path.join(
"ocp-deployment", "ai-host-network-config.yaml.j2"
)
MULTIPLE_DEVICECLASSES_DIR = os.path.join(TEMPLATE_DIR, "multiple-deviceclasses")

# Statuses
STATUS_READY = "Ready"
PEER_READY = "Peer ready"
Expand Down Expand Up @@ -243,6 +245,7 @@
EXTERNAL_CLUSTER_SCRIPT_CONFIG = "rook-ceph-external-cluster-script-config"
ENCRYPTIONKEYROTATIONCRONJOB = "encryptionkeyrotationcronjobs.csiaddons.openshift.io"
ENCRYPTIONKEYROTATIONJOB = "encryptionkeyrotationjobs.csiaddons.openshift.io"
DEFAULT_CEPH_DEVICECLASS = "defaultCephDeviceClass"

# Provisioners
AWS_EFS_PROVISIONER = "openshift.org/aws-efs"
Expand Down Expand Up @@ -3144,3 +3147,14 @@
"In-transit encryption": "intransit_encryption",
"Object storage": "object_storage",
}

# Multiple device classes Yaml files
STORAGE_DEVICESET_YAML = os.path.join(
MULTIPLE_DEVICECLASSES_DIR, "storage-device-set.yaml"
)
DEVICECLASS_CEPHBLOCKPOOL_YAML = os.path.join(
MULTIPLE_DEVICECLASSES_DIR, "deviceclass-cephblockpool.yaml"
)
DEVICECLASS_STORAGECLASS_YAML = os.path.join(
MULTIPLE_DEVICECLASSES_DIR, "deviceclass-storageclass.yaml"
)
164 changes: 164 additions & 0 deletions ocs_ci/ocs/device_classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import logging
import random

from ocs_ci.helpers.helpers import create_lvs_resource
from ocs_ci.ocs.cluster import check_ceph_osd_tree
from ocs_ci.ocs.exceptions import CephHealthException
from ocs_ci.ocs.node import add_disk_to_node, get_node_objs
from ocs_ci.ocs.resources.pod import get_ceph_tools_pod
from ocs_ci.ocs.resources.pvc import wait_for_pvcs_in_lvs_to_reach_status
from ocs_ci.ocs.resources.storage_cluster import (
get_storage_size,
get_device_class,
verify_storage_device_class,
verify_device_class_in_osd_tree,
get_deviceset_sc_name_per_count,
)
from ocs_ci.utility.utils import sum_of_two_storage_sizes

from ocs_ci.ocs import constants, defaults
from ocs_ci.ocs.ocp import OCP
from ocs_ci.framework import config


log = logging.getLogger(__name__)


def create_new_lvs_for_new_deviceclass(
worker_nodes, create_disks_for_lvs=True, ssd=True
):
"""
Create a new LocalVolumeSet resource for a new device class
It performs the following steps:
1. Update the old LocalVolumeSet with a maxSize, so it will not consume the new PVs.
2. Create a new minSize that will be higher than the maxSize of the LocalVolumeSets
so that the new LVS will consume the disks with the new size.
3. Limit the max size of the new LVS, so it will consume only the newly added disks.
4. Create a new LocalVolumeSet with the minSize and maxSize above.
5. If the param 'create_disks_for_lvs' is True, add new disks for the worker nodes.
The disk size will be between the minSize and maxSize above to match the new LVS.

Args:
worker_nodes (list): The worker node names to be used in the LocalVolumeSet resource.
create_disks_for_lvs (bool): If True, it will create a new disks for the new LocalVolumeSet resource.
ssd (bool): if True, mark disk as SSD

Returns:
OCS: The OCS instance for the LocalVolumeSet resource

"""
osd_size = get_storage_size()
log.info(f"the osd size is {osd_size}")
# Limit the old LVS max size so it will not consume the new PVs
old_lvs_max_size = sum_of_two_storage_sizes(osd_size, "30Gi")
ocp_lvs_obj = OCP(
kind=constants.LOCAL_VOLUME_SET,
namespace=defaults.LOCAL_STORAGE_NAMESPACE,
resource_name=constants.LOCAL_BLOCK_RESOURCE,
)
log.info(
f"Update the old LocalVolumeSet {ocp_lvs_obj.resource_name} with the maxSize "
f"{old_lvs_max_size} so it will not consume the new PVs"
)
params = (
f'{{"spec": {{"deviceInclusionSpec": {{"maxSize": "{old_lvs_max_size}"}}}}}}'
)
lvs_result = ocp_lvs_obj.patch(params=params, format_type="merge")
assert (
lvs_result
), f"Failed to update the LocalVolumeSet {ocp_lvs_obj.resource_name}"

lvs_items = OCP(
kind=constants.LOCAL_VOLUME_SET,
namespace=defaults.LOCAL_STORAGE_NAMESPACE,
).get()["items"]

lvs_max_sizes = [
lvs_data["spec"]["deviceInclusionSpec"].get("maxSize", 0)
for lvs_data in lvs_items
]
lvs_max_size = max(lvs_max_sizes, key=lambda size: int(size[0:-2]))

log.info(
f"Create a new minSize that will be be higher than the maxSize of the LocalVolumeSets "
f"{lvs_max_size}, so that the new LVS will consume the disks with the new size"
)
min_size = sum_of_two_storage_sizes(lvs_max_size, "10Gi")
log.info(
"Limit the max size of the new LVS, so it will consume only the new added disks"
)
max_size = sum_of_two_storage_sizes(min_size, "40Gi")
suffix = "".join(random.choices("0123456789", k=5))
sc_name = f"localvolume{suffix}"
lvs_obj = create_lvs_resource(sc_name, sc_name, worker_nodes, min_size, max_size)

if create_disks_for_lvs:
# The disk size will be between the minSize and maxSize above to match the new LVS
disk_size_in_gb = sum_of_two_storage_sizes(min_size, "10Gi")
disk_size = int(disk_size_in_gb[:-2])
worker_node_objs = get_node_objs(worker_nodes)
for n in worker_node_objs:
add_disk_to_node(n, disk_size=disk_size, ssd=ssd)

return lvs_obj


def check_ceph_state_post_add_deviceclass():
"""
Check the Ceph state post add a new deviceclass.
The function checks the Ceph device classes and osd tree.

Raises:
CephHealthException: In case the Ceph device classes and osd tree checks
didn't finish successfully

"""
log.info("Check the Ceph device classes and osd tree")
device_class = get_device_class()
ct_pod = get_ceph_tools_pod()
try:
verify_storage_device_class(device_class, check_multiple_deviceclasses=True)
verify_device_class_in_osd_tree(
ct_pod, device_class, check_multiple_deviceclasses=True
)
except AssertionError as ex:
raise CephHealthException(ex)
if not check_ceph_osd_tree():
raise CephHealthException("The ceph osd tree checks didn't finish successfully")


def verification_steps_after_adding_new_deviceclass():
"""
The function verify the following:
1. Wait for the LocalVolumeSet PVCs to reach the Bound state.
2. Wait for the OSD pods to reach the Running state.
3. Check the Ceph state post add a new deviceclass as defined in the function
'check_ceph_state_post_add_deviceclass'.

"""
deviceclass_name_per_count = get_deviceset_sc_name_per_count()
log.info(f"deviceclass name per count = {deviceclass_name_per_count}")
lvs_obj = OCP(
kind=constants.LOCAL_VOLUME_SET, namespace=defaults.LOCAL_STORAGE_NAMESPACE
)
lvs_items = lvs_obj.data["items"]
log.info("Wait for the LocalVolumeSet PVCs to reach the Bound state")
for lvs_data in lvs_items:
lvs_name = lvs_data["metadata"]["name"]
pvc_count = deviceclass_name_per_count[lvs_name]
wait_for_pvcs_in_lvs_to_reach_status(
lvs_name, pvc_count, constants.STATUS_BOUND
)

osd_pods_count = sum(deviceclass_name_per_count.values())
pod_obj = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"])
log.info("Waiting for the OSD pods to reach the Running state")
pod_obj.wait_for_resource(
condition=constants.STATUS_RUNNING,
selector=constants.OSD_APP_LABEL,
resource_count=osd_pods_count,
timeout=180,
sleep=10,
)

check_ceph_state_post_add_deviceclass()
Loading