Skip to content

[Efs encryption][Draft] Support HeadNode Efs SharedStorage Encryption #6914

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 21 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f36a98e
[Efs Encryption] Add HeadNode/SharedStorageSettings/Encrypted
Allenz5 Jul 7, 2025
230d6a8
Merge branch 'develop' of https://github.com/aws/aws-parallelcluster …
Allenz5 Jul 8, 2025
ac5f9fb
[Efs Encryption] Update SharedStorageEfsSettingsEncryptedValidator to…
Allenz5 Jul 8, 2025
df0da09
[Efs Encryption] Add unit tests for SharedStorageEfsSettingsEncrypted…
Allenz5 Jul 8, 2025
a942241
[Efs Encryption] Fix parameter name in cluster_schema.py
Allenz5 Jul 8, 2025
a4ae51e
Merge branch 'aws:develop' into efs-encryption
Allenz5 Jul 14, 2025
1931dc8
[Efs Encryption] Fix parameter naming
Allenz5 Jul 14, 2025
63bfa91
Merge branch 'develop' of https://github.com/aws/aws-parallelcluster …
Allenz5 Jul 14, 2025
cbb5415
Merge branch 'efs-encryption' of github.com:Allenz5/aws-parallelclust…
Allenz5 Jul 14, 2025
be5ee63
[Efs Encryption] Add more test cases in `test_shared_storage_efs_sett…
Allenz5 Jul 14, 2025
91047d8
[Efs Encryption] Update the comments of `SharedStorageEfsSettings` an…
Allenz5 Jul 15, 2025
ea3b32a
Merge branch 'aws:develop' into efs-encryption
Allenz5 Jul 15, 2025
bb4e1e4
Merge branch 'develop' of https://github.com/aws/aws-parallelcluster …
Allenz5 Jul 15, 2025
88b3781
[Efs Encryption] Move SharedStorageType Enum from cluster_config.py t…
Allenz5 Jul 15, 2025
7508cfc
[Efs Encryption] Change hardcoded ShareStorageType to an enum in the …
Allenz5 Jul 15, 2025
944762a
[Efs Encryption] Fix format issues
Allenz5 Jul 15, 2025
b7936e4
Merge branch 'efs-encryption' of github.com:Allenz5/aws-parallelclust…
Allenz5 Jul 15, 2025
90b2691
[Efs Encryption] Move the addition of internal EFS shared storage awa…
Allenz5 Jul 15, 2025
3fc1d57
[Efs Encryption] Add SharedStorageEfsSettings section to the test_clu…
Allenz5 Jul 15, 2025
5ba7bbd
[Efs Encryption] Fix SharedStorageEfsSettingsValidator type issue
Allenz5 Jul 15, 2025
c27abc0
Merge branch 'develop' into efs-encryption
himani2411 Jul 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions cli/src/pcluster/config/cluster_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from pcluster.config.common import Imds as TopLevelImds
from pcluster.config.common import (
Resource,
SharedStorageType,
)
from pcluster.constants import (
CIDR_ALL_IPS,
Expand Down Expand Up @@ -114,6 +115,7 @@
SchedulerValidator,
SharedEbsPerformanceBottleNeckValidator,
SharedFileCacheNotHomeValidator,
SharedStorageEfsSettingsValidator,
SharedStorageMountDirValidator,
SharedStorageNameValidator,
UnmanagedFsxMultiAzValidator,
Expand Down Expand Up @@ -291,15 +293,6 @@ def __init__(self, root_volume: RootVolume = None, ephemeral_volume: EphemeralVo
self.ephemeral_volume = ephemeral_volume


class SharedStorageType(Enum):
"""Define storage types to be used as shared storage."""

EBS = "ebs"
RAID = "raid"
EFS = "efs"
FSX = "fsx"


class SharedEbs(Ebs):
"""Represent a shared EBS, inherits from both _SharedStorage and Ebs classes."""

Expand Down Expand Up @@ -851,6 +844,14 @@ def __init__(self, allowed_ips: str = None, **kwargs):
self.allowed_ips = Resource.init_param(allowed_ips)


class SharedStorageEfsSettings(Resource):
"""Represent the settings of Efs shared storage used by HeadNode."""

def __init__(self, encrypted: bool = False):
super().__init__()
self.encrypted = encrypted


class Dcv(Resource):
"""Represent the DCV configuration."""

Expand Down Expand Up @@ -1434,6 +1435,7 @@ def __init__(
disable_simultaneous_multithreading: bool = None,
local_storage: LocalStorage = None,
shared_storage_type: str = None,
shared_storage_efs_settings: SharedStorageEfsSettings = None,
dcv: Dcv = None,
custom_actions: CustomActions = None,
iam: Iam = None,
Expand All @@ -1452,6 +1454,7 @@ def __init__(
shared_storage_type,
default="Ebs",
)
self.shared_storage_efs_settings = shared_storage_efs_settings
self.dcv = dcv
self.custom_actions = custom_actions
self.iam = iam or Iam(implied=True)
Expand All @@ -1461,6 +1464,11 @@ def __init__(

def _register_validators(self, context: ValidatorContext = None): # noqa: D102 #pylint: disable=unused-argument
self._register_validator(InstanceTypeValidator, instance_type=self.instance_type)
self._register_validator(
SharedStorageEfsSettingsValidator,
shared_storage_type=self.shared_storage_type,
shared_storage_efs_settings=self.shared_storage_efs_settings,
)

@property
def architecture(self) -> str:
Expand Down
9 changes: 9 additions & 0 deletions cli/src/pcluster/config/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,3 +434,12 @@ def dump_json(self):
attribute_json = {"cluster": self._cluster_attributes}
attribute_json.update(self._extra_attributes)
return json.dumps(attribute_json, sort_keys=True)


class SharedStorageType(Enum):
"""Define storage types to be used as shared storage."""

EBS = "ebs"
RAID = "raid"
EFS = "efs"
FSX = "fsx"
15 changes: 15 additions & 0 deletions cli/src/pcluster/schemas/cluster_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
SharedEbs,
SharedEfs,
SharedFsxLustre,
SharedStorageEfsSettings,
SlurmClusterConfig,
SlurmComputeResource,
SlurmComputeResourceNetworking,
Expand Down Expand Up @@ -792,6 +793,17 @@ def make_resource(self, data, **kwargs):
return HeadNodeSsh(**data)


class SharedStorageEfsSettingsSchema(BaseSchema):
"""Represent the schema of SharedStorageEfsSettings for the HeadNode."""

encrypted = fields.Bool(metadata={"update_policy": UpdatePolicy.UNSUPPORTED})

@post_load
def make_resource(self, data, **kwargs):
"""Generate resource."""
return SharedStorageEfsSettings(**data)


class DcvSchema(BaseSchema):
"""Represent the schema of DCV."""

Expand Down Expand Up @@ -1363,6 +1375,9 @@ class HeadNodeSchema(BaseSchema):
metadata={"update_policy": UpdatePolicy.UNSUPPORTED},
validate=validate.OneOf(["Ebs", "Efs"]),
)
shared_storage_efs_settings = fields.Nested(
SharedStorageEfsSettingsSchema, metadata={"update_policy": UpdatePolicy.UNSUPPORTED}
)
dcv = fields.Nested(DcvSchema, metadata={"update_policy": UpdatePolicy.UNSUPPORTED})
custom_actions = fields.Nested(HeadNodeCustomActionsSchema, metadata={"update_policy": UpdatePolicy.IGNORED})
iam = fields.Nested(HeadNodeIamSchema, metadata={"update_policy": UpdatePolicy.SUPPORTED})
Expand Down
3 changes: 2 additions & 1 deletion cli/src/pcluster/templates/awsbatch_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
from aws_cdk.aws_ec2 import CfnSecurityGroup
from aws_cdk.core import CfnOutput, CfnResource, Construct, Fn, Stack

from pcluster.config.cluster_config import AwsBatchClusterConfig, CapacityType, SharedStorageType
from pcluster.config.cluster_config import AwsBatchClusterConfig, CapacityType
from pcluster.config.common import SharedStorageType
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here and in other part of the PR.
This change is motivated by a refactoring, unrelated to the main goal of this PR.
A best practice is to keep the PR minimal and avoid refactoring on components that are out of scope for the main goal. The refactoring you're suggesting here is correct, but I suggest to do it in a follow up PR.

from pcluster.constants import AWSBATCH_CLI_REQUIREMENTS, CW_LOG_GROUP_NAME_PREFIX, IAM_ROLE_PATH
from pcluster.models.s3_bucket import S3Bucket
from pcluster.templates.cdk_builder_utils import (
Expand Down
2 changes: 1 addition & 1 deletion cli/src/pcluster/templates/cdk_builder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
BaseQueue,
HeadNode,
LoginNodesPool,
SharedStorageType,
SlurmClusterConfig,
SlurmComputeResource,
SlurmQueue,
)
from pcluster.config.common import SharedStorageType
from pcluster.constants import (
COOKBOOK_PACKAGES_VERSIONS,
CW_LOGS_RETENTION_DAYS_DEFAULT,
Expand Down
23 changes: 16 additions & 7 deletions cli/src/pcluster/templates/cluster_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,9 @@
SharedEbs,
SharedEfs,
SharedFsxLustre,
SharedStorageType,
SlurmClusterConfig,
)
from pcluster.config.common import DefaultUserHomeType
from pcluster.config.common import DefaultUserHomeType, SharedStorageType
from pcluster.constants import (
ALL_PORTS_RANGE,
CW_ALARM_DATAPOINTS_TO_ALARM_DEFAULT,
Expand Down Expand Up @@ -264,12 +263,9 @@ def _add_resources(self):
# Add the internal use shared storage to the stack
# This FS will be mounted, the shared dirs will be added,
# then it will be unmounted and the shared dirs will be
# mounted. We need to create the additional mount points first.
# mounted. We need to create the additional mount points first.
if self.config.head_node.shared_storage_type.lower() == SharedStorageType.EFS.value:
internal_efs_storage_shared = SharedEfs(
mount_dir="/opt/parallelcluster/init_shared", name="internal_pcluster_shared", throughput_mode="elastic"
)
self._add_shared_storage(internal_efs_storage_shared)
self._add_internal_efs_shared_storage()

# Add user configured shared storage
if self.config.shared_storage:
Expand Down Expand Up @@ -335,6 +331,19 @@ def _add_resources(self):
head_node_alarms=self.head_node_alarms,
)

def _add_internal_efs_shared_storage(self):
if self.config.head_node.shared_storage_efs_settings:
encrypted = self.config.head_node.shared_storage_efs_settings.encrypted
else:
encrypted = False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Single Responsibility / Maintainability] The resource SharedStorageEfsSettings should be the only responsible to determine the default value of the encryption. To this aim, you should define the default value into a constant SharedStorageEfsSettings .DEFAULT_ENCRYPTION and reference the constant here. In this way we do not risk to scatter default values in different places. This prevent scattering responsibilities in different places, makes the code easier to maintain as it avoids code duplications and risk of misalignments.

internal_efs_storage_shared = SharedEfs(
mount_dir="/opt/parallelcluster/init_shared",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why init_shared and not, for example, shared? I know this was already in the code, but do we have an understanding about it?

name="internal_pcluster_shared",
throughput_mode="elastic",
encrypted=encrypted,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Test] We should capture this property in a unit test. the unit test should verify that when encrypted is specified in the cluster config, then the cluster template has the EFS resource with that property set to the expected value.

See example:

def test_add_alarms(mocker, config_file_name):
mock_aws_api(mocker)
# mock bucket initialization parameters
mock_bucket(mocker)
mock_bucket_object_utils(mocker)
input_yaml, cluster = load_cluster_model_from_yaml(config_file_name)
generated_template, _ = CDKTemplateBuilder().build_cluster_template(
cluster_config=cluster, bucket=dummy_cluster_bucket(), stack_name="clustername"
)
simple_type = "AWS::CloudWatch::Alarm"
composite_type = "AWS::CloudWatch::CompositeAlarm"
head_node_alarms = [
{"name": "clustername-HeadNode", "type": composite_type},
{"name": "clustername-HeadNode-Health", "type": simple_type},
{"name": "clustername-HeadNode-Cpu", "type": simple_type},
{"name": "clustername-HeadNode-Mem", "type": simple_type},
{"name": "clustername-HeadNode-Disk", "type": simple_type},
]
if cluster.are_alarms_enabled:
for alarm in head_node_alarms:
matched_resources = get_resources(
generated_template, type=alarm["type"], properties={"AlarmName": alarm["name"]}
)
assert_that(matched_resources).is_length(1)
else:
matched_simple_alarms = get_resources(generated_template, type=simple_type)
matched_composite_alarms = get_resources(generated_template, type=composite_type)
assert_that(matched_simple_alarms).is_empty()
assert_that(matched_composite_alarms).is_empty()

)
self._add_shared_storage(internal_efs_storage_shared)

def _cw_metric_head_node(
self, namespace, metric_name, statistic="Maximum", period_seconds=CW_ALARM_PERIOD_DEFAULT, extra_dimensions=None
):
Expand Down
3 changes: 2 additions & 1 deletion cli/src/pcluster/templates/cw_dashboard_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from aws_cdk.aws_cloudwatch import IAlarm
from aws_cdk.core import Construct, Duration, Stack

from pcluster.config.cluster_config import BaseClusterConfig, ExistingFileCache, SharedFsxLustre, SharedStorageType
from pcluster.config.cluster_config import BaseClusterConfig, ExistingFileCache, SharedFsxLustre
from pcluster.config.common import SharedStorageType
from pcluster.constants import Feature
from pcluster.utils import is_feature_supported

Expand Down
4 changes: 2 additions & 2 deletions cli/src/pcluster/templates/login_nodes_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from aws_cdk.core import CfnTag, Construct, Fn, NestedStack, Stack, Tags

from pcluster.aws.aws_api import AWSApi
from pcluster.config.cluster_config import LoginNodesPool, SharedStorageType, SlurmClusterConfig
from pcluster.config.common import DefaultUserHomeType
from pcluster.config.cluster_config import LoginNodesPool, SlurmClusterConfig
from pcluster.config.common import DefaultUserHomeType, SharedStorageType
from pcluster.constants import (
DEFAULT_EPHEMERAL_DIR,
NODE_BOOTSTRAP_TIMEOUT,
Expand Down
4 changes: 2 additions & 2 deletions cli/src/pcluster/templates/queues_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from constructs import Construct

from pcluster.aws.aws_api import AWSApi
from pcluster.config.cluster_config import SharedStorageType, SlurmClusterConfig, SlurmComputeResource, SlurmQueue
from pcluster.config.common import DefaultUserHomeType
from pcluster.config.cluster_config import SlurmClusterConfig, SlurmComputeResource, SlurmQueue
from pcluster.config.common import DefaultUserHomeType, SharedStorageType
from pcluster.constants import (
DEFAULT_EPHEMERAL_DIR,
NODE_BOOTSTRAP_TIMEOUT,
Expand Down
22 changes: 20 additions & 2 deletions cli/src/pcluster/validators/cluster_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from pcluster.aws.aws_resources import InstanceTypeInfo
from pcluster.aws.common import AWSClientError
from pcluster.cli.commands.dcv_util import get_supported_dcv_os
from pcluster.config.common import CapacityType
from pcluster.config.common import CapacityType, SharedStorageType
from pcluster.constants import (
CIDR_ALL_IPS,
DELETE_POLICY,
Expand Down Expand Up @@ -676,7 +676,7 @@ def _check_file_storage(self, security_groups_by_nodes, file_storages, subnet_id
self._add_failure(
f"The current security group settings on file storage '{file_storage_id}' does not"
" satisfy mounting requirement. The file storage must be associated to a security group"
f" that allows {direction } {protocol.upper()} traffic through ports {ports}. "
f" that allows {direction} {protocol.upper()} traffic through ports {ports}. "
f"Missing ports: {missing_ports}",
FailureLevel.ERROR,
)
Expand Down Expand Up @@ -1336,6 +1336,24 @@ def _validate(self, head_node_instance_type: str, total_max_compute_nodes: int):
)


class SharedStorageEfsSettingsValidator(Validator):
"""
HeadNode SharedStorageEfsSettings Validator.

Verify HeadNode SharedStorageEfsSettings can only be used with Efs SharedStorageType.
"""

def _validate(self, shared_storage_type: str, shared_storage_efs_settings):
if shared_storage_efs_settings and shared_storage_type.lower() != SharedStorageType.EFS.value:
self._add_failure(
"SharedStorageEfsSettings is specified "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message can be simplified to SharedStorageEfsSettings can only be used when SharedStorageType is {SharedStorageType.EFS.value.capitalize()}.

I suggest to use the capitalize because, even if we are able to handle both Efs and efs as values, in our public doc we always use capitalized strings.

f"but the SharedStorageType is set to {shared_storage_type}. "
"SharedStorageEfsSettings can only be used when SharedStorageType "
f"is specified as {SharedStorageType.EFS.value}.",
FailureLevel.ERROR,
)


class SharedEbsPerformanceBottleNeckValidator(Validator):
"""Warn potential performance bottleneck of using Shared EBS."""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ HeadNode:
DeleteOnTermination: true
EphemeralVolume:
MountDir: /test
SharedStorageEfsSettings:
Encrypted: false
SharedStorageType: Efs # Ebs
Dcv:
Enabled: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ HeadNode:
HttpProxyAddress: https://proxy-address:port
SecurityGroups: null
SubnetId: subnet-12345678
SharedStorageEfsSettings:
Encrypted: false
SharedStorageType: Efs
Ssh:
AllowedIps: 1.2.3.4/32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import yaml
from assertpy import assert_that

from pcluster.config.cluster_config import SharedStorageType
from pcluster.config.common import SharedStorageType
from pcluster.constants import Feature
from pcluster.schemas.cluster_schema import ClusterSchema
from pcluster.templates.cdk_builder import CDKTemplateBuilder
Expand Down
7 changes: 7 additions & 0 deletions cli/tests/pcluster/validators/test_all_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ def test_slurm_validators_are_called_with_correct_argument(test_datadir, mocker)
number_of_storage_validator = mocker.patch(
cluster_validators + ".NumberOfStorageValidator._validate", return_value=[]
)
shared_storage_efs_settings_validator = mocker.patch(
cluster_validators + ".SharedStorageEfsSettingsValidator._validate", return_value=[]
)
deletion_policy_validator = mocker.patch(cluster_validators + ".DeletionPolicyValidator._validate", return_value=[])
root_volume_encryption_consistency_validator = mocker.patch(
cluster_validators + ".RootVolumeEncryptionConsistencyValidator._validate", return_value=[]
Expand Down Expand Up @@ -392,6 +395,10 @@ def test_slurm_validators_are_called_with_correct_argument(test_datadir, mocker)
],
any_order=True,
)
shared_storage_efs_settings_validator.assert_has_calls(
[call(shared_storage_type="Ebs", shared_storage_efs_settings=None)],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Test] This unit test would make mnore sense if we use a configuration that inject both shared_storage_type and shared_storage_efs_settings into the validators, otherwise you could have the doubt that there exists cases where shared_storage_efs_settings is not injected.

My suggestion is to cover two scenarios:

  1. default case (the one you did): shared_storage_type=Ebs and shared_storage_efs_settings is not set
  2. case where shared_storage_efs_settings is injected: shared_storage_type=Efs and shared_storage_efs_settings is set.

any_order=True,
)
# capacity reservation validators
capacity_reservation_validator.assert_has_calls(
[
Expand Down
47 changes: 46 additions & 1 deletion cli/tests/pcluster/validators/test_cluster_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
SlurmSettings,
Tag,
)
from pcluster.config.common import CapacityType
from pcluster.config.common import CapacityType, SharedStorageType
from pcluster.constants import PCLUSTER_NAME_MAX_LENGTH, PCLUSTER_NAME_MAX_LENGTH_SLURM_ACCOUNTING
from pcluster.validators.cluster_validators import (
FSX_MESSAGES,
Expand Down Expand Up @@ -71,6 +71,7 @@
SchedulerDisableSudoAccessForDefaultUserValidator,
SchedulerOsValidator,
SharedFileCacheNotHomeValidator,
SharedStorageEfsSettingsValidator,
SharedStorageMountDirValidator,
SharedStorageNameValidator,
UnmanagedFsxMultiAzValidator,
Expand Down Expand Up @@ -2075,6 +2076,50 @@ def test_mixed_security_group_overwrite_validator(head_node_security_groups, que
assert_failure_messages(actual_failures, expected_message)


@pytest.mark.parametrize(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Test] It's a best practice to define test cases with a self-explanatory id. This improves readability and troubleshooting.
See example:

pytest.param(
{"clusterName": "cluster", "clusterConfiguration": CONFIG},
[ValidationResult("message", FailureLevel.WARNING, "type")],
None,
None,
"us-east-1",
None,
id="test with all errors",
),

"shared_storage_type, shared_storage_efs_settings, expected_message",
[
(
"Efs",
{"encrypted": True},
None,
),
(
"Efs",
{"encrypted": False},
None,
),
(
"Efs",
None,
None,
),
(
"Ebs",
{"encrypted": True},
f"SharedStorageEfsSettings is specified but the SharedStorageType is set to Ebs. "
"SharedStorageEfsSettings can only be used when SharedStorageType "
f"is specified as {SharedStorageType.EFS.value}.",
),
(
"Ebs",
{"encrypted": False},
f"SharedStorageEfsSettings is specified but the SharedStorageType is set to Ebs. "
"SharedStorageEfsSettings can only be used when SharedStorageType "
f"is specified as {SharedStorageType.EFS.value}.",
),
(
"Ebs",
None,
None,
),
],
)
def test_shared_storage_efs_settings_validator(shared_storage_type, shared_storage_efs_settings, expected_message):
actual_failures = SharedStorageEfsSettingsValidator().execute(shared_storage_type, shared_storage_efs_settings)
assert_failure_messages(actual_failures, expected_message)


@pytest.mark.parametrize(
"root_volume_size, ami_size, expected_message",
[
Expand Down
Loading