Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
3fe23d3
Add possibility to test kitchen the AMI with custom node package
lukeseawalker Apr 10, 2019
523f7c8
Remove double quotes from post install args variable
Apr 11, 2019
b25404b
Remove duplicated (and wrong) mount point for ebs volumes
Apr 11, 2019
992a1e7
Pin supervisord to version 3.4.0
lukeseawalker Apr 12, 2019
454f4cf
Bump version to 2.3.2 alpha 1
lukeseawalker Apr 8, 2019
52e9500
Fix variable name
lukeseawalker Apr 16, 2019
64dca67
jobwatcher.cfg: remove compute_instance_type config value
demartinofra Apr 15, 2019
6701f97
sqswatcher.cfg: remove unneeded max_queue_size config value
demartinofra Apr 17, 2019
7829815
Install slurm libpmi/libpmi2
lukeseawalker Apr 17, 2019
47b9832
Avoid failure when partition changes require a reboot
lukeseawalker Apr 19, 2019
fe3abdc
Delete previous network configuration left by cloud-init
lukeseawalker Apr 23, 2019
83c22f0
Execute cleanup script also for official released AMI
lukeseawalker Apr 23, 2019
4aba58c
torque: restarting pbs_mom only when hostaname changes get applied
demartinofra Apr 23, 2019
da4eb01
ami_cleanup: remove network interface configs only if created by clou…
demartinofra Apr 24, 2019
e077674
torque: starting pbs_mom and restart on network/hostname changes
demartinofra Apr 24, 2019
e2cd225
ami_cleanup: remove ifcfg-eth0 in centos7
demartinofra Apr 24, 2019
0d95ba0
torque: use :immediately when subscribed to restart on service[network]
demartinofra Apr 24, 2019
ad4fe59
Skip test for custom AMI
lukeseawalker Apr 24, 2019
c0556ec
use full master FQDN when mounting nfs
demartinofra Apr 26, 2019
4f2585c
Use custom chef install URL to resolve Ubuntu dpkg lock issue
lukeseawalker Apr 30, 2019
184c037
Skip SGE installation on compute node
lukeseawalker Apr 30, 2019
838d75c
Skip Slurm installation on compute node
lukeseawalker Apr 30, 2019
fe0dff7
Slurm: decrease SlurmdTimeout to 120 seconds
demartinofra May 1, 2019
402947a
Add finalize recipe for supervisord and compute_ready
demartinofra Apr 30, 2019
99c3a28
Add PATH for aws cli when executing compute_ready
demartinofra May 13, 2019
b2755e0
Double the retry limit to cope with the throttling error "Request lim…
lukeseawalker May 14, 2019
619b3d1
Change download URL for CloudFormation Helper Scripts package
lukeseawalker May 23, 2019
46efa52
Bump version to 2.4.0
lukeseawalker May 23, 2019
f9977c7
Bump version to 2.4.0
lukeseawalker May 25, 2019
4d368b5
sge: configure scheduler behaviour for unknown hosts
demartinofra May 24, 2019
60bbf34
Test for EFA Install
May 15, 2019
b40535d
Fix for when ParallelCluster version contains letters e.g. (2.3.2a1)
lukeseawalker May 24, 2019
2cb0576
Install EFA drivers
Mar 21, 2019
347eb2c
Fix conflict between EFA package and openmpi-devel package
lukeseawalker May 30, 2019
3fc6170
Do not reinstall EFA package at runtime
lukeseawalker May 30, 2019
3fd5819
sge: set ENABLE_RESCHEDULE_KILL=1
demartinofra May 31, 2019
14b02a2
Install EFA at Runtime
May 30, 2019
8ffcf39
Enable EFA Kitchen Test
May 31, 2019
2ec3d12
Apply patch to fix issue in sge_edit_mod_attr script
demartinofra Jun 3, 2019
bffb47e
Revert "Enable EFA Kitchen Test"
May 31, 2019
00d9674
Revert "Install EFA at Runtime"
May 31, 2019
d1639f5
Enable EFA Limit Check at Runtime
May 31, 2019
344e14a
Unconditionally attempt fsx mount.
Jun 5, 2019
25ee19c
Enable EFA on Ubuntu1604
Jun 5, 2019
28c92ee
Add Ubuntu to Kitchen Tests
Jun 6, 2019
f92426d
Skip EFA test in China regions
lukeseawalker Jun 7, 2019
6afb480
Set ulimit nofile to be 1000
lukeseawalker Jun 6, 2019
e44d47e
Test soft ulimit nofile to be greater equal than 16384
lukeseawalker Jun 6, 2019
ea8a22f
Update changelog for v2.4.0
demartinofra Jun 6, 2019
4afb7d6
Set ulimits when EFA Enabled
Jun 10, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .kitchen.cloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
driver_config:
retryable_sleep: 15
retryable_tries: 20
retry_limit: 6
aws_ssh_key_id: <%= ENV['AWS_KEYPAIR_NAME'] %>
region: <%= ENV['AWS_DEFAULT_REGION'] %>
instance_type: <%= ENV['AWS_FLAVOR_ID'] %>
Expand All @@ -16,6 +17,8 @@ driver_config:
provisioner:
name: chef_zero
require_chef_omnibus: 14.2.0
# use custom chef install URL to cope with issue https://github.com/chef/bento/issues/609
chef_omnibus_url: https://raw.githubusercontent.com/aws/aws-parallelcluster-cookbook/develop/util/chef-install.sh
retry_on_exit_code:
- 35 # 35 is the exit code signaling that the node is rebooting
max_retries: 1
Expand Down
20 changes: 20 additions & 0 deletions .kitchen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,14 @@ suites:
cfn_ephemeral_dir: <%= ENV['CFN_EPHEMERAL_DIR'] %>
cfn_shared_dir: <%= ENV['CFN_SHARED_DIR'] %>
cfn_cluster_user: <%= ENV['CFN_CLUSTER_USER'] %>
custom_node_package: <%= ENV['PARALLELCLUSTER_NODE_URL'] %>
os: <%= ENV['OS'] %>

- name: sge_config_MasterServer
run_list:
- recipe[aws-parallelcluster::_prep_env]
- recipe[aws-parallelcluster::sge_config]
- recipe[aws-parallelcluster::finalize]
- recipe[aws-parallelcluster::tests]
attributes:
cfncluster:
Expand All @@ -87,11 +90,14 @@ suites:
cfn_cluster_user: <%= ENV['CFN_CLUSTER_USER'] %>
cfn_sqs_queue: <%= ENV['CFN_SQS_QUEUE'] %>
cfn_ddb_table: <%= ENV['CFN_DDB_TABLE'] %>
custom_node_package: <%= ENV['PARALLELCLUSTER_NODE_URL'] %>
os: <%= ENV['OS'] %>

- name: torque_config_MasterServer
run_list:
- recipe[aws-parallelcluster::_prep_env]
- recipe[aws-parallelcluster::torque_config]
- recipe[aws-parallelcluster::finalize]
- recipe[aws-parallelcluster::tests]
attributes:
cfncluster:
Expand All @@ -106,11 +112,14 @@ suites:
cfn_cluster_user: <%= ENV['CFN_CLUSTER_USER'] %>
cfn_sqs_queue: <%= ENV['CFN_SQS_QUEUE'] %>
cfn_ddb_table: <%= ENV['CFN_DDB_TABLE'] %>
custom_node_package: <%= ENV['PARALLELCLUSTER_NODE_URL'] %>
os: <%= ENV['OS'] %>

- name: slurm_config_MasterServer
run_list:
- recipe[aws-parallelcluster::_prep_env]
- recipe[aws-parallelcluster::slurm_config]
- recipe[aws-parallelcluster::finalize]
- recipe[aws-parallelcluster::tests]
attributes:
cfncluster:
Expand All @@ -125,11 +134,14 @@ suites:
cfn_cluster_user: <%= ENV['CFN_CLUSTER_USER'] %>
cfn_sqs_queue: <%= ENV['CFN_SQS_QUEUE'] %>
cfn_ddb_table: <%= ENV['CFN_DDB_TABLE'] %>
custom_node_package: <%= ENV['PARALLELCLUSTER_NODE_URL'] %>
os: <%= ENV['OS'] %>

- name: sge_config_ComputeFleet
run_list:
- recipe[aws-parallelcluster::_prep_env]
- recipe[aws-parallelcluster::sge_config]
- recipe[aws-parallelcluster::finalize]
- recipe[aws-parallelcluster::tests]
attributes:
cfncluster:
Expand All @@ -144,11 +156,14 @@ suites:
cfn_cluster_user: <%= ENV['CFN_CLUSTER_USER'] %>
cfn_sqs_queue: <%= ENV['CFN_SQS_QUEUE'] %>
cfn_master: <%= ENV['CFN_MASTER'] %>
custom_node_package: <%= ENV['PARALLELCLUSTER_NODE_URL'] %>
os: <%= ENV['OS'] %>

- name: torque_config_ComputeFleet
run_list:
- recipe[aws-parallelcluster::_prep_env]
- recipe[aws-parallelcluster::torque_config]
- recipe[aws-parallelcluster::finalize]
- recipe[aws-parallelcluster::tests]
attributes:
cfncluster:
Expand All @@ -163,11 +178,14 @@ suites:
cfn_cluster_user: <%= ENV['CFN_CLUSTER_USER'] %>
cfn_sqs_queue: <%= ENV['CFN_SQS_QUEUE'] %>
cfn_master: <%= ENV['CFN_MASTER'] %>
custom_node_package: <%= ENV['PARALLELCLUSTER_NODE_URL'] %>
os: <%= ENV['OS'] %>

- name: slurm_config_ComputeFleet
run_list:
- recipe[aws-parallelcluster::_prep_env]
- recipe[aws-parallelcluster::slurm_config]
- recipe[aws-parallelcluster::finalize]
- recipe[aws-parallelcluster::tests]
attributes:
cfncluster:
Expand All @@ -182,3 +200,5 @@ suites:
cfn_cluster_user: <%= ENV['CFN_CLUSTER_USER'] %>
cfn_sqs_queue: <%= ENV['CFN_SQS_QUEUE'] %>
cfn_master: <%= ENV['CFN_MASTER'] %>
custom_node_package: <%= ENV['PARALLELCLUSTER_NODE_URL'] %>
os: <%= ENV['OS'] %>
30 changes: 30 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,36 @@ aws-parallelcluster-cookbook CHANGELOG

This file is used to list changes made in each version of the AWS ParallelCluster cookbook.

2.4.0
-----

**ENHANCEMENTS**
- Add support for EFA on Centos 7, Amazon Linux and Ubuntu 1604
- Add support for Ubuntu in China region `cn-northwest-1`

**CHANGES**
- SGE: changed following parameters in global configuration
- `max_unheard 00:03:00`: allows a faster reaction in case of faulty nodes
- `reschedule_unknown 00:00:30`: enables rescheduling of jobs running on failing nodes
- `qmaster_params ENABLE_FORCED_QDEL_IF_UNKNOWN`: forces job deletion on unresponsive nodes
- `qmaster_params ENABLE_RESCHEDULE_KILL`: forces rescheduling or killing of jobs running on failing nodes
- Slurm: decrease SlurmdTimeout to 120 seconds to speed up replacement of faulty nodes
- Always use full master FQDN when mounting NFS on compute nodes. This solves some issues occurring with some networking
setups and custom DNS configurations
- Set soft and hard ulimit on open files to 10000 for all supported OSs
- Pin python `supervisor` version to 3.4.0
- Remove unused `compute_instance_type` from jobwatcher.cfg
- Removed unused `max_queue_size` from sqswatcher.cfg
- Remove double quoting of the post_install args

**BUG FIXES**
- Fix issue that was preventing Torque from being used on Centos 7
- Start node daemons at the end of instance initialization. The time spent for post-install script and node
initialization is not counted as part of node idletime anymore.
- Fix issue which was causing an additional and invalid EBS mount point to be added in case of multiple EBS
- Install Slurm libpmpi/libpmpi2 that is distributed in a separate package since Slurm 17


2.3.1
-----

Expand Down
2 changes: 1 addition & 1 deletion amis/packer_alinux.json
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@
"pause_before": "2m",
"json" : {
"cfncluster" : {
"cfn_region": "{{user `region`}}",
"nvidia" : {
"enabled" : "{{user `nvidia_enabled`}}"
},
Expand Down Expand Up @@ -246,7 +247,6 @@
},
{
"type" : "shell",
"only": ["custom-alinux"],
"inline" : [
"sudo /usr/local/sbin/ami_cleanup.sh"
]
Expand Down
4 changes: 2 additions & 2 deletions amis/packer_centos6.json
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@
"pause_before": "2m",
"json" : {
"cfncluster" : {
"cfn_region": "{{user `region`}}",
"nvidia" : {
"enabled" : "{{user `nvidia_enabled`}}"
},
Expand Down Expand Up @@ -251,7 +252,7 @@
"inline" : [
"region=\"{{user `region`}}\"",
"bucket=\"s3.amazonaws.com\"",
"[[ ${region} =~ ^cn- ]] && bucket=\"s3.cn-north-1.amazonaws.com.cn\"",
"[[ ${region} =~ ^cn- ]] && bucket=\"s3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster\"",
"curl --retry 3 -L -o /tmp/aws-cfn-bootstrap-latest.tar.gz https://${bucket}/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz",
"sudo pip install /tmp/aws-cfn-bootstrap-latest.tar.gz"
]
Expand All @@ -265,7 +266,6 @@
},
{
"type" : "shell",
"only": ["custom-centos6"],
"inline" : [
"sudo /usr/local/sbin/ami_cleanup.sh"
]
Expand Down
4 changes: 2 additions & 2 deletions amis/packer_centos7.json
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@
"pause_before": "2m",
"json" : {
"cfncluster" : {
"cfn_region": "{{user `region`}}",
"nvidia" : {
"enabled" : "{{user `nvidia_enabled`}}"
},
Expand Down Expand Up @@ -256,7 +257,7 @@
"inline" : [
"region=\"{{user `region`}}\"",
"bucket=\"s3.amazonaws.com\"",
"[[ ${region} =~ ^cn- ]] && bucket=\"s3.cn-north-1.amazonaws.com.cn\"",
"[[ ${region} =~ ^cn- ]] && bucket=\"s3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster\"",
"curl --retry 3 -L -o /tmp/aws-cfn-bootstrap-latest.tar.gz https://${bucket}/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz",
"which pip2",
"if [ $? -eq 0 ]; then sudo pip2 install /tmp/aws-cfn-bootstrap-latest.tar.gz; else sudo pip install /tmp/aws-cfn-bootstrap-latest.tar.gz; fi"
Expand All @@ -277,7 +278,6 @@
},
{
"type" : "shell",
"only": ["custom-centos7"],
"inline" : [
"sudo /usr/local/sbin/ami_cleanup.sh"
]
Expand Down
4 changes: 2 additions & 2 deletions amis/packer_ubuntu1404.json
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@
"pause_before": "2m",
"json" : {
"cfncluster" : {
"cfn_region": "{{user `region`}}",
"nvidia" : {
"enabled" : "{{user `nvidia_enabled`}}"
},
Expand Down Expand Up @@ -257,7 +258,7 @@
"inline" : [
"region=\"{{user `region`}}\"",
"bucket=\"s3.amazonaws.com\"",
"[[ ${region} =~ ^cn- ]] && bucket=\"s3.cn-north-1.amazonaws.com.cn\"",
"[[ ${region} =~ ^cn- ]] && bucket=\"s3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster\"",
"curl --retry 3 -L -o /tmp/aws-cfn-bootstrap-latest.tar.gz https://${bucket}/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz",
"sudo pip install /tmp/aws-cfn-bootstrap-latest.tar.gz"
]
Expand All @@ -271,7 +272,6 @@
},
{
"type" : "shell",
"only": ["custom-ubuntu1404"],
"inline" : [
"sudo /usr/local/sbin/ami_cleanup.sh"
]
Expand Down
4 changes: 2 additions & 2 deletions amis/packer_ubuntu1604.json
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@
"pause_before": "2m",
"json" : {
"cfncluster" : {
"cfn_region": "{{user `region`}}",
"nvidia" : {
"enabled" : "{{user `nvidia_enabled`}}"
},
Expand Down Expand Up @@ -260,7 +261,7 @@
"inline" : [
"region=\"{{user `region`}}\"",
"bucket=\"s3.amazonaws.com\"",
"[[ ${region} =~ ^cn- ]] && bucket=\"s3.cn-north-1.amazonaws.com.cn\"",
"[[ ${region} =~ ^cn- ]] && bucket=\"s3.cn-north-1.amazonaws.com.cn/cn-north-1-aws-parallelcluster\"",
"curl --retry 3 -L -o /tmp/aws-cfn-bootstrap-latest.tar.gz https://${bucket}/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz",
"sudo pip install /tmp/aws-cfn-bootstrap-latest.tar.gz"
]
Expand All @@ -274,7 +275,6 @@
},
{
"type" : "shell",
"only": ["custom-ubuntu1604"],
"inline" : [
"sudo /usr/local/sbin/ami_cleanup.sh"
]
Expand Down
4 changes: 2 additions & 2 deletions amis/packer_variables.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"parallelcluster_version": "2.3.1",
"parallelcluster_cookbook_version": "2.3.1",
"parallelcluster_version": "2.4.0",
"parallelcluster_cookbook_version": "2.4.0",
"chef_version": "14.2.0",
"ridley_version": "5.1.1",
"berkshelf_version": "7.0.4"
Expand Down
18 changes: 11 additions & 7 deletions attributes/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
default['cfncluster']['scripts_dir'] = "#{node['cfncluster']['base_dir']}/scripts"
default['cfncluster']['license_dir'] = "#{node['cfncluster']['base_dir']}/licenses"
# Python packages
default['cfncluster']['cfncluster-version'] = '2.3.1'
default['cfncluster']['cfncluster-node-version'] = '2.3.1'
default['cfncluster']['cfncluster-supervisor-version'] = '3.3.1'
default['cfncluster']['cfncluster-version'] = '2.4.0'
default['cfncluster']['cfncluster-node-version'] = '2.4.0'
default['cfncluster']['supervisor-version'] = '3.4.0'
# URLs to software packages used during install recipes
# Gridengine software
default['cfncluster']['sge']['version'] = '8.1.9'
Expand All @@ -45,6 +45,8 @@
default['cfncluster']['nvidia']['enabled'] = 'no'
default['cfncluster']['nvidia']['driver_url'] = 'http://download.nvidia.com/XFree86/Linux-x86_64/418.56/NVIDIA-Linux-x86_64-418.56.run'
default['cfncluster']['nvidia']['cuda_url'] = 'https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux'
# EFA
default['cfncluster']['efa']['installer_url'] = 'https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-latest.tar.gz'

# Reboot after default_pre recipe
default['cfncluster']['default_pre_reboot'] = 'true'
Expand Down Expand Up @@ -83,7 +85,7 @@
if node['platform_version'].to_i >= 7
default['cfncluster']['base_packages'] = %w[vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel
libXmu-devel hwloc-devel libdb-devel tcl-devel automake autoconf pyparted libtool
httpd boost-devel redhat-lsb mlocate lvm2 mpich-devel openmpi-devel R atlas-devel
httpd boost-devel redhat-lsb mlocate lvm2 mpich-devel R atlas-devel
blas-devel fftw-devel libffi-devel openssl-devel dkms mariadb-devel libedit-devel
libical-devel postgresql-devel postgresql-server sendmail libxml2-devel libglvnd-devel mdadm]
if node['platform_version'].split('.')[1] == '6'
Expand All @@ -105,7 +107,7 @@
when 'amazon'
default['cfncluster']['base_packages'] = %w[vim ksh tcsh zsh openssl-devel ncurses-devel pam-devel net-tools openmotif-devel
libXmu-devel hwloc-devel db4-devel tcl-devel automake autoconf pyparted libtool
httpd boost-devel redhat-lsb mlocate mpich-devel openmpi-devel R atlas-devel fftw-devel
httpd boost-devel redhat-lsb mlocate mpich-devel R atlas-devel fftw-devel
libffi-devel openssl-devel dkms mysql-devel libedit-devel postgresql-devel postgresql-server
sendmail cmake byacc libglvnd-devel mdadm]
end
Expand All @@ -123,8 +125,11 @@
default['cfncluster']['base_packages'] = %w[vim ksh tcsh zsh libssl-dev ncurses-dev libpam-dev net-tools libhwloc-dev dkms
tcl-dev automake autoconf python-parted libtool librrd-dev libapr1-dev libconfuse-dev
apache2 libboost-dev libdb-dev tcsh libssl-dev libncurses5-dev libpam0g-dev libxt-dev
libmotif-dev libxmu-dev libxft-dev libhwloc-dev man-db lvm2 libmpich-dev libopenmpi-dev
libmotif-dev libxmu-dev libxft-dev libhwloc-dev man-db lvm2 libmpich-dev
r-base libatlas-dev libblas-dev libfftw3-dev libffi-dev libssl-dev libxml2-dev mdadm]
if node['platform_version'] == '14.04'
default['cfncluster']['base_packages'].push('libopenmpi-dev')
end
default['cfncluster']['kernel_generic_pkg'] = "linux-generic"
default['cfncluster']['kernel_extra_pkg'] = "linux-image-extra-#{node['kernel']['release']}"
default['cfncluster']['ganglia']['apache_user'] = 'www-data'
Expand Down Expand Up @@ -166,7 +171,6 @@
default['cfncluster']['cfn_shared_dir'] = '/shared'
default['cfncluster']['cfn_efs_shared_dir'] = 'NONE'
default['cfncluster']['cfn_efs'] = nil
default['cfncluster']['cfn_node_type'] = nil
default['cfncluster']['cfn_master'] = nil
default['cfncluster']['cfn_cluster_user'] = 'ec2-user'
default['cfncluster']['cfn_fsx_options'] = 'NONE'
Expand Down
10 changes: 9 additions & 1 deletion files/default/ami_cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,13 @@ rm -rf /var/lib/cloud/instances/*
rm -f /var/lib/cloud/instance
rm -rf /etc/ssh/ssh_host_*
rm -f /etc/udev/rules.d/70-persistent-net.rules
grep -l "Created by cloud-init on instance boot automatically" /etc/sysconfig/network-scripts/ifcfg-* | xargs rm -f

# https://bugs.centos.org/view.php?id=13836#c33128
source /etc/os-release
if [ "${ID}${VERSION_ID}" == "centos7" ]; then
rm -f /etc/sysconfig/network-scripts/ifcfg-eth0
fi

find /var/log -type f -exec /bin/rm -v {} \;
touch /var/log/lastlog
touch /var/log/lastlog
1 change: 1 addition & 0 deletions files/default/compute_ready
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
set -e

. /etc/parallelcluster/cfnconfig

Expand Down
17 changes: 12 additions & 5 deletions files/default/setup-ephemeral-drives.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ function error_exit () {
exit 1
}

function exec_command() {
_command_output=$($@ 2>&1)
_exit_code=$?

# Do not set RC=1 if error says that changes have been written but a reboot is required to inform the kernel
[[ $_exit_code -ne 0 && $(echo "${_command_output}" | grep -i "you should reboot now") ]] && RC=1
}

# LVM stripe, format, mount ephemeral drives
function setup_ephemeral_drives () {
Expand Down Expand Up @@ -48,11 +55,11 @@ function setup_ephemeral_drives () {
for d in $DEVS; do
d=/dev/${d}
dd if=/dev/zero of=${d} bs=32k count=1 || RC=1
parted -s ${d} mklabel gpt || RC=1
parted -s ${d} || RC=1
parted -s -a optimal ${d} mkpart primary 1MB 100% || RC=1
partprobe
parted -s ${d} set 1 lvm on || RC=1
exec_command "parted -s ${d} mklabel gpt"
exec_command "parted -s ${d}"
exec_command "parted -s -a optimal ${d} mkpart primary 1MB 100%"
partprobe ${d}
exec_command "parted -s ${d} set 1 lvm on"
if [ $IS_NVME -eq 1 ]; then
PARTITIONS="${d}p1 $PARTITIONS"
else
Expand Down
Loading