From 1e1d3e4b1aa699716de6df8c10164f32c7cb1cf6 Mon Sep 17 00:00:00 2001 From: John Bowdre Date: Tue, 6 Dec 2022 16:42:44 -0600 Subject: [PATCH] use http check instead of ssh to detect when controlplane is ready --- terraform/scripts/initialize-controlplane.sh | 19 ++++++++++++++++--- terraform/scripts/join-workers.sh | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/terraform/scripts/initialize-controlplane.sh b/terraform/scripts/initialize-controlplane.sh index 51823bf..8dbeee8 100644 --- a/terraform/scripts/initialize-controlplane.sh +++ b/terraform/scripts/initialize-controlplane.sh @@ -63,14 +63,27 @@ EOF exit 1 fi fi - echo ">> Waiting up to 15 minutes for all nodes to be Ready..." + echo ">> Waiting up to 10 minutes for all control-plane nodes to be Ready..." python3 -m http.server & PROC_ID=$! - attempts_max=90 + attempts_max=60 + attempt=0 + until [ "$(kubectl get nodes | grep -c ' control-plane ')" -eq "${K8S_CONTROLPLANE_COUNT}" ]; do + if [ ${attempt} -eq ${attempts_max} ]; then + echo ">> [ERROR] Timeout waiting for control-plane nodes to join! <<" + exit 1 + fi + attempt=$((attempt+1)) + sleep 10 + done + echo ">> Continuing after $((attempt*10)) seconds." + touch .k8s-controlplane-success + echo ">> Waiting up to 10 minutes for all worker nodes to be Ready..." + attempts_max=60 attempt=0 until [ "$(kubectl get nodes | grep -c ' Ready ')" == "${K8S_NODE_COUNT}" ]; do if [ ${attempt} -eq ${attempts_max} ]; then - echo ">> [ERROR] Timeout waiting for cluster online! <<" + echo ">> [ERROR] Timeout waiting for worker nodes to join! <<" exit 1 fi attempt=$((attempt+1)) diff --git a/terraform/scripts/join-workers.sh b/terraform/scripts/join-workers.sh index 00c15ac..8e10b7e 100644 --- a/terraform/scripts/join-workers.sh +++ b/terraform/scripts/join-workers.sh @@ -17,7 +17,7 @@ echo ">> Continuing after $((attempt*10)) seconds." echo ">> Waiting up to 10 minutes for all control-plane nodes..." attempts_max=60 attempt=0 -until [ "$(ssh -o StrictHostKeyChecking=no "${K8S_CONTROLPLANE_VIP}" kubectl get nodes | grep -c " control-plane ")" -eq "${K8S_CONTROLPLANE_COUNT}" ]; do +until "$(wget http://${K8S_CONTROLPLANE_VIP}:8000/.k8s-controlplane-success)" 2>/dev/null; do if [ ${attempt} -eq ${attempts_max} ]; then echo ">> [ERROR] Timeout waiting for control-plane nodes! <<" exit 1