improve timing/error handling in k8s bootstrap

This commit is contained in:
John Bowdre 2023-04-19 13:39:27 -05:00
parent 101ad08bd3
commit 05ab05a84a
2 changed files with 33 additions and 16 deletions

View file

@ -53,7 +53,7 @@ EOF
sudo chown "$(id -u):$(id -g)" "${HOME}"/.kube/config sudo chown "$(id -u):$(id -g)" "${HOME}"/.kube/config
echo ">> Applying Calico networking..." echo ">> Applying Calico networking..."
kubectl apply -f https://docs.projectcalico.org/manifests/calico.yaml kubectl apply -f https://raw.githubusercontent.com/projectcalico/calico/master/manifests/calico.yaml
echo ">> Creating discovery config..." echo ">> Creating discovery config..."
kubectl -n kube-public get configmap cluster-info -o jsonpath='{.data.kubeconfig}' > discovery.yaml kubectl -n kube-public get configmap cluster-info -o jsonpath='{.data.kubeconfig}' > discovery.yaml
@ -64,7 +64,7 @@ EOF
fi fi
fi fi
echo ">> Waiting up to 10 minutes for all control-plane nodes to be Ready..." echo ">> Waiting up to 10 minutes for all control-plane nodes to be Ready..."
python3 -m http.server & python3 -m http.server &>/dev/null &
PROC_ID=$! PROC_ID=$!
attempts_max=60 attempts_max=60
attempt=0 attempt=0
@ -383,17 +383,25 @@ else
sleep 10 sleep 10
done done
echo ">> Continuing after $((attempt*10)) seconds." echo ">> Continuing after $((attempt*10)) seconds."
echo ">> Joining cluster..." echo ">> Retrieving cluster discovery config..."
attempts_max=6 attempts_max=6
attempt=0 attempt=0
until [ -f /etc/kubernetes/discovery.yaml ]; do until [ -f ~/discovery.yaml ] || [ ${attempt} -eq ${attempts_max} ]; do
wget "http://${K8S_CONTROLPLANE_VIP}:8000/discovery.yaml" 2>/dev/null wget "http://${K8S_CONTROLPLANE_VIP}:8000/discovery.yaml"
sudo install -o root -g root -m 600 discovery.yaml /etc/kubernetes/discovery.yaml 2>/dev/null sleep 2
if [ ! -f /etc/kubernetes/discovery.yaml ]; then if ! [ -f ~/discovery.yaml ]; then
echo ">> Unable to retrieve config..."
attempt=$((attempt+1)) attempt=$((attempt+1))
sleep 10 sleep 8
fi fi
done done
if ! [ -f ~/discovery.yaml ]; then
echo ">> Timeout reached while retrieving config!"
echo "Exiting."
exit 1
fi
sudo install -o root -g root -m 600 discovery.yaml /etc/kubernetes/discovery.yaml
echo ">> Successfully discovered cluster!"
cat << EOF > kubeadmjoin.yaml cat << EOF > kubeadmjoin.yaml
apiVersion: kubeadm.k8s.io/v1beta3 apiVersion: kubeadm.k8s.io/v1beta3
caCertPath: /etc/kubernetes/pki/ca.crt caCertPath: /etc/kubernetes/pki/ca.crt
@ -409,6 +417,7 @@ nodeRegistration:
controlPlane: controlPlane:
certificateKey: ${KUBEADM_CERTKEY} certificateKey: ${KUBEADM_CERTKEY}
EOF EOF
echo ">> Joining cluster..."
if sudo kubeadm join "${K8S_CONTROLPLANE_VIP}":6443 --config kubeadmjoin.yaml; then if sudo kubeadm join "${K8S_CONTROLPLANE_VIP}":6443 --config kubeadmjoin.yaml; then
echo ">> Node ${HOSTNAME} successfully initialized!" echo ">> Node ${HOSTNAME} successfully initialized!"
touch .k8s-node-success touch .k8s-node-success

View file

@ -17,7 +17,7 @@ echo ">> Continuing after $((attempt*10)) seconds."
echo ">> Waiting up to 10 minutes for all control-plane nodes..." echo ">> Waiting up to 10 minutes for all control-plane nodes..."
attempts_max=60 attempts_max=60
attempt=0 attempt=0
until "$(wget http://${K8S_CONTROLPLANE_VIP}:8000/.k8s-controlplane-success)" 2>/dev/null; do until curl --fail "http://${K8S_CONTROLPLANE_VIP}:8000/.k8s-controlplane-success" 2>/dev/null; do
if [ ${attempt} -eq ${attempts_max} ]; then if [ ${attempt} -eq ${attempts_max} ]; then
echo ">> [ERROR] Timeout waiting for control-plane nodes! <<" echo ">> [ERROR] Timeout waiting for control-plane nodes! <<"
exit 1 exit 1
@ -26,18 +26,26 @@ until "$(wget http://${K8S_CONTROLPLANE_VIP}:8000/.k8s-controlplane-success)" 2>
sleep 10 sleep 10
done done
echo ">> Continuing after $((attempt*10)) seconds." echo ">> Continuing after $((attempt*10)) seconds."
echo ">> Joining cluster..." echo ">> Retrieving cluster discovery config..."
attempts_max=6 attempts_max=6
attempt=0 attempt=0
until [ -f /etc/kubernetes/discovery.yaml ]; do until [ -f ~/discovery.yaml ] || [ ${attempt} -eq ${attempts_max} ]; do
wget "http://${K8S_CONTROLPLANE_VIP}:8000/discovery.yaml" 2>/dev/null wget "http://${K8S_CONTROLPLANE_VIP}:8000/discovery.yaml"
sudo install -o root -g root -m 600 discovery.yaml /etc/kubernetes/discovery.yaml 2>/dev/null sleep 2
if [ ! -f /etc/kubernetes/discovery.yaml ]; then if ! [ -f ~/discovery.yaml ]; then
echo ">> Unable to retrieve config..."
attempt=$((attempt+1)) attempt=$((attempt+1))
sleep 10 sleep 8
fi fi
done done
if ! [ -f ~/discovery.yaml ]; then
echo ">> Timeout reached while retrieving config!"
echo "Exiting."
exit 1
fi
sudo install -o root -g root -m 600 discovery.yaml /etc/kubernetes/discovery.yaml
echo ">> Successfully discovered cluster!"
echo ">> Discovered cluster!"
cat << EOF > kubeadmjoin.yaml cat << EOF > kubeadmjoin.yaml
apiVersion: kubeadm.k8s.io/v1beta3 apiVersion: kubeadm.k8s.io/v1beta3
caCertPath: /etc/kubernetes/pki/ca.crt caCertPath: /etc/kubernetes/pki/ca.crt