fix(cd): drop retired supervisor build, add routing rollout verification
Plan 7 (2026-05-12) retired the supervisor pod, deleted cmd/supervisor/
and the root Dockerfile, but cd.yml still tried to:
- buildctl a supervisor image using the (non-existent) root Dockerfile
- sed gitea.d-ma.be/mathias/supervisor: in k3s/apps/supervisor/deployment.yaml
(also non-existent — k3s/apps/supervisor/ only ships ingestion-* files now)
- wait for and rollout-verify a supervisor Deployment that no longer exists
Result: every CD run since the retirement has been failing at 'Build and push
supervisor image', leaving ingestion + routing un-deployed despite the binaries
being built. The routing pod was last deployed at sha 189ff89c (weeks stale).
This commit:
- Removes the supervisor build step and supervisor sed/git add lines.
- Adds 'Wait for Flux to apply new routing image' + 'Verify routing rollout'
steps that mirror the ingestion equivalents, so failures land loudly rather
than 5 min later when something tries to call the new tool.
- Updates the chore(deploy) commit message to 'ingestion+routing' to match
reality.
Unblocks deployment of feat: project_create (#10).
This commit is contained in:
@@ -13,8 +13,6 @@ jobs:
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' }}
|
||||
environment: staging
|
||||
env:
|
||||
SERVICE: supervisor
|
||||
IMAGE: gitea.d-ma.be/mathias/supervisor
|
||||
INGESTION_IMAGE: gitea.d-ma.be/mathias/ingestion
|
||||
ROUTING_IMAGE: gitea.d-ma.be/mathias/routing
|
||||
INFRA_REPO: git@gitea.d-ma.be:mathias/infra.git
|
||||
@@ -23,27 +21,6 @@ jobs:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build and push supervisor image
|
||||
run: |
|
||||
set -e
|
||||
trap 'rm -f /tmp/supervisor-image.tar' EXIT
|
||||
IMAGE_TAG="${{ github.sha }}"
|
||||
echo "Building ${IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
buildctl --addr "${BUILDKIT_HOST}" build \
|
||||
--frontend dockerfile.v0 \
|
||||
--local context=. \
|
||||
--local dockerfile=. \
|
||||
--opt build-arg:VERSION="${IMAGE_TAG}" \
|
||||
--output type=oci,dest=/tmp/supervisor-image.tar
|
||||
|
||||
skopeo copy \
|
||||
oci-archive:/tmp/supervisor-image.tar \
|
||||
docker://${IMAGE}:${IMAGE_TAG} \
|
||||
--dest-creds "${{ secrets.REGISTRY_CREDS }}"
|
||||
|
||||
echo "Built and pushed ${IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
- name: Build and push ingestion image
|
||||
run: |
|
||||
set -e
|
||||
@@ -101,25 +78,21 @@ jobs:
|
||||
|
||||
cd /tmp/infra-update
|
||||
|
||||
sed -i "s|gitea.d-ma.be/mathias/supervisor:.*|gitea.d-ma.be/mathias/supervisor:${IMAGE_TAG}|" \
|
||||
"k3s/apps/${SERVICE}/deployment.yaml"
|
||||
|
||||
sed -i "s|gitea.d-ma.be/mathias/ingestion:.*|gitea.d-ma.be/mathias/ingestion:${IMAGE_TAG}|" \
|
||||
"k3s/apps/${SERVICE}/ingestion-deployment.yaml"
|
||||
"k3s/apps/supervisor/ingestion-deployment.yaml"
|
||||
|
||||
sed -i "s|gitea.d-ma.be/mathias/routing:.*|gitea.d-ma.be/mathias/routing:${IMAGE_TAG}|" \
|
||||
"k3s/apps/routing/deployment.yaml"
|
||||
|
||||
git config user.email "cd-bot@d-ma.be"
|
||||
git config user.name "CD Bot"
|
||||
git add "k3s/apps/${SERVICE}/deployment.yaml" \
|
||||
"k3s/apps/${SERVICE}/ingestion-deployment.yaml" \
|
||||
git add "k3s/apps/supervisor/ingestion-deployment.yaml" \
|
||||
"k3s/apps/routing/deployment.yaml"
|
||||
git commit -m "chore(deploy): supervisor+ingestion+routing → ${IMAGE_TAG}"
|
||||
git commit -m "chore(deploy): ingestion+routing → ${IMAGE_TAG}"
|
||||
GIT_SSH_COMMAND="ssh -i ~/.ssh/infra_deploy_key -o IdentitiesOnly=yes" \
|
||||
git push
|
||||
|
||||
echo "Infra repo updated: ${SERVICE}+ingestion → ${IMAGE_TAG}"
|
||||
echo "Infra repo updated: ingestion+routing → ${IMAGE_TAG}"
|
||||
|
||||
- name: Trigger Flux reconcile (immediate)
|
||||
run: |
|
||||
@@ -128,23 +101,6 @@ jobs:
|
||||
kubectl -n flux-system annotate kustomization apps \
|
||||
reconcile.fluxcd.io/requestedAt="$(date +%s)" --overwrite
|
||||
|
||||
- name: Wait for Flux to apply new supervisor image
|
||||
run: |
|
||||
EXPECTED="gitea.d-ma.be/mathias/supervisor:${{ github.sha }}"
|
||||
for i in $(seq 1 60); do
|
||||
CURRENT=$(kubectl get deploy supervisor -n supervisor \
|
||||
-o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || echo "")
|
||||
if [ "$CURRENT" = "$EXPECTED" ]; then
|
||||
echo "✓ Flux applied supervisor image after ${i}s"
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
kubectl get deploy supervisor -n supervisor \
|
||||
-o jsonpath='{.spec.template.spec.containers[0].image}' \
|
||||
| grep -qx "$EXPECTED" \
|
||||
|| { echo "✗ Flux did not apply supervisor image within 60s"; exit 1; }
|
||||
|
||||
- name: Wait for Flux to apply new ingestion image
|
||||
run: |
|
||||
EXPECTED="gitea.d-ma.be/mathias/ingestion:${{ github.sha }}"
|
||||
@@ -162,21 +118,6 @@ jobs:
|
||||
| grep -qx "$EXPECTED" \
|
||||
|| { echo "✗ Flux did not apply ingestion image within 60s"; exit 1; }
|
||||
|
||||
- name: Verify supervisor rollout
|
||||
run: |
|
||||
kubectl rollout status deployment/supervisor \
|
||||
--namespace supervisor \
|
||||
--timeout=120s \
|
||||
|| {
|
||||
echo "── pod status ──"
|
||||
kubectl get pods -n supervisor -o wide
|
||||
echo "── events ──"
|
||||
kubectl get events -n supervisor --sort-by='.lastTimestamp' | tail -20
|
||||
echo "── describe ──"
|
||||
kubectl describe pods -n supervisor -l app=supervisor | tail -40
|
||||
exit 1
|
||||
}
|
||||
|
||||
- name: Verify ingestion rollout
|
||||
run: |
|
||||
kubectl rollout status deployment/ingestion \
|
||||
@@ -191,3 +132,35 @@ jobs:
|
||||
kubectl describe pods -n supervisor -l app=ingestion | tail -40
|
||||
exit 1
|
||||
}
|
||||
|
||||
- name: Wait for Flux to apply new routing image
|
||||
run: |
|
||||
EXPECTED="gitea.d-ma.be/mathias/routing:${{ github.sha }}"
|
||||
for i in $(seq 1 60); do
|
||||
CURRENT=$(kubectl get deploy routing -n routing \
|
||||
-o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || echo "")
|
||||
if [ "$CURRENT" = "$EXPECTED" ]; then
|
||||
echo "✓ Flux applied routing image after ${i}s"
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
kubectl get deploy routing -n routing \
|
||||
-o jsonpath='{.spec.template.spec.containers[0].image}' \
|
||||
| grep -qx "$EXPECTED" \
|
||||
|| { echo "✗ Flux did not apply routing image within 60s"; exit 1; }
|
||||
|
||||
- name: Verify routing rollout
|
||||
run: |
|
||||
kubectl rollout status deployment/routing \
|
||||
--namespace routing \
|
||||
--timeout=120s \
|
||||
|| {
|
||||
echo "── pod status ──"
|
||||
kubectl get pods -n routing -o wide
|
||||
echo "── events ──"
|
||||
kubectl get events -n routing --sort-by='.lastTimestamp' | tail -20
|
||||
echo "── describe ──"
|
||||
kubectl describe pods -n routing -l app=routing | tail -40
|
||||
exit 1
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user