Skip to content

Commit 6597a2d

Browse files
Parkreinerjohnstcn
andauthored
chore: add updates to force redeployment on Vercel (#348)
## Changes made - Updated `check.sh` script to add support for automatic re-deploying in the event that the the registry has a partial/full outage. --------- Co-authored-by: Cian Johnston <[email protected]>
1 parent 5101c27 commit 6597a2d

File tree

2 files changed

+80
-6
lines changed

2 files changed

+80
-6
lines changed

.github/scripts/check.sh

+78-5
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,17 @@
22
set -o pipefail
33
set -u
44

5+
VERBOSE="${VERBOSE:-0}"
6+
if [[ "${VERBOSE}" -ne "0" ]]; then
7+
set -x
8+
fi
9+
510
# List of required environment variables
611
required_vars=(
712
"INSTATUS_API_KEY"
813
"INSTATUS_PAGE_ID"
914
"INSTATUS_COMPONENT_ID"
15+
"VERCEL_API_KEY"
1016
)
1117

1218
# Check if each required variable is set
@@ -24,7 +30,7 @@ declare -a modules=()
2430
declare -a failures=()
2531

2632
# Collect all module directories containing a main.tf file
27-
for path in $(find . -not -path '*/.*' -type f -name main.tf -maxdepth 2 | cut -d '/' -f 2 | sort -u); do
33+
for path in $(find . -maxdepth 2 -not -path '*/.*' -type f -name main.tf | cut -d '/' -f 2 | sort -u); do
2834
modules+=("${path}")
2935
done
3036

@@ -45,7 +51,7 @@ create_incident() {
4551
local incident_name="Testing Instatus"
4652
local message="The following modules are experiencing issues:\n"
4753
for i in "${!failures[@]}"; do
48-
message+="$(($i + 1)). ${failures[$i]}\n"
54+
message+="$((i + 1)). ${failures[$i]}\n"
4955
done
5056

5157
component_status="PARTIALOUTAGE"
@@ -74,14 +80,77 @@ create_incident() {
7480
echo "$incident_id"
7581
}
7682

83+
force_redeploy_registry () {
84+
# These are not secret values; safe to just expose directly in script
85+
local VERCEL_TEAM_SLUG="codercom"
86+
local VERCEL_TEAM_ID="team_tGkWfhEGGelkkqUUm9nXq17r"
87+
local VERCEL_APP="registry"
88+
89+
local latest_res
90+
latest_res=$(curl "https://api.vercel.com/v6/deployments?app=$VERCEL_APP&limit=1&slug=$VERCEL_TEAM_SLUG&teamId=$VERCEL_TEAM_ID&target=production&state=BUILDING,INITIALIZING,QUEUED,READY" \
91+
--fail \
92+
--silent \
93+
--header "Authorization: Bearer $VERCEL_API_KEY" \
94+
--header "Content-Type: application/json"
95+
)
96+
97+
# If we have zero deployments, something is VERY wrong. Make the whole
98+
# script exit with a non-zero status code
99+
local latest_id
100+
latest_id=$(echo "${latest_res}" | jq -r '.deployments[0].uid')
101+
if [[ "${latest_id}" = "null" ]]; then
102+
echo "Unable to pull any previous deployments for redeployment"
103+
echo "Please redeploy the latest deployment manually in Vercel."
104+
echo "https://vercel.com/codercom/registry/deployments"
105+
exit 1
106+
fi
107+
108+
local latest_date_ts_seconds
109+
latest_date_ts_seconds=$(echo "${latest_res}" | jq -r '.deployments[0].createdAt/1000|floor')
110+
local current_date_ts_seconds
111+
current_date_ts_seconds="$(date +%s)"
112+
local max_redeploy_interval_seconds=7200 # 2 hours
113+
if (( current_date_ts_seconds - latest_date_ts_seconds < max_redeploy_interval_seconds )); then
114+
echo "The registry was deployed less than 2 hours ago."
115+
echo "Not automatically re-deploying the regitstry."
116+
echo "A human reading this message should decide if a redeployment is necessary."
117+
echo "Please check the Vercel dashboard for more information."
118+
echo "https://vercel.com/codercom/registry/deployments"
119+
exit 1
120+
fi
121+
122+
local latest_deployment_state
123+
latest_deployment_state="$(echo "${latest_res}" | jq -r '.deployments[0].state')"
124+
if [[ "${latest_deployment_state}" != "READY" ]]; then
125+
echo "Last deployment was not in READY state. Skipping redeployment."
126+
echo "A human reading this message should decide if a redeployment is necessary."
127+
echo "Please check the Vercel dashboard for more information."
128+
echo "https://vercel.com/codercom/registry/deployments"
129+
exit 1
130+
fi
131+
132+
echo "============================================================="
133+
echo "!!! Redeploying registry with deployment ID: ${latest_id} !!!"
134+
echo "============================================================="
135+
136+
if ! curl -X POST "https://api.vercel.com/v13/deployments?forceNew=1&skipAutoDetectionConfirmation=1&slug=$VERCEL_TEAM_SLUG&teamId=$VERCEL_TEAM_ID" \
137+
--fail \
138+
--header "Authorization: Bearer $VERCEL_API_KEY" \
139+
--header "Content-Type: application/json" \
140+
--data-raw "{ \"deploymentId\": \"${latest_id}\", \"name\": \"${VERCEL_APP}\", \"target\": \"production\" }"; then
141+
echo "DEPLOYMENT FAILED! Please check the Vercel dashboard for more information."
142+
echo "https://vercel.com/codercom/registry/deployments"
143+
exit 1
144+
fi
145+
}
146+
77147
# Check each module's accessibility
78148
for module in "${modules[@]}"; do
79149
# Trim leading/trailing whitespace from module name
80150
module=$(echo "${module}" | xargs)
81151
url="${REGISTRY_BASE_URL}/modules/${module}"
82152
printf "=== Checking module %s at %s\n" "${module}" "${url}"
83153
status_code=$(curl --output /dev/null --head --silent --fail --location "${url}" --retry 3 --write-out "%{http_code}")
84-
# shellcheck disable=SC2181
85154
if (( status_code != 200 )); then
86155
printf "==> FAIL(%s)\n" "${status_code}"
87156
status=1
@@ -94,11 +163,11 @@ done
94163
# Determine overall status and update Instatus component
95164
if (( status == 0 )); then
96165
echo "All modules are operational."
97-
# set to
166+
# set to
98167
update_component_status "OPERATIONAL"
99168
else
100169
echo "The following modules have issues: ${failures[*]}"
101-
# check if all modules are down
170+
# check if all modules are down
102171
if (( ${#failures[@]} == ${#modules[@]} )); then
103172
update_component_status "MAJOROUTAGE"
104173
else
@@ -108,6 +177,10 @@ else
108177
# Create a new incident
109178
incident_id=$(create_incident)
110179
echo "Created incident with ID: $incident_id"
180+
181+
# If a module is down, force a reployment to try getting things back online
182+
# ASAP
183+
force_redeploy_registry
111184
fi
112185

113186
exit "${status}"

.github/workflows/check.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: Health
22
# Check modules health on registry.coder.com
33
on:
44
schedule:
5-
- cron: "*/13 * * * *" # Runs every 13th minute
5+
- cron: "0,15,30,45 * * * *" # Runs every 15 minutes
66
workflow_dispatch: # Allows manual triggering of the workflow if needed
77

88
jobs:
@@ -20,3 +20,4 @@ jobs:
2020
INSTATUS_API_KEY: ${{ secrets.INSTATUS_API_KEY }}
2121
INSTATUS_PAGE_ID: ${{ secrets.INSTATUS_PAGE_ID }}
2222
INSTATUS_COMPONENT_ID: ${{ secrets.INSTATUS_COMPONENT_ID }}
23+
VERCEL_API_KEY: ${{ secrets.VERCEL_API_KEY }}

0 commit comments

Comments
 (0)