Skip to content

Commit c885750

Browse files
committed
calculate-cache.sh: calculate cache size usage for new caching method versus previous method
This was created in response to the question at #2508 (comment). Signed-off-by: Norio Nomura <[email protected]> calculate-cache.sh: `shfmt -s` Signed-off-by: Norio Nomura <[email protected]> calculate-cache.sh: extract `runs_on` and `template` from workflow file Signed-off-by: Norio Nomura <[email protected]> calculate-cache.sh: add DEBUG=1 to save collected information as yaml Signed-off-by: Norio Nomura <[email protected]> calculate-cache.sh: add descriptions and output examples to functions Signed-off-by: Norio Nomura <[email protected]> calculate-cache.sh: use `--jq` instead of `| jq` Signed-off-by: Norio Nomura <[email protected]> calculate-cache.sh: add some descriptions Signed-off-by: Norio Nomura <[email protected]> calculate-cache.sh: resolve shfmt issue Signed-off-by: Norio Nomura <[email protected]> calculate-cache.sh: mention response cache file Signed-off-by: Norio Nomura <[email protected]>
1 parent 99c1247 commit c885750

File tree

1 file changed

+299
-0
lines changed

1 file changed

+299
-0
lines changed

hack/calculate-cache.sh

+299
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
#!/usr/bin/env bash
2+
# This script calculates the expected content size, actual cached size, and cache-keys used in caching method before and after
3+
# implementation in https://github.com/lima-vm/lima/pull/2508
4+
#
5+
# Answer to the question in https://github.com/lima-vm/lima/pull/2508#discussion_r1699798651
6+
7+
# usage: [DEBUG=1] ./hack/calculate-cache.sh
8+
# DEBUG=1 will save the collected information in .calculate-cache-collected-info-{before,after}.yaml
9+
#
10+
# This script does:
11+
# 1. extracts `runs_on` and `template` from workflow file (.github/workflows/test.yml)
12+
# 2. check each template for image and nerdctl
13+
# 3. detect size of image and nerdctl (responses from remote are cached for faster iteration)
14+
# save the response in .calculate-cache-response-cache.yaml
15+
# 4. print content size, actual cache size (if available), by cache key
16+
#
17+
# The major differences for reducing cache usage are as follows:
18+
# - it is now cached `~/.cache/lima/download/by-url-sha256/$sha256` instead of caching `~/.cache/lima/download`
19+
# - the cache keys are now based on the image digest and nerdctl digest instead of the template file's hash
20+
# - enables the use of cache regardless of the operating system used to execute CI.
21+
#
22+
# The script requires the following commands:
23+
# - gh: GitHub CLI.
24+
# Using to get the cache information
25+
# - jq: Command-line JSON processor
26+
# Parse the workflow file and print runs-on and template.
27+
# Parse output from gh cache list
28+
# Calculate the expected content size, actual cached size, and cache-keys used.
29+
# - limactl: lima CLI.
30+
# Using to validate the template file for getting nerdctl location and digest.
31+
# - sha256sum: Print or check SHA256 (256-bit) checksums
32+
# - xxd: make a hexdump or do the reverse.
33+
# Using to simulate the 'hashFile()' function in the workflow.
34+
# - yq: Command-line YAML processor.
35+
# Parse the template file for image and nerdctl location, digest, and size.
36+
# Parse the cache response file for the cache.
37+
# Convert the collected information to JSON.
38+
39+
set -u -o pipefail
40+
41+
required_commands=(gh jq limactl sha256sum xxd yq)
42+
for cmd in "${required_commands[@]}"; do
43+
if ! command -v "${cmd}" &>/dev/null; then
44+
echo "${cmd} is required. Please install it" >&2
45+
exit 1
46+
fi
47+
done
48+
49+
# current workflow uses x86_64 only
50+
arch=x86_64
51+
52+
LIMA_HOME=$(mktemp -d)
53+
export LIMA_HOME
54+
55+
# parse the workflow file and print runs-on and template
56+
# e.g.
57+
# ```console
58+
# $ print_runs_on_template_from_workflow .github/workflows/test.yml
59+
# macos-12 templates/default.yaml
60+
# ubuntu-24.04 templates/alpine.yaml
61+
# ubuntu-24.04 templates/debian.yaml
62+
# ubuntu-24.04 templates/fedora.yaml
63+
# ubuntu-24.04 templates/archlinux.yaml
64+
# ubuntu-24.04 templates/opensuse.yaml
65+
# ubuntu-24.04 templates/experimental/net-user-v2.yaml
66+
# ubuntu-24.04 templates/experimental/9p.yaml
67+
# ubuntu-24.04 templates/docker.yaml
68+
# ubuntu-24.04 templates/../hack/test-templates/alpine-9p-writable.yaml
69+
# ubuntu-24.04 templates/../hack/test-templates/test-misc.yaml
70+
# macos-12 templates/vmnet.yaml
71+
# macos-12 https://raw.githubusercontent.com/lima-vm/lima/v0.15.1/examples/ubuntu-lts.yaml
72+
# macos-13 templates/experimental/vz.yaml
73+
# macos-13 templates/fedora.yaml
74+
# ```
75+
function print_runs_on_template_from_workflow() {
76+
yq -o=j "$1" | jq -r '
77+
"./.github/actions/setup_cache_for_template" as $action |
78+
"\\$\\{\\{\\s*(?<path>\\S*)\\s*\\}\\}" as $pattern |
79+
.jobs | map_values(
80+
."runs-on" as $runs_on |
81+
{
82+
template: .steps | map_values(select(.uses == $action)) | first |.with.template,
83+
matrix: .strategy.matrix
84+
} | select(.template) |
85+
. + { path: .template | (if test($pattern) then sub(".*\($pattern).*";"\(.path)")|split(".") else null end) } |
86+
(
87+
.template as $template|
88+
if .path then
89+
getpath(.path)|map(. as $item|$template|sub($pattern;$item))
90+
else
91+
[$template]
92+
end
93+
) | map("\($runs_on)\t\(.)")
94+
95+
) | flatten |.[]
96+
'
97+
}
98+
99+
# returns the OS name from the runner equivalent to the expression `${{ runner.os }}` in the workflow
100+
# e.g.
101+
# ```console
102+
# $ runner_os_from_runner "macos-12"
103+
# macOS
104+
# $ runner_os_from_runner "ubuntu-24.04"
105+
# Linux
106+
# ```
107+
function runner_os_from_runner() {
108+
# shellcheck disable=SC2249
109+
case "$1" in
110+
macos*)
111+
echo macOS
112+
;;
113+
ubuntu*)
114+
echo Linux
115+
;;
116+
esac
117+
}
118+
119+
# check the remote location and return the http code and size.
120+
# The result is cached in .calculate-cache-response-cache.yaml
121+
# e.g.
122+
# ```console
123+
# $ check_location "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img"
124+
# 200 585498624
125+
# ```
126+
function check_location() {
127+
location="$1"
128+
readonly cache_file="./.calculate-cache-response-cache.yaml"
129+
# check response_cache.yaml for the cache
130+
if [[ -f ${cache_file} ]]; then
131+
cached=$(yq -e eval ".[\"${location}\"]" "${cache_file}" 2>/dev/null) && echo "${cached}" && return
132+
else
133+
touch "${cache_file}"
134+
fi
135+
http_code_and_size=$(curl -sIL -w "%{http_code} %header{Content-Length}" "${location}" -o /dev/null)
136+
yq eval ".[\"${location}\"] = \"${http_code_and_size}\"" -i "${cache_file}"
137+
echo "${http_code_and_size}"
138+
}
139+
140+
# print image location, digest, size, hash, containerd, containerd_location, containerd_digest, containerd_size from the template
141+
# e.g.
142+
# ```console
143+
# $ print_location_digest_size_hash_from_template "templates/default.yaml"
144+
# https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img sha256:d2377667ea95222330ca2287817403c85178dad397e9fed768a9b4aec79d2a7f 585498624 49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f true https://github.com/containerd/nerdctl/releases/download/v1.7.6/nerdctl-full-1.7.6-linux-amd64.tar.gz sha256:2c841e097fcfb5a1760bd354b3778cb695b44cd01f9f271c17507dc4a0b25606 237465717
145+
# ```
146+
function print_location_digest_size_hash_from_template() {
147+
readonly template=$1
148+
case "${template}" in
149+
http*)
150+
template_yaml=$(curl -sSL "${template}")
151+
;;
152+
*)
153+
template_yaml=$(<"${template}")
154+
;;
155+
esac
156+
readonly yq_filter="
157+
[
158+
.images | map(select(.arch == \"${arch}\")) | [.[0,1].location, .[0,1].digest],
159+
.containerd|[.system or .user],
160+
.containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest]
161+
]|flatten|.[]
162+
"
163+
if command -v limactl &>/dev/null; then
164+
parsed=$(limactl validate <(echo "${template_yaml}") --fill 2>/dev/null | yq eval "${yq_filter}")
165+
else
166+
parsed=$(yq eval "${yq_filter}" <<<"${template_yaml}")
167+
fi
168+
# macOS earlier than 15.0 uses bash 3.2.57, which does not support readarray -t
169+
# readarray -t arr <<<"${parsed}"
170+
while IFS= read -r line; do arr+=("${line}"); done <<<"${parsed}"
171+
readonly locations=("${arr[@]:0:2}") digests=("${arr[@]:2:2}")
172+
readonly containerd="${arr[4]}" containerd_location="${arr[5]}" containerd_digest="${arr[6]}"
173+
declare location digest size hash
174+
for ((i = 0; i < ${#locations[@]}; i++)); do
175+
[[ ${locations[i]} != null ]] || continue
176+
http_code_and_size=$(check_location "${locations[i]}")
177+
read -r http_code size <<<"${http_code_and_size}"
178+
if [[ ${http_code} -eq 200 ]]; then
179+
location=${locations[i]}
180+
digest=${digests[i]}
181+
break
182+
fi
183+
done
184+
if [[ -z ${location} ]]; then
185+
echo "Failed to get the image location for ${template}" >&2
186+
return 1
187+
fi
188+
hash=$(sha256sum <<<"${template_yaml}" | cut -d' ' -f1 | xxd -r -p | sha256sum | cut -d' ' -f1)
189+
declare containerd_size
190+
containerd_http_code_and_size=$(check_location "${containerd_location}")
191+
read -r _containerd_http_code containerd_size <<<"${containerd_http_code_and_size}"
192+
echo "${location} ${digest} ${size} ${hash} ${containerd} ${containerd_location} ${containerd_digest} ${containerd_size}"
193+
}
194+
195+
# format first column to MiB
196+
# e.g.
197+
# ```console
198+
# $ echo 585498624 | size_to_mib
199+
# 558.38 MiB
200+
# ```
201+
function size_to_mib() {
202+
awk '
203+
function mib(size) { return sprintf("%7.2f MiB", size / 1024 / 1024) }
204+
int($1)>0{ $1=" "mib($1) }
205+
int($2)>0{ $2=mib($2) }
206+
int($2)==0 && NF>1{ $2="<<missing>>" }
207+
{ print }
208+
'
209+
}
210+
211+
# actual_cache_sizes=$(gh cache list --json key,createdAt,sizeInBytes|jq '[.[]|{"key":.key,"value":.sizeInBytes}]|from_entries')
212+
# e.g.
213+
# ```console
214+
# $ echo "${actual_cache_sizes}"
215+
# {
216+
# "Linux-1c3b2791d52735d916dc44767c745c2319eb7cae74af71bbf45ddb268f42fc1d": 810758533,
217+
# "Linux-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633036717,
218+
# "Linux-3b906d46fa532e3bc348c35fc8e7ede6c69f0b27032046ee2cbb56d4022d1146": 574242367,
219+
# "Linux-69a547b760dbf1650007ed541408474237bc611704077214adcac292de556444": 70310855,
220+
# "Linux-7782f8b4ff8cd378377eb79f8d61c9559b94bbd0c11d19eb380ee7bda19af04e": 494141177,
221+
# "Linux-8812aedfe81b4456d421645928b493b1f2f88aff04b7f3171207492fd44cd189": 812730766,
222+
# "Linux-caa7d8af214d55ad8902e82d5918e61573f3d6795d2b5ad9a35305e26fa0e6a9": 754723892,
223+
# "Linux-colima-v0.6.5": 226350335,
224+
# "Linux-de83bce0608d787e3c68c7a31c5fab2b6d054320fd7bf633a031845e2ee03414": 810691197,
225+
# "Linux-eb88a19dfcf2fb98278e7c7e941c143737c6d7cd8950a88f58e04b4ee7cef1bc": 570625794,
226+
# "Linux-f88f0b3b678ff6432386a42bdd27661133c84a36ad29f393da407c871b0143eb": 68490954,
227+
# "golangci-lint.cache-Linux-2850-74615231540133417fd618c72e37be92c5d3b3ad": 2434144,
228+
# "macOS-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633020464,
229+
# "macOS-49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f": 813179462,
230+
# "macOS-8f37f663956af5f743f0f99ab973729b6a02f200ebfac7a3a036eff296550732": 810756770,
231+
# "macOS-ef5509b5d4495c8c3590442ee912ad1c9a33f872dc4a29421c524fc1e2103b59": 813179476,
232+
# "macOS-upgrade-v0.15.1": 1157814690,
233+
# "setup-go-Linux-ubuntu20-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1015518352,
234+
# "setup-go-Linux-ubuntu20-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 936433302,
235+
# "setup-go-Linux-ubuntu24-go-1.22.6-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1090001859,
236+
# "setup-go-Linux-ubuntu24-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 526146768,
237+
# "setup-go-Windows-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1155374040,
238+
# "setup-go-Windows-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 1056433137,
239+
# "setup-go-macOS-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1060919942,
240+
# "setup-go-macOS-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 982139209
241+
# }
242+
actual_cache_sizes=$(
243+
gh cache list --json key,createdAt,sizeInBytes \
244+
--jq 'sort_by(.createdAt)|reverse|unique_by(.key)|sort_by(.key)|map({"key":.key,"value":.sizeInBytes})|from_entries'
245+
)
246+
247+
workflows=(
248+
.github/workflows/test.yml
249+
)
250+
251+
# shellcheck disable=SC2016
252+
echo "=> compare expected content size, actual cached size, and cache-keys used before and after the change in https://github.com/lima-vm/lima/pull/2508"
253+
# iterate over before and after
254+
for cache_method in before after; do
255+
echo "==> ${cache_method}"
256+
echo "content-size actual-size cache-key"
257+
output_yaml=$(
258+
for workflow in "${workflows[@]}"; do
259+
print_runs_on_template_from_workflow "${workflow}"
260+
done | while IFS=$'\t' read -r runner template; do
261+
runner_os=$(runner_os_from_runner "${runner}")
262+
location_digest_size_hash=$(print_location_digest_size_hash_from_template "${template}") || continue
263+
read -r location digest size hash containerd containerd_location containerd_digest containerd_size <<<"${location_digest_size_hash}"
264+
if [[ ${cache_method} != after ]]; then
265+
key=${runner_os}-${hash}
266+
elif [[ ${digest} == null ]]; then
267+
key=image:$(basename "${location}")-url-sha256:$(echo -n "${location}" | sha256sum | cut -d' ' -f1)
268+
else
269+
key=image:$(basename "${location}")-${digest}
270+
fi
271+
if [[ ${containerd} == true ]]; then
272+
if [[ ${cache_method} != after ]]; then
273+
# previous caching method packages the containerd archive with the image
274+
size=$((size + containerd_size))
275+
else
276+
# new caching method packages the containerd archive separately
277+
containerd_key=containerd:$(basename "${containerd_location}")-${containerd_digest}
278+
printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \
279+
"${containerd_key}" "${template}" "${containerd_location}" "${containerd_digest}" "${containerd_size}"
280+
fi
281+
fi
282+
printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \
283+
"${key}" "${template}" "${location}" "${digest}" "${size}"
284+
done
285+
)
286+
output_json=$(yq -o=j . <<<"${output_yaml}")
287+
288+
# print size key
289+
jq --argjson actual_size "${actual_cache_sizes}" -r 'unique_by(.key)|sort_by(.key)|.[]|[.size, $actual_size[.key] // 0, .key]|@tsv' <<<"${output_json}" | size_to_mib
290+
# total
291+
echo "------------"
292+
jq '[unique_by(.key)|.[]|.size]|add' <<<"${output_json}" | size_to_mib
293+
# save the collected information as yaml if DEBUG is set
294+
if [[ -n ${DEBUG:+1} ]]; then
295+
cat <<<"${output_yaml}" >".calculate-cache-collected-info-${cache_method}.yaml"
296+
echo "Saved the collected information in .calculate-cache-collected-info-${cache_method}.yaml"
297+
fi
298+
echo ""
299+
done

0 commit comments

Comments
 (0)