Skip to content

Commit 0cb71c8

Browse files
committed
calculate-cache.sh: calculate cache size usage for new caching method versus previous method
This was created in response to the question at #2508 (comment). Signed-off-by: Norio Nomura <[email protected]>
1 parent b0b35dc commit 0cb71c8

File tree

1 file changed

+207
-0
lines changed

1 file changed

+207
-0
lines changed

hack/calculate-cache.sh

+207
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
#!/usr/bin/env bash
2+
# This script calculates the expected content size, actual cached size, and cache-keys used in caching method prior and after
3+
# implementation in https://github.com/lima-vm/lima/pull/2508
4+
#
5+
# Answer to the question in https://github.com/lima-vm/lima/pull/2508#discussion_r1699798651
6+
set -u -o pipefail
7+
8+
required_commands=(gh jq limactl sha256sum xxd yq)
9+
for cmd in "${required_commands[@]}"; do
10+
if ! command -v "${cmd}" &>/dev/null; then
11+
echo "${cmd} is required. Please install it" >&2
12+
exit 1
13+
fi
14+
done
15+
16+
# current workflow uses x86_64 only
17+
arch=x86_64
18+
19+
LIMA_HOME=$(mktemp -d)
20+
export LIMA_HOME
21+
22+
# shellcheck disable=SC2034
23+
macos_12=(
24+
# integration
25+
examples/default.yaml
26+
# vmnet
27+
examples/vmnet.yaml
28+
# upgrade
29+
https://raw.githubusercontent.com/lima-vm/lima/v0.15.1/examples/ubuntu-lts.yaml
30+
)
31+
32+
# shellcheck disable=SC2034
33+
ubuntu_2204=(
34+
# integration-linux
35+
examples/alpine.yaml
36+
examples/debian.yaml
37+
examples/fedora.yaml
38+
examples/archlinux.yaml
39+
examples/opensuse.yaml
40+
examples/experimental/net-user-v2.yaml
41+
examples/experimental/9p.yaml
42+
examples/docker.yaml
43+
examples/../hack/test-templates/alpine-9p-writable.yaml
44+
examples/../hack/test-templates/test-misc.yaml
45+
)
46+
47+
# shellcheck disable=SC2034
48+
macos_13=(
49+
# vz
50+
examples/experimental/vz.yaml
51+
examples/fedora.yaml
52+
)
53+
54+
runners=(
55+
macos_12
56+
ubuntu_2204
57+
macos_13
58+
)
59+
60+
function runner_os_from_runner() {
61+
# shellcheck disable=SC2249
62+
case "$1" in
63+
macos*)
64+
echo macOS
65+
;;
66+
ubuntu*)
67+
echo Linux
68+
;;
69+
esac
70+
}
71+
72+
function check_location() {
73+
location="$1"
74+
readonly cache_file="./.calculate-cache-response-cache.yaml"
75+
# check response_cache.yaml for the cache
76+
if [[ -f "${cache_file}" ]]; then
77+
cached=$(yq -e eval ".[\"${location}\"]" "${cache_file}" 2>/dev/null) && echo "${cached}" && return
78+
else
79+
touch "${cache_file}"
80+
fi
81+
http_code_and_size=$(curl -sIL -w "%{http_code} %header{Content-Length}" "${location}" -o /dev/null)
82+
yq eval ".[\"${location}\"] = \"${http_code_and_size}\"" -i "${cache_file}"
83+
echo "${http_code_and_size}"
84+
}
85+
86+
function print_location_digest_size_hash_from_template() {
87+
readonly template=$1
88+
case "${template}" in
89+
http*)
90+
template_yaml=$(curl -sSL "${template}")
91+
;;
92+
*)
93+
template_yaml=$(<"${template}")
94+
;;
95+
esac
96+
readonly yq_filter="
97+
[
98+
.images | map(select(.arch == \"${arch}\")) | [.[0,1].location, .[0,1].digest],
99+
.containerd|[.system or .user],
100+
.containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest]
101+
]|flatten|.[]
102+
"
103+
if command -v limactl &>/dev/null; then
104+
parsed=$(limactl validate <(echo "${template_yaml}") --fill 2>/dev/null | yq eval "${yq_filter}")
105+
else
106+
parsed=$(yq eval "${yq_filter}" <<<"${template_yaml}")
107+
fi
108+
# macOS earlier than 15.0 uses bash 3.2.57, which does not support readarray -t
109+
# readarray -t arr <<<"${parsed}"
110+
while IFS= read -r line; do arr+=("${line}"); done <<<"${parsed}"
111+
readonly locations=("${arr[@]:0:2}") digests=("${arr[@]:2:2}")
112+
readonly containerd="${arr[4]}" containerd_location="${arr[5]}" containerd_digest="${arr[6]}"
113+
declare location digest size hash
114+
for ((i = 0; i < ${#locations[@]}; i++)); do
115+
[[ ${locations[i]} != null ]] || continue
116+
http_code_and_size=$(check_location "${locations[i]}")
117+
read -r http_code size <<<"${http_code_and_size}"
118+
if [[ ${http_code} -eq 200 ]]; then
119+
location=${locations[i]}
120+
digest=${digests[i]}
121+
break
122+
fi
123+
done
124+
if [[ -z ${location} ]]; then
125+
echo "Failed to get the image location for ${template}" >&2
126+
return 1
127+
fi
128+
hash=$(sha256sum <<<"${template_yaml}" | cut -d' ' -f1 | xxd -r -p | sha256sum | cut -d' ' -f1)
129+
declare containerd_size
130+
containerd_http_code_and_size=$(check_location "${containerd_location}")
131+
read -r _containerd_http_code containerd_size <<<"${containerd_http_code_and_size}"
132+
echo "${location} ${digest} ${size} ${hash} ${containerd} ${containerd_location} ${containerd_digest} ${containerd_size}"
133+
}
134+
135+
# format first column to MiB
136+
function size_to_mib() {
137+
awk '
138+
function mib(size) { return sprintf("%7.2f MiB", size / 1024 / 1024) }
139+
int($1)>0{ $1=" "mib($1) }
140+
int($2)>0{ $2=mib($2) }
141+
int($2)==0 && NF>1{ $2="<<missing>>" }
142+
{ print }
143+
'
144+
}
145+
146+
# actual_cache_sizes=$(gh cache list --json key,createdAt,sizeInBytes|jq '[.[]|{"key":.key,"value":.sizeInBytes}]|from_entries')
147+
actual_cache_sizes=$(
148+
gh cache list --json key,createdAt,sizeInBytes |
149+
jq 'sort_by(.createdAt)|reverse|unique_by(.key)|sort_by(.key)|map({"key":.key,"value":.sizeInBytes})|from_entries'
150+
)
151+
152+
# shellcheck disable=SC2016
153+
for cache_method in prior after; do
154+
echo "==> expected content size, actual cached size, and cache-keys used in caching method ${cache_method} implementation in https://github.com/lima-vm/lima/pull/2508"
155+
echo "content-size actual-size cache-key"
156+
output_yaml=$(
157+
for runner in "${runners[@]}"; do
158+
runner_os=$(runner_os_from_runner "${runner}")
159+
declare -n ref="${runner}"
160+
tepmlates_used_in_test_yml=("${ref[@]}")
161+
for template in "${tepmlates_used_in_test_yml[@]}"; do
162+
location_digest_size_hash=$(print_location_digest_size_hash_from_template "${template}") || continue
163+
read -r location digest size hash containerd containerd_location containerd_digest containerd_size <<<"${location_digest_size_hash}"
164+
if [[ ${cache_method} = prior ]]; then
165+
key=${runner_os}-${hash}
166+
elif [[ ${digest} = null ]]; then
167+
key=image:$(basename "${location}")-url-sha256:$(echo -n "${location}" | sha256sum | cut -d' ' -f1)
168+
else
169+
key=image:$(basename "${location}")-${digest}
170+
fi
171+
if [[ ${containerd} = true ]]; then
172+
if [[ ${cache_method} = prior ]]; then
173+
# previous caching method packages the containerd archive with the image
174+
size=$((size + containerd_size))
175+
else
176+
# new caching method packages the containerd archive separately
177+
containerd_key=containerd:$(basename "${containerd_location}")-${containerd_digest}
178+
cat <<-EOF
179+
- key: ${containerd_key}
180+
template: ${template}
181+
location: ${containerd_location}
182+
digest: ${containerd_digest}
183+
size: ${containerd_size}
184+
EOF
185+
fi
186+
fi
187+
cat <<-EOF
188+
- key: ${key}
189+
template: ${template}
190+
location: ${location}
191+
digest: ${digest}
192+
size: ${size}
193+
EOF
194+
# echo -e "- key: ${key}\n template: ${template}\n location: ${location}\n size: ${size}\n containerd: ${containerd}\n containerd_location: ${containerd_location}\n containerd_digest: ${containerd_digest}"
195+
done
196+
done
197+
)
198+
cat <<<"${output_yaml}" >".calculate-cache-collected-info-${cache_method}.yaml"
199+
output_json=$(yq -o=j . <<<"${output_yaml}" | tee ".calculate-cache-collected-info-${cache_method}.json")
200+
201+
# print size key
202+
jq --argjson actual_size "${actual_cache_sizes}" -r 'unique_by(.key)|sort_by(.key)|.[]|[.size, $actual_size[.key] // 0, .key]|@tsv' <<<"${output_json}" | size_to_mib
203+
# total
204+
echo "------------"
205+
jq '[unique_by(.key)|.[]|.size]|add' <<<"${output_json}" | size_to_mib
206+
echo ""
207+
done

0 commit comments

Comments
 (0)