|
| 1 | +#!/usr/bin/env bash |
| 2 | +# This script calculates the expected content size, actual cached size, and cache-keys used in caching method before and after |
| 3 | +# implementation in https://github.com/lima-vm/lima/pull/2508 |
| 4 | +# |
| 5 | +# Answer to the question in https://github.com/lima-vm/lima/pull/2508#discussion_r1699798651 |
| 6 | + |
| 7 | +# usage: [DEBUG=1] ./hack/calculate-cache.sh |
| 8 | +# DEBUG=1 will save the collected information in .calculate-cache-collected-info-{before,after}.yaml |
| 9 | +# |
| 10 | +# This script does: |
| 11 | +# 1. extracts `runs_on` and `template` from workflow file (.github/workflows/test.yml) |
| 12 | +# 2. check each template for image and nerdctl |
| 13 | +# 3. detect size of image and nerdctl (responses from remote are cached for faster iteration) |
| 14 | +# save the response in .calculate-cache-response-cache.yaml |
| 15 | +# 4. print content size, actual cache size (if available), by cache key |
| 16 | +# |
| 17 | +# The major differences for reducing cache usage are as follows: |
| 18 | +# - it is now cached `~/.cache/lima/download/by-url-sha256/$sha256` instead of caching `~/.cache/lima/download` |
| 19 | +# - the cache keys are now based on the image digest and nerdctl digest instead of the template file's hash |
| 20 | +# - enables the use of cache regardless of the operating system used to execute CI. |
| 21 | +# |
| 22 | +# The script requires the following commands: |
| 23 | +# - gh: GitHub CLI. |
| 24 | +# Using to get the cache information |
| 25 | +# - jq: Command-line JSON processor |
| 26 | +# Parse the workflow file and print runs-on and template. |
| 27 | +# Parse output from gh cache list |
| 28 | +# Calculate the expected content size, actual cached size, and cache-keys used. |
| 29 | +# - limactl: lima CLI. |
| 30 | +# Using to validate the template file for getting nerdctl location and digest. |
| 31 | +# - sha256sum: Print or check SHA256 (256-bit) checksums |
| 32 | +# - xxd: make a hexdump or do the reverse. |
| 33 | +# Using to simulate the 'hashFile()' function in the workflow. |
| 34 | +# - yq: Command-line YAML processor. |
| 35 | +# Parse the template file for image and nerdctl location, digest, and size. |
| 36 | +# Parse the cache response file for the cache. |
| 37 | +# Convert the collected information to JSON. |
| 38 | + |
| 39 | +set -u -o pipefail |
| 40 | + |
| 41 | +required_commands=(gh jq limactl sha256sum xxd yq) |
| 42 | +for cmd in "${required_commands[@]}"; do |
| 43 | + if ! command -v "${cmd}" &>/dev/null; then |
| 44 | + echo "${cmd} is required. Please install it" >&2 |
| 45 | + exit 1 |
| 46 | + fi |
| 47 | +done |
| 48 | + |
| 49 | +# current workflow uses x86_64 only |
| 50 | +arch=x86_64 |
| 51 | + |
| 52 | +LIMA_HOME=$(mktemp -d) |
| 53 | +export LIMA_HOME |
| 54 | + |
| 55 | +# parse the workflow file and print runs-on and template |
| 56 | +# e.g. |
| 57 | +# ```console |
| 58 | +# $ print_runs_on_template_from_workflow .github/workflows/test.yml |
| 59 | +# macos-12 templates/default.yaml |
| 60 | +# ubuntu-24.04 templates/alpine.yaml |
| 61 | +# ubuntu-24.04 templates/debian.yaml |
| 62 | +# ubuntu-24.04 templates/fedora.yaml |
| 63 | +# ubuntu-24.04 templates/archlinux.yaml |
| 64 | +# ubuntu-24.04 templates/opensuse.yaml |
| 65 | +# ubuntu-24.04 templates/experimental/net-user-v2.yaml |
| 66 | +# ubuntu-24.04 templates/experimental/9p.yaml |
| 67 | +# ubuntu-24.04 templates/docker.yaml |
| 68 | +# ubuntu-24.04 templates/../hack/test-templates/alpine-9p-writable.yaml |
| 69 | +# ubuntu-24.04 templates/../hack/test-templates/test-misc.yaml |
| 70 | +# macos-12 templates/vmnet.yaml |
| 71 | +# macos-12 https://raw.githubusercontent.com/lima-vm/lima/v0.15.1/examples/ubuntu-lts.yaml |
| 72 | +# macos-13 templates/experimental/vz.yaml |
| 73 | +# macos-13 templates/fedora.yaml |
| 74 | +# ``` |
| 75 | +function print_runs_on_template_from_workflow() { |
| 76 | + yq -o=j "$1" | jq -r ' |
| 77 | + "./.github/actions/setup_cache_for_template" as $action | |
| 78 | + "\\$\\{\\{\\s*(?<path>\\S*)\\s*\\}\\}" as $pattern | |
| 79 | + .jobs | map_values( |
| 80 | + ."runs-on" as $runs_on | |
| 81 | + { |
| 82 | + template: .steps | map_values(select(.uses == $action)) | first |.with.template, |
| 83 | + matrix: .strategy.matrix |
| 84 | + } | select(.template) | |
| 85 | + . + { path: .template | (if test($pattern) then sub(".*\($pattern).*";"\(.path)")|split(".") else null end) } | |
| 86 | + ( |
| 87 | + .template as $template| |
| 88 | + if .path then |
| 89 | + getpath(.path)|map(. as $item|$template|sub($pattern;$item)) |
| 90 | + else |
| 91 | + [$template] |
| 92 | + end |
| 93 | + ) | map("\($runs_on)\t\(.)") |
| 94 | +
|
| 95 | + ) | flatten |.[] |
| 96 | + ' |
| 97 | +} |
| 98 | + |
| 99 | +# returns the OS name from the runner equivalent to the expression `${{ runner.os }}` in the workflow |
| 100 | +# e.g. |
| 101 | +# ```console |
| 102 | +# $ runner_os_from_runner "macos-12" |
| 103 | +# macOS |
| 104 | +# $ runner_os_from_runner "ubuntu-24.04" |
| 105 | +# Linux |
| 106 | +# ``` |
| 107 | +function runner_os_from_runner() { |
| 108 | + # shellcheck disable=SC2249 |
| 109 | + case "$1" in |
| 110 | + macos*) |
| 111 | + echo macOS |
| 112 | + ;; |
| 113 | + ubuntu*) |
| 114 | + echo Linux |
| 115 | + ;; |
| 116 | + esac |
| 117 | +} |
| 118 | + |
| 119 | +# check the remote location and return the http code and size. |
| 120 | +# The result is cached in .calculate-cache-response-cache.yaml |
| 121 | +# e.g. |
| 122 | +# ```console |
| 123 | +# $ check_location "https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img" |
| 124 | +# 200 585498624 |
| 125 | +# ``` |
| 126 | +function check_location() { |
| 127 | + location="$1" |
| 128 | + readonly cache_file="./.calculate-cache-response-cache.yaml" |
| 129 | + # check response_cache.yaml for the cache |
| 130 | + if [[ -f ${cache_file} ]]; then |
| 131 | + cached=$(yq -e eval ".[\"${location}\"]" "${cache_file}" 2>/dev/null) && echo "${cached}" && return |
| 132 | + else |
| 133 | + touch "${cache_file}" |
| 134 | + fi |
| 135 | + http_code_and_size=$(curl -sIL -w "%{http_code} %header{Content-Length}" "${location}" -o /dev/null) |
| 136 | + yq eval ".[\"${location}\"] = \"${http_code_and_size}\"" -i "${cache_file}" |
| 137 | + echo "${http_code_and_size}" |
| 138 | +} |
| 139 | + |
| 140 | +# print image location, digest, size, hash, containerd, containerd_location, containerd_digest, containerd_size from the template |
| 141 | +# e.g. |
| 142 | +# ```console |
| 143 | +# $ print_location_digest_size_hash_from_template "templates/default.yaml" |
| 144 | +# https://cloud-images.ubuntu.com/releases/24.04/release-20240725/ubuntu-24.04-server-cloudimg-amd64.img sha256:d2377667ea95222330ca2287817403c85178dad397e9fed768a9b4aec79d2a7f 585498624 49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f true https://github.com/containerd/nerdctl/releases/download/v1.7.6/nerdctl-full-1.7.6-linux-amd64.tar.gz sha256:2c841e097fcfb5a1760bd354b3778cb695b44cd01f9f271c17507dc4a0b25606 237465717 |
| 145 | +# ``` |
| 146 | +function print_location_digest_size_hash_from_template() { |
| 147 | + readonly template=$1 |
| 148 | + case "${template}" in |
| 149 | + http*) |
| 150 | + template_yaml=$(curl -sSL "${template}") |
| 151 | + ;; |
| 152 | + *) |
| 153 | + template_yaml=$(<"${template}") |
| 154 | + ;; |
| 155 | + esac |
| 156 | + readonly yq_filter=" |
| 157 | + [ |
| 158 | + .images | map(select(.arch == \"${arch}\")) | [.[0,1].location, .[0,1].digest], |
| 159 | + .containerd|[.system or .user], |
| 160 | + .containerd.archives | map(select(.arch == \"${arch}\")) | [.[0].location, .[0].digest] |
| 161 | + ]|flatten|.[] |
| 162 | + " |
| 163 | + if command -v limactl &>/dev/null; then |
| 164 | + parsed=$(limactl validate <(echo "${template_yaml}") --fill 2>/dev/null | yq eval "${yq_filter}") |
| 165 | + else |
| 166 | + parsed=$(yq eval "${yq_filter}" <<<"${template_yaml}") |
| 167 | + fi |
| 168 | + # macOS earlier than 15.0 uses bash 3.2.57, which does not support readarray -t |
| 169 | + # readarray -t arr <<<"${parsed}" |
| 170 | + while IFS= read -r line; do arr+=("${line}"); done <<<"${parsed}" |
| 171 | + readonly locations=("${arr[@]:0:2}") digests=("${arr[@]:2:2}") |
| 172 | + readonly containerd="${arr[4]}" containerd_location="${arr[5]}" containerd_digest="${arr[6]}" |
| 173 | + declare location digest size hash |
| 174 | + for ((i = 0; i < ${#locations[@]}; i++)); do |
| 175 | + [[ ${locations[i]} != null ]] || continue |
| 176 | + http_code_and_size=$(check_location "${locations[i]}") |
| 177 | + read -r http_code size <<<"${http_code_and_size}" |
| 178 | + if [[ ${http_code} -eq 200 ]]; then |
| 179 | + location=${locations[i]} |
| 180 | + digest=${digests[i]} |
| 181 | + break |
| 182 | + fi |
| 183 | + done |
| 184 | + if [[ -z ${location} ]]; then |
| 185 | + echo "Failed to get the image location for ${template}" >&2 |
| 186 | + return 1 |
| 187 | + fi |
| 188 | + hash=$(sha256sum <<<"${template_yaml}" | cut -d' ' -f1 | xxd -r -p | sha256sum | cut -d' ' -f1) |
| 189 | + declare containerd_size |
| 190 | + containerd_http_code_and_size=$(check_location "${containerd_location}") |
| 191 | + read -r _containerd_http_code containerd_size <<<"${containerd_http_code_and_size}" |
| 192 | + echo "${location} ${digest} ${size} ${hash} ${containerd} ${containerd_location} ${containerd_digest} ${containerd_size}" |
| 193 | +} |
| 194 | + |
| 195 | +# format first column to MiB |
| 196 | +# e.g. |
| 197 | +# ```console |
| 198 | +# $ echo 585498624 | size_to_mib |
| 199 | +# 558.38 MiB |
| 200 | +# ``` |
| 201 | +function size_to_mib() { |
| 202 | + awk ' |
| 203 | + function mib(size) { return sprintf("%7.2f MiB", size / 1024 / 1024) } |
| 204 | + int($1)>0{ $1=" "mib($1) } |
| 205 | + int($2)>0{ $2=mib($2) } |
| 206 | + int($2)==0 && NF>1{ $2="<<missing>>" } |
| 207 | + { print } |
| 208 | + ' |
| 209 | +} |
| 210 | + |
| 211 | +# actual_cache_sizes=$(gh cache list --json key,createdAt,sizeInBytes|jq '[.[]|{"key":.key,"value":.sizeInBytes}]|from_entries') |
| 212 | +# e.g. |
| 213 | +# ```console |
| 214 | +# $ echo "${actual_cache_sizes}" |
| 215 | +# { |
| 216 | +# "Linux-1c3b2791d52735d916dc44767c745c2319eb7cae74af71bbf45ddb268f42fc1d": 810758533, |
| 217 | +# "Linux-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633036717, |
| 218 | +# "Linux-3b906d46fa532e3bc348c35fc8e7ede6c69f0b27032046ee2cbb56d4022d1146": 574242367, |
| 219 | +# "Linux-69a547b760dbf1650007ed541408474237bc611704077214adcac292de556444": 70310855, |
| 220 | +# "Linux-7782f8b4ff8cd378377eb79f8d61c9559b94bbd0c11d19eb380ee7bda19af04e": 494141177, |
| 221 | +# "Linux-8812aedfe81b4456d421645928b493b1f2f88aff04b7f3171207492fd44cd189": 812730766, |
| 222 | +# "Linux-caa7d8af214d55ad8902e82d5918e61573f3d6795d2b5ad9a35305e26fa0e6a9": 754723892, |
| 223 | +# "Linux-colima-v0.6.5": 226350335, |
| 224 | +# "Linux-de83bce0608d787e3c68c7a31c5fab2b6d054320fd7bf633a031845e2ee03414": 810691197, |
| 225 | +# "Linux-eb88a19dfcf2fb98278e7c7e941c143737c6d7cd8950a88f58e04b4ee7cef1bc": 570625794, |
| 226 | +# "Linux-f88f0b3b678ff6432386a42bdd27661133c84a36ad29f393da407c871b0143eb": 68490954, |
| 227 | +# "golangci-lint.cache-Linux-2850-74615231540133417fd618c72e37be92c5d3b3ad": 2434144, |
| 228 | +# "macOS-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633020464, |
| 229 | +# "macOS-49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f": 813179462, |
| 230 | +# "macOS-8f37f663956af5f743f0f99ab973729b6a02f200ebfac7a3a036eff296550732": 810756770, |
| 231 | +# "macOS-ef5509b5d4495c8c3590442ee912ad1c9a33f872dc4a29421c524fc1e2103b59": 813179476, |
| 232 | +# "macOS-upgrade-v0.15.1": 1157814690, |
| 233 | +# "setup-go-Linux-ubuntu20-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1015518352, |
| 234 | +# "setup-go-Linux-ubuntu20-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 936433302, |
| 235 | +# "setup-go-Linux-ubuntu24-go-1.22.6-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1090001859, |
| 236 | +# "setup-go-Linux-ubuntu24-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 526146768, |
| 237 | +# "setup-go-Windows-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1155374040, |
| 238 | +# "setup-go-Windows-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 1056433137, |
| 239 | +# "setup-go-macOS-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1060919942, |
| 240 | +# "setup-go-macOS-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 982139209 |
| 241 | +# } |
| 242 | +actual_cache_sizes=$( |
| 243 | + gh cache list --json key,createdAt,sizeInBytes \ |
| 244 | + --jq 'sort_by(.createdAt)|reverse|unique_by(.key)|sort_by(.key)|map({"key":.key,"value":.sizeInBytes})|from_entries' |
| 245 | +) |
| 246 | + |
| 247 | +workflows=( |
| 248 | + .github/workflows/test.yml |
| 249 | +) |
| 250 | + |
| 251 | +# shellcheck disable=SC2016 |
| 252 | +echo "=> compare expected content size, actual cached size, and cache-keys used before and after the change in https://github.com/lima-vm/lima/pull/2508" |
| 253 | +# iterate over before and after |
| 254 | +for cache_method in before after; do |
| 255 | + echo "==> ${cache_method}" |
| 256 | + echo "content-size actual-size cache-key" |
| 257 | + output_yaml=$( |
| 258 | + for workflow in "${workflows[@]}"; do |
| 259 | + print_runs_on_template_from_workflow "${workflow}" |
| 260 | + done | while IFS=$'\t' read -r runner template; do |
| 261 | + runner_os=$(runner_os_from_runner "${runner}") |
| 262 | + location_digest_size_hash=$(print_location_digest_size_hash_from_template "${template}") || continue |
| 263 | + read -r location digest size hash containerd containerd_location containerd_digest containerd_size <<<"${location_digest_size_hash}" |
| 264 | + if [[ ${cache_method} != after ]]; then |
| 265 | + key=${runner_os}-${hash} |
| 266 | + elif [[ ${digest} == null ]]; then |
| 267 | + key=image:$(basename "${location}")-url-sha256:$(echo -n "${location}" | sha256sum | cut -d' ' -f1) |
| 268 | + else |
| 269 | + key=image:$(basename "${location}")-${digest} |
| 270 | + fi |
| 271 | + if [[ ${containerd} == true ]]; then |
| 272 | + if [[ ${cache_method} != after ]]; then |
| 273 | + # previous caching method packages the containerd archive with the image |
| 274 | + size=$((size + containerd_size)) |
| 275 | + else |
| 276 | + # new caching method packages the containerd archive separately |
| 277 | + containerd_key=containerd:$(basename "${containerd_location}")-${containerd_digest} |
| 278 | + printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \ |
| 279 | + "${containerd_key}" "${template}" "${containerd_location}" "${containerd_digest}" "${containerd_size}" |
| 280 | + fi |
| 281 | + fi |
| 282 | + printf -- "- key: %s\n template: %s\n location: %s\n digest: %s\n size: %s\n" \ |
| 283 | + "${key}" "${template}" "${location}" "${digest}" "${size}" |
| 284 | + done |
| 285 | + ) |
| 286 | + output_json=$(yq -o=j . <<<"${output_yaml}") |
| 287 | + |
| 288 | + # print size key |
| 289 | + jq --argjson actual_size "${actual_cache_sizes}" -r 'unique_by(.key)|sort_by(.key)|.[]|[.size, $actual_size[.key] // 0, .key]|@tsv' <<<"${output_json}" | size_to_mib |
| 290 | + # total |
| 291 | + echo "------------" |
| 292 | + jq '[unique_by(.key)|.[]|.size]|add' <<<"${output_json}" | size_to_mib |
| 293 | + # save the collected information as yaml if DEBUG is set |
| 294 | + if [[ -n ${DEBUG:+1} ]]; then |
| 295 | + cat <<<"${output_yaml}" >".calculate-cache-collected-info-${cache_method}.yaml" |
| 296 | + echo "Saved the collected information in .calculate-cache-collected-info-${cache_method}.yaml" |
| 297 | + fi |
| 298 | + echo "" |
| 299 | +done |
0 commit comments