add a script to get a flamegraph of collab in production (#8972)

Conrad Irwin created

Add `./script/collab-flamegraph` so you can profile in production (or
staging)

Release Notes:

- N/A

Change summary

Dockerfile               |  3 ++-
script/collab-flamegraph | 32 ++++++++++++++++++++++++++++++++
2 files changed, 34 insertions(+), 1 deletion(-)

Detailed changes

Dockerfile 🔗

@@ -22,7 +22,8 @@ RUN --mount=type=cache,target=./target \
 # Copy collab server binary to the runtime image
 FROM debian:bookworm-slim as runtime
 RUN apt-get update; \
-    apt-get install -y --no-install-recommends libcurl4-openssl-dev ca-certificates linux-perf
+    apt-get install -y --no-install-recommends libcurl4-openssl-dev ca-certificates \
+    linux-perf binutils
 WORKDIR app
 COPY --from=builder /app/collab /app/collab
 COPY --from=builder /app/crates/collab/migrations /app/migrations

script/collab-flamegraph 🔗

@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# Notes for fixing this script if it's broken:
+# - if you see an error about "can't find perf_6.1" you need to install `linux-perf` from the
+#   version of Debian that matches the host (e.g. apt-get -t bookworm-backports install linux-perf)
+# - if you see an error about `addr2line` you may need to install binutils
+
+set -euo pipefail
+
+source script/lib/deploy-helpers.sh
+
+if [[ $# != 1 ]]; then
+  echo "Usage: $0 <production|staging>"
+  exit 1
+fi
+environment=$1
+
+target_zed_kube_cluster
+
+echo "Running perf on collab, collecting 30s of data..."
+
+kubectl -n $environment exec -it deployments/collab -- perf record -p 1 -g -m 64 --call-graph dwarf -- sleep 30
+
+run="collab-$environment-$(date -Iseconds)"
+echo "Processing data and downloading to '$run.perf'..."
+
+kubectl -n $environment exec -it deployments/collab -- perf --no-pager script > "$run.perf"
+
+which inferno-flamegraph 2>/dev/null || (echo "installing inferno..."; cargo install inferno)
+
+inferno-collapse-perf "$run.perf" | inferno-flamegraph > "$run.svg"
+open "$run.svg"