Deploy collab like nightly (#7174)

Conrad Irwin created

After this change we'll be able to push a tag to github to deploy to
collab.

The advantages of this are that there's no longer a separate step to
first
build the image, and then deploy it.

In the future I'd like to make this happen more automatically (maybe as
part of
bump nightly).

Release Notes:

- N/A

Change summary

.github/workflows/deploy_collab.yml        | 107 ++++++++++++++++++++++++
.github/workflows/publish_collab_image.yml |  49 ----------
Dockerfile                                 |   3 
crates/collab/README.md                    |  32 +++++++
crates/collab/src/main.rs                  |   5 
script/bump-collab-version                 |   8 -
script/deploy-collab                       |  25 ++---
script/lib/deploy-helpers.sh               |  41 ++++----
script/what-is-deployed                    |  25 ++---
9 files changed, 184 insertions(+), 111 deletions(-)

Detailed changes

.github/workflows/deploy_collab.yml 🔗

@@ -0,0 +1,107 @@
+name: Publish Collab Server Image
+
+on:
+  push:
+    tags:
+      - collab-production
+      - collab-staging
+
+env:
+  DOCKER_BUILDKIT: 1
+  DIGITALOCEAN_ACCESS_TOKEN: ${{ secrets.DIGITALOCEAN_ACCESS_TOKEN }}
+
+jobs:
+  style:
+    name: Check formatting and Clippy lints
+    runs-on:
+      - self-hosted
+      - test
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          clean: false
+          submodules: "recursive"
+          fetch-depth: 0
+
+      - name: Run style checks
+        uses: ./.github/actions/check_style
+
+  tests:
+    name: Run tests
+    runs-on:
+      - self-hosted
+      - test
+    needs: style
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          clean: false
+          submodules: "recursive"
+          fetch-depth: 0
+
+      - name: Run tests
+        uses: ./.github/actions/run_tests
+
+  publish:
+    name: Publish collab server image
+    needs:
+      - style
+      - tests
+    runs-on:
+      - self-hosted
+      - deploy
+    steps:
+      - name: Add Rust to the PATH
+        run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+
+      - name: Sign into DigitalOcean docker registry
+        run: doctl registry login
+
+      - name: Prune Docker system
+        run: docker system prune  --filter 'until=720h' -f
+
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          clean: false
+          submodules: "recursive"
+
+      - name: Build docker image
+        run: docker build . --build-arg GITHUB_SHA=$GITHUB_SHA --tag registry.digitalocean.com/zed/collab:$GITHUB_SHA
+
+      - name: Publish docker image
+        run: docker push registry.digitalocean.com/zed/collab:${GITHUB_SHA}
+
+  deploy:
+    name: Deploy new server image
+    needs:
+      - publish
+    runs-on:
+      - self-hosted
+      - deploy
+
+    steps:
+      - name: Sign into Kubernetes
+        run: doctl kubernetes cluster kubeconfig save --expiry-seconds 600 ${{ secrets.CLUSTER_NAME }}
+
+      - name: Determine namespace
+        run: |
+          set -eu
+          if [[ $GITHUB_REF_NAME = "collab-production" ]]; then
+            echo "Deploying collab:$GITHUB_SHA to production"
+            echo "KUBE_NAMESPACE=production" >> $GITHUB_ENV
+          elif [[ $GITHUB_REF_NAME = "collab-staging" ]]; then
+            echo "Deploying collab:$GITHUB_SHA to staging"
+            echo "KUBE_NAMESPACE=staging" >> $GITHUB_ENV
+          else
+            echo "cowardly refusing to deploy from an unknown branch"
+            exit 1
+          fi
+
+      - name: Start rollout
+        run: kubectl -n "$KUBE_NAMESPACE" set image deployment/collab collab=registry.digitalocean.com/zed/collab:${GITHUB_SHA}
+
+      - name: Wait for rollout to finish
+        run: kubectl -n "$KUBE_NAMESPACE" rollout status deployment/collab

.github/workflows/publish_collab_image.yml 🔗

@@ -1,49 +0,0 @@
-name: Publish Collab Server Image
-
-on:
-  push:
-    tags:
-      - collab-v*
-
-env:
-  DOCKER_BUILDKIT: 1
-  DIGITALOCEAN_ACCESS_TOKEN: ${{ secrets.DIGITALOCEAN_ACCESS_TOKEN }}
-
-jobs:
-  publish:
-    name: Publish collab server image
-    runs-on:
-      - self-hosted
-      - deploy
-    steps:
-      - name: Add Rust to the PATH
-        run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
-
-      - name: Sign into DigitalOcean docker registry
-        run: doctl registry login
-
-      - name: Prune Docker system
-        run: docker system prune
-
-      - name: Checkout repo
-        uses: actions/checkout@v4
-        with:
-          clean: false
-          submodules: 'recursive'
-
-      - name: Determine version
-        run: |
-          set -eu
-          version=$(script/get-crate-version collab)
-          if [[ $GITHUB_REF_NAME != "collab-v${version}" ]]; then
-            echo "release tag ${GITHUB_REF_NAME} does not match version ${version}"
-            exit 1
-          fi
-          echo "Publishing collab version: ${version}"
-          echo "COLLAB_VERSION=${version}" >> $GITHUB_ENV
-
-      - name: Build docker image
-        run: docker build . --tag registry.digitalocean.com/zed/collab:v${COLLAB_VERSION}
-
-      - name: Publish docker image
-        run: docker push registry.digitalocean.com/zed/collab:v${COLLAB_VERSION}

Dockerfile 🔗

@@ -6,6 +6,9 @@ COPY . .
 
 # Compile collab server
 ARG CARGO_PROFILE_RELEASE_PANIC=abort
+ARG GITHUB_SHA
+
+ENV GITHUB_SHA=$GITHUB_SHA
 RUN --mount=type=cache,target=./script/node_modules \
     --mount=type=cache,target=/usr/local/cargo/registry \
     --mount=type=cache,target=./target \

crates/collab/README.md 🔗

@@ -3,3 +3,35 @@
 This crate is what we run at https://collab.zed.dev.
 
 It contains our back-end logic for collaboration, to which we connect from the Zed client via a websocket after authenticating via https://zed.dev, which is a separate repo running on Vercel.
+
+# Local Development
+
+ Detailed instructions on getting started are [here](https://zed.dev/docs/local-collaboration).
+
+# Deployment
+
+We run two instances of collab:
+
+* Staging (https://staging-collab.zed.dev)
+* Production (https://collab.zed.dev)
+
+Both of these run on the Kubernetes cluster hosted in Digital Ocean.
+
+Deployment is triggered by pushing to the `collab-staging` (or `collab-production`) tag in Github. The best way to do this is:
+
+* `./script/deploy-collab staging`
+* `./script/deploy-collab production`
+
+You can tell what is currently deployed with `./script/what-is-deployed`.
+
+# Database Migrations
+
+To create a new migration:
+
+```
+./script/sqlx migrate add <name>
+```
+
+Migrations are run automatically on service start, so run `foreman start` again. The service will crash if the migrations fail.
+
+When you create a new migration, you also need to update the [SQLite schema](./migrations.sqlite/20221109000000_test_schema.sql) that is used for testing.

crates/collab/src/main.rs 🔗

@@ -14,6 +14,7 @@ use tracing_subscriber::{filter::EnvFilter, fmt::format::JsonFields, Layer};
 use util::ResultExt;
 
 const VERSION: &'static str = env!("CARGO_PKG_VERSION");
+const REVISION: Option<&'static str> = option_env!("GITHUB_SHA");
 
 #[tokio::main]
 async fn main() -> Result<()> {
@@ -26,7 +27,7 @@ async fn main() -> Result<()> {
 
     match args().skip(1).next().as_deref() {
         Some("version") => {
-            println!("collab v{VERSION}");
+            println!("collab v{} ({})", VERSION, REVISION.unwrap_or("unknown"));
         }
         Some("migrate") => {
             run_migrations().await?;
@@ -105,7 +106,7 @@ async fn run_migrations() -> Result<()> {
 }
 
 async fn handle_root() -> String {
-    format!("collab v{VERSION}")
+    format!("collab v{} ({})", VERSION, REVISION.unwrap_or("unknown"))
 }
 
 async fn handle_liveness_probe(Extension(state): Extension<Arc<AppState>>) -> Result<String> {

script/bump-collab-version 🔗

@@ -1,8 +0,0 @@
-#!/bin/bash
-
-if [[ $# < 1 ]]; then
-  echo "Missing version increment (major, minor, or patch)" >&2
-  exit 1
-fi
-
-exec script/lib/bump-version.sh collab collab-v '' $1

script/deploy-collab 🔗

@@ -3,22 +3,19 @@
 set -eu
 source script/lib/deploy-helpers.sh
 
-if [[ $# < 2 ]]; then
-  echo "Usage: $0 <production|staging> <tag-name>"
+if [[ $# != 1 ]]; then
+  echo "Usage: $0 <production|staging>"
   exit 1
 fi
 environment=$1
-version=$2
+tag="$(tag_for_environment $environment)"
 
-export_vars_for_environment ${environment}
-image_id=$(image_id_for_version ${version})
-
-export ZED_DO_CERTIFICATE_ID=$(doctl compute certificate list --format ID --no-header)
-export ZED_KUBE_NAMESPACE=${environment}
-export ZED_IMAGE_ID=${image_id}
-
-target_zed_kube_cluster
-envsubst < crates/collab/k8s/collab.template.yml | kubectl apply -f -
-kubectl -n "$environment" rollout status deployment/collab --watch
+branch=$(git rev-parse --abbrev-ref HEAD)
+if [ "$branch" != "main" ]; then
+  echo "You must be on main to run this script"
+  exit 1
+fi
 
-echo "deployed collab v${version} to ${environment}"
+echo git pull --ff-only origin main
+echo git tag -f $tag
+echo git push -f origin $tag

script/lib/deploy-helpers.sh 🔗

@@ -8,33 +8,30 @@ function export_vars_for_environment {
   export $(cat $env_file)
 }
 
-function image_id_for_version {
-  local version=$1
-
-  # Check that version is valid
-  if [[ ! ${version} =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-    echo "Invalid version number '${version}'" >&2
-    exit 1
+function target_zed_kube_cluster {
+  if [[ $(kubectl config current-context 2> /dev/null) != do-nyc1-zed-1 ]]; then
+    doctl kubernetes cluster kubeconfig save zed-1
   fi
+}
 
-  # Check that image exists for version
-  tag_names=$(doctl registry repository list-tags collab --no-header --format Tag)
-  if ! $(echo "${tag_names}" | grep -Fqx v${version}); then
-    echo "No docker image tagged for version '${version}'" >&2
-    echo "Found images with these tags:" ${tag_names} >&2
+function tag_for_environment {
+  if [[ "$1" == "production" ]]; then
+    echo "collab-production"
+  elif [[ "$1" == "staging" ]]; then
+    echo "collab-staging"
+  else
+    echo "Invalid environment name '${environment}'" >&2
     exit 1
   fi
-  
-  echo "registry.digitalocean.com/zed/collab:v${version}"
 }
 
-function version_for_image_id {
-  local image_id=$1
-  echo $image_id | cut -d: -f2
-}
-
-function target_zed_kube_cluster {
-  if [[ $(kubectl config current-context 2> /dev/null) != do-nyc1-zed-1 ]]; then
-    doctl kubernetes cluster kubeconfig save zed-1
+function url_for_environment {
+  if [[ "$1" == "production" ]]; then
+    echo "https://collab.zed.dev"
+  elif [[ "$1" == "staging" ]]; then
+    echo "https://collab-staging.zed.dev"
+  else
+    echo "Invalid environment name '${environment}'" >&2
+    exit 1
   fi
 }

script/what-is-deployed 🔗

@@ -3,13 +3,15 @@
 set -eu
 source script/lib/deploy-helpers.sh
 
-if [[ $# < 1 ]]; then
+if [[ $# != 1 ]]; then
   echo "Usage: $0 <production|staging>"
   exit 1
 fi
+
 environment=$1
+url="$(url_for_environment $environment)"
+tag="$(tag_for_environment $environment)"
 
-export_vars_for_environment ${environment}
 target_zed_kube_cluster
 
 deployed_image_id=$(
@@ -20,18 +22,9 @@ deployed_image_id=$(
     | cut -d: -f2
 )
 
-job_image_ids=$(
-  kubectl \
-    --namespace=${environment} \
-    get jobs \
-    -o 'jsonpath={range .items[0:5]}{.spec.template.spec.containers[0].image}{"\n"}{end}' \
-    2> /dev/null \
-    || true
-)
+echo "Deployed image version: $deployed_image_id"
 
-echo "Deployed image version:"
-echo "$deployed_image_id"
-echo
-echo "Migration job image versions:"
-echo "$job_image_ids"
-echo
+git fetch >/dev/null
+if [[ "$(git rev-parse tags/$tag)" != $deployed_image_id ]]; then
+    echo "NOTE: tags/$tag $(git rev-parse tags/$tag) is not yet deployed"
+fi;