Release failure visibility (#42572)

Conrad Irwin created

Closes #ISSUE

Release Notes:

- N/A

Change summary

.github/workflows/after_release.yml                  | 16 ++++++++++++
.github/workflows/release.yml                        | 14 ++++++++++
.github/workflows/release_nightly.yml                | 18 +++++++++++++
tooling/xtask/src/tasks/workflows/after_release.rs   | 10 ++++++
tooling/xtask/src/tasks/workflows/release.rs         | 16 ++++++++++++
tooling/xtask/src/tasks/workflows/release_nightly.rs |  4 ++
tooling/xtask/src/tasks/workflows/vars.rs            |  1 
7 files changed, 77 insertions(+), 2 deletions(-)

Detailed changes

.github/workflows/after_release.yml 🔗

@@ -86,3 +86,19 @@ jobs:
         SENTRY_ORG: zed-dev
         SENTRY_PROJECT: zed
         SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
+  notify_on_failure:
+    needs:
+    - rebuild_releases_page
+    - post_to_discord
+    - publish_winget
+    - create_sentry_release
+    if: failure()
+    runs-on: namespace-profile-2x4-ubuntu-2404
+    steps:
+    - name: release::notify_on_failure::notify_slack
+      run: |-
+        curl -X POST -H 'Content-type: application/json'\
+         --data '{"text":"${{ github.workflow }} failed:  ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' "$SLACK_WEBHOOK"
+      shell: bash -euxo pipefail {0}
+      env:
+        SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_WORKFLOW_FAILURES }}

.github/workflows/release.yml 🔗

@@ -477,6 +477,20 @@ jobs:
       shell: bash -euxo pipefail {0}
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  notify_on_failure:
+    needs:
+    - upload_release_assets
+    - auto_release_preview
+    if: failure()
+    runs-on: namespace-profile-2x4-ubuntu-2404
+    steps:
+    - name: release::notify_on_failure::notify_slack
+      run: |-
+        curl -X POST -H 'Content-type: application/json'\
+         --data '{"text":"${{ github.workflow }} failed:  ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' "$SLACK_WEBHOOK"
+      shell: bash -euxo pipefail {0}
+      env:
+        SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_WORKFLOW_FAILURES }}
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
   cancel-in-progress: true

.github/workflows/release_nightly.yml 🔗

@@ -490,3 +490,21 @@ jobs:
         SENTRY_PROJECT: zed
         SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
     timeout-minutes: 60
+  notify_on_failure:
+    needs:
+    - bundle_linux_aarch64
+    - bundle_linux_x86_64
+    - bundle_mac_aarch64
+    - bundle_mac_x86_64
+    - bundle_windows_aarch64
+    - bundle_windows_x86_64
+    if: failure()
+    runs-on: namespace-profile-2x4-ubuntu-2404
+    steps:
+    - name: release::notify_on_failure::notify_slack
+      run: |-
+        curl -X POST -H 'Content-type: application/json'\
+         --data '{"text":"${{ github.workflow }} failed:  ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' "$SLACK_WEBHOOK"
+      shell: bash -euxo pipefail {0}
+      env:
+        SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_WORKFLOW_FAILURES }}

tooling/xtask/src/tasks/workflows/after_release.rs 🔗

@@ -1,7 +1,8 @@
 use gh_workflow::*;
 
 use crate::tasks::workflows::{
-    release, runners,
+    release::{self, notify_on_failure},
+    runners,
     steps::{NamedJob, checkout_repo, dependant_job, named},
     vars::{self, StepOutput},
 };
@@ -11,6 +12,12 @@ pub fn after_release() -> Workflow {
     let post_to_discord = post_to_discord(&[&refresh_zed_dev]);
     let publish_winget = publish_winget();
     let create_sentry_release = create_sentry_release();
+    let notify_on_failure = notify_on_failure(&[
+        &refresh_zed_dev,
+        &post_to_discord,
+        &publish_winget,
+        &create_sentry_release,
+    ]);
 
     named::workflow()
         .on(Event::default().release(Release::default().types(vec![ReleaseType::Published])))
@@ -18,6 +25,7 @@ pub fn after_release() -> Workflow {
         .add_job(post_to_discord.name, post_to_discord.job)
         .add_job(publish_winget.name, publish_winget.job)
         .add_job(create_sentry_release.name, create_sentry_release.job)
+        .add_job(notify_on_failure.name, notify_on_failure.job)
 }
 
 fn rebuild_releases_page() -> NamedJob {

tooling/xtask/src/tasks/workflows/release.rs 🔗

@@ -28,6 +28,7 @@ pub(crate) fn release() -> Workflow {
     let upload_release_assets = upload_release_assets(&[&create_draft_release], &bundle);
 
     let auto_release_preview = auto_release_preview(&[&upload_release_assets]);
+    let notify_on_failure = notify_on_failure(&[&upload_release_assets, &auto_release_preview]);
 
     named::workflow()
         .on(Event::default().push(Push::default().tags(vec!["v*".to_string()])))
@@ -47,6 +48,7 @@ pub(crate) fn release() -> Workflow {
         })
         .add_job(upload_release_assets.name, upload_release_assets.job)
         .add_job(auto_release_preview.name, auto_release_preview.job)
+        .add_job(notify_on_failure.name, notify_on_failure.job)
 }
 
 pub(crate) struct ReleaseBundleJobs {
@@ -177,3 +179,17 @@ fn create_draft_release() -> NamedJob {
             .add_step(create_release()),
     )
 }
+
+pub(crate) fn notify_on_failure(deps: &[&NamedJob]) -> NamedJob {
+    fn notify_slack() -> Step<Run> {
+        named::bash(
+            "curl -X POST -H 'Content-type: application/json'\\\n --data '{\"text\":\"${{ github.workflow }} failed:  ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\"}' \"$SLACK_WEBHOOK\""
+        ).add_env(("SLACK_WEBHOOK", vars::SLACK_WEBHOOK_WORKFLOW_FAILURES))
+    }
+
+    let job = dependant_job(deps)
+        .runs_on(runners::LINUX_SMALL)
+        .cond(Expression::new("failure()"))
+        .add_step(notify_slack());
+    named::job(job)
+}

tooling/xtask/src/tasks/workflows/release_nightly.rs 🔗

@@ -1,7 +1,7 @@
 use crate::tasks::workflows::{
     nix_build::build_nix,
     release::{
-        ReleaseBundleJobs, create_sentry_release, download_workflow_artifacts,
+        ReleaseBundleJobs, create_sentry_release, download_workflow_artifacts, notify_on_failure,
         prep_release_artifacts,
     },
     run_bundling::{bundle_linux, bundle_mac, bundle_windows},
@@ -44,6 +44,7 @@ pub fn release_nightly() -> Workflow {
         &[&style, &tests],
     );
     let update_nightly_tag = update_nightly_tag_job(&bundle);
+    let notify_on_failure = notify_on_failure(&bundle.jobs());
 
     named::workflow()
         .on(Event::default()
@@ -63,6 +64,7 @@ pub fn release_nightly() -> Workflow {
         .add_job(nix_linux_x86.name, nix_linux_x86.job)
         .add_job(nix_mac_arm.name, nix_mac_arm.job)
         .add_job(update_nightly_tag.name, update_nightly_tag.job)
+        .add_job(notify_on_failure.name, notify_on_failure.job)
 }
 
 fn check_style() -> NamedJob {

tooling/xtask/src/tasks/workflows/vars.rs 🔗

@@ -42,6 +42,7 @@ secret!(ZED_ZIPPY_APP_PRIVATE_KEY);
 secret!(DISCORD_WEBHOOK_RELEASE_NOTES);
 secret!(WINGET_TOKEN);
 secret!(VERCEL_TOKEN);
+secret!(SLACK_WEBHOOK_WORKFLOW_FAILURES);
 
 // todo(ci) make these secrets too...
 var!(AZURE_SIGNING_ACCOUNT_NAME);