collab: Remove user backfiller (#38291)

Marshall Bowers created

This PR removes the user backfiller from Collab.

Release Notes:

- N/A

Change summary

crates/collab/k8s/collab.template.yml  |   6 -
crates/collab/src/lib.rs               |   3 
crates/collab/src/main.rs              |   2 
crates/collab/src/tests/test_server.rs |   1 
crates/collab/src/user_backfiller.rs   | 165 ---------------------------
5 files changed, 177 deletions(-)

Detailed changes

crates/collab/k8s/collab.template.yml 🔗

@@ -226,12 +226,6 @@ spec:
                 secretKeyRef:
                   name: supermaven
                   key: api_key
-            - name: USER_BACKFILLER_GITHUB_ACCESS_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: user-backfiller
-                  key: github_access_token
-                  optional: true
             - name: INVITE_LINK_PREFIX
               value: ${INVITE_LINK_PREFIX}
             - name: RUST_BACKTRACE

crates/collab/src/lib.rs 🔗

@@ -7,7 +7,6 @@ pub mod llm;
 pub mod migrations;
 pub mod rpc;
 pub mod seed;
-pub mod user_backfiller;
 
 #[cfg(test)]
 mod tests;
@@ -157,7 +156,6 @@ pub struct Config {
     pub slack_panics_webhook: Option<String>,
     pub auto_join_channel_id: Option<ChannelId>,
     pub supermaven_admin_api_key: Option<Arc<str>>,
-    pub user_backfiller_github_access_token: Option<Arc<str>>,
 }
 
 impl Config {
@@ -211,7 +209,6 @@ impl Config {
             migrations_path: None,
             seed_path: None,
             supermaven_admin_api_key: None,
-            user_backfiller_github_access_token: None,
             kinesis_region: None,
             kinesis_access_key: None,
             kinesis_secret_key: None,

crates/collab/src/main.rs 🔗

@@ -11,7 +11,6 @@ use collab::ServiceMode;
 use collab::api::CloudflareIpCountryHeader;
 use collab::llm::db::LlmDatabase;
 use collab::migrations::run_database_migrations;
-use collab::user_backfiller::spawn_user_backfiller;
 use collab::{
     AppState, Config, Result, api::fetch_extensions_from_blob_store_periodically, db, env,
     executor::Executor, rpc::ResultExt,
@@ -114,7 +113,6 @@ async fn main() -> Result<()> {
 
                 if mode.is_api() {
                     fetch_extensions_from_blob_store_periodically(state.clone());
-                    spawn_user_backfiller(state.clone());
 
                     app = app
                         .merge(collab::api::events::router())

crates/collab/src/tests/test_server.rs 🔗

@@ -604,7 +604,6 @@ impl TestServer {
                 migrations_path: None,
                 seed_path: None,
                 supermaven_admin_api_key: None,
-                user_backfiller_github_access_token: None,
                 kinesis_region: None,
                 kinesis_stream: None,
                 kinesis_access_key: None,

crates/collab/src/user_backfiller.rs 🔗

@@ -1,165 +0,0 @@
-use std::sync::Arc;
-
-use anyhow::{Context as _, Result};
-use chrono::{DateTime, Utc};
-use util::ResultExt;
-
-use crate::db::Database;
-use crate::executor::Executor;
-use crate::{AppState, Config};
-
-pub fn spawn_user_backfiller(app_state: Arc<AppState>) {
-    let Some(user_backfiller_github_access_token) =
-        app_state.config.user_backfiller_github_access_token.clone()
-    else {
-        log::info!("no USER_BACKFILLER_GITHUB_ACCESS_TOKEN set; not spawning user backfiller");
-        return;
-    };
-
-    let executor = app_state.executor.clone();
-    executor.spawn_detached({
-        let executor = executor.clone();
-        async move {
-            let user_backfiller = UserBackfiller::new(
-                app_state.config.clone(),
-                user_backfiller_github_access_token,
-                app_state.db.clone(),
-                executor,
-            );
-
-            log::info!("backfilling users");
-
-            user_backfiller
-                .backfill_github_user_created_at()
-                .await
-                .log_err();
-        }
-    });
-}
-
-const GITHUB_REQUESTS_PER_HOUR_LIMIT: usize = 5_000;
-const SLEEP_DURATION_BETWEEN_USERS: std::time::Duration = std::time::Duration::from_millis(
-    (GITHUB_REQUESTS_PER_HOUR_LIMIT as f64 / 60. / 60. * 1000.) as u64,
-);
-
-struct UserBackfiller {
-    config: Config,
-    github_access_token: Arc<str>,
-    db: Arc<Database>,
-    http_client: reqwest::Client,
-    executor: Executor,
-}
-
-impl UserBackfiller {
-    fn new(
-        config: Config,
-        github_access_token: Arc<str>,
-        db: Arc<Database>,
-        executor: Executor,
-    ) -> Self {
-        Self {
-            config,
-            github_access_token,
-            db,
-            http_client: reqwest::Client::new(),
-            executor,
-        }
-    }
-
-    async fn backfill_github_user_created_at(&self) -> Result<()> {
-        let initial_channel_id = self.config.auto_join_channel_id;
-
-        let users_missing_github_user_created_at =
-            self.db.get_users_missing_github_user_created_at().await?;
-
-        for user in users_missing_github_user_created_at {
-            match self
-                .fetch_github_user(&format!(
-                    "https://api.github.com/user/{}",
-                    user.github_user_id
-                ))
-                .await
-            {
-                Ok(github_user) => {
-                    self.db
-                        .update_or_create_user_by_github_account(
-                            &user.github_login,
-                            github_user.id,
-                            user.email_address.as_deref(),
-                            user.name.as_deref(),
-                            github_user.created_at,
-                            initial_channel_id,
-                        )
-                        .await?;
-
-                    log::info!("backfilled user: {}", user.github_login);
-                }
-                Err(err) => {
-                    log::error!("failed to fetch GitHub user {}: {err}", user.github_login);
-                }
-            }
-
-            self.executor.sleep(SLEEP_DURATION_BETWEEN_USERS).await;
-        }
-
-        Ok(())
-    }
-
-    async fn fetch_github_user(&self, url: &str) -> Result<GithubUser> {
-        let response = self
-            .http_client
-            .get(url)
-            .header(
-                "authorization",
-                format!("Bearer {}", self.github_access_token),
-            )
-            .header("user-agent", "zed")
-            .send()
-            .await
-            .with_context(|| format!("failed to fetch '{url}'"))?;
-
-        let rate_limit_remaining = response
-            .headers()
-            .get("x-ratelimit-remaining")
-            .and_then(|value| value.to_str().ok())
-            .and_then(|value| value.parse::<i32>().ok());
-        let rate_limit_reset = response
-            .headers()
-            .get("x-ratelimit-reset")
-            .and_then(|value| value.to_str().ok())
-            .and_then(|value| value.parse::<i64>().ok())
-            .and_then(|value| DateTime::from_timestamp(value, 0));
-
-        if rate_limit_remaining == Some(0)
-            && let Some(reset_at) = rate_limit_reset
-        {
-            let now = Utc::now();
-            if reset_at > now {
-                let sleep_duration = reset_at - now;
-                log::info!(
-                    "rate limit reached. Sleeping for {} seconds",
-                    sleep_duration.num_seconds()
-                );
-                self.executor.sleep(sleep_duration.to_std().unwrap()).await;
-            }
-        }
-
-        response
-            .error_for_status()
-            .context("fetching GitHub user")?
-            .json()
-            .await
-            .with_context(|| format!("failed to deserialize GitHub user from '{url}'"))
-    }
-}
-
-#[derive(serde::Deserialize)]
-struct GithubUser {
-    id: i32,
-    created_at: DateTime<Utc>,
-    #[expect(
-        unused,
-        reason = "This field was found to be unused with serde library bump; it's left as is due to insufficient context on PO's side, but it *may* be fine to remove"
-    )]
-    name: Option<String>,
-}