diff --git a/crates/collab/k8s/collab.template.yml b/crates/collab/k8s/collab.template.yml index 214b550ac20499b8b03cfafeefab9b45d51fcc24..1476e5890283c62cee3563a327fcdd5ee84842e7 100644 --- a/crates/collab/k8s/collab.template.yml +++ b/crates/collab/k8s/collab.template.yml @@ -226,12 +226,6 @@ spec: secretKeyRef: name: supermaven key: api_key - - name: USER_BACKFILLER_GITHUB_ACCESS_TOKEN - valueFrom: - secretKeyRef: - name: user-backfiller - key: github_access_token - optional: true - name: INVITE_LINK_PREFIX value: ${INVITE_LINK_PREFIX} - name: RUST_BACKTRACE diff --git a/crates/collab/src/lib.rs b/crates/collab/src/lib.rs index 191025df3770db78df3a12bc16d5c8f32d54571c..f1de0cdc7ff79cd25c8ef7b0b2b21d9e0b45d332 100644 --- a/crates/collab/src/lib.rs +++ b/crates/collab/src/lib.rs @@ -7,7 +7,6 @@ pub mod llm; pub mod migrations; pub mod rpc; pub mod seed; -pub mod user_backfiller; #[cfg(test)] mod tests; @@ -157,7 +156,6 @@ pub struct Config { pub slack_panics_webhook: Option, pub auto_join_channel_id: Option, pub supermaven_admin_api_key: Option>, - pub user_backfiller_github_access_token: Option>, } impl Config { @@ -211,7 +209,6 @@ impl Config { migrations_path: None, seed_path: None, supermaven_admin_api_key: None, - user_backfiller_github_access_token: None, kinesis_region: None, kinesis_access_key: None, kinesis_secret_key: None, diff --git a/crates/collab/src/main.rs b/crates/collab/src/main.rs index cb6f6cad1dd483c463bcda5d8a4ff914f4bf10aa..6b94459910647c1e48ee69f2b0dd38afd3723821 100644 --- a/crates/collab/src/main.rs +++ b/crates/collab/src/main.rs @@ -11,7 +11,6 @@ use collab::ServiceMode; use collab::api::CloudflareIpCountryHeader; use collab::llm::db::LlmDatabase; use collab::migrations::run_database_migrations; -use collab::user_backfiller::spawn_user_backfiller; use collab::{ AppState, Config, Result, api::fetch_extensions_from_blob_store_periodically, db, env, executor::Executor, rpc::ResultExt, @@ -114,7 +113,6 @@ async fn main() -> Result<()> { if mode.is_api() { fetch_extensions_from_blob_store_periodically(state.clone()); - spawn_user_backfiller(state.clone()); app = app .merge(collab::api::events::router()) diff --git a/crates/collab/src/tests/test_server.rs b/crates/collab/src/tests/test_server.rs index eb7df28478158a10a0c2d52c3560cad391937383..5e99cc192ad080c1a79913c79fbbaae9d8b6d951 100644 --- a/crates/collab/src/tests/test_server.rs +++ b/crates/collab/src/tests/test_server.rs @@ -604,7 +604,6 @@ impl TestServer { migrations_path: None, seed_path: None, supermaven_admin_api_key: None, - user_backfiller_github_access_token: None, kinesis_region: None, kinesis_stream: None, kinesis_access_key: None, diff --git a/crates/collab/src/user_backfiller.rs b/crates/collab/src/user_backfiller.rs deleted file mode 100644 index fdb9ef67c2f1d04bf0a1919045f91d75a14ef834..0000000000000000000000000000000000000000 --- a/crates/collab/src/user_backfiller.rs +++ /dev/null @@ -1,165 +0,0 @@ -use std::sync::Arc; - -use anyhow::{Context as _, Result}; -use chrono::{DateTime, Utc}; -use util::ResultExt; - -use crate::db::Database; -use crate::executor::Executor; -use crate::{AppState, Config}; - -pub fn spawn_user_backfiller(app_state: Arc) { - let Some(user_backfiller_github_access_token) = - app_state.config.user_backfiller_github_access_token.clone() - else { - log::info!("no USER_BACKFILLER_GITHUB_ACCESS_TOKEN set; not spawning user backfiller"); - return; - }; - - let executor = app_state.executor.clone(); - executor.spawn_detached({ - let executor = executor.clone(); - async move { - let user_backfiller = UserBackfiller::new( - app_state.config.clone(), - user_backfiller_github_access_token, - app_state.db.clone(), - executor, - ); - - log::info!("backfilling users"); - - user_backfiller - .backfill_github_user_created_at() - .await - .log_err(); - } - }); -} - -const GITHUB_REQUESTS_PER_HOUR_LIMIT: usize = 5_000; -const SLEEP_DURATION_BETWEEN_USERS: std::time::Duration = std::time::Duration::from_millis( - (GITHUB_REQUESTS_PER_HOUR_LIMIT as f64 / 60. / 60. * 1000.) as u64, -); - -struct UserBackfiller { - config: Config, - github_access_token: Arc, - db: Arc, - http_client: reqwest::Client, - executor: Executor, -} - -impl UserBackfiller { - fn new( - config: Config, - github_access_token: Arc, - db: Arc, - executor: Executor, - ) -> Self { - Self { - config, - github_access_token, - db, - http_client: reqwest::Client::new(), - executor, - } - } - - async fn backfill_github_user_created_at(&self) -> Result<()> { - let initial_channel_id = self.config.auto_join_channel_id; - - let users_missing_github_user_created_at = - self.db.get_users_missing_github_user_created_at().await?; - - for user in users_missing_github_user_created_at { - match self - .fetch_github_user(&format!( - "https://api.github.com/user/{}", - user.github_user_id - )) - .await - { - Ok(github_user) => { - self.db - .update_or_create_user_by_github_account( - &user.github_login, - github_user.id, - user.email_address.as_deref(), - user.name.as_deref(), - github_user.created_at, - initial_channel_id, - ) - .await?; - - log::info!("backfilled user: {}", user.github_login); - } - Err(err) => { - log::error!("failed to fetch GitHub user {}: {err}", user.github_login); - } - } - - self.executor.sleep(SLEEP_DURATION_BETWEEN_USERS).await; - } - - Ok(()) - } - - async fn fetch_github_user(&self, url: &str) -> Result { - let response = self - .http_client - .get(url) - .header( - "authorization", - format!("Bearer {}", self.github_access_token), - ) - .header("user-agent", "zed") - .send() - .await - .with_context(|| format!("failed to fetch '{url}'"))?; - - let rate_limit_remaining = response - .headers() - .get("x-ratelimit-remaining") - .and_then(|value| value.to_str().ok()) - .and_then(|value| value.parse::().ok()); - let rate_limit_reset = response - .headers() - .get("x-ratelimit-reset") - .and_then(|value| value.to_str().ok()) - .and_then(|value| value.parse::().ok()) - .and_then(|value| DateTime::from_timestamp(value, 0)); - - if rate_limit_remaining == Some(0) - && let Some(reset_at) = rate_limit_reset - { - let now = Utc::now(); - if reset_at > now { - let sleep_duration = reset_at - now; - log::info!( - "rate limit reached. Sleeping for {} seconds", - sleep_duration.num_seconds() - ); - self.executor.sleep(sleep_duration.to_std().unwrap()).await; - } - } - - response - .error_for_status() - .context("fetching GitHub user")? - .json() - .await - .with_context(|| format!("failed to deserialize GitHub user from '{url}'")) - } -} - -#[derive(serde::Deserialize)] -struct GithubUser { - id: i32, - created_at: DateTime, - #[expect( - unused, - reason = "This field was found to be unused with serde library bump; it's left as is due to insufficient context on PO's side, but it *may* be fine to remove" - )] - name: Option, -}