diff --git a/crates/collab/k8s/collab.template.yml b/crates/collab/k8s/collab.template.yml index 6d0cafc0acecdfb4c1d38e27ad8b8e99c97498da..3e0304c8be62b41ddc51bee5ec46ff2a0668745e 100644 --- a/crates/collab/k8s/collab.template.yml +++ b/crates/collab/k8s/collab.template.yml @@ -216,6 +216,11 @@ spec: secretKeyRef: name: supermaven key: api_key + - name: USER_BACKFILLER_GITHUB_ACCESS_TOKEN + valueFrom: + secretKeyRef: + name: user-backfiller + key: github_access_token - name: INVITE_LINK_PREFIX value: ${INVITE_LINK_PREFIX} - name: RUST_BACKTRACE diff --git a/crates/collab/src/db/queries/users.rs b/crates/collab/src/db/queries/users.rs index 251f5bec685d9266261549ef8e1bc537725efaa5..2aa5857560d8693a0ec642582fff6ff14e0dad8e 100644 --- a/crates/collab/src/db/queries/users.rs +++ b/crates/collab/src/db/queries/users.rs @@ -377,4 +377,14 @@ impl Database { }) .await } + + pub async fn get_users_missing_github_user_created_at(&self) -> Result> { + self.transaction(|tx| async move { + Ok(user::Entity::find() + .filter(user::Column::GithubUserCreatedAt.is_null()) + .all(&*tx) + .await?) + }) + .await + } } diff --git a/crates/collab/src/lib.rs b/crates/collab/src/lib.rs index 06d741e2cab9ae4c3d068846eda1f35286d98d80..11e93db66865eb8d2033090d006cd2d059878fa9 100644 --- a/crates/collab/src/lib.rs +++ b/crates/collab/src/lib.rs @@ -9,6 +9,7 @@ pub mod migrations; mod rate_limiter; pub mod rpc; pub mod seed; +pub mod user_backfiller; #[cfg(test)] mod tests; @@ -177,6 +178,7 @@ pub struct Config { pub stripe_api_key: Option, pub stripe_price_id: Option>, pub supermaven_admin_api_key: Option>, + pub user_backfiller_github_access_token: Option>, } impl Config { @@ -235,6 +237,7 @@ impl Config { supermaven_admin_api_key: None, qwen2_7b_api_key: None, qwen2_7b_api_url: None, + user_backfiller_github_access_token: None, } } } diff --git a/crates/collab/src/main.rs b/crates/collab/src/main.rs index a946d1662019b70198ca62ef019a36eb2857431b..50cb81934e11b4a0708cea73e880d3b739690a86 100644 --- a/crates/collab/src/main.rs +++ b/crates/collab/src/main.rs @@ -7,6 +7,7 @@ use axum::{ }; use collab::llm::{db::LlmDatabase, log_usage_periodically}; use collab::migrations::run_database_migrations; +use collab::user_backfiller::spawn_user_backfiller; use collab::{api::billing::poll_stripe_events_periodically, llm::LlmState, ServiceMode}; use collab::{ api::fetch_extensions_from_blob_store_periodically, db, env, executor::Executor, @@ -131,6 +132,7 @@ async fn main() -> Result<()> { if mode.is_api() { poll_stripe_events_periodically(state.clone()); fetch_extensions_from_blob_store_periodically(state.clone()); + spawn_user_backfiller(state.clone()); app = app .merge(collab::api::events::router()) diff --git a/crates/collab/src/tests/test_server.rs b/crates/collab/src/tests/test_server.rs index 6fde54ff1c23d7ef5e08eee268d995ebaff63ee7..6522eed3037dd6fb963f5eea2efc8f57ddb3e28c 100644 --- a/crates/collab/src/tests/test_server.rs +++ b/crates/collab/src/tests/test_server.rs @@ -682,6 +682,7 @@ impl TestServer { supermaven_admin_api_key: None, qwen2_7b_api_key: None, qwen2_7b_api_url: None, + user_backfiller_github_access_token: None, }, }) } diff --git a/crates/collab/src/user_backfiller.rs b/crates/collab/src/user_backfiller.rs new file mode 100644 index 0000000000000000000000000000000000000000..e56f53b831757ea47d1cd11683ec77acac8cb232 --- /dev/null +++ b/crates/collab/src/user_backfiller.rs @@ -0,0 +1,132 @@ +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use util::ResultExt; + +use crate::db::Database; +use crate::executor::Executor; +use crate::{AppState, Config}; + +pub fn spawn_user_backfiller(app_state: Arc) { + let Some(user_backfiller_github_access_token) = + app_state.config.user_backfiller_github_access_token.clone() + else { + log::info!("no USER_BACKFILLER_GITHUB_ACCESS_TOKEN set; not spawning user backfiller"); + return; + }; + + let executor = app_state.executor.clone(); + executor.spawn_detached({ + let executor = executor.clone(); + async move { + let user_backfiller = UserBackfiller::new( + app_state.config.clone(), + user_backfiller_github_access_token, + app_state.db.clone(), + executor, + ); + + log::info!("backfilling users"); + + user_backfiller + .backfill_github_user_created_at() + .await + .log_err(); + } + }); +} + +struct UserBackfiller { + config: Config, + github_access_token: Arc, + db: Arc, + http_client: reqwest::Client, + executor: Executor, +} + +impl UserBackfiller { + fn new( + config: Config, + github_access_token: Arc, + db: Arc, + executor: Executor, + ) -> Self { + Self { + config, + github_access_token, + db, + http_client: reqwest::Client::new(), + executor, + } + } + + async fn backfill_github_user_created_at(&self) -> Result<()> { + let initial_channel_id = self.config.auto_join_channel_id; + + let users_missing_github_user_created_at = + self.db.get_users_missing_github_user_created_at().await?; + + for user in users_missing_github_user_created_at { + match self + .fetch_github_user(&format!( + "https://api.github.com/users/{}", + user.github_login + )) + .await + { + Ok(github_user) => { + self.db + .get_or_create_user_by_github_account( + &user.github_login, + Some(github_user.id), + user.email_address.as_deref(), + Some(github_user.created_at), + initial_channel_id, + ) + .await?; + + log::info!("backfilled user: {}", user.github_login); + } + Err(err) => { + log::error!("failed to fetch GitHub user {}: {err}", user.github_login); + } + } + + self.executor + .sleep(std::time::Duration::from_millis(200)) + .await; + } + + Ok(()) + } + + async fn fetch_github_user(&self, url: &str) -> Result { + let response = self + .http_client + .get(url) + .header( + "authorization", + format!("Bearer {}", self.github_access_token), + ) + .header("user-agent", "zed") + .send() + .await + .with_context(|| format!("failed to fetch '{url}'"))?; + + let response = match response.error_for_status() { + Ok(response) => response, + Err(err) => return Err(anyhow!("failed to fetch GitHub user: {err}")), + }; + + response + .json() + .await + .with_context(|| format!("failed to deserialize GitHub user from '{url}'")) + } +} + +#[derive(serde::Deserialize)] +struct GithubUser { + id: i32, + created_at: chrono::DateTime, +}