Add OpenMetrics endpoint exposing the basic RPC store metrics as guages

Max Brunsfeld and Antonio Scandurra created

Co-authored-by: Antonio Scandurra <me@as-cii.com>

Change summary

Cargo.lock               | 22 ++++++++++++++++++++++
crates/collab/Cargo.toml |  1 +
crates/collab/src/rpc.rs | 32 ++++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+)

Detailed changes

Cargo.lock 🔗

@@ -870,6 +870,7 @@ dependencies = [
  "nanoid",
  "parking_lot 0.11.2",
  "project",
+ "prometheus",
  "rand 0.8.5",
  "reqwest",
  "rpc",
@@ -3401,6 +3402,21 @@ dependencies = [
  "workspace",
 ]
 
+[[package]]
+name = "prometheus"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cface98dfa6d645ea4c789839f176e4b072265d085bfcc48eaa8d137f58d3c39"
+dependencies = [
+ "cfg-if 1.0.0",
+ "fnv",
+ "lazy_static",
+ "memchr",
+ "parking_lot 0.12.1",
+ "protobuf",
+ "thiserror",
+]
+
 [[package]]
 name = "prost"
 version = "0.8.0"
@@ -3477,6 +3493,12 @@ dependencies = [
  "prost 0.9.0",
 ]
 
+[[package]]
+name = "protobuf"
+version = "2.27.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96"
+
 [[package]]
 name = "pulldown-cmark"
 version = "0.9.1"

crates/collab/Cargo.toml 🔗

@@ -31,6 +31,7 @@ lazy_static = "1.4"
 lipsum = { version = "0.8", optional = true }
 nanoid = "0.4"
 parking_lot = "0.11.1"
+prometheus = "0.13"
 rand = "0.8"
 reqwest = { version = "0.11", features = ["json"], optional = true }
 scrypt = "0.7"

crates/collab/src/rpc.rs 🔗

@@ -29,6 +29,7 @@ use futures::{
     FutureExt, SinkExt, StreamExt, TryStreamExt,
 };
 use lazy_static::lazy_static;
+use prometheus::{register_int_gauge, IntGauge};
 use rpc::{
     proto::{self, AnyTypedEnvelope, EntityMessage, EnvelopedMessage, RequestMessage},
     Connection, ConnectionId, Peer, Receipt, TypedEnvelope,
@@ -57,6 +58,18 @@ use tracing::{info_span, instrument, Instrument};
 
 pub use store::{Store, Worktree};
 
+lazy_static! {
+    static ref METRIC_CONNECTIONS: IntGauge =
+        register_int_gauge!("connections", "number of connections").unwrap();
+    static ref METRIC_PROJECTS: IntGauge =
+        register_int_gauge!("projects", "number of open projects").unwrap();
+    static ref METRIC_SHARED_PROJECTS: IntGauge = register_int_gauge!(
+        "shared_projects",
+        "number of open projects with one or more guests"
+    )
+    .unwrap();
+}
+
 type MessageHandler =
     Box<dyn Send + Sync + Fn(Arc<Server>, Box<dyn AnyTypedEnvelope>) -> BoxFuture<'static, ()>>;
 
@@ -1534,6 +1547,11 @@ impl<'a> Drop for StoreWriteGuard<'a> {
         self.check_invariants();
 
         let metrics = self.metrics();
+
+        METRIC_CONNECTIONS.set(metrics.connections as _);
+        METRIC_PROJECTS.set(metrics.registered_projects as _);
+        METRIC_SHARED_PROJECTS.set(metrics.shared_projects as _);
+
         tracing::info!(
             connections = metrics.connections,
             registered_projects = metrics.registered_projects,
@@ -1609,6 +1627,7 @@ pub fn routes(server: Arc<Server>) -> Router<Body> {
                 .layer(middleware::from_fn(auth::validate_header))
                 .layer(Extension(server)),
         )
+        .route("/metrics", get(handle_metrics))
 }
 
 pub async fn handle_websocket_request(
@@ -1642,6 +1661,19 @@ pub async fn handle_websocket_request(
     })
 }
 
+pub async fn handle_metrics() -> axum::response::Response {
+    let encoder = prometheus::TextEncoder::new();
+    let metric_families = prometheus::gather();
+    match encoder.encode_to_string(&metric_families) {
+        Ok(string) => (StatusCode::OK, string).into_response(),
+        Err(error) => (
+            StatusCode::INTERNAL_SERVER_ERROR,
+            format!("failed to encode metrics {:?}", error),
+        )
+            .into_response(),
+    }
+}
+
 fn to_axum_message(message: TungsteniteMessage) -> AxumMessage {
     match message {
         TungsteniteMessage::Text(payload) => AxumMessage::Text(payload),