From 9e1fed301d3fb76fee58582816b47dacab5fdd30 Mon Sep 17 00:00:00 2001 From: Ben Kunkle Date: Tue, 10 Feb 2026 10:44:16 -0600 Subject: [PATCH] ep_cli: Include `request_id` in rated events and tag rated examples with experiment name and prompt format (#48883) Closes #ISSUE - [ ] Tests or screenshots needed? - [ ] Code Reviewed - [ ] Manual QA Release Notes: - N/A *or* Added/Fixed/Improved ... Co-authored-by: Tom --- crates/edit_prediction/src/edit_prediction.rs | 1 + .../edit_prediction_cli/src/pull_examples.rs | 80 +++++++++++++------ .../src/sync_deployments.rs | 2 +- 3 files changed, 58 insertions(+), 25 deletions(-) diff --git a/crates/edit_prediction/src/edit_prediction.rs b/crates/edit_prediction/src/edit_prediction.rs index f24a39353e65ed5a99a527d62f1138dc6edf4f2f..1ec3c7ac44fc8f592fa094f668b3bfd84245eb5a 100644 --- a/crates/edit_prediction/src/edit_prediction.rs +++ b/crates/edit_prediction/src/edit_prediction.rs @@ -2206,6 +2206,7 @@ impl EditPredictionStore { self.rated_predictions.insert(prediction.id.clone()); telemetry::event!( "Edit Prediction Rated", + request_id = prediction.id.to_string(), rating, inputs = prediction.inputs, output = prediction diff --git a/crates/edit_prediction_cli/src/pull_examples.rs b/crates/edit_prediction_cli/src/pull_examples.rs index 96ca30ee5c0094f43b8477591d414df9a77fca64..b48cc09e13b02cac85033786e780533304fa6de4 100644 --- a/crates/edit_prediction_cli/src/pull_examples.rs +++ b/crates/edit_prediction_cli/src/pull_examples.rs @@ -14,6 +14,7 @@ use zeta_prompt::ZetaPromptInput; use crate::example::Example; use crate::progress::{InfoStyle, Progress, Step}; +use crate::sync_deployments::EDIT_PREDICTION_DEPLOYMENT_EVENT; use edit_prediction::example_spec::{ CapturedEvent, CapturedPromptInput, CapturedRelatedExcerpt, CapturedRelatedFile, ExampleSpec, TelemetrySource, @@ -759,29 +760,41 @@ pub async fn fetch_rated_examples_after( let statement = indoc! {r#" SELECT - event_properties:inputs AS inputs, - event_properties:output::string AS output, - event_properties:rating::string AS rating, - event_properties:feedback::string AS feedback, - device_id::string AS device_id, - time::string AS time - FROM events - WHERE event_type = ? - AND (? IS NULL OR event_properties:rating::string = ?) - AND time > TRY_TO_TIMESTAMP_NTZ(?) - AND event_properties:inputs IS NOT NULL - AND event_properties:inputs:cursor_excerpt IS NOT NULL - AND event_properties:output IS NOT NULL - ORDER BY time ASC + rated.event_properties:request_id::string AS request_id, + rated.event_properties:inputs AS inputs, + rated.event_properties:output::string AS output, + rated.event_properties:rating::string AS rating, + rated.event_properties:feedback::string AS feedback, + rated.device_id::string AS device_id, + rated.time::string AS time, + deploy.event_properties:experiment_name::string AS experiment_name, + deploy.event_properties:environment::string AS environment + FROM events rated + LEFT JOIN events req + ON rated.event_properties:request_id::string = req.event_properties:request_id::string + AND req.event_type = ? + LEFT JOIN events deploy + ON req.event_properties:headers:x_baseten_model_id::string = deploy.event_properties:model_id::string + AND req.event_properties:headers:x_baseten_model_version_id::string = deploy.event_properties:model_version_id::string + AND deploy.event_type = ? + WHERE rated.event_type = ? + AND (? IS NULL OR rated.event_properties:rating::string = ?) + AND rated.time > TRY_TO_TIMESTAMP_NTZ(?) + AND rated.event_properties:inputs IS NOT NULL + AND rated.event_properties:inputs:cursor_excerpt IS NOT NULL + AND rated.event_properties:output IS NOT NULL + ORDER BY rated.time ASC LIMIT ? "#}; let bindings = json!({ - "1": { "type": "TEXT", "value": EDIT_PREDICTION_RATED_EVENT }, - "2": { "type": "TEXT", "value": rating_value }, - "3": { "type": "TEXT", "value": rating_value }, - "4": { "type": "TEXT", "value": after_date }, - "5": { "type": "FIXED", "value": max_rows_per_timestamp.to_string() } + "1": { "type": "TEXT", "value": PREDICTIVE_EDIT_REQUESTED_EVENT }, + "2": { "type": "TEXT", "value": EDIT_PREDICTION_DEPLOYMENT_EVENT }, + "3": { "type": "TEXT", "value": EDIT_PREDICTION_RATED_EVENT }, + "4": { "type": "TEXT", "value": rating_value }, + "5": { "type": "TEXT", "value": rating_value }, + "6": { "type": "TEXT", "value": after_date }, + "7": { "type": "FIXED", "value": max_rows_per_timestamp.to_string() } }); let request = json!({ @@ -823,12 +836,15 @@ pub async fn fetch_rated_examples_after( let column_indices = get_column_indices( &response.result_set_meta_data, &[ + "request_id", "inputs", "output", "rating", "feedback", "device_id", "time", + "experiment_name", + "environment", ], ); @@ -908,6 +924,7 @@ fn rated_examples_from_response<'a>( } }; + let request_id = get_string("request_id"); let inputs_json = get_json("inputs"); let inputs: Option = match &inputs_json { Some(v) => match serde_json::from_value(v.clone()) { @@ -926,16 +943,21 @@ fn rated_examples_from_response<'a>( let feedback = get_string("feedback").unwrap_or_default(); let device_id = get_string("device_id"); let time = get_string("time"); + let experiment_name = get_string("experiment_name"); + let environment = get_string("environment"); match (inputs, output.clone(), rating.clone(), device_id.clone(), time.clone()) { (Some(inputs), Some(output), Some(rating), Some(device_id), Some(time)) => { Some(build_rated_example( + request_id, device_id, time, inputs, output, rating, feedback, + experiment_name, + environment, )) } _ => { @@ -956,12 +978,15 @@ fn rated_examples_from_response<'a>( } fn build_rated_example( + request_id: Option, device_id: String, time: String, input: ZetaPromptInput, output: String, rating: String, feedback: String, + experiment_name: Option, + environment: Option, ) -> Example { let parsed_rating = if rating == "Positive" { EditPredictionRating::Positive @@ -969,13 +994,20 @@ fn build_rated_example( EditPredictionRating::Negative }; let is_positive = parsed_rating == EditPredictionRating::Positive; - let request_id = format!("rated-{}-{}", device_id, time); + let request_id = request_id.unwrap_or_else(|| format!("rated-{}-{}", device_id, time)); - let tags = if is_positive { - vec!["rated:positive".to_string()] + let mut tags = Vec::with_capacity(3); + tags.push(if is_positive { + "rated:positive".to_string() } else { - vec!["rated:negative".to_string()] - }; + "rated:negative".to_string() + }); + if let Some(experiment) = experiment_name { + tags.push(format!("experiment:{experiment}")); + } + if let Some(env) = environment { + tags.push(format!("environment:{env}")); + } let mut example = build_example_from_snowflake(request_id, device_id, time, input, tags, None); diff --git a/crates/edit_prediction_cli/src/sync_deployments.rs b/crates/edit_prediction_cli/src/sync_deployments.rs index b3104c8462f5d7fbd6bca0c9cde7943d4664da62..b55923eda60fdf1c966f418d0f9d08762617987c 100644 --- a/crates/edit_prediction_cli/src/sync_deployments.rs +++ b/crates/edit_prediction_cli/src/sync_deployments.rs @@ -12,7 +12,7 @@ use crate::pull_examples::{ const DEFAULT_BASETEN_MODEL_NAME: &str = "zeta-2"; const DEFAULT_STATEMENT_TIMEOUT_SECONDS: u64 = 120; -const EDIT_PREDICTION_DEPLOYMENT_EVENT: &str = "Edit Prediction Deployment"; +pub(crate) const EDIT_PREDICTION_DEPLOYMENT_EVENT: &str = "Edit Prediction Deployment"; #[derive(Debug, Clone, Deserialize)] struct BasetenModelsResponse {