Improve visual test runner: consolidate setup, add constants, remove dead code (#46217)

Richard Feldman created 5 days ago

Just some code cleanup to improve the tests - make them better exemplars
for future use.

Release Notes:

- N/A

Change summary

crates/zed/src/visual_test_runner.rs | 532 ++++++++++-------------
plans/visual-test-improvements.md    | 661 +++++++++++++++++++++++++++++
2 files changed, 887 insertions(+), 306 deletions(-)

Detailed changes

crates/zed/src/visual_test_runner.rs 🔗

@@ -26,8 +26,8 @@
 
 use anyhow::{Context, Result};
 use gpui::{
-    App, AppContext as _, Application, Bounds, Window, WindowBounds, WindowHandle, WindowOptions,
-    point, px, size,
+    App, AppContext as _, Application, Bounds, Pixels, Size, Window, WindowBounds, WindowHandle,
+    WindowOptions, point, px,
 };
 use image::RgbaImage;
 use project_panel::ProjectPanel;
@@ -53,6 +53,144 @@ const EMBEDDED_TEST_IMAGE: &[u8] = include_bytes!("../resources/app-icon.png");
 /// Images must match at least this percentage to pass
 const MATCH_THRESHOLD: f64 = 0.99;
 
+/// Window size for workspace tests (project panel, editor)
+fn workspace_window_size() -> Size<Pixels> {
+    Size {
+        width: px(1280.0),
+        height: px(800.0),
+    }
+}
+
+/// Window size for agent panel tests
+fn agent_panel_window_size() -> Size<Pixels> {
+    Size {
+        width: px(500.0),
+        height: px(900.0),
+    }
+}
+
+/// Helper struct for setting up test workspaces
+struct TestWorkspace {
+    window: WindowHandle<Workspace>,
+}
+
+impl TestWorkspace {
+    async fn new(
+        app_state: Arc<AppState>,
+        window_size: Size<Pixels>,
+        project_path: &Path,
+        cx: &mut gpui::AsyncApp,
+    ) -> Result<Self> {
+        let project = cx.update(|cx| {
+            project::Project::local(
+                app_state.client.clone(),
+                app_state.node_runtime.clone(),
+                app_state.user_store.clone(),
+                app_state.languages.clone(),
+                app_state.fs.clone(),
+                None,
+                false,
+                cx,
+            )
+        })?;
+
+        let add_worktree_task = project.update(cx, |project, cx| {
+            project.find_or_create_worktree(project_path, true, cx)
+        })?;
+        add_worktree_task.await?;
+
+        let bounds = Bounds {
+            origin: point(px(0.0), px(0.0)),
+            size: window_size,
+        };
+
+        let window: WindowHandle<Workspace> = cx.update(|cx| {
+            cx.open_window(
+                WindowOptions {
+                    window_bounds: Some(WindowBounds::Windowed(bounds)),
+                    focus: false,
+                    show: false,
+                    ..Default::default()
+                },
+                |window, cx| {
+                    cx.new(|cx| {
+                        Workspace::new(None, project.clone(), app_state.clone(), window, cx)
+                    })
+                },
+            )
+        })??;
+
+        cx.background_executor()
+            .timer(std::time::Duration::from_millis(100))
+            .await;
+
+        Ok(Self { window })
+    }
+}
+
+async fn setup_project_panel(
+    workspace: &TestWorkspace,
+    cx: &mut gpui::AsyncApp,
+) -> Result<gpui::Entity<ProjectPanel>> {
+    let panel_task = workspace.window.update(cx, |_workspace, window, cx| {
+        let weak_workspace = cx.weak_entity();
+        let async_window_cx = window.to_async(cx);
+        window.spawn(cx, async move |_cx| {
+            ProjectPanel::load(weak_workspace, async_window_cx).await
+        })
+    })?;
+
+    let panel = panel_task.await?;
+
+    workspace.window.update(cx, |ws, window, cx| {
+        ws.add_panel(panel.clone(), window, cx);
+        ws.open_panel::<ProjectPanel>(window, cx);
+    })?;
+
+    cx.background_executor()
+        .timer(std::time::Duration::from_millis(100))
+        .await;
+
+    Ok(panel)
+}
+
+async fn open_file(
+    workspace: &TestWorkspace,
+    relative_path: &str,
+    cx: &mut gpui::AsyncApp,
+) -> Result<()> {
+    let open_file_task = workspace.window.update(cx, |ws, window, cx| {
+        let worktree = ws.project().read(cx).worktrees(cx).next();
+        if let Some(worktree) = worktree {
+            let worktree_id = worktree.read(cx).id();
+            let rel_path: std::sync::Arc<util::rel_path::RelPath> =
+                util::rel_path::rel_path(relative_path).into();
+            let project_path: project::ProjectPath = (worktree_id, rel_path).into();
+            Some(ws.open_path(project_path, None, true, window, cx))
+        } else {
+            None
+        }
+    })?;
+
+    if let Some(task) = open_file_task {
+        let item = task.await?;
+        workspace.window.update(cx, |ws, window, cx| {
+            let pane = ws.active_pane().clone();
+            pane.update(cx, |pane, cx| {
+                if let Some(index) = pane.index_for_item(item.as_ref()) {
+                    pane.activate_item(index, true, true, window, cx);
+                }
+            });
+        })?;
+    }
+
+    cx.background_executor()
+        .timer(std::time::Duration::from_millis(100))
+        .await;
+
+    Ok(())
+}
+
 fn main() {
     env_logger::builder()
         .filter_level(log::LevelFilter::Info)
@@ -60,13 +198,6 @@ fn main() {
 
     let update_baseline = std::env::var("UPDATE_BASELINE").is_ok();
 
-    if update_baseline {
-        println!("=== Visual Test Runner (UPDATE MODE) ===\n");
-        println!("Baseline images will be updated.\n");
-    } else {
-        println!("=== Visual Test Runner ===\n");
-    }
-
     // Create a temporary directory for test files
     let temp_dir = tempfile::tempdir().expect("Failed to create temp directory");
     let project_path = temp_dir.path().join("project");
@@ -107,200 +238,72 @@ fn main() {
                 language_model::init(app_state.client.clone(), cx);
                 language_models::init(app_state.user_store.clone(), app_state.client.clone(), cx);
 
-                // Open a real Zed workspace window
-                let window_size = size(px(1280.0), px(800.0));
-                // Window can be hidden since we use direct texture capture (reading pixels from
-                // Metal texture) instead of ScreenCaptureKit which requires visible windows.
-                let bounds = Bounds {
-                    origin: point(px(0.0), px(0.0)),
-                    size: window_size,
-                };
-
-                // Create a project for the workspace
-                let project = project::Project::local(
-                    app_state.client.clone(),
-                    app_state.node_runtime.clone(),
-                    app_state.user_store.clone(),
-                    app_state.languages.clone(),
-                    app_state.fs.clone(),
-                    None,
-                    false,
-                    cx,
-                );
-
-                let workspace_window: WindowHandle<Workspace> = cx
-                    .open_window(
-                        WindowOptions {
-                            window_bounds: Some(WindowBounds::Windowed(bounds)),
-                            focus: false,
-                            show: false,
-                            ..Default::default()
-                        },
-                        |window, cx| {
-                            cx.new(|cx| {
-                                Workspace::new(None, project.clone(), app_state.clone(), window, cx)
-                            })
-                        },
-                    )
-                    .expect("Failed to open workspace window");
-
-                // Add the test project as a worktree directly to the project
-                let add_worktree_task = workspace_window
-                    .update(cx, |workspace, _window, cx| {
-                        workspace.project().update(cx, |project, cx| {
-                            project.find_or_create_worktree(&project_path, true, cx)
-                        })
-                    })
-                    .expect("Failed to update workspace");
-
                 // Clone app_state for the async block
                 let app_state_for_tests = app_state.clone();
 
                 // Spawn async task to set up the UI and capture screenshot
                 cx.spawn(async move |mut cx| {
-                    // Wait for the worktree to be added
-                    if let Err(e) = add_worktree_task.await {
-                        eprintln!("Failed to add worktree: {:?}", e);
-                    }
-
-                    // Wait for UI to settle
-                    cx.background_executor()
-                        .timer(std::time::Duration::from_millis(500))
-                        .await;
+                    let project_path_clone = project_path.clone();
 
-                    // Create and add the project panel to the workspace
-                    let panel_task = cx.update(|cx| {
-                        workspace_window
-                            .update(cx, |_workspace, window, cx| {
-                                let weak_workspace = cx.weak_entity();
-                                window.spawn(cx, async move |cx| {
-                                    ProjectPanel::load(weak_workspace, cx.clone()).await
-                                })
-                            })
-                            .ok()
-                    });
-
-                    if let Ok(Some(task)) = panel_task {
-                        if let Ok(panel) = task.await {
-                            cx.update(|cx| {
-                                workspace_window
-                                    .update(cx, |workspace, window, cx| {
-                                        workspace.add_panel(panel, window, cx);
-                                    })
-                                    .ok();
-                            })
-                            .ok();
+                    // Create the test workspace
+                    let workspace = match TestWorkspace::new(
+                        app_state_for_tests.clone(),
+                        workspace_window_size(),
+                        &project_path_clone,
+                        &mut cx,
+                    )
+                    .await
+                    {
+                        Ok(ws) => ws,
+                        Err(e) => {
+                            log::error!("Failed to create workspace: {}", e);
+                            cx.update(|cx| cx.quit()).ok();
+                            std::process::exit(1);
                         }
-                    }
+                    };
 
-                    // Wait for panel to be added
-                    cx.background_executor()
-                        .timer(std::time::Duration::from_millis(500))
-                        .await;
-
-                    // Open the project panel
-                    cx.update(|cx| {
-                        workspace_window
-                            .update(cx, |workspace, window, cx| {
-                                workspace.open_panel::<ProjectPanel>(window, cx);
-                            })
-                            .ok();
-                    })
-                    .ok();
-
-                    // Wait for project panel to render
-                    cx.background_executor()
-                        .timer(std::time::Duration::from_millis(500))
-                        .await;
+                    // Set up project panel
+                    if let Err(e) = setup_project_panel(&workspace, &mut cx).await {
+                        log::error!("Failed to setup project panel: {}", e);
+                        cx.update(|cx| cx.quit()).ok();
+                        std::process::exit(1);
+                    }
 
                     // Open main.rs in the editor
-                    let open_file_task = cx.update(|cx| {
-                        workspace_window
-                            .update(cx, |workspace, window, cx| {
-                                let worktree = workspace.project().read(cx).worktrees(cx).next();
-                                if let Some(worktree) = worktree {
-                                    let worktree_id = worktree.read(cx).id();
-                                    let rel_path: std::sync::Arc<util::rel_path::RelPath> =
-                                        util::rel_path::rel_path("src/main.rs").into();
-                                    let project_path: project::ProjectPath =
-                                        (worktree_id, rel_path).into();
-                                    Some(workspace.open_path(project_path, None, true, window, cx))
-                                } else {
-                                    None
-                                }
-                            })
-                            .ok()
-                            .flatten()
-                    });
-
-                    if let Ok(Some(task)) = open_file_task {
-                        if let Ok(item) = task.await {
-                            // Focus the opened item to dismiss the welcome screen
-                            cx.update(|cx| {
-                                workspace_window
-                                    .update(cx, |workspace, window, cx| {
-                                        let pane = workspace.active_pane().clone();
-                                        pane.update(cx, |pane, cx| {
-                                            if let Some(index) = pane.index_for_item(item.as_ref())
-                                            {
-                                                pane.activate_item(index, true, true, window, cx);
-                                            }
-                                        });
-                                    })
-                                    .ok();
-                            })
-                            .ok();
-
-                            // Wait for item activation to render
-                            cx.background_executor()
-                                .timer(std::time::Duration::from_millis(500))
-                                .await;
-                        }
+                    if let Err(e) = open_file(&workspace, "src/main.rs", &mut cx).await {
+                        log::error!("Failed to open file: {}", e);
+                        cx.update(|cx| cx.quit()).ok();
+                        std::process::exit(1);
                     }
 
                     // Request a window refresh to ensure all pending effects are processed
                     cx.refresh().ok();
-
-                    // Wait for UI to fully stabilize
                     cx.background_executor()
-                        .timer(std::time::Duration::from_secs(2))
+                        .timer(std::time::Duration::from_millis(500))
                         .await;
 
-                    // Track test results
-                    let mut passed = 0;
-                    let mut failed = 0;
-                    let mut updated = 0;
+                    // Track if any test failed
+                    let mut any_failed = false;
 
                     // Run Test 1: Project Panel (with project panel visible)
-                    println!("\n--- Test 1: project_panel ---");
-                    let test_result = run_visual_test(
+                    if run_visual_test(
                         "project_panel",
-                        workspace_window.into(),
+                        workspace.window.into(),
                         &mut cx,
                         update_baseline,
                     )
-                    .await;
-
-                    match test_result {
-                        Ok(TestResult::Passed) => {
-                            println!("✓ project_panel: PASSED");
-                            passed += 1;
-                        }
-                        Ok(TestResult::BaselineUpdated(path)) => {
-                            println!("✓ project_panel: Baseline updated at {}", path.display());
-                            updated += 1;
-                        }
-                        Err(e) => {
-                            eprintln!("✗ project_panel: FAILED - {}", e);
-                            failed += 1;
-                        }
+                    .await
+                    .is_err()
+                    {
+                        any_failed = true;
                     }
 
                     // Close the project panel for the second test
                     cx.update(|cx| {
-                        workspace_window
-                            .update(cx, |workspace, window, cx| {
-                                workspace.close_panel::<ProjectPanel>(window, cx);
+                        workspace
+                            .window
+                            .update(cx, |ws, window, cx| {
+                                ws.close_panel::<ProjectPanel>(window, cx);
                             })
                             .ok();
                     })
@@ -309,77 +312,37 @@ fn main() {
                     // Refresh and wait for panel to close
                     cx.refresh().ok();
                     cx.background_executor()
-                        .timer(std::time::Duration::from_millis(500))
+                        .timer(std::time::Duration::from_millis(100))
                         .await;
 
                     // Run Test 2: Workspace with Editor (without project panel)
-                    println!("\n--- Test 2: workspace_with_editor ---");
-                    let test_result = run_visual_test(
+                    if run_visual_test(
                         "workspace_with_editor",
-                        workspace_window.into(),
+                        workspace.window.into(),
                         &mut cx,
                         update_baseline,
                     )
-                    .await;
-
-                    match test_result {
-                        Ok(TestResult::Passed) => {
-                            println!("✓ workspace_with_editor: PASSED");
-                            passed += 1;
-                        }
-                        Ok(TestResult::BaselineUpdated(path)) => {
-                            println!(
-                                "✓ workspace_with_editor: Baseline updated at {}",
-                                path.display()
-                            );
-                            updated += 1;
-                        }
-                        Err(e) => {
-                            eprintln!("✗ workspace_with_editor: FAILED - {}", e);
-                            failed += 1;
-                        }
+                    .await
+                    .is_err()
+                    {
+                        any_failed = true;
                     }
 
                     // Run Test 3: Agent Thread View with Image (collapsed and expanded)
-                    println!("\n--- Test 3: agent_thread_with_image (collapsed + expanded) ---");
-                    let test_result = run_agent_thread_view_test(
+                    if run_agent_thread_view_test(
                         app_state_for_tests.clone(),
                         &mut cx,
                         update_baseline,
                     )
-                    .await;
-
-                    match test_result {
-                        Ok(TestResult::Passed) => {
-                            println!("✓ agent_thread_with_image (collapsed + expanded): PASSED");
-                            passed += 1;
-                        }
-                        Ok(TestResult::BaselineUpdated(_)) => {
-                            println!(
-                                "✓ agent_thread_with_image: Baselines updated (collapsed + expanded)"
-                            );
-                            updated += 1;
-                        }
-                        Err(e) => {
-                            eprintln!("✗ agent_thread_with_image: FAILED - {}", e);
-                            failed += 1;
-                        }
-                    }
-
-                    // Print summary
-                    println!("\n=== Test Summary ===");
-                    println!("Passed: {}", passed);
-                    println!("Failed: {}", failed);
-                    if updated > 0 {
-                        println!("Baselines Updated: {}", updated);
+                    .await
+                    .is_err()
+                    {
+                        any_failed = true;
                     }
 
-                    if failed > 0 {
-                        eprintln!("\n=== Visual Tests FAILED ===");
+                    if any_failed {
                         cx.update(|cx| cx.quit()).ok();
                         std::process::exit(1);
-                    } else {
-                        println!("\n=== All Visual Tests PASSED ===");
                     }
 
                     cx.update(|cx| cx.quit()).ok();
@@ -423,7 +386,6 @@ async fn run_visual_test(
 
     // Save the actual screenshot
     screenshot.save(&actual_path)?;
-    println!("Screenshot saved to: {}", actual_path.display());
 
     if update_baseline {
         // Update the baseline
@@ -449,13 +411,6 @@ async fn run_visual_test(
 
     let comparison = compare_images(&baseline, &screenshot);
 
-    println!(
-        "Image comparison: {:.2}% match ({} different pixels out of {})",
-        comparison.match_percentage * 100.0,
-        comparison.diff_pixel_count,
-        comparison.total_pixels
-    );
-
     if comparison.match_percentage >= MATCH_THRESHOLD {
         Ok(TestResult::Passed)
     } else {
@@ -463,7 +418,6 @@ async fn run_visual_test(
         if let Some(diff_image) = comparison.diff_image {
             let diff_path = Path::new(&output_dir).join(format!("{}_diff.png", test_name));
             diff_image.save(&diff_path)?;
-            println!("Diff image saved to: {}", diff_path.display());
         }
 
         Err(anyhow::anyhow!(
@@ -495,8 +449,6 @@ fn get_baseline_path(test_name: &str) -> PathBuf {
 struct ImageComparison {
     match_percentage: f64,
     diff_image: Option<RgbaImage>,
-    diff_pixel_count: u64,
-    total_pixels: u64,
 }
 
 fn compare_images(baseline: &RgbaImage, actual: &RgbaImage) -> ImageComparison {
@@ -505,8 +457,6 @@ fn compare_images(baseline: &RgbaImage, actual: &RgbaImage) -> ImageComparison {
         return ImageComparison {
             match_percentage: 0.0,
             diff_image: None,
-            diff_pixel_count: baseline.width() as u64 * baseline.height() as u64,
-            total_pixels: baseline.width() as u64 * baseline.height() as u64,
         };
     }
 
@@ -520,7 +470,7 @@ fn compare_images(baseline: &RgbaImage, actual: &RgbaImage) -> ImageComparison {
             let baseline_pixel = baseline.get_pixel(x, y);
             let actual_pixel = actual.get_pixel(x, y);
 
-            if pixels_are_similar(baseline_pixel, actual_pixel) {
+            if pixels_match(baseline_pixel, actual_pixel) {
                 // Matching pixel - show as dimmed version of actual
                 diff_image.put_pixel(
                     x,
@@ -549,19 +499,11 @@ fn compare_images(baseline: &RgbaImage, actual: &RgbaImage) -> ImageComparison {
     ImageComparison {
         match_percentage,
         diff_image: Some(diff_image),
-        diff_pixel_count: diff_count,
-        total_pixels,
     }
 }
 
-fn pixels_are_similar(a: &image::Rgba<u8>, b: &image::Rgba<u8>) -> bool {
-    // Allow small differences due to anti-aliasing, font rendering, etc.
-    const TOLERANCE: i16 = 2;
-
-    (a[0] as i16 - b[0] as i16).abs() <= TOLERANCE
-        && (a[1] as i16 - b[1] as i16).abs() <= TOLERANCE
-        && (a[2] as i16 - b[2] as i16).abs() <= TOLERANCE
-        && (a[3] as i16 - b[3] as i16).abs() <= TOLERANCE
+fn pixels_match(a: &image::Rgba<u8>, b: &image::Rgba<u8>) -> bool {
+    a == b
 }
 
 fn capture_screenshot(window: gpui::AnyWindowHandle, cx: &mut gpui::App) -> Result<RgbaImage> {
@@ -571,12 +513,6 @@ fn capture_screenshot(window: gpui::AnyWindowHandle, cx: &mut gpui::App) -> Resu
         window.render_to_image()
     })??;
 
-    println!(
-        "Screenshot captured: {}x{} pixels",
-        screenshot.width(),
-        screenshot.height()
-    );
-
     Ok(screenshot)
 }
 
@@ -585,9 +521,20 @@ fn create_test_files(project_path: &Path) {
     let src_dir = project_path.join("src");
     std::fs::create_dir_all(&src_dir).expect("Failed to create src directory");
 
-    std::fs::write(
-        src_dir.join("main.rs"),
-        r#"fn main() {
+    std::fs::write(src_dir.join("main.rs"), MAIN_RS_CONTENT).expect("Failed to write main.rs");
+
+    std::fs::write(src_dir.join("lib.rs"), LIB_RS_CONTENT).expect("Failed to write lib.rs");
+
+    std::fs::write(src_dir.join("utils.rs"), UTILS_RS_CONTENT).expect("Failed to write utils.rs");
+
+    std::fs::write(project_path.join("Cargo.toml"), CARGO_TOML_CONTENT)
+        .expect("Failed to write Cargo.toml");
+
+    std::fs::write(project_path.join("README.md"), README_MD_CONTENT)
+        .expect("Failed to write README.md");
+}
+
+const MAIN_RS_CONTENT: &str = r#"fn main() {
     println!("Hello, world!");
 
     let message = greet("Zed");
@@ -607,13 +554,9 @@ mod tests {
         assert_eq!(greet("World"), "Welcome to World, the editor of the future!");
     }
 }
-"#,
-    )
-    .expect("Failed to write main.rs");
+"#;
 
-    std::fs::write(
-        src_dir.join("lib.rs"),
-        r#"//! A sample library for visual testing.
+const LIB_RS_CONTENT: &str = r#"//! A sample library for visual testing.
 
 pub mod utils;
 
@@ -641,13 +584,9 @@ mod tests {
         assert_eq!(subtract(5, 3), 2);
     }
 }
-"#,
-    )
-    .expect("Failed to write lib.rs");
+"#;
 
-    std::fs::write(
-        src_dir.join("utils.rs"),
-        r#"//! Utility functions for the sample project.
+const UTILS_RS_CONTENT: &str = r#"//! Utility functions for the sample project.
 
 /// Formats a greeting message.
 pub fn format_greeting(name: &str) -> String {
@@ -658,13 +597,9 @@ pub fn format_greeting(name: &str) -> String {
 pub fn format_farewell(name: &str) -> String {
     format!("Goodbye, {}!", name)
 }
-"#,
-    )
-    .expect("Failed to write utils.rs");
+"#;
 
-    std::fs::write(
-        project_path.join("Cargo.toml"),
-        r#"[package]
+const CARGO_TOML_CONTENT: &str = r#"[package]
 name = "test-project"
 version = "0.1.0"
 edition = "2021"
@@ -672,13 +607,9 @@ edition = "2021"
 [dependencies]
 
 [dev-dependencies]
-"#,
-    )
-    .expect("Failed to write Cargo.toml");
+"#;
 
-    std::fs::write(
-        project_path.join("README.md"),
-        r#"# Test Project
+const README_MD_CONTENT: &str = r#"# Test Project
 
 This is a test project for visual testing of Zed.
 
@@ -704,10 +635,7 @@ cargo build
 ```bash
 cargo test
 ```
-"#,
-    )
-    .expect("Failed to write README.md");
-}
+"#;
 
 /// Initialize AppState with real filesystem for visual testing.
 fn init_app_state(cx: &mut gpui::App) -> Arc<AppState> {
@@ -814,7 +742,6 @@ async fn run_agent_thread_view_test(
     // Wait for worktree to scan and find the image file
     let worktree_name = worktree.read_with(cx, |wt, _| wt.root_name_str().to_string())?;
 
-    // Wait for worktree to be fully scanned
     cx.background_executor()
         .timer(std::time::Duration::from_millis(100))
         .await;
@@ -900,14 +827,12 @@ async fn run_agent_thread_view_test(
 
     let stub_agent: Rc<dyn AgentServer> = Rc::new(StubAgentServer::new(connection.clone()));
 
-    // Create a window sized for the agent panel (500x900)
-    let window_size = size(px(500.0), px(900.0));
+    // Create a workspace window
     let bounds = Bounds {
         origin: point(px(0.0), px(0.0)),
-        size: window_size,
+        size: agent_panel_window_size(),
     };
 
-    // Create a workspace window
     let workspace_window: WindowHandle<Workspace> = cx.update(|cx| {
         cx.open_window(
             WindowOptions {
@@ -922,7 +847,6 @@ async fn run_agent_thread_view_test(
         )
     })??;
 
-    // Wait for workspace to initialize
     cx.background_executor()
         .timer(std::time::Duration::from_millis(100))
         .await;
@@ -943,9 +867,8 @@ async fn run_agent_thread_view_test(
         workspace.open_panel::<AgentPanel>(window, cx);
     })?;
 
-    // Wait for panel to be ready
     cx.background_executor()
-        .timer(std::time::Duration::from_millis(200))
+        .timer(std::time::Duration::from_millis(100))
         .await;
 
     // Inject the stub server and open the stub thread
@@ -955,9 +878,8 @@ async fn run_agent_thread_view_test(
         });
     })?;
 
-    // Wait for thread view to initialize
     cx.background_executor()
-        .timer(std::time::Duration::from_millis(200))
+        .timer(std::time::Duration::from_millis(100))
         .await;
 
     // Get the thread view and send a message
@@ -974,9 +896,8 @@ async fn run_agent_thread_view_test(
         .update(cx, |thread, cx| thread.send_raw("Show me the Zed logo", cx))?
         .await?;
 
-    // Wait for response to be processed
     cx.background_executor()
-        .timer(std::time::Duration::from_millis(500))
+        .timer(std::time::Duration::from_millis(200))
         .await;
 
     // Get the tool call ID for expanding later
@@ -1001,7 +922,7 @@ async fn run_agent_thread_view_test(
     )?;
 
     cx.background_executor()
-        .timer(std::time::Duration::from_millis(300))
+        .timer(std::time::Duration::from_millis(100))
         .await;
 
     // First, capture the COLLAPSED state (image tool call not expanded)
@@ -1018,9 +939,8 @@ async fn run_agent_thread_view_test(
         view.expand_tool_call(tool_call_id, cx);
     })?;
 
-    // Wait for UI to update
     cx.background_executor()
-        .timer(std::time::Duration::from_millis(300))
+        .timer(std::time::Duration::from_millis(100))
         .await;
 
     // Refresh window for expanded state
@@ -1032,7 +952,7 @@ async fn run_agent_thread_view_test(
     )?;
 
     cx.background_executor()
-        .timer(std::time::Duration::from_millis(300))
+        .timer(std::time::Duration::from_millis(100))
         .await;
 
     // Capture the EXPANDED state (image visible)

plans/visual-test-improvements.md 🔗

@@ -0,0 +1,661 @@
+# Visual Test Runner Improvements
+
+This document describes improvements to make the visual test runner in `crates/zed/src/visual_test_runner.rs` more deterministic, faster, and less hacky.
+
+## Background
+
+The visual test runner captures screenshots of Zed's UI and compares them against baseline images. It currently works but has several issues:
+
+1. **Non-deterministic timing**: Uses `timer().await` calls scattered throughout
+2. **Real filesystem I/O**: Uses `tempfile` and `RealFs` instead of `FakeFs`
+3. **Dead code**: Unused variables and counters from removed print statements
+4. **Code duplication**: Similar setup code repeated across tests
+5. **Limited production code coverage**: Some areas use stubs where real code could run
+
+## How to Run the Visual Tests
+
+```bash
+# Run the visual tests (compare against baselines)
+cargo run -p zed --bin zed_visual_test_runner --features visual-tests
+
+# Update baseline images (when UI intentionally changes)
+UPDATE_BASELINE=1 cargo run -p zed --bin zed_visual_test_runner --features visual-tests
+```
+
+The test runner is a separate binary, not a `#[test]` function. It uses `Application::new().run()` to create a real GPUI application context.
+
+---
+
+## Improvement 1: Replace Timer-Based Waits with `run_until_parked()`
+
+### Problem
+
+The code is littered with timing-based waits like:
+
+```rust
+cx.background_executor()
+    .timer(std::time::Duration::from_millis(500))
+    .await;
+```
+
+These appear ~15 times in the file. They are:
+- **Slow**: Adds up to several seconds of unnecessary waiting
+- **Non-deterministic**: Could flake on slow CI machines
+- **Arbitrary**: The durations (100ms, 200ms, 300ms, 500ms, 2s) were chosen by trial and error
+
+### Solution
+
+Use `run_until_parked()` which runs all pending async tasks until there's nothing left to do. This is:
+- **Instant**: Returns immediately when work is complete
+- **Deterministic**: Waits exactly as long as needed
+- **Standard**: Used throughout Zed's test suite
+
+### How to Implement
+
+The challenge is that the visual test runner uses `AsyncApp` (from `cx.spawn()`), not `TestAppContext`. The `run_until_parked()` method is on `BackgroundExecutor` which is accessible via `cx.background_executor()`.
+
+However, `run_until_parked()` is a **blocking** call that runs the executor synchronously, while the visual tests are currently written as async code. You'll need to restructure the code.
+
+**Option A: Keep async structure, call run_until_parked between awaits**
+
+```rust
+// Before:
+cx.background_executor()
+    .timer(std::time::Duration::from_millis(500))
+    .await;
+
+// After - run all pending work synchronously:
+cx.background_executor().run_until_parked();
+```
+
+But this won't work directly in async context because `run_until_parked()` blocks.
+
+**Option B: Restructure to use synchronous test pattern**
+
+The standard Zed test pattern uses `#[gpui::test]` with `TestAppContext`:
+
+```rust
+#[gpui::test]
+async fn test_something(cx: &mut TestAppContext) {
+    cx.run_until_parked();  // This works!
+}
+```
+
+For the visual test runner, you'd need to convert from `Application::new().run()` to the test harness. This is a bigger change but would be more idiomatic.
+
+**Option C: Use cx.refresh() + small delay for rendering**
+
+For purely rendering-related waits, `cx.refresh()` forces a repaint. A single small delay after refresh may be acceptable for GPU readback timing:
+
+```rust
+cx.refresh().ok();
+// Minimal delay just for GPU work, not async task completion
+cx.background_executor()
+    .timer(std::time::Duration::from_millis(16))  // ~1 frame
+    .await;
+```
+
+### Locations to Change
+
+Search for `timer(std::time::Duration` in the file. Each occurrence should be evaluated:
+
+| Line | Current Duration | Purpose | Replacement |
+|------|-----------------|---------|-------------|
+| 160-162 | 500ms | Wait for worktree | `run_until_parked()` or await the task |
+| 190-192 | 500ms | Wait for panel add | `run_until_parked()` |
+| 205-207 | 500ms | Wait for panel render | `cx.refresh()` |
+| 248-250 | 500ms | Wait for item activation | `run_until_parked()` |
+| 258-260 | 2000ms | Wait for UI to stabilize | `cx.refresh()` + minimal delay |
+| 294-296 | 500ms | Wait for panel close | `run_until_parked()` |
+| 752-754 | 100ms | Wait for worktree scan | `run_until_parked()` |
+| 860-862 | 100ms | Wait for workspace init | `run_until_parked()` |
+| 881-883 | 200ms | Wait for panel ready | `run_until_parked()` |
+| 893-895 | 200ms | Wait for thread view | `run_until_parked()` |
+| 912-914 | 500ms | Wait for response | `run_until_parked()` |
+| 937-939 | 300ms | Wait for refresh | `cx.refresh()` |
+| 956-958 | 300ms | Wait for UI update | `run_until_parked()` |
+| 968-970 | 300ms | Wait for refresh | `cx.refresh()` |
+
+### How to Verify
+
+After making changes:
+1. Run the tests: `cargo run -p zed --bin zed_visual_test_runner --features visual-tests`
+2. They should pass with the same baseline images
+3. They should run faster (measure before/after)
+
+---
+
+## Improvement 2: Use `FakeFs` Instead of Real Filesystem
+
+### Problem
+
+The code currently uses:
+```rust
+let fs = Arc::new(RealFs::new(None, cx.background_executor().clone()));
+let temp_dir = tempfile::tempdir().expect("Failed to create temp directory");
+```
+
+This is:
+- **Slow**: Real I/O is slower than in-memory operations
+- **Non-deterministic**: Filesystem timing varies
+- **Messy**: Leaves temp directories on failure
+
+### Solution
+
+Use `FakeFs` which is an in-memory filesystem used throughout Zed's tests:
+
+```rust
+use fs::FakeFs;
+
+let fs = FakeFs::new(cx.background_executor().clone());
+fs.insert_tree(
+    "/project",
+    json!({
+        "src": {
+            "main.rs": "fn main() { println!(\"Hello\"); }"
+        },
+        "Cargo.toml": "[package]\nname = \"test\""
+    }),
+).await;
+```
+
+### How to Implement
+
+1. **Add the dependency** in `crates/zed/Cargo.toml` if not present:
+   ```toml
+   fs = { workspace = true, features = ["test-support"] }
+   ```
+
+2. **Replace RealFs creation** in `init_app_state()` (around line 655):
+
+   ```rust
+   // Before:
+   let fs = Arc::new(RealFs::new(None, cx.background_executor().clone()));
+   
+   // After:
+   let fs = FakeFs::new(cx.background_executor().clone());
+   ```
+
+3. **Replace tempdir + file creation** (around lines 66-71 and 518-643):
+
+   ```rust
+   // Before:
+   let temp_dir = tempfile::tempdir().expect("...");
+   let project_path = temp_dir.path().join("project");
+   std::fs::create_dir_all(&project_path).expect("...");
+   create_test_files(&project_path);
+   
+   // After:
+   let fs = FakeFs::new(cx.background_executor().clone());
+   fs.insert_tree("/project", json!({
+       "src": {
+           "main.rs": include_str!("test_content/main.rs"),
+           "lib.rs": include_str!("test_content/lib.rs"),
+       },
+       "Cargo.toml": include_str!("test_content/Cargo.toml"),
+       "README.md": include_str!("test_content/README.md"),
+   })).await;
+   let project_path = Path::new("/project");
+   ```
+
+4. **For the test image** (around line 726):
+
+   ```rust
+   // Before:
+   let temp_dir = tempfile::tempdir()?;
+   let project_path = temp_dir.path().join("project");
+   std::fs::create_dir_all(&project_path)?;
+   let image_path = project_path.join("test-image.png");
+   std::fs::write(&image_path, EMBEDDED_TEST_IMAGE)?;
+   
+   // After:
+   fs.insert_file("/project/test-image.png", EMBEDDED_TEST_IMAGE.to_vec()).await;
+   let project_path = Path::new("/project");
+   ```
+
+5. **Update `init_app_state`** to accept `fs` as a parameter instead of creating it internally.
+
+### Reference Example
+
+See `crates/project_panel/src/project_panel_tests.rs` lines 17-62 for a complete example of using `FakeFs` with `insert_tree()`.
+
+### How to Verify
+
+1. Run the tests - they should produce identical screenshots
+2. Verify no temp directories are created in `/tmp` or equivalent
+3. Check that tests run faster
+
+---
+
+## Improvement 3: Remove Dead Code
+
+### Problem
+
+After removing print statements, there's leftover dead code:
+
+```rust
+let _ = update_baseline;  // Line 63 - was used in removed print
+
+let mut passed = 0;       // Lines 263-265
+let mut failed = 0;
+let mut updated = 0;
+// ... counters incremented but never read
+
+let _ = (passed, updated);  // Line 327 - silences warning
+```
+
+### Solution
+
+Remove the unused code and restructure to not need counters.
+
+### How to Implement
+
+1. **Remove the `let _ = update_baseline;`** on line 63 - `update_baseline` is already used later
+
+2. **Simplify test result handling**:
+
+   ```rust
+   // Before:
+   let mut passed = 0;
+   let mut failed = 0;
+   let mut updated = 0;
+   
+   match test_result {
+       Ok(TestResult::Passed) => passed += 1,
+       Ok(TestResult::BaselineUpdated(_)) => updated += 1,
+       Err(_) => failed += 1,
+   }
+   // ... repeat for each test ...
+   
+   let _ = (passed, updated);
+   
+   if failed > 0 {
+       std::process::exit(1);
+   }
+   
+   // After:
+   let mut any_failed = false;
+   
+   if run_visual_test("project_panel", ...).await.is_err() {
+       any_failed = true;
+   }
+   
+   if run_visual_test("workspace_with_editor", ...).await.is_err() {
+       any_failed = true;
+   }
+   
+   if run_agent_thread_view_test(...).await.is_err() {
+       any_failed = true;
+   }
+   
+   if any_failed {
+       std::process::exit(1);
+   }
+   ```
+
+3. **Or collect results into a Vec**:
+
+   ```rust
+   let results = vec![
+       run_visual_test("project_panel", ...).await,
+       run_visual_test("workspace_with_editor", ...).await,
+       run_agent_thread_view_test(...).await,
+   ];
+   
+   if results.iter().any(|r| r.is_err()) {
+       std::process::exit(1);
+   }
+   ```
+
+### How to Verify
+
+1. Run `cargo clippy -p zed --features visual-tests` - should have no warnings about unused variables
+2. Run the tests - behavior should be unchanged
+
+---
+
+## Improvement 4: Consolidate Test Setup Code
+
+### Problem
+
+There's significant duplication between:
+- Main workspace test setup (lines 106-260)
+- Agent panel test setup (lines 713-900)
+
+Both create:
+- Projects with `Project::local()`
+- Worktrees with `find_or_create_worktree()`
+- Windows with `cx.open_window()`
+- Wait for things to settle
+
+### Solution
+
+Extract common setup into helper functions.
+
+### How to Implement
+
+1. **Create a `TestWorkspace` struct**:
+
+   ```rust
+   struct TestWorkspace {
+       window: WindowHandle<Workspace>,
+       project: Entity<Project>,
+   }
+   
+   impl TestWorkspace {
+       async fn new(
+           app_state: Arc<AppState>,
+           size: Size<Pixels>,
+           project_path: &Path,
+           cx: &mut AsyncApp,
+       ) -> Result<Self> {
+           let project = cx.update(|cx| {
+               Project::local(
+                   app_state.client.clone(),
+                   app_state.node_runtime.clone(),
+                   app_state.user_store.clone(),
+                   app_state.languages.clone(),
+                   app_state.fs.clone(),
+                   None,
+                   false,
+                   cx,
+               )
+           })?;
+           
+           // Add worktree
+           let add_task = project.update(cx, |project, cx| {
+               project.find_or_create_worktree(project_path, true, cx)
+           })?;
+           add_task.await?;
+           
+           // Create window
+           let bounds = Bounds {
+               origin: point(px(0.0), px(0.0)),
+               size,
+           };
+           
+           let window = cx.update(|cx| {
+               cx.open_window(
+                   WindowOptions {
+                       window_bounds: Some(WindowBounds::Windowed(bounds)),
+                       focus: false,
+                       show: false,
+                       ..Default::default()
+                   },
+                   |window, cx| {
+                       cx.new(|cx| {
+                           Workspace::new(None, project.clone(), app_state.clone(), window, cx)
+                       })
+                   },
+               )
+           })??;
+           
+           cx.background_executor().run_until_parked();
+           
+           Ok(Self { window, project })
+       }
+   }
+   ```
+
+2. **Create a `setup_project_panel` helper**:
+
+   ```rust
+   async fn setup_project_panel(
+       workspace: &TestWorkspace,
+       cx: &mut AsyncApp,
+   ) -> Result<Entity<ProjectPanel>> {
+       let panel_task = workspace.window.update(cx, |_ws, window, cx| {
+           let weak = cx.weak_entity();
+           window.spawn(cx, async move |cx| ProjectPanel::load(weak, cx).await)
+       })?;
+       
+       let panel = panel_task.await?;
+       
+       workspace.window.update(cx, |ws, window, cx| {
+           ws.add_panel(panel.clone(), window, cx);
+           ws.open_panel::<ProjectPanel>(window, cx);
+       })?;
+       
+       cx.background_executor().run_until_parked();
+       
+       Ok(panel)
+   }
+   ```
+
+3. **Use helpers in tests**:
+
+   ```rust
+   // Test 1
+   let workspace = TestWorkspace::new(
+       app_state.clone(),
+       size(px(1280.0), px(800.0)),
+       &project_path,
+       &mut cx,
+   ).await?;
+   
+   setup_project_panel(&workspace, &mut cx).await?;
+   open_file(&workspace, "src/main.rs", &mut cx).await?;
+   
+   run_visual_test("project_panel", workspace.window.into(), &mut cx, update_baseline).await?;
+   ```
+
+### How to Verify
+
+1. Tests should produce identical screenshots
+2. Code should be shorter and more readable
+3. Adding new tests should be easier
+
+---
+
+## Improvement 5: Exercise More Production Code
+
+### Problem
+
+Some tests use minimal stubs where real production code could run deterministically.
+
+### Current State (Good)
+
+The agent thread test already runs the **real** `ReadFileTool`:
+```rust
+let tool = Arc::new(agent::ReadFileTool::new(...));
+tool.clone().run(input, event_stream, cx).await?;
+```
+
+This is great! It exercises real tool execution.
+
+### Opportunities for More Coverage
+
+1. **Syntax highlighting**: Register real language grammars so the editor shows colored code
+
+   ```rust
+   // Currently just:
+   let languages = Arc::new(LanguageRegistry::new(cx.background_executor().clone()));
+   
+   // Could add:
+   languages.register_native_grammars([
+       tree_sitter_rust::LANGUAGE.into(),
+       tree_sitter_markdown::LANGUAGE.into(),
+   ]);
+   ```
+
+2. **File icons**: The project panel could show real file icons by registering file types
+
+3. **Theme loading**: Currently uses `LoadThemes::JustBase`. Could load a full theme:
+   ```rust
+   theme::init(theme::LoadThemes::All, cx);
+   ```
+   (But this might make tests slower - evaluate trade-off)
+
+4. **More tool types**: Test other tools like `ListFilesTool`, `GrepTool` that don't need network
+
+### How to Implement Syntax Highlighting
+
+1. Add language dependencies to `Cargo.toml`:
+   ```toml
+   tree-sitter-rust = { workspace = true }
+   tree-sitter-markdown = { workspace = true }
+   ```
+
+2. In `init_app_state` or test setup:
+   ```rust
+   let languages = Arc::new(LanguageRegistry::new(cx.background_executor().clone()));
+   
+   // Register Rust grammar
+   languages.register_native_grammars([
+       ("rust", tree_sitter_rust::LANGUAGE.into()),
+   ]);
+   
+   // Register the Rust language config
+   languages.register_available_language(
+       LanguageConfig {
+           name: "Rust".into(),
+           grammar: Some("rust".into()),
+           matcher: LanguageMatcher {
+               path_suffixes: vec!["rs".into()],
+               ..Default::default()
+           },
+           ..Default::default()
+       },
+       tree_sitter_rust::LANGUAGE.into(),
+       vec![],  // No LSP adapters needed for visual tests
+   );
+   ```
+
+### How to Verify
+
+1. Update baselines after adding syntax highlighting
+2. Screenshots should show colored code instead of plain text
+3. Tests should still be deterministic (same colors every time)
+
+---
+
+## Improvement 6: Better Test Organization
+
+### Problem
+
+Tests are numbered in comments but the structure is ad-hoc:
+```rust
+// Run Test 1: Project Panel
+// Run Test 2: Workspace with Editor  
+// Run Test 3: Agent Thread View
+```
+
+### Solution
+
+Create a test registry or use a more structured approach.
+
+### How to Implement
+
+1. **Define tests as structs**:
+
+   ```rust
+   trait VisualTest {
+       fn name(&self) -> &'static str;
+       async fn setup(&self, cx: &mut AsyncApp) -> Result<AnyWindowHandle>;
+       async fn run(&self, window: AnyWindowHandle, cx: &mut AsyncApp, update_baseline: bool) -> Result<TestResult>;
+   }
+   
+   struct ProjectPanelTest;
+   struct WorkspaceEditorTest;
+   struct AgentThreadTest;
+   
+   impl VisualTest for ProjectPanelTest {
+       fn name(&self) -> &'static str { "project_panel" }
+       // ...
+   }
+   ```
+
+2. **Run all tests from a registry**:
+
+   ```rust
+   let tests: Vec<Box<dyn VisualTest>> = vec![
+       Box::new(ProjectPanelTest),
+       Box::new(WorkspaceEditorTest),
+       Box::new(AgentThreadTest),
+   ];
+   
+   let mut failed = false;
+   for test in tests {
+       match test.run(...).await {
+           Ok(_) => {},
+           Err(_) => failed = true,
+       }
+   }
+   ```
+
+This makes it easy to:
+- Add new tests (just add to the vec)
+- Run specific tests (filter by name)
+- See all tests in one place
+
+---
+
+## Improvement 7: Constants and Configuration
+
+### Problem
+
+Magic numbers scattered throughout:
+```rust
+size(px(1280.0), px(800.0))  // Window size for workspace tests
+size(px(500.0), px(900.0))   // Window size for agent panel
+Duration::from_millis(500)   // Various delays
+const MATCH_THRESHOLD: f64 = 0.99;  // Already a constant, good!
+```
+
+### Solution
+
+Move all configuration to constants at the top of the file.
+
+### How to Implement
+
+```rust
+// Window sizes
+const WORKSPACE_WINDOW_SIZE: Size<Pixels> = size(px(1280.0), px(800.0));
+const AGENT_PANEL_WINDOW_SIZE: Size<Pixels> = size(px(500.0), px(900.0));
+
+// Timing (if any delays are still needed after Improvement 1)
+const FRAME_DELAY: Duration = Duration::from_millis(16);
+
+// Image comparison
+const MATCH_THRESHOLD: f64 = 0.99;
+const PIXEL_TOLERANCE: i16 = 2;  // Currently hardcoded in pixels_are_similar()
+```
+
+---
+
+## Summary: Recommended Order of Implementation
+
+1. **Remove dead code** (Improvement 3) - Quick win, low risk
+2. **Add constants** (Improvement 7) - Quick win, improves readability
+3. **Use FakeFs** (Improvement 2) - Medium effort, big determinism win
+4. **Replace timers** (Improvement 1) - Medium effort, biggest determinism win
+5. **Consolidate setup** (Improvement 4) - Medium effort, maintainability win
+6. **Exercise more production code** (Improvement 5) - Lower priority, nice to have
+7. **Better test organization** (Improvement 6) - Lower priority, nice to have for many tests
+
+## Testing Your Changes
+
+After each change:
+
+```bash
+# Build to check for compile errors
+cargo build -p zed --features visual-tests
+
+# Run clippy for warnings
+cargo clippy -p zed --features visual-tests
+
+# Run the tests
+cargo run -p zed --bin zed_visual_test_runner --features visual-tests
+
+# If screenshots changed intentionally, update baselines
+UPDATE_BASELINE=1 cargo run -p zed --bin zed_visual_test_runner --features visual-tests
+```
+
+Baseline images are stored in `crates/zed/test_fixtures/visual_tests/`.
+
+## Questions?
+
+If you get stuck:
+1. Look at existing tests in `crates/project_panel/src/project_panel_tests.rs` for examples of `FakeFs` and `run_until_parked()`
+2. Look at `crates/gpui/src/app/test_context.rs` for `VisualTestContext` documentation
+3. The CLAUDE.md file at the repo root has Rust coding guidelines