diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index ef495d462b97b9994317fdb4025f9d5ef2c3a5cd..8bfc4efb1ffa00abe6dae4dec0427a4368e1bed2 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -129,6 +129,12 @@ where .unwrap(); parser }); + // Tree-sitter auto-resets the parser at the end of a successful parse, + // but the cancellation paths (progress callback returning `Break`, + // cancelled balancing) leave outstanding state on the parser. The next + // call to `parse_with_options` would then *resume* that cancelled parse + // instead of starting fresh. + parser.reset(); parser.set_included_ranges(&[]).unwrap(); let result = func(&mut parser); PARSERS.lock().push(parser); @@ -1677,6 +1683,82 @@ mod tests { ); } + #[test] + fn test_with_parser_resets_after_cancellation() { + use std::ops::ControlFlow; + use tree_sitter::{Language as TsLanguage, ParseOptions}; + + let rust_language: TsLanguage = tree_sitter_rust::LANGUAGE.into(); + + // Drain the shared pool so this test sees a deterministic LIFO order: + // the parser we push at the end of the first `with_parser` call is the + // one we pop at the start of the second call. + PARSERS.lock().clear(); + + // Large enough that tree-sitter invokes the progress callback before + // the parse completes; otherwise the cancellation never fires. + let large_input = format!("fn a() {{ {} }}", "b(c, d); e(f, g); ".repeat(5000)); + let small_input = "fn z() {}"; + + // Cancel a parse via the progress callback. Tree-sitter retains the + // in-progress parse state on the parser (its `canceled_balancing` flag + // and/or non-empty parse stack), and the next call to + // `parse_with_options` will *resume* that parse unless the parser is + // reset first. + let cancelled = with_parser(|parser| { + parser.set_language(&rust_language).unwrap(); + let bytes = large_input.as_bytes(); + let mut break_immediately = |_: &_| ControlFlow::Break(()); + parser.parse_with_options( + &mut |offset, _| { + if offset < bytes.len() { + &bytes[offset..] + } else { + &[] + } + }, + None, + Some(ParseOptions { + progress_callback: Some(&mut break_immediately), + }), + ) + }); + assert!( + cancelled.is_none(), + "first parse should be cancelled by the progress callback" + ); + + // Deliberately do NOT call `set_language` here: tree-sitter's + // `ts_parser_set_language` internally calls `ts_parser_reset`, which + // would mask the very bug we're checking for. Instead we rely on the + // language being preserved across `parser.reset()` (it is) and verify + // that `with_parser` itself produces a clean parser for the next user. + let tree = with_parser(|parser| { + let bytes = small_input.as_bytes(); + parser + .parse_with_options( + &mut |offset, _| { + if offset < bytes.len() { + &bytes[offset..] + } else { + &[] + } + }, + None, + None, + ) + .expect("parse of small_input should succeed") + }); + + assert_eq!(tree.root_node().byte_range(), 0..small_input.len()); + assert_eq!(tree.root_node().kind(), "source_file"); + assert!( + !tree.root_node().has_error(), + "tree should be error-free, got: {}", + tree.root_node().to_sexp() + ); + } + #[gpui::test(iterations = 10)] async fn test_language_loading(cx: &mut TestAppContext) {