1use std::{collections::hash_map, ops::Range, slice::ChunksExact, sync::Arc};
2
3use anyhow::Result;
4
5use clock::Global;
6use collections::HashMap;
7use futures::{
8 FutureExt as _,
9 future::{Shared, join_all},
10};
11use gpui::{App, AppContext, AsyncApp, Context, Entity, ReadGlobal as _, SharedString, Task};
12use language::{Buffer, LanguageName, language_settings::all_language_settings};
13use lsp::{AdapterServerCapabilities, LanguageServerId};
14use rpc::{TypedEnvelope, proto};
15use settings::{
16 DefaultSemanticTokenRules, SemanticTokenRule, SemanticTokenRules, Settings as _, SettingsStore,
17};
18use smol::future::yield_now;
19
20use text::{Anchor, Bias, OffsetUtf16, PointUtf16, Unclipped};
21use util::ResultExt as _;
22
23use crate::{
24 LanguageServerToQuery, LspStore, LspStoreEvent,
25 lsp_command::{
26 LspCommand, SemanticTokensDelta, SemanticTokensEdit, SemanticTokensFull,
27 SemanticTokensResponse,
28 },
29 project_settings::ProjectSettings,
30};
31
32pub(super) struct SemanticTokenConfig {
33 stylizers: HashMap<(LanguageServerId, Option<LanguageName>), SemanticTokenStylizer>,
34 rules: SemanticTokenRules,
35 global_mode: settings::SemanticTokens,
36}
37
38impl SemanticTokenConfig {
39 pub(super) fn new(cx: &App) -> Self {
40 Self {
41 stylizers: HashMap::default(),
42 rules: ProjectSettings::get_global(cx)
43 .global_lsp_settings
44 .semantic_token_rules
45 .clone(),
46 global_mode: all_language_settings(None, cx).defaults.semantic_tokens,
47 }
48 }
49
50 pub(super) fn remove_server_data(&mut self, server_id: LanguageServerId) {
51 self.stylizers.retain(|&(id, _), _| id != server_id);
52 }
53
54 pub(super) fn update_rules(&mut self, new_rules: SemanticTokenRules) -> bool {
55 if new_rules != self.rules {
56 self.rules = new_rules;
57 self.stylizers.clear();
58 true
59 } else {
60 false
61 }
62 }
63
64 /// Clears all cached stylizers.
65 ///
66 /// This is called when settings change to ensure that any modifications to
67 /// language-specific semantic token rules (e.g. from extension install/uninstall)
68 /// are picked up. Stylizers are recreated lazily on next use.
69 pub(super) fn clear_stylizers(&mut self) {
70 self.stylizers.clear();
71 }
72
73 pub(super) fn update_global_mode(&mut self, new_mode: settings::SemanticTokens) -> bool {
74 if new_mode != self.global_mode {
75 self.global_mode = new_mode;
76 true
77 } else {
78 false
79 }
80 }
81}
82
83#[derive(Debug, Clone, Copy)]
84pub struct RefreshForServer {
85 pub server_id: LanguageServerId,
86 pub request_id: Option<usize>,
87}
88
89impl LspStore {
90 pub fn semantic_tokens(
91 &mut self,
92 buffer: Entity<Buffer>,
93 refresh: Option<RefreshForServer>,
94 cx: &mut Context<Self>,
95 ) -> SemanticTokensTask {
96 let version_queried_for = buffer.read(cx).version();
97 let latest_lsp_data = self.latest_lsp_data(&buffer, cx);
98 let semantic_tokens_data = latest_lsp_data.semantic_tokens.get_or_insert_default();
99 if let Some(refresh) = refresh {
100 let mut invalidate_cache = true;
101 match semantic_tokens_data
102 .latest_invalidation_requests
103 .entry(refresh.server_id)
104 {
105 hash_map::Entry::Occupied(mut o) => {
106 if refresh.request_id > *o.get() {
107 o.insert(refresh.request_id);
108 } else {
109 invalidate_cache = false;
110 }
111 }
112 hash_map::Entry::Vacant(v) => {
113 v.insert(refresh.request_id);
114 }
115 }
116
117 if invalidate_cache {
118 let SemanticTokensData {
119 raw_tokens,
120 latest_invalidation_requests: _,
121 update,
122 } = semantic_tokens_data;
123 *update = None;
124 raw_tokens.servers.clear();
125 }
126 }
127
128 if let Some((updating_for, task)) = &semantic_tokens_data.update
129 && !version_queried_for.changed_since(updating_for)
130 {
131 return task.clone();
132 }
133
134 let new_tokens = self.fetch_semantic_tokens_for_buffer(
135 &buffer,
136 refresh.map(|refresh| refresh.server_id),
137 cx,
138 );
139
140 let task_buffer = buffer.clone();
141 let task_version_queried_for = version_queried_for.clone();
142 let task = cx
143 .spawn(async move |lsp_store, cx| {
144 let buffer = task_buffer;
145 let version_queried_for = task_version_queried_for;
146 let res = if let Some(new_tokens) = new_tokens.await {
147 let (raw_tokens, buffer_snapshot) = lsp_store
148 .update(cx, |lsp_store, cx| {
149 let lsp_data = lsp_store.latest_lsp_data(&buffer, cx);
150 let semantic_tokens_data =
151 lsp_data.semantic_tokens.get_or_insert_default();
152
153 if version_queried_for == lsp_data.buffer_version {
154 for (server_id, new_tokens_response) in new_tokens {
155 match new_tokens_response {
156 SemanticTokensResponse::Full { data, result_id } => {
157 semantic_tokens_data.raw_tokens.servers.insert(
158 server_id,
159 Arc::new(ServerSemanticTokens::from_full(
160 data, result_id,
161 )),
162 );
163 }
164 SemanticTokensResponse::Delta { edits, result_id } => {
165 if let Some(tokens) = semantic_tokens_data
166 .raw_tokens
167 .servers
168 .get_mut(&server_id)
169 {
170 let tokens = Arc::make_mut(tokens);
171 tokens.result_id = result_id;
172 tokens.apply(&edits);
173 }
174 }
175 }
176 }
177 }
178 let buffer_snapshot =
179 buffer.read_with(cx, |buffer, _| buffer.snapshot());
180 (semantic_tokens_data.raw_tokens.clone(), buffer_snapshot)
181 })
182 .map_err(Arc::new)?;
183 Some(
184 cx.background_spawn(raw_to_buffer_semantic_tokens(
185 raw_tokens,
186 buffer_snapshot.text.clone(),
187 ))
188 .await,
189 )
190 } else {
191 lsp_store.update(cx, |lsp_store, cx| {
192 if let Some(current_lsp_data) =
193 lsp_store.current_lsp_data(buffer.read(cx).remote_id())
194 {
195 if current_lsp_data.buffer_version == version_queried_for {
196 current_lsp_data.semantic_tokens = None;
197 }
198 }
199 })?;
200 None
201 };
202 Ok(BufferSemanticTokens { tokens: res })
203 })
204 .shared();
205
206 self.latest_lsp_data(&buffer, cx)
207 .semantic_tokens
208 .get_or_insert_default()
209 .update = Some((version_queried_for, task.clone()));
210
211 task
212 }
213
214 pub(super) fn fetch_semantic_tokens_for_buffer(
215 &mut self,
216 buffer: &Entity<Buffer>,
217 for_server: Option<LanguageServerId>,
218 cx: &mut Context<Self>,
219 ) -> Task<Option<HashMap<LanguageServerId, SemanticTokensResponse>>> {
220 if let Some((client, upstream_project_id)) = self.upstream_client() {
221 let request = SemanticTokensFull { for_server };
222 if !self.is_capable_for_proto_request(buffer, &request, cx) {
223 return Task::ready(None);
224 }
225
226 let request_timeout = ProjectSettings::get_global(cx)
227 .global_lsp_settings
228 .get_request_timeout();
229 let request_task = client.request_lsp(
230 upstream_project_id,
231 None,
232 request_timeout,
233 cx.background_executor().clone(),
234 request.to_proto(upstream_project_id, buffer.read(cx)),
235 );
236 let buffer = buffer.clone();
237 cx.spawn(async move |weak_lsp_store, cx| {
238 let lsp_store = weak_lsp_store.upgrade()?;
239 let tokens = join_all(
240 request_task
241 .await
242 .log_err()
243 .flatten()
244 .map(|response| response.payload)
245 .unwrap_or_default()
246 .into_iter()
247 .map(|response| {
248 let server_id = LanguageServerId::from_proto(response.server_id);
249 let response = request.response_from_proto(
250 response.response,
251 lsp_store.clone(),
252 buffer.clone(),
253 cx.clone(),
254 );
255 async move {
256 match response.await {
257 Ok(tokens) => Some((server_id, tokens)),
258 Err(e) => {
259 log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
260 None
261 }
262 }
263 }
264 }),
265 )
266 .await
267 .into_iter()
268 .flatten()
269 .collect();
270 Some(tokens)
271 })
272 } else {
273 let token_tasks = self
274 .local_lsp_servers_for_buffer(&buffer, cx)
275 .into_iter()
276 .filter(|&server_id| {
277 for_server.is_none_or(|for_server_id| for_server_id == server_id)
278 })
279 .filter_map(|server_id| {
280 let capabilities = AdapterServerCapabilities {
281 server_capabilities: self.lsp_server_capabilities.get(&server_id)?.clone(),
282 code_action_kinds: None,
283 };
284 let request_task = match self.semantic_tokens_result_id(server_id, buffer, cx) {
285 Some(result_id) => {
286 let delta_request = SemanticTokensDelta {
287 previous_result_id: result_id,
288 };
289 if !delta_request.check_capabilities(capabilities.clone()) {
290 let full_request = SemanticTokensFull {
291 for_server: Some(server_id),
292 };
293 if !full_request.check_capabilities(capabilities) {
294 return None;
295 }
296
297 self.request_lsp(
298 buffer.clone(),
299 LanguageServerToQuery::Other(server_id),
300 full_request,
301 cx,
302 )
303 } else {
304 self.request_lsp(
305 buffer.clone(),
306 LanguageServerToQuery::Other(server_id),
307 delta_request,
308 cx,
309 )
310 }
311 }
312 None => {
313 let request = SemanticTokensFull {
314 for_server: Some(server_id),
315 };
316 if !request.check_capabilities(capabilities) {
317 return None;
318 }
319 self.request_lsp(
320 buffer.clone(),
321 LanguageServerToQuery::Other(server_id),
322 request,
323 cx,
324 )
325 }
326 };
327 Some(async move { (server_id, request_task.await) })
328 })
329 .collect::<Vec<_>>();
330 if token_tasks.is_empty() {
331 return Task::ready(None);
332 }
333
334 cx.background_spawn(async move {
335 Some(
336 join_all(token_tasks)
337 .await
338 .into_iter()
339 .flat_map(|(server_id, response)| {
340 match response {
341 Ok(tokens) => Some((server_id, tokens)),
342 Err(e) => {
343 log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
344 None
345 }
346 }
347 })
348 .collect()
349 )
350 })
351 }
352 }
353
354 pub(crate) async fn handle_refresh_semantic_tokens(
355 lsp_store: Entity<Self>,
356 envelope: TypedEnvelope<proto::RefreshSemanticTokens>,
357 mut cx: AsyncApp,
358 ) -> Result<proto::Ack> {
359 lsp_store.update(&mut cx, |_, cx| {
360 cx.emit(LspStoreEvent::RefreshSemanticTokens {
361 server_id: LanguageServerId::from_proto(envelope.payload.server_id),
362 request_id: envelope.payload.request_id.map(|id| id as usize),
363 });
364 });
365 Ok(proto::Ack {})
366 }
367
368 fn semantic_tokens_result_id(
369 &mut self,
370 server_id: LanguageServerId,
371 buffer: &Entity<Buffer>,
372 cx: &mut App,
373 ) -> Option<SharedString> {
374 self.latest_lsp_data(buffer, cx)
375 .semantic_tokens
376 .as_ref()?
377 .raw_tokens
378 .servers
379 .get(&server_id)?
380 .result_id
381 .clone()
382 }
383
384 pub fn get_or_create_token_stylizer(
385 &mut self,
386 server_id: LanguageServerId,
387 language: Option<&LanguageName>,
388 cx: &mut App,
389 ) -> Option<&SemanticTokenStylizer> {
390 let stylizer = match self
391 .semantic_token_config
392 .stylizers
393 .entry((server_id, language.cloned()))
394 {
395 hash_map::Entry::Occupied(o) => o.into_mut(),
396 hash_map::Entry::Vacant(v) => {
397 let tokens_provider = self
398 .lsp_server_capabilities
399 .get(&server_id)?
400 .semantic_tokens_provider
401 .as_ref()?;
402 let legend = match tokens_provider {
403 lsp::SemanticTokensServerCapabilities::SemanticTokensOptions(opts) => {
404 &opts.legend
405 }
406 lsp::SemanticTokensServerCapabilities::SemanticTokensRegistrationOptions(
407 opts,
408 ) => &opts.semantic_tokens_options.legend,
409 };
410 let language_rules = language.and_then(|language| {
411 SettingsStore::global(cx).language_semantic_token_rules(language.as_ref())
412 });
413 let stylizer = SemanticTokenStylizer::new(server_id, legend, language_rules, cx);
414 v.insert(stylizer)
415 }
416 };
417 Some(stylizer)
418 }
419}
420
421pub type SemanticTokensTask =
422 Shared<Task<std::result::Result<BufferSemanticTokens, Arc<anyhow::Error>>>>;
423
424#[derive(Debug, Default, Clone)]
425pub struct BufferSemanticTokens {
426 pub tokens: Option<HashMap<LanguageServerId, Arc<[BufferSemanticToken]>>>,
427}
428
429#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
430pub struct TokenType(pub u32);
431
432#[derive(Debug, Clone)]
433pub struct BufferSemanticToken {
434 /// The range of the token in the buffer.
435 ///
436 /// Guaranteed to contain a buffer id.
437 pub range: Range<Anchor>,
438 pub token_type: TokenType,
439 pub token_modifiers: u32,
440}
441
442pub struct SemanticTokenStylizer {
443 server_id: LanguageServerId,
444 rules_by_token_type: HashMap<TokenType, Vec<SemanticTokenRule>>,
445 token_type_names: HashMap<TokenType, SharedString>,
446 modifier_mask: HashMap<SharedString, u32>,
447}
448
449impl SemanticTokenStylizer {
450 pub fn new(
451 server_id: LanguageServerId,
452 legend: &lsp::SemanticTokensLegend,
453 language_rules: Option<&SemanticTokenRules>,
454 cx: &App,
455 ) -> Self {
456 let token_types: HashMap<TokenType, SharedString> = legend
457 .token_types
458 .iter()
459 .enumerate()
460 .map(|(i, token_type)| {
461 (
462 TokenType(i as u32),
463 SharedString::from(token_type.as_str().to_string()),
464 )
465 })
466 .collect();
467 let modifier_mask: HashMap<SharedString, u32> = legend
468 .token_modifiers
469 .iter()
470 .enumerate()
471 .map(|(i, modifier)| (SharedString::from(modifier.as_str().to_string()), 1 << i))
472 .collect();
473
474 let global_rules = &ProjectSettings::get_global(cx)
475 .global_lsp_settings
476 .semantic_token_rules;
477 let default_rules = cx.global::<DefaultSemanticTokenRules>();
478
479 let rules_by_token_type = token_types
480 .iter()
481 .map(|(index, token_type_name)| {
482 let filter = |rule: &&SemanticTokenRule| {
483 rule.token_type
484 .as_ref()
485 .is_none_or(|rule_token_type| rule_token_type == token_type_name.as_ref())
486 };
487 let matching_rules: Vec<SemanticTokenRule> = global_rules
488 .rules
489 .iter()
490 .chain(language_rules.into_iter().flat_map(|lr| &lr.rules))
491 .chain(default_rules.0.rules.iter())
492 .rev()
493 .filter(filter)
494 .cloned()
495 .collect();
496 (*index, matching_rules)
497 })
498 .collect();
499
500 SemanticTokenStylizer {
501 server_id,
502 rules_by_token_type,
503 token_type_names: token_types,
504 modifier_mask,
505 }
506 }
507
508 pub fn server_id(&self) -> LanguageServerId {
509 self.server_id
510 }
511
512 pub fn token_type_name(&self, token_type: TokenType) -> Option<&SharedString> {
513 self.token_type_names.get(&token_type)
514 }
515
516 pub fn has_modifier(&self, token_modifiers: u32, modifier: &str) -> bool {
517 let Some(mask) = self.modifier_mask.get(modifier) else {
518 return false;
519 };
520 (token_modifiers & mask) != 0
521 }
522
523 pub fn token_modifiers(&self, token_modifiers: u32) -> Option<String> {
524 let modifiers: Vec<&str> = self
525 .modifier_mask
526 .iter()
527 .filter(|(_, mask)| (token_modifiers & *mask) != 0)
528 .map(|(name, _)| name.as_ref())
529 .collect();
530 if modifiers.is_empty() {
531 None
532 } else {
533 Some(modifiers.join(", "))
534 }
535 }
536
537 pub fn rules_for_token(&self, token_type: TokenType) -> Option<&[SemanticTokenRule]> {
538 self.rules_by_token_type
539 .get(&token_type)
540 .map(|v| v.as_slice())
541 }
542}
543
544async fn raw_to_buffer_semantic_tokens(
545 raw_tokens: RawSemanticTokens,
546 buffer_snapshot: text::BufferSnapshot,
547) -> HashMap<LanguageServerId, Arc<[BufferSemanticToken]>> {
548 let mut res = HashMap::default();
549 for (&server_id, server_tokens) in &raw_tokens.servers {
550 let mut last = 0;
551 // We don't do `collect` here due to the filter map not pre-allocating
552 // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways
553 let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5);
554 let mut tokens = server_tokens.tokens();
555 // 5000 was chosen by profiling, on a decent machine this will take about 1ms per chunk
556 // This is to avoid blocking the main thread for hundreds of milliseconds at a time for very big files
557 // If we every change the below code to not query the underlying rope 6 times per token we can bump this up
558 const CHUNK_LEN: usize = 5000;
559 loop {
560 let mut changed = false;
561 let chunk = tokens
562 .by_ref()
563 .take(CHUNK_LEN)
564 .inspect(|_| changed = true)
565 .filter_map(|token| {
566 let start = Unclipped(PointUtf16::new(token.line, token.start));
567 let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left);
568 let start_offset = buffer_snapshot
569 .as_rope()
570 .point_utf16_to_offset_utf16(clipped_start);
571 let end_offset = start_offset + OffsetUtf16(token.length as usize);
572
573 let start = buffer_snapshot
574 .as_rope()
575 .offset_utf16_to_offset(start_offset);
576 if start < last {
577 return None;
578 }
579
580 let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset);
581 last = end;
582
583 if start == end {
584 return None;
585 }
586
587 Some(BufferSemanticToken {
588 range: buffer_snapshot.anchor_range_inside(start..end),
589 token_type: token.token_type,
590 token_modifiers: token.token_modifiers,
591 })
592 });
593 buffer_tokens.extend(chunk);
594
595 if !changed {
596 break;
597 }
598 yield_now().await;
599 }
600
601 res.insert(server_id, buffer_tokens.into());
602 }
603 res
604}
605
606#[derive(Default, Debug)]
607pub struct SemanticTokensData {
608 pub(super) raw_tokens: RawSemanticTokens,
609 pub(super) latest_invalidation_requests: HashMap<LanguageServerId, Option<usize>>,
610 update: Option<(Global, SemanticTokensTask)>,
611}
612
613impl SemanticTokensData {
614 pub(super) fn remove_server_data(&mut self, server_id: LanguageServerId) {
615 self.raw_tokens.servers.remove(&server_id);
616 self.latest_invalidation_requests.remove(&server_id);
617 self.update = None;
618 }
619}
620
621/// All the semantic token tokens for a buffer.
622///
623/// This aggregates semantic tokens from multiple language servers in a specific order.
624/// Semantic tokens later in the list will override earlier ones in case of overlap.
625#[derive(Default, Debug, Clone)]
626pub(super) struct RawSemanticTokens {
627 pub servers: HashMap<lsp::LanguageServerId, Arc<ServerSemanticTokens>>,
628}
629
630/// All the semantic tokens for a buffer, from a single language server.
631#[derive(Debug, Clone)]
632pub struct ServerSemanticTokens {
633 /// Each value is:
634 /// data[5*i] - deltaLine: token line number, relative to the start of the previous token
635 /// data[5*i+1] - deltaStart: token start character, relative to the start of the previous token (relative to 0 or the previous token’s start if they are on the same line)
636 /// data[5*i+2] - length: the length of the token.
637 /// data[5*i+3] - tokenType: will be looked up in SemanticTokensLegend.tokenTypes. We currently ask that tokenType < 65536.
638 /// data[5*i+4] - tokenModifiers: each set bit will be looked up in SemanticTokensLegend.tokenModifiers
639 ///
640 /// See https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/ for more.
641 data: Vec<u32>,
642
643 pub(crate) result_id: Option<SharedString>,
644}
645
646pub struct SemanticTokensIter<'a> {
647 prev: Option<(u32, u32)>,
648 data: ChunksExact<'a, u32>,
649}
650
651// A single item from `data`.
652struct SemanticTokenValue {
653 delta_line: u32,
654 delta_start: u32,
655 length: u32,
656 token_type: TokenType,
657 token_modifiers: u32,
658}
659
660/// A semantic token, independent of its position.
661#[derive(Debug, PartialEq, Eq)]
662pub struct SemanticToken {
663 pub line: u32,
664 pub start: u32,
665 pub length: u32,
666 pub token_type: TokenType,
667 pub token_modifiers: u32,
668}
669
670impl ServerSemanticTokens {
671 pub fn from_full(data: Vec<u32>, result_id: Option<SharedString>) -> Self {
672 ServerSemanticTokens { data, result_id }
673 }
674
675 pub(crate) fn apply(&mut self, edits: &[SemanticTokensEdit]) {
676 for edit in edits {
677 let start = (edit.start as usize).min(self.data.len());
678 let end = (start + edit.delete_count as usize).min(self.data.len());
679 self.data.splice(start..end, edit.data.iter().copied());
680 }
681 }
682
683 pub fn tokens(&self) -> SemanticTokensIter<'_> {
684 SemanticTokensIter {
685 prev: None,
686 data: self.data.chunks_exact(5),
687 }
688 }
689}
690
691impl Iterator for SemanticTokensIter<'_> {
692 type Item = SemanticToken;
693
694 fn next(&mut self) -> Option<Self::Item> {
695 let chunk = self.data.next()?;
696 let token = SemanticTokenValue {
697 delta_line: chunk[0],
698 delta_start: chunk[1],
699 length: chunk[2],
700 token_type: TokenType(chunk[3]),
701 token_modifiers: chunk[4],
702 };
703
704 let (line, start) = if let Some((last_line, last_start)) = self.prev {
705 let line = last_line + token.delta_line;
706 let start = if token.delta_line == 0 {
707 last_start + token.delta_start
708 } else {
709 token.delta_start
710 };
711 (line, start)
712 } else {
713 (token.delta_line, token.delta_start)
714 };
715
716 self.prev = Some((line, start));
717
718 Some(SemanticToken {
719 line,
720 start,
721 length: token.length,
722 token_type: token.token_type,
723 token_modifiers: token.token_modifiers,
724 })
725 }
726}
727
728#[cfg(test)]
729mod tests {
730 use super::*;
731 use crate::lsp_command::SemanticTokensEdit;
732 use lsp::SEMANTIC_TOKEN_MODIFIERS;
733
734 fn modifier_names(bits: u32) -> String {
735 if bits == 0 {
736 return "-".to_string();
737 }
738 let names: Vec<&str> = SEMANTIC_TOKEN_MODIFIERS
739 .iter()
740 .enumerate()
741 .filter(|(i, _)| bits & (1 << i) != 0)
742 .map(|(_, m)| m.as_str())
743 .collect();
744
745 // Check for unknown bits
746 let known_bits = (1u32 << SEMANTIC_TOKEN_MODIFIERS.len()) - 1;
747 let unknown = bits & !known_bits;
748
749 if unknown != 0 {
750 let mut result = names.join("+");
751 if !result.is_empty() {
752 result.push('+');
753 }
754 result.push_str(&format!("?0x{:x}", unknown));
755 result
756 } else {
757 names.join("+")
758 }
759 }
760
761 /// Debug tool: parses semantic token JSON from LSP and prints human-readable output.
762 ///
763 /// Usage: Paste JSON into `json_input`, then run:
764 /// cargo test -p project debug_parse_tokens -- --nocapture --ignored
765 ///
766 /// Accepts either:
767 /// - Full LSP response: `{"jsonrpc":"2.0","id":1,"result":{"data":[...]}}`
768 /// - Just the data array: `[0,0,5,1,0,...]`
769 ///
770 /// For delta responses, paste multiple JSON messages (one per line) and they
771 /// will be applied in sequence.
772 ///
773 /// Token encoding (5 values per token):
774 /// [deltaLine, deltaStart, length, tokenType, tokenModifiers]
775 #[test]
776 #[ignore] // Run with: cargo test -p project debug_parse_tokens -- --nocapture --ignored
777 fn debug_parse_tokens() {
778 // ============================================================
779 // PASTE YOUR JSON HERE (one message per line for sequences)
780 // Comments starting with // are ignored
781 // ============================================================
782 let json_input = r#"
783// === EXAMPLE 1: Full response (LSP spec example) ===
784// 3 tokens: property at line 2, type at line 2, class at line 5
785{"jsonrpc":"2.0","id":1,"result":{"resultId":"1","data":[2,5,3,9,3,0,5,4,6,0,3,2,7,1,0]}}
786
787// === EXAMPLE 2: Delta response ===
788// User added empty line at start of file, so all tokens shift down by 1 line.
789// This changes first token's deltaLine from 2 to 3 (edit at index 0).
790{"jsonrpc":"2.0","id":2,"result":{"resultId":"2","edits":[{"start":0,"deleteCount":1,"data":[3]}]}}
791
792// === EXAMPLE 3: Another delta ===
793// User added a new token. Insert 5 values at position 5 (after first token).
794// New token: same line as token 1, 2 chars after it ends, len 5, type=function(12), mods=definition(2)
795{"jsonrpc":"2.0","id":3,"result":{"resultId":"3","edits":[{"start":5,"deleteCount":0,"data":[0,2,5,12,2]}]}}
796 "#;
797 // Accepted formats:
798 // - Full response: {"result":{"data":[...]}}
799 // - Delta response: {"result":{"edits":[{"start":N,"deleteCount":N,"data":[...]}]}}
800 // - Just array: [0,0,5,1,0,...]
801
802 // ============================================================
803 // PROCESSING
804 // ============================================================
805 let mut current_data: Vec<u32> = Vec::new();
806 let mut result_id: Option<String> = None;
807
808 for line in json_input.lines() {
809 let line = line.trim();
810 if line.is_empty() || line.starts_with("//") {
811 continue;
812 }
813
814 let parsed: serde_json::Value =
815 serde_json::from_str(line).expect("Failed to parse JSON");
816
817 // Try to extract data from various JSON shapes
818 let (data, edits, new_result_id) = extract_semantic_tokens(&parsed);
819
820 if let Some(new_id) = new_result_id {
821 result_id = Some(new_id);
822 }
823
824 if let Some(full_data) = data {
825 println!("\n{}", "=".repeat(70));
826 println!("FULL RESPONSE (resultId: {:?})", result_id);
827 current_data = full_data;
828 } else if let Some(delta_edits) = edits {
829 println!("\n{}", "=".repeat(70));
830 println!(
831 "DELTA RESPONSE: {} edit(s) (resultId: {:?})",
832 delta_edits.len(),
833 result_id
834 );
835 for (i, edit) in delta_edits.iter().enumerate() {
836 println!(
837 " [{}] start={}, delete={}, insert {} values",
838 i,
839 edit.start,
840 edit.delete_count,
841 edit.data.len()
842 );
843 }
844 let mut tokens = ServerSemanticTokens::from_full(current_data.clone(), None);
845 tokens.apply(&delta_edits);
846 current_data = tokens.data;
847 }
848 }
849
850 // Print parsed tokens
851 println!(
852 "\nDATA: {} values = {} tokens",
853 current_data.len(),
854 current_data.len() / 5
855 );
856 println!("\nPARSED TOKENS:");
857 println!("{:-<100}", "");
858 println!(
859 "{:>5} {:>6} {:>4} {:<15} {}",
860 "LINE", "START", "LEN", "TYPE", "MODIFIERS"
861 );
862 println!("{:-<100}", "");
863
864 let tokens = ServerSemanticTokens::from_full(current_data, None);
865 for token in tokens.tokens() {
866 println!(
867 "{:>5} {:>6} {:>4} {:<15} {}",
868 token.line,
869 token.start,
870 token.length,
871 token.token_type.0,
872 modifier_names(token.token_modifiers),
873 );
874 }
875 println!("{:-<100}", "");
876 println!("{}\n", "=".repeat(100));
877 }
878
879 fn extract_semantic_tokens(
880 value: &serde_json::Value,
881 ) -> (
882 Option<Vec<u32>>,
883 Option<Vec<SemanticTokensEdit>>,
884 Option<String>,
885 ) {
886 // Try as array directly: [1,2,3,...]
887 if let Some(arr) = value.as_array() {
888 let data: Vec<u32> = arr
889 .iter()
890 .filter_map(|v| v.as_u64().map(|n| n as u32))
891 .collect();
892 return (Some(data), None, None);
893 }
894
895 // Try as LSP response: {"result": {"data": [...]} } or {"result": {"edits": [...]}}
896 let result = value.get("result").unwrap_or(value);
897 let result_id = result
898 .get("resultId")
899 .and_then(|v| v.as_str())
900 .map(String::from);
901
902 // Full response with data
903 if let Some(data_arr) = result.get("data").and_then(|v| v.as_array()) {
904 let data: Vec<u32> = data_arr
905 .iter()
906 .filter_map(|v| v.as_u64().map(|n| n as u32))
907 .collect();
908 return (Some(data), None, result_id);
909 }
910
911 // Delta response with edits
912 if let Some(edits_arr) = result.get("edits").and_then(|v| v.as_array()) {
913 let edits: Vec<SemanticTokensEdit> = edits_arr
914 .iter()
915 .filter_map(|e| {
916 Some(SemanticTokensEdit {
917 start: e.get("start")?.as_u64()? as u32,
918 delete_count: e.get("deleteCount")?.as_u64()? as u32,
919 data: e
920 .get("data")
921 .and_then(|d| d.as_array())
922 .map(|arr| {
923 arr.iter()
924 .filter_map(|v| v.as_u64().map(|n| n as u32))
925 .collect()
926 })
927 .unwrap_or_default(),
928 })
929 })
930 .collect();
931 return (None, Some(edits), result_id);
932 }
933
934 (None, None, result_id)
935 }
936
937 #[test]
938 fn parses_sample_tokens() {
939 // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
940 let tokens = ServerSemanticTokens::from_full(
941 vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
942 None,
943 )
944 .tokens()
945 .collect::<Vec<SemanticToken>>();
946
947 // The spec uses 1-based line numbers, and 0-based character numbers. This test uses 0-based for both.
948 assert_eq!(
949 tokens,
950 &[
951 SemanticToken {
952 line: 2,
953 start: 5,
954 length: 3,
955 token_type: TokenType(0),
956 token_modifiers: 3
957 },
958 SemanticToken {
959 line: 2,
960 start: 10,
961 length: 4,
962 token_type: TokenType(1),
963 token_modifiers: 0
964 },
965 SemanticToken {
966 line: 5,
967 start: 2,
968 length: 7,
969 token_type: TokenType(2),
970 token_modifiers: 0
971 }
972 ]
973 );
974 }
975
976 #[test]
977 fn applies_delta_edit() {
978 // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
979 // After a user types a new empty line at the beginning of the file,
980 // the tokens shift down by one line. The delta edit transforms
981 // [2,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0] into [3,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0]
982 // by replacing the first element (deltaLine of first token) from 2 to 3.
983
984 let mut tokens = ServerSemanticTokens::from_full(
985 vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
986 None,
987 );
988
989 tokens.apply(&[SemanticTokensEdit {
990 start: 0,
991 delete_count: 1,
992 data: vec![3],
993 }]);
994
995 let result = tokens.tokens().collect::<Vec<SemanticToken>>();
996
997 assert_eq!(
998 result,
999 &[
1000 SemanticToken {
1001 line: 3,
1002 start: 5,
1003 length: 3,
1004 token_type: TokenType(0),
1005 token_modifiers: 3
1006 },
1007 SemanticToken {
1008 line: 3,
1009 start: 10,
1010 length: 4,
1011 token_type: TokenType(1),
1012 token_modifiers: 0
1013 },
1014 SemanticToken {
1015 line: 6,
1016 start: 2,
1017 length: 7,
1018 token_type: TokenType(2),
1019 token_modifiers: 0
1020 }
1021 ]
1022 );
1023 }
1024
1025 #[test]
1026 fn applies_out_of_bounds_delta_edit_without_panic() {
1027 let mut tokens = ServerSemanticTokens::from_full(vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0], None);
1028
1029 // start beyond data length
1030 tokens.apply(&[SemanticTokensEdit {
1031 start: 100,
1032 delete_count: 5,
1033 data: vec![1, 2, 3, 4, 5],
1034 }]);
1035 assert_eq!(
1036 tokens.data,
1037 vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 1, 2, 3, 4, 5]
1038 );
1039
1040 // delete_count extends past data length
1041 let mut tokens = ServerSemanticTokens::from_full(vec![2, 5, 3, 0, 3], None);
1042 tokens.apply(&[SemanticTokensEdit {
1043 start: 3,
1044 delete_count: 100,
1045 data: vec![9, 9],
1046 }]);
1047 assert_eq!(tokens.data, vec![2, 5, 3, 9, 9]);
1048
1049 // empty data
1050 let mut tokens = ServerSemanticTokens::from_full(Vec::new(), None);
1051 tokens.apply(&[SemanticTokensEdit {
1052 start: 0,
1053 delete_count: 5,
1054 data: vec![1, 2, 3, 4, 5],
1055 }]);
1056 assert_eq!(tokens.data, vec![1, 2, 3, 4, 5]);
1057 }
1058}