1use std::{collections::hash_map, ops::Range, slice::ChunksExact, sync::Arc};
2
3use anyhow::Result;
4
5use clock::Global;
6use collections::HashMap;
7use futures::{
8 FutureExt as _,
9 future::{Shared, join_all},
10};
11use gpui::{App, AppContext, AsyncApp, Context, Entity, ReadGlobal as _, SharedString, Task};
12use language::{Buffer, LanguageName, language_settings::all_language_settings};
13use lsp::{AdapterServerCapabilities, LanguageServerId};
14use rpc::{TypedEnvelope, proto};
15use settings::{SemanticTokenRule, SemanticTokenRules, Settings as _, SettingsStore};
16use smol::future::yield_now;
17use text::{Anchor, Bias, OffsetUtf16, PointUtf16, Unclipped};
18use util::ResultExt as _;
19
20use crate::{
21 LanguageServerToQuery, LspStore, LspStoreEvent,
22 lsp_command::{
23 LspCommand, SemanticTokensDelta, SemanticTokensEdit, SemanticTokensFull,
24 SemanticTokensResponse,
25 },
26 project_settings::ProjectSettings,
27};
28
29pub(super) struct SemanticTokenConfig {
30 stylizers: HashMap<(LanguageServerId, Option<LanguageName>), SemanticTokenStylizer>,
31 rules: SemanticTokenRules,
32 global_mode: settings::SemanticTokens,
33}
34
35impl SemanticTokenConfig {
36 pub(super) fn new(cx: &App) -> Self {
37 Self {
38 stylizers: HashMap::default(),
39 rules: ProjectSettings::get_global(cx)
40 .global_lsp_settings
41 .semantic_token_rules
42 .clone(),
43 global_mode: all_language_settings(None, cx).defaults.semantic_tokens,
44 }
45 }
46
47 pub(super) fn remove_server_data(&mut self, server_id: LanguageServerId) {
48 self.stylizers.retain(|&(id, _), _| id != server_id);
49 }
50
51 pub(super) fn update_rules(&mut self, new_rules: SemanticTokenRules) -> bool {
52 if new_rules != self.rules {
53 self.rules = new_rules;
54 self.stylizers.clear();
55 true
56 } else {
57 false
58 }
59 }
60
61 pub(super) fn update_global_mode(&mut self, new_mode: settings::SemanticTokens) -> bool {
62 if new_mode != self.global_mode {
63 self.global_mode = new_mode;
64 true
65 } else {
66 false
67 }
68 }
69}
70
71#[derive(Debug, Clone, Copy)]
72pub struct RefreshForServer {
73 pub server_id: LanguageServerId,
74 pub request_id: Option<usize>,
75}
76
77impl LspStore {
78 pub fn semantic_tokens(
79 &mut self,
80 buffer: Entity<Buffer>,
81 refresh: Option<RefreshForServer>,
82 cx: &mut Context<Self>,
83 ) -> SemanticTokensTask {
84 let version_queried_for = buffer.read(cx).version();
85 let latest_lsp_data = self.latest_lsp_data(&buffer, cx);
86 let semantic_tokens_data = latest_lsp_data.semantic_tokens.get_or_insert_default();
87 if let Some(refresh) = refresh {
88 let mut invalidate_cache = true;
89 match semantic_tokens_data
90 .latest_invalidation_requests
91 .entry(refresh.server_id)
92 {
93 hash_map::Entry::Occupied(mut o) => {
94 if refresh.request_id > *o.get() {
95 o.insert(refresh.request_id);
96 } else {
97 invalidate_cache = false;
98 }
99 }
100 hash_map::Entry::Vacant(v) => {
101 v.insert(refresh.request_id);
102 }
103 }
104
105 if invalidate_cache {
106 let SemanticTokensData {
107 raw_tokens,
108 latest_invalidation_requests: _,
109 update,
110 } = semantic_tokens_data;
111 *update = None;
112 raw_tokens.servers.clear();
113 }
114 }
115
116 if let Some((updating_for, task)) = &semantic_tokens_data.update
117 && !version_queried_for.changed_since(updating_for)
118 {
119 return task.clone();
120 }
121
122 let new_tokens = self.fetch_semantic_tokens_for_buffer(
123 &buffer,
124 refresh.map(|refresh| refresh.server_id),
125 cx,
126 );
127
128 let task_buffer = buffer.clone();
129 let task_version_queried_for = version_queried_for.clone();
130 let task = cx
131 .spawn(async move |lsp_store, cx| {
132 let buffer = task_buffer;
133 let version_queried_for = task_version_queried_for;
134 let res = if let Some(new_tokens) = new_tokens.await {
135 let (raw_tokens, buffer_snapshot) = lsp_store
136 .update(cx, |lsp_store, cx| {
137 let lsp_data = lsp_store.latest_lsp_data(&buffer, cx);
138 let semantic_tokens_data =
139 lsp_data.semantic_tokens.get_or_insert_default();
140
141 if version_queried_for == lsp_data.buffer_version {
142 for (server_id, new_tokens_response) in new_tokens {
143 match new_tokens_response {
144 SemanticTokensResponse::Full { data, result_id } => {
145 semantic_tokens_data.raw_tokens.servers.insert(
146 server_id,
147 Arc::new(ServerSemanticTokens::from_full(
148 data, result_id,
149 )),
150 );
151 }
152 SemanticTokensResponse::Delta { edits, result_id } => {
153 if let Some(tokens) = semantic_tokens_data
154 .raw_tokens
155 .servers
156 .get_mut(&server_id)
157 {
158 let tokens = Arc::make_mut(tokens);
159 tokens.result_id = result_id;
160 tokens.apply(&edits);
161 }
162 }
163 }
164 }
165 }
166 let buffer_snapshot =
167 buffer.read_with(cx, |buffer, _| buffer.snapshot());
168 (semantic_tokens_data.raw_tokens.clone(), buffer_snapshot)
169 })
170 .map_err(Arc::new)?;
171 Some(
172 cx.background_spawn(raw_to_buffer_semantic_tokens(
173 raw_tokens,
174 buffer_snapshot.text.clone(),
175 ))
176 .await,
177 )
178 } else {
179 lsp_store.update(cx, |lsp_store, cx| {
180 if let Some(current_lsp_data) =
181 lsp_store.current_lsp_data(buffer.read(cx).remote_id())
182 {
183 if current_lsp_data.buffer_version == version_queried_for {
184 current_lsp_data.semantic_tokens = None;
185 }
186 }
187 })?;
188 None
189 };
190 Ok(BufferSemanticTokens { tokens: res })
191 })
192 .shared();
193
194 self.latest_lsp_data(&buffer, cx)
195 .semantic_tokens
196 .get_or_insert_default()
197 .update = Some((version_queried_for, task.clone()));
198
199 task
200 }
201
202 pub(super) fn fetch_semantic_tokens_for_buffer(
203 &mut self,
204 buffer: &Entity<Buffer>,
205 for_server: Option<LanguageServerId>,
206 cx: &mut Context<Self>,
207 ) -> Task<Option<HashMap<LanguageServerId, SemanticTokensResponse>>> {
208 if let Some((client, upstream_project_id)) = self.upstream_client() {
209 let request = SemanticTokensFull { for_server };
210 if !self.is_capable_for_proto_request(buffer, &request, cx) {
211 return Task::ready(None);
212 }
213
214 let request_timeout = ProjectSettings::get_global(cx)
215 .global_lsp_settings
216 .get_request_timeout();
217 let request_task = client.request_lsp(
218 upstream_project_id,
219 None,
220 request_timeout,
221 cx.background_executor().clone(),
222 request.to_proto(upstream_project_id, buffer.read(cx)),
223 );
224 let buffer = buffer.clone();
225 cx.spawn(async move |weak_lsp_store, cx| {
226 let lsp_store = weak_lsp_store.upgrade()?;
227 let tokens = join_all(
228 request_task
229 .await
230 .log_err()
231 .flatten()
232 .map(|response| response.payload)
233 .unwrap_or_default()
234 .into_iter()
235 .map(|response| {
236 let server_id = LanguageServerId::from_proto(response.server_id);
237 let response = request.response_from_proto(
238 response.response,
239 lsp_store.clone(),
240 buffer.clone(),
241 cx.clone(),
242 );
243 async move {
244 match response.await {
245 Ok(tokens) => Some((server_id, tokens)),
246 Err(e) => {
247 log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
248 None
249 }
250 }
251 }
252 }),
253 )
254 .await
255 .into_iter()
256 .flatten()
257 .collect();
258 Some(tokens)
259 })
260 } else {
261 let token_tasks = self
262 .local_lsp_servers_for_buffer(&buffer, cx)
263 .into_iter()
264 .filter(|&server_id| {
265 for_server.is_none_or(|for_server_id| for_server_id == server_id)
266 })
267 .filter_map(|server_id| {
268 let capabilities = AdapterServerCapabilities {
269 server_capabilities: self.lsp_server_capabilities.get(&server_id)?.clone(),
270 code_action_kinds: None,
271 };
272 let request_task = match self.semantic_tokens_result_id(server_id, buffer, cx) {
273 Some(result_id) => {
274 let delta_request = SemanticTokensDelta {
275 previous_result_id: result_id,
276 };
277 if !delta_request.check_capabilities(capabilities.clone()) {
278 let full_request = SemanticTokensFull {
279 for_server: Some(server_id),
280 };
281 if !full_request.check_capabilities(capabilities) {
282 return None;
283 }
284
285 self.request_lsp(
286 buffer.clone(),
287 LanguageServerToQuery::Other(server_id),
288 full_request,
289 cx,
290 )
291 } else {
292 self.request_lsp(
293 buffer.clone(),
294 LanguageServerToQuery::Other(server_id),
295 delta_request,
296 cx,
297 )
298 }
299 }
300 None => {
301 let request = SemanticTokensFull {
302 for_server: Some(server_id),
303 };
304 if !request.check_capabilities(capabilities) {
305 return None;
306 }
307 self.request_lsp(
308 buffer.clone(),
309 LanguageServerToQuery::Other(server_id),
310 request,
311 cx,
312 )
313 }
314 };
315 Some(async move { (server_id, request_task.await) })
316 })
317 .collect::<Vec<_>>();
318 if token_tasks.is_empty() {
319 return Task::ready(None);
320 }
321
322 cx.background_spawn(async move {
323 Some(
324 join_all(token_tasks)
325 .await
326 .into_iter()
327 .flat_map(|(server_id, response)| {
328 match response {
329 Ok(tokens) => Some((server_id, tokens)),
330 Err(e) => {
331 log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
332 None
333 }
334 }
335 })
336 .collect()
337 )
338 })
339 }
340 }
341
342 pub(crate) async fn handle_refresh_semantic_tokens(
343 lsp_store: Entity<Self>,
344 envelope: TypedEnvelope<proto::RefreshSemanticTokens>,
345 mut cx: AsyncApp,
346 ) -> Result<proto::Ack> {
347 lsp_store.update(&mut cx, |_, cx| {
348 cx.emit(LspStoreEvent::RefreshSemanticTokens {
349 server_id: LanguageServerId::from_proto(envelope.payload.server_id),
350 request_id: envelope.payload.request_id.map(|id| id as usize),
351 });
352 });
353 Ok(proto::Ack {})
354 }
355
356 fn semantic_tokens_result_id(
357 &mut self,
358 server_id: LanguageServerId,
359 buffer: &Entity<Buffer>,
360 cx: &mut App,
361 ) -> Option<SharedString> {
362 self.latest_lsp_data(buffer, cx)
363 .semantic_tokens
364 .as_ref()?
365 .raw_tokens
366 .servers
367 .get(&server_id)?
368 .result_id
369 .clone()
370 }
371
372 pub fn get_or_create_token_stylizer(
373 &mut self,
374 server_id: LanguageServerId,
375 language: Option<&LanguageName>,
376 cx: &mut App,
377 ) -> Option<&SemanticTokenStylizer> {
378 let stylizer = match self
379 .semantic_token_config
380 .stylizers
381 .entry((server_id, language.cloned()))
382 {
383 hash_map::Entry::Occupied(o) => o.into_mut(),
384 hash_map::Entry::Vacant(v) => {
385 let tokens_provider = self
386 .lsp_server_capabilities
387 .get(&server_id)?
388 .semantic_tokens_provider
389 .as_ref()?;
390 let legend = match tokens_provider {
391 lsp::SemanticTokensServerCapabilities::SemanticTokensOptions(opts) => {
392 &opts.legend
393 }
394 lsp::SemanticTokensServerCapabilities::SemanticTokensRegistrationOptions(
395 opts,
396 ) => &opts.semantic_tokens_options.legend,
397 };
398 let language_rules = language.and_then(|language| {
399 SettingsStore::global(cx).language_semantic_token_rules(language.as_ref())
400 });
401 let stylizer = SemanticTokenStylizer::new(server_id, legend, language_rules, cx);
402 v.insert(stylizer)
403 }
404 };
405 Some(stylizer)
406 }
407}
408
409pub type SemanticTokensTask =
410 Shared<Task<std::result::Result<BufferSemanticTokens, Arc<anyhow::Error>>>>;
411
412#[derive(Debug, Default, Clone)]
413pub struct BufferSemanticTokens {
414 pub tokens: Option<HashMap<LanguageServerId, Arc<[BufferSemanticToken]>>>,
415}
416
417#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
418pub struct TokenType(pub u32);
419
420#[derive(Debug, Clone)]
421pub struct BufferSemanticToken {
422 /// The range of the token in the buffer.
423 ///
424 /// Guaranteed to contain a buffer id.
425 pub range: Range<Anchor>,
426 pub token_type: TokenType,
427 pub token_modifiers: u32,
428}
429
430pub struct SemanticTokenStylizer {
431 server_id: LanguageServerId,
432 rules_by_token_type: HashMap<TokenType, Vec<SemanticTokenRule>>,
433 token_type_names: HashMap<TokenType, SharedString>,
434 modifier_mask: HashMap<SharedString, u32>,
435}
436
437impl SemanticTokenStylizer {
438 pub fn new(
439 server_id: LanguageServerId,
440 legend: &lsp::SemanticTokensLegend,
441 language_rules: Option<&SemanticTokenRules>,
442 cx: &App,
443 ) -> Self {
444 let token_types: HashMap<TokenType, SharedString> = legend
445 .token_types
446 .iter()
447 .enumerate()
448 .map(|(i, token_type)| {
449 (
450 TokenType(i as u32),
451 SharedString::from(token_type.as_str().to_string()),
452 )
453 })
454 .collect();
455 let modifier_mask: HashMap<SharedString, u32> = legend
456 .token_modifiers
457 .iter()
458 .enumerate()
459 .map(|(i, modifier)| (SharedString::from(modifier.as_str().to_string()), 1 << i))
460 .collect();
461
462 let global_rules = &ProjectSettings::get_global(cx)
463 .global_lsp_settings
464 .semantic_token_rules;
465
466 let rules_by_token_type = token_types
467 .iter()
468 .map(|(index, token_type_name)| {
469 let filter = |rule: &&SemanticTokenRule| {
470 rule.token_type
471 .as_ref()
472 .is_none_or(|rule_token_type| rule_token_type == token_type_name.as_ref())
473 };
474 let matching_rules: Vec<SemanticTokenRule> = global_rules
475 .rules
476 .iter()
477 .chain(language_rules.into_iter().flat_map(|lr| &lr.rules))
478 .rev()
479 .filter(filter)
480 .cloned()
481 .collect();
482 (*index, matching_rules)
483 })
484 .collect();
485
486 SemanticTokenStylizer {
487 server_id,
488 rules_by_token_type,
489 token_type_names: token_types,
490 modifier_mask,
491 }
492 }
493
494 pub fn server_id(&self) -> LanguageServerId {
495 self.server_id
496 }
497
498 pub fn token_type_name(&self, token_type: TokenType) -> Option<&SharedString> {
499 self.token_type_names.get(&token_type)
500 }
501
502 pub fn has_modifier(&self, token_modifiers: u32, modifier: &str) -> bool {
503 let Some(mask) = self.modifier_mask.get(modifier) else {
504 return false;
505 };
506 (token_modifiers & mask) != 0
507 }
508
509 pub fn token_modifiers(&self, token_modifiers: u32) -> Option<String> {
510 let modifiers: Vec<&str> = self
511 .modifier_mask
512 .iter()
513 .filter(|(_, mask)| (token_modifiers & *mask) != 0)
514 .map(|(name, _)| name.as_ref())
515 .collect();
516 if modifiers.is_empty() {
517 None
518 } else {
519 Some(modifiers.join(", "))
520 }
521 }
522
523 pub fn rules_for_token(&self, token_type: TokenType) -> Option<&[SemanticTokenRule]> {
524 self.rules_by_token_type
525 .get(&token_type)
526 .map(|v| v.as_slice())
527 }
528}
529
530async fn raw_to_buffer_semantic_tokens(
531 raw_tokens: RawSemanticTokens,
532 buffer_snapshot: text::BufferSnapshot,
533) -> HashMap<LanguageServerId, Arc<[BufferSemanticToken]>> {
534 let mut res = HashMap::default();
535 for (&server_id, server_tokens) in &raw_tokens.servers {
536 let mut last = 0;
537 // We don't do `collect` here due to the filter map not pre-allocating
538 // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways
539 let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5);
540 let mut tokens = server_tokens.tokens();
541 // 5000 was chosen by profiling, on a decent machine this will take about 1ms per chunk
542 // This is to avoid blocking the main thread for hundreds of milliseconds at a time for very big files
543 // If we every change the below code to not query the underlying rope 6 times per token we can bump this up
544 const CHUNK_LEN: usize = 5000;
545 loop {
546 let mut changed = false;
547 let chunk = tokens
548 .by_ref()
549 .take(CHUNK_LEN)
550 .inspect(|_| changed = true)
551 .filter_map(|token| {
552 let start = Unclipped(PointUtf16::new(token.line, token.start));
553 let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left);
554 let start_offset = buffer_snapshot
555 .as_rope()
556 .point_utf16_to_offset_utf16(clipped_start);
557 let end_offset = start_offset + OffsetUtf16(token.length as usize);
558
559 let start = buffer_snapshot
560 .as_rope()
561 .offset_utf16_to_offset(start_offset);
562 if start < last {
563 return None;
564 }
565
566 let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset);
567 if end < last {
568 return None;
569 }
570 last = end;
571
572 if start == end {
573 return None;
574 }
575
576 Some(BufferSemanticToken {
577 range: buffer_snapshot.anchor_before(start)
578 ..buffer_snapshot.anchor_after(end),
579 token_type: token.token_type,
580 token_modifiers: token.token_modifiers,
581 })
582 });
583 buffer_tokens.extend(chunk);
584
585 if !changed {
586 break;
587 }
588 yield_now().await;
589 }
590
591 res.insert(server_id, buffer_tokens.into());
592 }
593 res
594}
595
596#[derive(Default, Debug)]
597pub struct SemanticTokensData {
598 pub(super) raw_tokens: RawSemanticTokens,
599 pub(super) latest_invalidation_requests: HashMap<LanguageServerId, Option<usize>>,
600 update: Option<(Global, SemanticTokensTask)>,
601}
602
603/// All the semantic token tokens for a buffer.
604///
605/// This aggregates semantic tokens from multiple language servers in a specific order.
606/// Semantic tokens later in the list will override earlier ones in case of overlap.
607#[derive(Default, Debug, Clone)]
608pub(super) struct RawSemanticTokens {
609 pub servers: HashMap<lsp::LanguageServerId, Arc<ServerSemanticTokens>>,
610}
611
612/// All the semantic tokens for a buffer, from a single language server.
613#[derive(Debug, Clone)]
614pub struct ServerSemanticTokens {
615 /// Each value is:
616 /// data[5*i] - deltaLine: token line number, relative to the start of the previous token
617 /// data[5*i+1] - deltaStart: token start character, relative to the start of the previous token (relative to 0 or the previous token’s start if they are on the same line)
618 /// data[5*i+2] - length: the length of the token.
619 /// data[5*i+3] - tokenType: will be looked up in SemanticTokensLegend.tokenTypes. We currently ask that tokenType < 65536.
620 /// data[5*i+4] - tokenModifiers: each set bit will be looked up in SemanticTokensLegend.tokenModifiers
621 ///
622 /// See https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/ for more.
623 data: Vec<u32>,
624
625 pub(crate) result_id: Option<SharedString>,
626}
627
628pub struct SemanticTokensIter<'a> {
629 prev: Option<(u32, u32)>,
630 data: ChunksExact<'a, u32>,
631}
632
633// A single item from `data`.
634struct SemanticTokenValue {
635 delta_line: u32,
636 delta_start: u32,
637 length: u32,
638 token_type: TokenType,
639 token_modifiers: u32,
640}
641
642/// A semantic token, independent of its position.
643#[derive(Debug, PartialEq, Eq)]
644pub struct SemanticToken {
645 pub line: u32,
646 pub start: u32,
647 pub length: u32,
648 pub token_type: TokenType,
649 pub token_modifiers: u32,
650}
651
652impl ServerSemanticTokens {
653 pub fn from_full(data: Vec<u32>, result_id: Option<SharedString>) -> Self {
654 ServerSemanticTokens { data, result_id }
655 }
656
657 pub(crate) fn apply(&mut self, edits: &[SemanticTokensEdit]) {
658 for edit in edits {
659 let start = (edit.start as usize).min(self.data.len());
660 let end = (start + edit.delete_count as usize).min(self.data.len());
661 self.data.splice(start..end, edit.data.iter().copied());
662 }
663 }
664
665 pub fn tokens(&self) -> SemanticTokensIter<'_> {
666 SemanticTokensIter {
667 prev: None,
668 data: self.data.chunks_exact(5),
669 }
670 }
671}
672
673impl Iterator for SemanticTokensIter<'_> {
674 type Item = SemanticToken;
675
676 fn next(&mut self) -> Option<Self::Item> {
677 let chunk = self.data.next()?;
678 let token = SemanticTokenValue {
679 delta_line: chunk[0],
680 delta_start: chunk[1],
681 length: chunk[2],
682 token_type: TokenType(chunk[3]),
683 token_modifiers: chunk[4],
684 };
685
686 let (line, start) = if let Some((last_line, last_start)) = self.prev {
687 let line = last_line + token.delta_line;
688 let start = if token.delta_line == 0 {
689 last_start + token.delta_start
690 } else {
691 token.delta_start
692 };
693 (line, start)
694 } else {
695 (token.delta_line, token.delta_start)
696 };
697
698 self.prev = Some((line, start));
699
700 Some(SemanticToken {
701 line,
702 start,
703 length: token.length,
704 token_type: token.token_type,
705 token_modifiers: token.token_modifiers,
706 })
707 }
708}
709
710#[cfg(test)]
711mod tests {
712 use super::*;
713 use crate::lsp_command::SemanticTokensEdit;
714 use lsp::SEMANTIC_TOKEN_MODIFIERS;
715
716 fn modifier_names(bits: u32) -> String {
717 if bits == 0 {
718 return "-".to_string();
719 }
720 let names: Vec<&str> = SEMANTIC_TOKEN_MODIFIERS
721 .iter()
722 .enumerate()
723 .filter(|(i, _)| bits & (1 << i) != 0)
724 .map(|(_, m)| m.as_str())
725 .collect();
726
727 // Check for unknown bits
728 let known_bits = (1u32 << SEMANTIC_TOKEN_MODIFIERS.len()) - 1;
729 let unknown = bits & !known_bits;
730
731 if unknown != 0 {
732 let mut result = names.join("+");
733 if !result.is_empty() {
734 result.push('+');
735 }
736 result.push_str(&format!("?0x{:x}", unknown));
737 result
738 } else {
739 names.join("+")
740 }
741 }
742
743 /// Debug tool: parses semantic token JSON from LSP and prints human-readable output.
744 ///
745 /// Usage: Paste JSON into `json_input`, then run:
746 /// cargo test -p project debug_parse_tokens -- --nocapture --ignored
747 ///
748 /// Accepts either:
749 /// - Full LSP response: `{"jsonrpc":"2.0","id":1,"result":{"data":[...]}}`
750 /// - Just the data array: `[0,0,5,1,0,...]`
751 ///
752 /// For delta responses, paste multiple JSON messages (one per line) and they
753 /// will be applied in sequence.
754 ///
755 /// Token encoding (5 values per token):
756 /// [deltaLine, deltaStart, length, tokenType, tokenModifiers]
757 #[test]
758 #[ignore] // Run with: cargo test -p project debug_parse_tokens -- --nocapture --ignored
759 fn debug_parse_tokens() {
760 // ============================================================
761 // PASTE YOUR JSON HERE (one message per line for sequences)
762 // Comments starting with // are ignored
763 // ============================================================
764 let json_input = r#"
765// === EXAMPLE 1: Full response (LSP spec example) ===
766// 3 tokens: property at line 2, type at line 2, class at line 5
767{"jsonrpc":"2.0","id":1,"result":{"resultId":"1","data":[2,5,3,9,3,0,5,4,6,0,3,2,7,1,0]}}
768
769// === EXAMPLE 2: Delta response ===
770// User added empty line at start of file, so all tokens shift down by 1 line.
771// This changes first token's deltaLine from 2 to 3 (edit at index 0).
772{"jsonrpc":"2.0","id":2,"result":{"resultId":"2","edits":[{"start":0,"deleteCount":1,"data":[3]}]}}
773
774// === EXAMPLE 3: Another delta ===
775// User added a new token. Insert 5 values at position 5 (after first token).
776// New token: same line as token 1, 2 chars after it ends, len 5, type=function(12), mods=definition(2)
777{"jsonrpc":"2.0","id":3,"result":{"resultId":"3","edits":[{"start":5,"deleteCount":0,"data":[0,2,5,12,2]}]}}
778 "#;
779 // Accepted formats:
780 // - Full response: {"result":{"data":[...]}}
781 // - Delta response: {"result":{"edits":[{"start":N,"deleteCount":N,"data":[...]}]}}
782 // - Just array: [0,0,5,1,0,...]
783
784 // ============================================================
785 // PROCESSING
786 // ============================================================
787 let mut current_data: Vec<u32> = Vec::new();
788 let mut result_id: Option<String> = None;
789
790 for line in json_input.lines() {
791 let line = line.trim();
792 if line.is_empty() || line.starts_with("//") {
793 continue;
794 }
795
796 let parsed: serde_json::Value =
797 serde_json::from_str(line).expect("Failed to parse JSON");
798
799 // Try to extract data from various JSON shapes
800 let (data, edits, new_result_id) = extract_semantic_tokens(&parsed);
801
802 if let Some(new_id) = new_result_id {
803 result_id = Some(new_id);
804 }
805
806 if let Some(full_data) = data {
807 println!("\n{}", "=".repeat(70));
808 println!("FULL RESPONSE (resultId: {:?})", result_id);
809 current_data = full_data;
810 } else if let Some(delta_edits) = edits {
811 println!("\n{}", "=".repeat(70));
812 println!(
813 "DELTA RESPONSE: {} edit(s) (resultId: {:?})",
814 delta_edits.len(),
815 result_id
816 );
817 for (i, edit) in delta_edits.iter().enumerate() {
818 println!(
819 " [{}] start={}, delete={}, insert {} values",
820 i,
821 edit.start,
822 edit.delete_count,
823 edit.data.len()
824 );
825 }
826 let mut tokens = ServerSemanticTokens::from_full(current_data.clone(), None);
827 tokens.apply(&delta_edits);
828 current_data = tokens.data;
829 }
830 }
831
832 // Print parsed tokens
833 println!(
834 "\nDATA: {} values = {} tokens",
835 current_data.len(),
836 current_data.len() / 5
837 );
838 println!("\nPARSED TOKENS:");
839 println!("{:-<100}", "");
840 println!(
841 "{:>5} {:>6} {:>4} {:<15} {}",
842 "LINE", "START", "LEN", "TYPE", "MODIFIERS"
843 );
844 println!("{:-<100}", "");
845
846 let tokens = ServerSemanticTokens::from_full(current_data, None);
847 for token in tokens.tokens() {
848 println!(
849 "{:>5} {:>6} {:>4} {:<15} {}",
850 token.line,
851 token.start,
852 token.length,
853 token.token_type.0,
854 modifier_names(token.token_modifiers),
855 );
856 }
857 println!("{:-<100}", "");
858 println!("{}\n", "=".repeat(100));
859 }
860
861 fn extract_semantic_tokens(
862 value: &serde_json::Value,
863 ) -> (
864 Option<Vec<u32>>,
865 Option<Vec<SemanticTokensEdit>>,
866 Option<String>,
867 ) {
868 // Try as array directly: [1,2,3,...]
869 if let Some(arr) = value.as_array() {
870 let data: Vec<u32> = arr
871 .iter()
872 .filter_map(|v| v.as_u64().map(|n| n as u32))
873 .collect();
874 return (Some(data), None, None);
875 }
876
877 // Try as LSP response: {"result": {"data": [...]} } or {"result": {"edits": [...]}}
878 let result = value.get("result").unwrap_or(value);
879 let result_id = result
880 .get("resultId")
881 .and_then(|v| v.as_str())
882 .map(String::from);
883
884 // Full response with data
885 if let Some(data_arr) = result.get("data").and_then(|v| v.as_array()) {
886 let data: Vec<u32> = data_arr
887 .iter()
888 .filter_map(|v| v.as_u64().map(|n| n as u32))
889 .collect();
890 return (Some(data), None, result_id);
891 }
892
893 // Delta response with edits
894 if let Some(edits_arr) = result.get("edits").and_then(|v| v.as_array()) {
895 let edits: Vec<SemanticTokensEdit> = edits_arr
896 .iter()
897 .filter_map(|e| {
898 Some(SemanticTokensEdit {
899 start: e.get("start")?.as_u64()? as u32,
900 delete_count: e.get("deleteCount")?.as_u64()? as u32,
901 data: e
902 .get("data")
903 .and_then(|d| d.as_array())
904 .map(|arr| {
905 arr.iter()
906 .filter_map(|v| v.as_u64().map(|n| n as u32))
907 .collect()
908 })
909 .unwrap_or_default(),
910 })
911 })
912 .collect();
913 return (None, Some(edits), result_id);
914 }
915
916 (None, None, result_id)
917 }
918
919 #[test]
920 fn parses_sample_tokens() {
921 // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
922 let tokens = ServerSemanticTokens::from_full(
923 vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
924 None,
925 )
926 .tokens()
927 .collect::<Vec<SemanticToken>>();
928
929 // The spec uses 1-based line numbers, and 0-based character numbers. This test uses 0-based for both.
930 assert_eq!(
931 tokens,
932 &[
933 SemanticToken {
934 line: 2,
935 start: 5,
936 length: 3,
937 token_type: TokenType(0),
938 token_modifiers: 3
939 },
940 SemanticToken {
941 line: 2,
942 start: 10,
943 length: 4,
944 token_type: TokenType(1),
945 token_modifiers: 0
946 },
947 SemanticToken {
948 line: 5,
949 start: 2,
950 length: 7,
951 token_type: TokenType(2),
952 token_modifiers: 0
953 }
954 ]
955 );
956 }
957
958 #[test]
959 fn applies_delta_edit() {
960 // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
961 // After a user types a new empty line at the beginning of the file,
962 // the tokens shift down by one line. The delta edit transforms
963 // [2,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0] into [3,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0]
964 // by replacing the first element (deltaLine of first token) from 2 to 3.
965
966 let mut tokens = ServerSemanticTokens::from_full(
967 vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
968 None,
969 );
970
971 tokens.apply(&[SemanticTokensEdit {
972 start: 0,
973 delete_count: 1,
974 data: vec![3],
975 }]);
976
977 let result = tokens.tokens().collect::<Vec<SemanticToken>>();
978
979 assert_eq!(
980 result,
981 &[
982 SemanticToken {
983 line: 3,
984 start: 5,
985 length: 3,
986 token_type: TokenType(0),
987 token_modifiers: 3
988 },
989 SemanticToken {
990 line: 3,
991 start: 10,
992 length: 4,
993 token_type: TokenType(1),
994 token_modifiers: 0
995 },
996 SemanticToken {
997 line: 6,
998 start: 2,
999 length: 7,
1000 token_type: TokenType(2),
1001 token_modifiers: 0
1002 }
1003 ]
1004 );
1005 }
1006
1007 #[test]
1008 fn applies_out_of_bounds_delta_edit_without_panic() {
1009 let mut tokens = ServerSemanticTokens::from_full(vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0], None);
1010
1011 // start beyond data length
1012 tokens.apply(&[SemanticTokensEdit {
1013 start: 100,
1014 delete_count: 5,
1015 data: vec![1, 2, 3, 4, 5],
1016 }]);
1017 assert_eq!(
1018 tokens.data,
1019 vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 1, 2, 3, 4, 5]
1020 );
1021
1022 // delete_count extends past data length
1023 let mut tokens = ServerSemanticTokens::from_full(vec![2, 5, 3, 0, 3], None);
1024 tokens.apply(&[SemanticTokensEdit {
1025 start: 3,
1026 delete_count: 100,
1027 data: vec![9, 9],
1028 }]);
1029 assert_eq!(tokens.data, vec![2, 5, 3, 9, 9]);
1030
1031 // empty data
1032 let mut tokens = ServerSemanticTokens::from_full(Vec::new(), None);
1033 tokens.apply(&[SemanticTokensEdit {
1034 start: 0,
1035 delete_count: 5,
1036 data: vec![1, 2, 3, 4, 5],
1037 }]);
1038 assert_eq!(tokens.data, vec![1, 2, 3, 4, 5]);
1039 }
1040}