1use std::{collections::hash_map, ops::Range, slice::ChunksExact, sync::Arc};
2
3use anyhow::Result;
4
5use clock::Global;
6use collections::HashMap;
7use futures::{
8 FutureExt as _,
9 future::{Shared, join_all},
10};
11use gpui::{App, AppContext, AsyncApp, Context, Entity, ReadGlobal as _, SharedString, Task};
12use itertools::Itertools;
13use language::{Buffer, LanguageName, language_settings::all_language_settings};
14use lsp::{AdapterServerCapabilities, LanguageServerId};
15use rpc::{TypedEnvelope, proto};
16use settings::{SemanticTokenRule, SemanticTokenRules, Settings as _, SettingsStore};
17use smol::future::yield_now;
18use text::{Anchor, Bias, OffsetUtf16, PointUtf16, Unclipped};
19use util::ResultExt as _;
20
21use crate::{
22 LanguageServerToQuery, LspStore, LspStoreEvent,
23 lsp_command::{
24 LspCommand, SemanticTokensDelta, SemanticTokensEdit, SemanticTokensFull,
25 SemanticTokensResponse,
26 },
27 project_settings::ProjectSettings,
28};
29
30pub(super) struct SemanticTokenConfig {
31 stylizers: HashMap<(LanguageServerId, Option<LanguageName>), SemanticTokenStylizer>,
32 rules: SemanticTokenRules,
33 global_mode: settings::SemanticTokens,
34}
35
36impl SemanticTokenConfig {
37 pub(super) fn new(cx: &App) -> Self {
38 Self {
39 stylizers: HashMap::default(),
40 rules: ProjectSettings::get_global(cx)
41 .global_lsp_settings
42 .semantic_token_rules
43 .clone(),
44 global_mode: all_language_settings(None, cx).defaults.semantic_tokens,
45 }
46 }
47
48 pub(super) fn remove_server_data(&mut self, server_id: LanguageServerId) {
49 self.stylizers.retain(|&(id, _), _| id != server_id);
50 }
51
52 pub(super) fn update_rules(&mut self, new_rules: SemanticTokenRules) -> bool {
53 if new_rules != self.rules {
54 self.rules = new_rules;
55 self.stylizers.clear();
56 true
57 } else {
58 false
59 }
60 }
61
62 pub(super) fn update_global_mode(&mut self, new_mode: settings::SemanticTokens) -> bool {
63 if new_mode != self.global_mode {
64 self.global_mode = new_mode;
65 true
66 } else {
67 false
68 }
69 }
70}
71
72#[derive(Debug, Clone, Copy)]
73pub struct RefreshForServer {
74 pub server_id: LanguageServerId,
75 pub request_id: Option<usize>,
76}
77
78impl LspStore {
79 pub fn semantic_tokens(
80 &mut self,
81 buffer: Entity<Buffer>,
82 refresh: Option<RefreshForServer>,
83 cx: &mut Context<Self>,
84 ) -> SemanticTokensTask {
85 let version_queried_for = buffer.read(cx).version();
86 let latest_lsp_data = self.latest_lsp_data(&buffer, cx);
87 let semantic_tokens_data = latest_lsp_data.semantic_tokens.get_or_insert_default();
88 if let Some(refresh) = refresh {
89 let mut invalidate_cache = true;
90 match semantic_tokens_data
91 .latest_invalidation_requests
92 .entry(refresh.server_id)
93 {
94 hash_map::Entry::Occupied(mut o) => {
95 if refresh.request_id > *o.get() {
96 o.insert(refresh.request_id);
97 } else {
98 invalidate_cache = false;
99 }
100 }
101 hash_map::Entry::Vacant(v) => {
102 v.insert(refresh.request_id);
103 }
104 }
105
106 if invalidate_cache {
107 let SemanticTokensData {
108 raw_tokens,
109 latest_invalidation_requests: _,
110 update,
111 } = semantic_tokens_data;
112 *update = None;
113 raw_tokens.servers.clear();
114 }
115 }
116
117 if let Some((updating_for, task)) = &semantic_tokens_data.update
118 && !version_queried_for.changed_since(updating_for)
119 {
120 return task.clone();
121 }
122
123 let new_tokens = self.fetch_semantic_tokens_for_buffer(
124 &buffer,
125 refresh.map(|refresh| refresh.server_id),
126 cx,
127 );
128
129 let task_buffer = buffer.clone();
130 let task_version_queried_for = version_queried_for.clone();
131 let task = cx
132 .spawn(async move |lsp_store, cx| {
133 let buffer = task_buffer;
134 let version_queried_for = task_version_queried_for;
135 let res = if let Some(new_tokens) = new_tokens.await {
136 let (raw_tokens, buffer_snapshot) = lsp_store
137 .update(cx, |lsp_store, cx| {
138 let lsp_data = lsp_store.latest_lsp_data(&buffer, cx);
139 let semantic_tokens_data =
140 lsp_data.semantic_tokens.get_or_insert_default();
141
142 if version_queried_for == lsp_data.buffer_version {
143 for (server_id, new_tokens_response) in new_tokens {
144 match new_tokens_response {
145 SemanticTokensResponse::Full { data, result_id } => {
146 semantic_tokens_data.raw_tokens.servers.insert(
147 server_id,
148 Arc::new(ServerSemanticTokens::from_full(
149 data, result_id,
150 )),
151 );
152 }
153 SemanticTokensResponse::Delta { edits, result_id } => {
154 if let Some(tokens) = semantic_tokens_data
155 .raw_tokens
156 .servers
157 .get_mut(&server_id)
158 {
159 let tokens = Arc::make_mut(tokens);
160 tokens.result_id = result_id;
161 tokens.apply(&edits);
162 }
163 }
164 }
165 }
166 }
167 let buffer_snapshot =
168 buffer.read_with(cx, |buffer, _| buffer.snapshot());
169 (semantic_tokens_data.raw_tokens.clone(), buffer_snapshot)
170 })
171 .map_err(Arc::new)?;
172 Some(raw_to_buffer_semantic_tokens(raw_tokens, &buffer_snapshot).await)
173 } else {
174 lsp_store.update(cx, |lsp_store, cx| {
175 if let Some(current_lsp_data) =
176 lsp_store.current_lsp_data(buffer.read(cx).remote_id())
177 {
178 if current_lsp_data.buffer_version == version_queried_for {
179 current_lsp_data.semantic_tokens = None;
180 }
181 }
182 })?;
183 None
184 };
185 Ok(BufferSemanticTokens { tokens: res })
186 })
187 .shared();
188
189 self.latest_lsp_data(&buffer, cx)
190 .semantic_tokens
191 .get_or_insert_default()
192 .update = Some((version_queried_for, task.clone()));
193
194 task
195 }
196
197 pub(super) fn fetch_semantic_tokens_for_buffer(
198 &mut self,
199 buffer: &Entity<Buffer>,
200 for_server: Option<LanguageServerId>,
201 cx: &mut Context<Self>,
202 ) -> Task<Option<HashMap<LanguageServerId, SemanticTokensResponse>>> {
203 if let Some((client, upstream_project_id)) = self.upstream_client() {
204 let request = SemanticTokensFull { for_server };
205 if !self.is_capable_for_proto_request(buffer, &request, cx) {
206 return Task::ready(None);
207 }
208
209 let request_timeout = ProjectSettings::get_global(cx)
210 .global_lsp_settings
211 .get_request_timeout();
212 let request_task = client.request_lsp(
213 upstream_project_id,
214 None,
215 request_timeout,
216 cx.background_executor().clone(),
217 request.to_proto(upstream_project_id, buffer.read(cx)),
218 );
219 let buffer = buffer.clone();
220 cx.spawn(async move |weak_lsp_store, cx| {
221 let lsp_store = weak_lsp_store.upgrade()?;
222 let tokens = join_all(
223 request_task
224 .await
225 .log_err()
226 .flatten()
227 .map(|response| response.payload)
228 .unwrap_or_default()
229 .into_iter()
230 .map(|response| {
231 let server_id = LanguageServerId::from_proto(response.server_id);
232 let response = request.response_from_proto(
233 response.response,
234 lsp_store.clone(),
235 buffer.clone(),
236 cx.clone(),
237 );
238 async move {
239 match response.await {
240 Ok(tokens) => Some((server_id, tokens)),
241 Err(e) => {
242 log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
243 None
244 }
245 }
246 }
247 }),
248 )
249 .await
250 .into_iter()
251 .flatten()
252 .collect();
253 Some(tokens)
254 })
255 } else {
256 let token_tasks = self
257 .local_lsp_servers_for_buffer(&buffer, cx)
258 .into_iter()
259 .filter(|&server_id| {
260 for_server.is_none_or(|for_server_id| for_server_id == server_id)
261 })
262 .filter_map(|server_id| {
263 let capabilities = AdapterServerCapabilities {
264 server_capabilities: self.lsp_server_capabilities.get(&server_id)?.clone(),
265 code_action_kinds: None,
266 };
267 let request_task = match self.semantic_tokens_result_id(server_id, buffer, cx) {
268 Some(result_id) => {
269 let delta_request = SemanticTokensDelta {
270 previous_result_id: result_id,
271 };
272 if !delta_request.check_capabilities(capabilities.clone()) {
273 let full_request = SemanticTokensFull {
274 for_server: Some(server_id),
275 };
276 if !full_request.check_capabilities(capabilities) {
277 return None;
278 }
279
280 self.request_lsp(
281 buffer.clone(),
282 LanguageServerToQuery::Other(server_id),
283 full_request,
284 cx,
285 )
286 } else {
287 self.request_lsp(
288 buffer.clone(),
289 LanguageServerToQuery::Other(server_id),
290 delta_request,
291 cx,
292 )
293 }
294 }
295 None => {
296 let request = SemanticTokensFull {
297 for_server: Some(server_id),
298 };
299 if !request.check_capabilities(capabilities) {
300 return None;
301 }
302 self.request_lsp(
303 buffer.clone(),
304 LanguageServerToQuery::Other(server_id),
305 request,
306 cx,
307 )
308 }
309 };
310 Some(async move { (server_id, request_task.await) })
311 })
312 .collect::<Vec<_>>();
313 if token_tasks.is_empty() {
314 return Task::ready(None);
315 }
316
317 cx.background_spawn(async move {
318 Some(
319 join_all(token_tasks)
320 .await
321 .into_iter()
322 .flat_map(|(server_id, response)| {
323 match response {
324 Ok(tokens) => Some((server_id, tokens)),
325 Err(e) => {
326 log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
327 None
328 }
329 }
330 })
331 .collect()
332 )
333 })
334 }
335 }
336
337 pub(crate) async fn handle_refresh_semantic_tokens(
338 lsp_store: Entity<Self>,
339 envelope: TypedEnvelope<proto::RefreshSemanticTokens>,
340 mut cx: AsyncApp,
341 ) -> Result<proto::Ack> {
342 lsp_store.update(&mut cx, |_, cx| {
343 cx.emit(LspStoreEvent::RefreshSemanticTokens {
344 server_id: LanguageServerId::from_proto(envelope.payload.server_id),
345 request_id: envelope.payload.request_id.map(|id| id as usize),
346 });
347 });
348 Ok(proto::Ack {})
349 }
350
351 fn semantic_tokens_result_id(
352 &mut self,
353 server_id: LanguageServerId,
354 buffer: &Entity<Buffer>,
355 cx: &mut App,
356 ) -> Option<SharedString> {
357 self.latest_lsp_data(buffer, cx)
358 .semantic_tokens
359 .as_ref()?
360 .raw_tokens
361 .servers
362 .get(&server_id)?
363 .result_id
364 .clone()
365 }
366
367 pub fn get_or_create_token_stylizer(
368 &mut self,
369 server_id: LanguageServerId,
370 language: Option<&LanguageName>,
371 cx: &mut App,
372 ) -> Option<&SemanticTokenStylizer> {
373 let stylizer = match self
374 .semantic_token_config
375 .stylizers
376 .entry((server_id, language.cloned()))
377 {
378 hash_map::Entry::Occupied(o) => o.into_mut(),
379 hash_map::Entry::Vacant(v) => {
380 let tokens_provider = self
381 .lsp_server_capabilities
382 .get(&server_id)?
383 .semantic_tokens_provider
384 .as_ref()?;
385 let legend = match tokens_provider {
386 lsp::SemanticTokensServerCapabilities::SemanticTokensOptions(opts) => {
387 &opts.legend
388 }
389 lsp::SemanticTokensServerCapabilities::SemanticTokensRegistrationOptions(
390 opts,
391 ) => &opts.semantic_tokens_options.legend,
392 };
393 let language_rules = language.and_then(|language| {
394 SettingsStore::global(cx).language_semantic_token_rules(language.as_ref())
395 });
396 let stylizer = SemanticTokenStylizer::new(server_id, legend, language_rules, cx);
397 v.insert(stylizer)
398 }
399 };
400 Some(stylizer)
401 }
402}
403
404pub type SemanticTokensTask =
405 Shared<Task<std::result::Result<BufferSemanticTokens, Arc<anyhow::Error>>>>;
406
407#[derive(Debug, Default, Clone)]
408pub struct BufferSemanticTokens {
409 pub tokens: Option<HashMap<LanguageServerId, Arc<[BufferSemanticToken]>>>,
410}
411
412#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
413pub struct TokenType(pub u32);
414
415#[derive(Debug, Clone)]
416pub struct BufferSemanticToken {
417 pub range: Range<Anchor>,
418 pub token_type: TokenType,
419 pub token_modifiers: u32,
420}
421
422pub struct SemanticTokenStylizer {
423 server_id: LanguageServerId,
424 rules_by_token_type: HashMap<TokenType, Vec<SemanticTokenRule>>,
425 token_type_names: HashMap<TokenType, SharedString>,
426 modifier_mask: HashMap<SharedString, u32>,
427}
428
429impl SemanticTokenStylizer {
430 pub fn new(
431 server_id: LanguageServerId,
432 legend: &lsp::SemanticTokensLegend,
433 language_rules: Option<&SemanticTokenRules>,
434 cx: &App,
435 ) -> Self {
436 let token_types: HashMap<TokenType, SharedString> = legend
437 .token_types
438 .iter()
439 .enumerate()
440 .map(|(i, token_type)| {
441 (
442 TokenType(i as u32),
443 SharedString::from(token_type.as_str().to_string()),
444 )
445 })
446 .collect();
447 let modifier_mask: HashMap<SharedString, u32> = legend
448 .token_modifiers
449 .iter()
450 .enumerate()
451 .map(|(i, modifier)| (SharedString::from(modifier.as_str().to_string()), 1 << i))
452 .collect();
453
454 let global_rules = &ProjectSettings::get_global(cx)
455 .global_lsp_settings
456 .semantic_token_rules;
457
458 let rules_by_token_type = token_types
459 .iter()
460 .map(|(index, token_type_name)| {
461 let filter = |rule: &&SemanticTokenRule| {
462 rule.token_type
463 .as_ref()
464 .is_none_or(|rule_token_type| rule_token_type == token_type_name.as_ref())
465 };
466 let matching_rules: Vec<SemanticTokenRule> = global_rules
467 .rules
468 .iter()
469 .chain(language_rules.into_iter().flat_map(|lr| &lr.rules))
470 .rev()
471 .filter(filter)
472 .cloned()
473 .collect();
474 (*index, matching_rules)
475 })
476 .collect();
477
478 SemanticTokenStylizer {
479 server_id,
480 rules_by_token_type,
481 token_type_names: token_types,
482 modifier_mask,
483 }
484 }
485
486 pub fn server_id(&self) -> LanguageServerId {
487 self.server_id
488 }
489
490 pub fn token_type_name(&self, token_type: TokenType) -> Option<&SharedString> {
491 self.token_type_names.get(&token_type)
492 }
493
494 pub fn has_modifier(&self, token_modifiers: u32, modifier: &str) -> bool {
495 let Some(mask) = self.modifier_mask.get(modifier) else {
496 return false;
497 };
498 (token_modifiers & mask) != 0
499 }
500
501 pub fn token_modifiers(&self, token_modifiers: u32) -> Option<String> {
502 let modifiers: Vec<&str> = self
503 .modifier_mask
504 .iter()
505 .filter(|(_, mask)| (token_modifiers & *mask) != 0)
506 .map(|(name, _)| name.as_ref())
507 .collect();
508 if modifiers.is_empty() {
509 None
510 } else {
511 Some(modifiers.join(", "))
512 }
513 }
514
515 pub fn rules_for_token(&self, token_type: TokenType) -> Option<&[SemanticTokenRule]> {
516 self.rules_by_token_type
517 .get(&token_type)
518 .map(|v| v.as_slice())
519 }
520}
521
522async fn raw_to_buffer_semantic_tokens(
523 raw_tokens: RawSemanticTokens,
524 buffer_snapshot: &text::BufferSnapshot,
525) -> HashMap<LanguageServerId, Arc<[BufferSemanticToken]>> {
526 let mut res = HashMap::default();
527 for (&server_id, server_tokens) in &raw_tokens.servers {
528 // We don't do `collect` here due to the filter map not pre-allocating
529 // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways
530 let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5);
531 // 5000 was chosen by profiling, on a decent machine this will take about 1ms per chunk
532 // This is to avoid blocking the main thread for hundreds of milliseconds at a time for very big files
533 // If we every change the below code to not query the underlying rope 6 times per token we can bump this up
534 for chunk in server_tokens.tokens().chunks(5000).into_iter() {
535 buffer_tokens.extend(chunk.filter_map(|token| {
536 let start = Unclipped(PointUtf16::new(token.line, token.start));
537 let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left);
538 let start_offset = buffer_snapshot
539 .as_rope()
540 .point_utf16_to_offset_utf16(clipped_start);
541 let end_offset = start_offset + OffsetUtf16(token.length as usize);
542
543 let start = buffer_snapshot
544 .as_rope()
545 .offset_utf16_to_offset(start_offset);
546 let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset);
547
548 if start == end {
549 return None;
550 }
551
552 Some(BufferSemanticToken {
553 range: buffer_snapshot.anchor_before(start)..buffer_snapshot.anchor_after(end),
554 token_type: token.token_type,
555 token_modifiers: token.token_modifiers,
556 })
557 }));
558 yield_now().await;
559 }
560
561 res.insert(server_id, buffer_tokens.into());
562 yield_now().await;
563 }
564 res
565}
566
567#[derive(Default, Debug)]
568pub struct SemanticTokensData {
569 pub(super) raw_tokens: RawSemanticTokens,
570 pub(super) latest_invalidation_requests: HashMap<LanguageServerId, Option<usize>>,
571 update: Option<(Global, SemanticTokensTask)>,
572}
573
574/// All the semantic token tokens for a buffer.
575///
576/// This aggregates semantic tokens from multiple language servers in a specific order.
577/// Semantic tokens later in the list will override earlier ones in case of overlap.
578#[derive(Default, Debug, Clone)]
579pub(super) struct RawSemanticTokens {
580 pub servers: HashMap<lsp::LanguageServerId, Arc<ServerSemanticTokens>>,
581}
582
583/// All the semantic tokens for a buffer, from a single language server.
584#[derive(Debug, Clone)]
585pub struct ServerSemanticTokens {
586 /// Each value is:
587 /// data[5*i] - deltaLine: token line number, relative to the start of the previous token
588 /// data[5*i+1] - deltaStart: token start character, relative to the start of the previous token (relative to 0 or the previous token’s start if they are on the same line)
589 /// data[5*i+2] - length: the length of the token.
590 /// data[5*i+3] - tokenType: will be looked up in SemanticTokensLegend.tokenTypes. We currently ask that tokenType < 65536.
591 /// data[5*i+4] - tokenModifiers: each set bit will be looked up in SemanticTokensLegend.tokenModifiers
592 ///
593 /// See https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/ for more.
594 data: Vec<u32>,
595
596 pub(crate) result_id: Option<SharedString>,
597}
598
599pub struct SemanticTokensIter<'a> {
600 prev: Option<(u32, u32)>,
601 data: ChunksExact<'a, u32>,
602}
603
604// A single item from `data`.
605struct SemanticTokenValue {
606 delta_line: u32,
607 delta_start: u32,
608 length: u32,
609 token_type: TokenType,
610 token_modifiers: u32,
611}
612
613/// A semantic token, independent of its position.
614#[derive(Debug, PartialEq, Eq)]
615pub struct SemanticToken {
616 pub line: u32,
617 pub start: u32,
618 pub length: u32,
619 pub token_type: TokenType,
620 pub token_modifiers: u32,
621}
622
623impl ServerSemanticTokens {
624 pub fn from_full(data: Vec<u32>, result_id: Option<SharedString>) -> Self {
625 ServerSemanticTokens { data, result_id }
626 }
627
628 pub(crate) fn apply(&mut self, edits: &[SemanticTokensEdit]) {
629 for edit in edits {
630 let start = edit.start as usize;
631 let end = start + edit.delete_count as usize;
632 self.data.splice(start..end, edit.data.iter().copied());
633 }
634 }
635
636 pub fn tokens(&self) -> SemanticTokensIter<'_> {
637 SemanticTokensIter {
638 prev: None,
639 data: self.data.chunks_exact(5),
640 }
641 }
642}
643
644impl Iterator for SemanticTokensIter<'_> {
645 type Item = SemanticToken;
646
647 fn next(&mut self) -> Option<Self::Item> {
648 let chunk = self.data.next()?;
649 let token = SemanticTokenValue {
650 delta_line: chunk[0],
651 delta_start: chunk[1],
652 length: chunk[2],
653 token_type: TokenType(chunk[3]),
654 token_modifiers: chunk[4],
655 };
656
657 let (line, start) = if let Some((last_line, last_start)) = self.prev {
658 let line = last_line + token.delta_line;
659 let start = if token.delta_line == 0 {
660 last_start + token.delta_start
661 } else {
662 token.delta_start
663 };
664 (line, start)
665 } else {
666 (token.delta_line, token.delta_start)
667 };
668
669 self.prev = Some((line, start));
670
671 Some(SemanticToken {
672 line,
673 start,
674 length: token.length,
675 token_type: token.token_type,
676 token_modifiers: token.token_modifiers,
677 })
678 }
679}
680
681#[cfg(test)]
682mod tests {
683 use super::*;
684 use crate::lsp_command::SemanticTokensEdit;
685 use lsp::SEMANTIC_TOKEN_MODIFIERS;
686
687 fn modifier_names(bits: u32) -> String {
688 if bits == 0 {
689 return "-".to_string();
690 }
691 let names: Vec<&str> = SEMANTIC_TOKEN_MODIFIERS
692 .iter()
693 .enumerate()
694 .filter(|(i, _)| bits & (1 << i) != 0)
695 .map(|(_, m)| m.as_str())
696 .collect();
697
698 // Check for unknown bits
699 let known_bits = (1u32 << SEMANTIC_TOKEN_MODIFIERS.len()) - 1;
700 let unknown = bits & !known_bits;
701
702 if unknown != 0 {
703 let mut result = names.join("+");
704 if !result.is_empty() {
705 result.push('+');
706 }
707 result.push_str(&format!("?0x{:x}", unknown));
708 result
709 } else {
710 names.join("+")
711 }
712 }
713
714 /// Debug tool: parses semantic token JSON from LSP and prints human-readable output.
715 ///
716 /// Usage: Paste JSON into `json_input`, then run:
717 /// cargo test -p project debug_parse_tokens -- --nocapture --ignored
718 ///
719 /// Accepts either:
720 /// - Full LSP response: `{"jsonrpc":"2.0","id":1,"result":{"data":[...]}}`
721 /// - Just the data array: `[0,0,5,1,0,...]`
722 ///
723 /// For delta responses, paste multiple JSON messages (one per line) and they
724 /// will be applied in sequence.
725 ///
726 /// Token encoding (5 values per token):
727 /// [deltaLine, deltaStart, length, tokenType, tokenModifiers]
728 #[test]
729 #[ignore] // Run with: cargo test -p project debug_parse_tokens -- --nocapture --ignored
730 fn debug_parse_tokens() {
731 // ============================================================
732 // PASTE YOUR JSON HERE (one message per line for sequences)
733 // Comments starting with // are ignored
734 // ============================================================
735 let json_input = r#"
736// === EXAMPLE 1: Full response (LSP spec example) ===
737// 3 tokens: property at line 2, type at line 2, class at line 5
738{"jsonrpc":"2.0","id":1,"result":{"resultId":"1","data":[2,5,3,9,3,0,5,4,6,0,3,2,7,1,0]}}
739
740// === EXAMPLE 2: Delta response ===
741// User added empty line at start of file, so all tokens shift down by 1 line.
742// This changes first token's deltaLine from 2 to 3 (edit at index 0).
743{"jsonrpc":"2.0","id":2,"result":{"resultId":"2","edits":[{"start":0,"deleteCount":1,"data":[3]}]}}
744
745// === EXAMPLE 3: Another delta ===
746// User added a new token. Insert 5 values at position 5 (after first token).
747// New token: same line as token 1, 2 chars after it ends, len 5, type=function(12), mods=definition(2)
748{"jsonrpc":"2.0","id":3,"result":{"resultId":"3","edits":[{"start":5,"deleteCount":0,"data":[0,2,5,12,2]}]}}
749 "#;
750 // Accepted formats:
751 // - Full response: {"result":{"data":[...]}}
752 // - Delta response: {"result":{"edits":[{"start":N,"deleteCount":N,"data":[...]}]}}
753 // - Just array: [0,0,5,1,0,...]
754
755 // ============================================================
756 // PROCESSING
757 // ============================================================
758 let mut current_data: Vec<u32> = Vec::new();
759 let mut result_id: Option<String> = None;
760
761 for line in json_input.lines() {
762 let line = line.trim();
763 if line.is_empty() || line.starts_with("//") {
764 continue;
765 }
766
767 let parsed: serde_json::Value =
768 serde_json::from_str(line).expect("Failed to parse JSON");
769
770 // Try to extract data from various JSON shapes
771 let (data, edits, new_result_id) = extract_semantic_tokens(&parsed);
772
773 if let Some(new_id) = new_result_id {
774 result_id = Some(new_id);
775 }
776
777 if let Some(full_data) = data {
778 println!("\n{}", "=".repeat(70));
779 println!("FULL RESPONSE (resultId: {:?})", result_id);
780 current_data = full_data;
781 } else if let Some(delta_edits) = edits {
782 println!("\n{}", "=".repeat(70));
783 println!(
784 "DELTA RESPONSE: {} edit(s) (resultId: {:?})",
785 delta_edits.len(),
786 result_id
787 );
788 for (i, edit) in delta_edits.iter().enumerate() {
789 println!(
790 " [{}] start={}, delete={}, insert {} values",
791 i,
792 edit.start,
793 edit.delete_count,
794 edit.data.len()
795 );
796 }
797 let mut tokens = ServerSemanticTokens::from_full(current_data.clone(), None);
798 tokens.apply(&delta_edits);
799 current_data = tokens.data;
800 }
801 }
802
803 // Print parsed tokens
804 println!(
805 "\nDATA: {} values = {} tokens",
806 current_data.len(),
807 current_data.len() / 5
808 );
809 println!("\nPARSED TOKENS:");
810 println!("{:-<100}", "");
811 println!(
812 "{:>5} {:>6} {:>4} {:<15} {}",
813 "LINE", "START", "LEN", "TYPE", "MODIFIERS"
814 );
815 println!("{:-<100}", "");
816
817 let tokens = ServerSemanticTokens::from_full(current_data, None);
818 for token in tokens.tokens() {
819 println!(
820 "{:>5} {:>6} {:>4} {:<15} {}",
821 token.line,
822 token.start,
823 token.length,
824 token.token_type.0,
825 modifier_names(token.token_modifiers),
826 );
827 }
828 println!("{:-<100}", "");
829 println!("{}\n", "=".repeat(100));
830 }
831
832 fn extract_semantic_tokens(
833 value: &serde_json::Value,
834 ) -> (
835 Option<Vec<u32>>,
836 Option<Vec<SemanticTokensEdit>>,
837 Option<String>,
838 ) {
839 // Try as array directly: [1,2,3,...]
840 if let Some(arr) = value.as_array() {
841 let data: Vec<u32> = arr
842 .iter()
843 .filter_map(|v| v.as_u64().map(|n| n as u32))
844 .collect();
845 return (Some(data), None, None);
846 }
847
848 // Try as LSP response: {"result": {"data": [...]} } or {"result": {"edits": [...]}}
849 let result = value.get("result").unwrap_or(value);
850 let result_id = result
851 .get("resultId")
852 .and_then(|v| v.as_str())
853 .map(String::from);
854
855 // Full response with data
856 if let Some(data_arr) = result.get("data").and_then(|v| v.as_array()) {
857 let data: Vec<u32> = data_arr
858 .iter()
859 .filter_map(|v| v.as_u64().map(|n| n as u32))
860 .collect();
861 return (Some(data), None, result_id);
862 }
863
864 // Delta response with edits
865 if let Some(edits_arr) = result.get("edits").and_then(|v| v.as_array()) {
866 let edits: Vec<SemanticTokensEdit> = edits_arr
867 .iter()
868 .filter_map(|e| {
869 Some(SemanticTokensEdit {
870 start: e.get("start")?.as_u64()? as u32,
871 delete_count: e.get("deleteCount")?.as_u64()? as u32,
872 data: e
873 .get("data")
874 .and_then(|d| d.as_array())
875 .map(|arr| {
876 arr.iter()
877 .filter_map(|v| v.as_u64().map(|n| n as u32))
878 .collect()
879 })
880 .unwrap_or_default(),
881 })
882 })
883 .collect();
884 return (None, Some(edits), result_id);
885 }
886
887 (None, None, result_id)
888 }
889
890 #[test]
891 fn parses_sample_tokens() {
892 // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
893 let tokens = ServerSemanticTokens::from_full(
894 vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
895 None,
896 )
897 .tokens()
898 .collect::<Vec<SemanticToken>>();
899
900 // The spec uses 1-based line numbers, and 0-based character numbers. This test uses 0-based for both.
901 assert_eq!(
902 tokens,
903 &[
904 SemanticToken {
905 line: 2,
906 start: 5,
907 length: 3,
908 token_type: TokenType(0),
909 token_modifiers: 3
910 },
911 SemanticToken {
912 line: 2,
913 start: 10,
914 length: 4,
915 token_type: TokenType(1),
916 token_modifiers: 0
917 },
918 SemanticToken {
919 line: 5,
920 start: 2,
921 length: 7,
922 token_type: TokenType(2),
923 token_modifiers: 0
924 }
925 ]
926 );
927 }
928
929 #[test]
930 fn applies_delta_edit() {
931 // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
932 // After a user types a new empty line at the beginning of the file,
933 // the tokens shift down by one line. The delta edit transforms
934 // [2,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0] into [3,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0]
935 // by replacing the first element (deltaLine of first token) from 2 to 3.
936
937 let mut tokens = ServerSemanticTokens::from_full(
938 vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
939 None,
940 );
941
942 tokens.apply(&[SemanticTokensEdit {
943 start: 0,
944 delete_count: 1,
945 data: vec![3],
946 }]);
947
948 let result = tokens.tokens().collect::<Vec<SemanticToken>>();
949
950 assert_eq!(
951 result,
952 &[
953 SemanticToken {
954 line: 3,
955 start: 5,
956 length: 3,
957 token_type: TokenType(0),
958 token_modifiers: 3
959 },
960 SemanticToken {
961 line: 3,
962 start: 10,
963 length: 4,
964 token_type: TokenType(1),
965 token_modifiers: 0
966 },
967 SemanticToken {
968 line: 6,
969 start: 2,
970 length: 7,
971 token_type: TokenType(2),
972 token_modifiers: 0
973 }
974 ]
975 );
976 }
977}