semantic_tokens.rs

  1use std::{collections::hash_map, ops::Range, slice::ChunksExact, sync::Arc};
  2
  3use anyhow::Result;
  4
  5use clock::Global;
  6use collections::HashMap;
  7use futures::{
  8    FutureExt as _,
  9    future::{Shared, join_all},
 10};
 11use gpui::{App, AppContext, AsyncApp, Context, Entity, ReadGlobal as _, SharedString, Task};
 12use itertools::Itertools;
 13use language::{Buffer, LanguageName, language_settings::all_language_settings};
 14use lsp::{AdapterServerCapabilities, LanguageServerId};
 15use rpc::{TypedEnvelope, proto};
 16use settings::{SemanticTokenRule, SemanticTokenRules, Settings as _, SettingsStore};
 17use smol::future::yield_now;
 18use text::{Anchor, Bias, OffsetUtf16, PointUtf16, Unclipped};
 19use util::ResultExt as _;
 20
 21use crate::{
 22    LanguageServerToQuery, LspStore, LspStoreEvent,
 23    lsp_command::{
 24        LspCommand, SemanticTokensDelta, SemanticTokensEdit, SemanticTokensFull,
 25        SemanticTokensResponse,
 26    },
 27    project_settings::ProjectSettings,
 28};
 29
 30pub(super) struct SemanticTokenConfig {
 31    stylizers: HashMap<(LanguageServerId, Option<LanguageName>), SemanticTokenStylizer>,
 32    rules: SemanticTokenRules,
 33    global_mode: settings::SemanticTokens,
 34}
 35
 36impl SemanticTokenConfig {
 37    pub(super) fn new(cx: &App) -> Self {
 38        Self {
 39            stylizers: HashMap::default(),
 40            rules: ProjectSettings::get_global(cx)
 41                .global_lsp_settings
 42                .semantic_token_rules
 43                .clone(),
 44            global_mode: all_language_settings(None, cx).defaults.semantic_tokens,
 45        }
 46    }
 47
 48    pub(super) fn remove_server_data(&mut self, server_id: LanguageServerId) {
 49        self.stylizers.retain(|&(id, _), _| id != server_id);
 50    }
 51
 52    pub(super) fn update_rules(&mut self, new_rules: SemanticTokenRules) -> bool {
 53        if new_rules != self.rules {
 54            self.rules = new_rules;
 55            self.stylizers.clear();
 56            true
 57        } else {
 58            false
 59        }
 60    }
 61
 62    pub(super) fn update_global_mode(&mut self, new_mode: settings::SemanticTokens) -> bool {
 63        if new_mode != self.global_mode {
 64            self.global_mode = new_mode;
 65            true
 66        } else {
 67            false
 68        }
 69    }
 70}
 71
 72#[derive(Debug, Clone, Copy)]
 73pub struct RefreshForServer {
 74    pub server_id: LanguageServerId,
 75    pub request_id: Option<usize>,
 76}
 77
 78impl LspStore {
 79    pub fn semantic_tokens(
 80        &mut self,
 81        buffer: Entity<Buffer>,
 82        refresh: Option<RefreshForServer>,
 83        cx: &mut Context<Self>,
 84    ) -> SemanticTokensTask {
 85        let version_queried_for = buffer.read(cx).version();
 86        let latest_lsp_data = self.latest_lsp_data(&buffer, cx);
 87        let semantic_tokens_data = latest_lsp_data.semantic_tokens.get_or_insert_default();
 88        if let Some(refresh) = refresh {
 89            let mut invalidate_cache = true;
 90            match semantic_tokens_data
 91                .latest_invalidation_requests
 92                .entry(refresh.server_id)
 93            {
 94                hash_map::Entry::Occupied(mut o) => {
 95                    if refresh.request_id > *o.get() {
 96                        o.insert(refresh.request_id);
 97                    } else {
 98                        invalidate_cache = false;
 99                    }
100                }
101                hash_map::Entry::Vacant(v) => {
102                    v.insert(refresh.request_id);
103                }
104            }
105
106            if invalidate_cache {
107                let SemanticTokensData {
108                    raw_tokens,
109                    latest_invalidation_requests: _,
110                    update,
111                } = semantic_tokens_data;
112                *update = None;
113                raw_tokens.servers.clear();
114            }
115        }
116
117        if let Some((updating_for, task)) = &semantic_tokens_data.update
118            && !version_queried_for.changed_since(updating_for)
119        {
120            return task.clone();
121        }
122
123        let new_tokens = self.fetch_semantic_tokens_for_buffer(
124            &buffer,
125            refresh.map(|refresh| refresh.server_id),
126            cx,
127        );
128
129        let task_buffer = buffer.clone();
130        let task_version_queried_for = version_queried_for.clone();
131        let task = cx
132            .spawn(async move |lsp_store, cx| {
133                let buffer = task_buffer;
134                let version_queried_for = task_version_queried_for;
135                let res = if let Some(new_tokens) = new_tokens.await {
136                    let (raw_tokens, buffer_snapshot) = lsp_store
137                        .update(cx, |lsp_store, cx| {
138                            let lsp_data = lsp_store.latest_lsp_data(&buffer, cx);
139                            let semantic_tokens_data =
140                                lsp_data.semantic_tokens.get_or_insert_default();
141
142                            if version_queried_for == lsp_data.buffer_version {
143                                for (server_id, new_tokens_response) in new_tokens {
144                                    match new_tokens_response {
145                                        SemanticTokensResponse::Full { data, result_id } => {
146                                            semantic_tokens_data.raw_tokens.servers.insert(
147                                                server_id,
148                                                Arc::new(ServerSemanticTokens::from_full(
149                                                    data, result_id,
150                                                )),
151                                            );
152                                        }
153                                        SemanticTokensResponse::Delta { edits, result_id } => {
154                                            if let Some(tokens) = semantic_tokens_data
155                                                .raw_tokens
156                                                .servers
157                                                .get_mut(&server_id)
158                                            {
159                                                let tokens = Arc::make_mut(tokens);
160                                                tokens.result_id = result_id;
161                                                tokens.apply(&edits);
162                                            }
163                                        }
164                                    }
165                                }
166                            }
167                            let buffer_snapshot =
168                                buffer.read_with(cx, |buffer, _| buffer.snapshot());
169                            (semantic_tokens_data.raw_tokens.clone(), buffer_snapshot)
170                        })
171                        .map_err(Arc::new)?;
172                    Some(raw_to_buffer_semantic_tokens(raw_tokens, &buffer_snapshot).await)
173                } else {
174                    lsp_store.update(cx, |lsp_store, cx| {
175                        if let Some(current_lsp_data) =
176                            lsp_store.current_lsp_data(buffer.read(cx).remote_id())
177                        {
178                            if current_lsp_data.buffer_version == version_queried_for {
179                                current_lsp_data.semantic_tokens = None;
180                            }
181                        }
182                    })?;
183                    None
184                };
185                Ok(BufferSemanticTokens { tokens: res })
186            })
187            .shared();
188
189        self.latest_lsp_data(&buffer, cx)
190            .semantic_tokens
191            .get_or_insert_default()
192            .update = Some((version_queried_for, task.clone()));
193
194        task
195    }
196
197    pub(super) fn fetch_semantic_tokens_for_buffer(
198        &mut self,
199        buffer: &Entity<Buffer>,
200        for_server: Option<LanguageServerId>,
201        cx: &mut Context<Self>,
202    ) -> Task<Option<HashMap<LanguageServerId, SemanticTokensResponse>>> {
203        if let Some((client, upstream_project_id)) = self.upstream_client() {
204            let request = SemanticTokensFull { for_server };
205            if !self.is_capable_for_proto_request(buffer, &request, cx) {
206                return Task::ready(None);
207            }
208
209            let request_timeout = ProjectSettings::get_global(cx)
210                .global_lsp_settings
211                .get_request_timeout();
212            let request_task = client.request_lsp(
213                upstream_project_id,
214                None,
215                request_timeout,
216                cx.background_executor().clone(),
217                request.to_proto(upstream_project_id, buffer.read(cx)),
218            );
219            let buffer = buffer.clone();
220            cx.spawn(async move |weak_lsp_store, cx| {
221                let lsp_store = weak_lsp_store.upgrade()?;
222                let tokens = join_all(
223                    request_task
224                        .await
225                        .log_err()
226                        .flatten()
227                        .map(|response| response.payload)
228                        .unwrap_or_default()
229                        .into_iter()
230                        .map(|response| {
231                            let server_id = LanguageServerId::from_proto(response.server_id);
232                            let response = request.response_from_proto(
233                                response.response,
234                                lsp_store.clone(),
235                                buffer.clone(),
236                                cx.clone(),
237                            );
238                            async move {
239                                match response.await {
240                                    Ok(tokens) => Some((server_id, tokens)),
241                                    Err(e) => {
242                                        log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
243                                        None
244                                    }
245                                }
246                            }
247                        }),
248                )
249                .await
250                .into_iter()
251                .flatten()
252                .collect();
253                Some(tokens)
254            })
255        } else {
256            let token_tasks = self
257                .local_lsp_servers_for_buffer(&buffer, cx)
258                .into_iter()
259                .filter(|&server_id| {
260                    for_server.is_none_or(|for_server_id| for_server_id == server_id)
261                })
262                .filter_map(|server_id| {
263                    let capabilities = AdapterServerCapabilities {
264                        server_capabilities: self.lsp_server_capabilities.get(&server_id)?.clone(),
265                        code_action_kinds: None,
266                    };
267                    let request_task = match self.semantic_tokens_result_id(server_id, buffer, cx) {
268                        Some(result_id) => {
269                            let delta_request = SemanticTokensDelta {
270                                previous_result_id: result_id,
271                            };
272                            if !delta_request.check_capabilities(capabilities.clone()) {
273                                let full_request = SemanticTokensFull {
274                                    for_server: Some(server_id),
275                                };
276                                if !full_request.check_capabilities(capabilities) {
277                                    return None;
278                                }
279
280                                self.request_lsp(
281                                    buffer.clone(),
282                                    LanguageServerToQuery::Other(server_id),
283                                    full_request,
284                                    cx,
285                                )
286                            } else {
287                                self.request_lsp(
288                                    buffer.clone(),
289                                    LanguageServerToQuery::Other(server_id),
290                                    delta_request,
291                                    cx,
292                                )
293                            }
294                        }
295                        None => {
296                            let request = SemanticTokensFull {
297                                for_server: Some(server_id),
298                            };
299                            if !request.check_capabilities(capabilities) {
300                                return None;
301                            }
302                            self.request_lsp(
303                                buffer.clone(),
304                                LanguageServerToQuery::Other(server_id),
305                                request,
306                                cx,
307                            )
308                        }
309                    };
310                    Some(async move { (server_id, request_task.await) })
311                })
312                .collect::<Vec<_>>();
313            if token_tasks.is_empty() {
314                return Task::ready(None);
315            }
316
317            cx.background_spawn(async move {
318                Some(
319                    join_all(token_tasks)
320                        .await
321                        .into_iter()
322                        .flat_map(|(server_id, response)| {
323                            match response {
324                                Ok(tokens) => Some((server_id, tokens)),
325                                Err(e) => {
326                                    log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
327                                    None
328                                }
329                            }
330                        })
331                        .collect()
332                )
333            })
334        }
335    }
336
337    pub(crate) async fn handle_refresh_semantic_tokens(
338        lsp_store: Entity<Self>,
339        envelope: TypedEnvelope<proto::RefreshSemanticTokens>,
340        mut cx: AsyncApp,
341    ) -> Result<proto::Ack> {
342        lsp_store.update(&mut cx, |_, cx| {
343            cx.emit(LspStoreEvent::RefreshSemanticTokens {
344                server_id: LanguageServerId::from_proto(envelope.payload.server_id),
345                request_id: envelope.payload.request_id.map(|id| id as usize),
346            });
347        });
348        Ok(proto::Ack {})
349    }
350
351    fn semantic_tokens_result_id(
352        &mut self,
353        server_id: LanguageServerId,
354        buffer: &Entity<Buffer>,
355        cx: &mut App,
356    ) -> Option<SharedString> {
357        self.latest_lsp_data(buffer, cx)
358            .semantic_tokens
359            .as_ref()?
360            .raw_tokens
361            .servers
362            .get(&server_id)?
363            .result_id
364            .clone()
365    }
366
367    pub fn get_or_create_token_stylizer(
368        &mut self,
369        server_id: LanguageServerId,
370        language: Option<&LanguageName>,
371        cx: &mut App,
372    ) -> Option<&SemanticTokenStylizer> {
373        let stylizer = match self
374            .semantic_token_config
375            .stylizers
376            .entry((server_id, language.cloned()))
377        {
378            hash_map::Entry::Occupied(o) => o.into_mut(),
379            hash_map::Entry::Vacant(v) => {
380                let tokens_provider = self
381                    .lsp_server_capabilities
382                    .get(&server_id)?
383                    .semantic_tokens_provider
384                    .as_ref()?;
385                let legend = match tokens_provider {
386                    lsp::SemanticTokensServerCapabilities::SemanticTokensOptions(opts) => {
387                        &opts.legend
388                    }
389                    lsp::SemanticTokensServerCapabilities::SemanticTokensRegistrationOptions(
390                        opts,
391                    ) => &opts.semantic_tokens_options.legend,
392                };
393                let language_rules = language.and_then(|language| {
394                    SettingsStore::global(cx).language_semantic_token_rules(language.as_ref())
395                });
396                let stylizer = SemanticTokenStylizer::new(server_id, legend, language_rules, cx);
397                v.insert(stylizer)
398            }
399        };
400        Some(stylizer)
401    }
402}
403
404pub type SemanticTokensTask =
405    Shared<Task<std::result::Result<BufferSemanticTokens, Arc<anyhow::Error>>>>;
406
407#[derive(Debug, Default, Clone)]
408pub struct BufferSemanticTokens {
409    pub tokens: Option<HashMap<LanguageServerId, Arc<[BufferSemanticToken]>>>,
410}
411
412#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
413pub struct TokenType(pub u32);
414
415#[derive(Debug, Clone)]
416pub struct BufferSemanticToken {
417    /// The range of the token in the buffer.
418    ///
419    /// Guaranteed to contain a buffer id.
420    pub range: Range<Anchor>,
421    pub token_type: TokenType,
422    pub token_modifiers: u32,
423}
424
425pub struct SemanticTokenStylizer {
426    server_id: LanguageServerId,
427    rules_by_token_type: HashMap<TokenType, Vec<SemanticTokenRule>>,
428    token_type_names: HashMap<TokenType, SharedString>,
429    modifier_mask: HashMap<SharedString, u32>,
430}
431
432impl SemanticTokenStylizer {
433    pub fn new(
434        server_id: LanguageServerId,
435        legend: &lsp::SemanticTokensLegend,
436        language_rules: Option<&SemanticTokenRules>,
437        cx: &App,
438    ) -> Self {
439        let token_types: HashMap<TokenType, SharedString> = legend
440            .token_types
441            .iter()
442            .enumerate()
443            .map(|(i, token_type)| {
444                (
445                    TokenType(i as u32),
446                    SharedString::from(token_type.as_str().to_string()),
447                )
448            })
449            .collect();
450        let modifier_mask: HashMap<SharedString, u32> = legend
451            .token_modifiers
452            .iter()
453            .enumerate()
454            .map(|(i, modifier)| (SharedString::from(modifier.as_str().to_string()), 1 << i))
455            .collect();
456
457        let global_rules = &ProjectSettings::get_global(cx)
458            .global_lsp_settings
459            .semantic_token_rules;
460
461        let rules_by_token_type = token_types
462            .iter()
463            .map(|(index, token_type_name)| {
464                let filter = |rule: &&SemanticTokenRule| {
465                    rule.token_type
466                        .as_ref()
467                        .is_none_or(|rule_token_type| rule_token_type == token_type_name.as_ref())
468                };
469                let matching_rules: Vec<SemanticTokenRule> = global_rules
470                    .rules
471                    .iter()
472                    .chain(language_rules.into_iter().flat_map(|lr| &lr.rules))
473                    .rev()
474                    .filter(filter)
475                    .cloned()
476                    .collect();
477                (*index, matching_rules)
478            })
479            .collect();
480
481        SemanticTokenStylizer {
482            server_id,
483            rules_by_token_type,
484            token_type_names: token_types,
485            modifier_mask,
486        }
487    }
488
489    pub fn server_id(&self) -> LanguageServerId {
490        self.server_id
491    }
492
493    pub fn token_type_name(&self, token_type: TokenType) -> Option<&SharedString> {
494        self.token_type_names.get(&token_type)
495    }
496
497    pub fn has_modifier(&self, token_modifiers: u32, modifier: &str) -> bool {
498        let Some(mask) = self.modifier_mask.get(modifier) else {
499            return false;
500        };
501        (token_modifiers & mask) != 0
502    }
503
504    pub fn token_modifiers(&self, token_modifiers: u32) -> Option<String> {
505        let modifiers: Vec<&str> = self
506            .modifier_mask
507            .iter()
508            .filter(|(_, mask)| (token_modifiers & *mask) != 0)
509            .map(|(name, _)| name.as_ref())
510            .collect();
511        if modifiers.is_empty() {
512            None
513        } else {
514            Some(modifiers.join(", "))
515        }
516    }
517
518    pub fn rules_for_token(&self, token_type: TokenType) -> Option<&[SemanticTokenRule]> {
519        self.rules_by_token_type
520            .get(&token_type)
521            .map(|v| v.as_slice())
522    }
523}
524
525async fn raw_to_buffer_semantic_tokens(
526    raw_tokens: RawSemanticTokens,
527    buffer_snapshot: &text::BufferSnapshot,
528) -> HashMap<LanguageServerId, Arc<[BufferSemanticToken]>> {
529    let mut res = HashMap::default();
530    for (&server_id, server_tokens) in &raw_tokens.servers {
531        let mut last = 0;
532        // We don't do `collect` here due to the filter map not pre-allocating
533        // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways
534        let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5);
535        // 5000 was chosen by profiling, on a decent machine this will take about 1ms per chunk
536        // This is to avoid blocking the main thread for hundreds of milliseconds at a time for very big files
537        // If we every change the below code to not query the underlying rope 6 times per token we can bump this up
538        for chunk in server_tokens.tokens().chunks(5000).into_iter() {
539            buffer_tokens.extend(chunk.filter_map(|token| {
540                let start = Unclipped(PointUtf16::new(token.line, token.start));
541                let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left);
542                let start_offset = buffer_snapshot
543                    .as_rope()
544                    .point_utf16_to_offset_utf16(clipped_start);
545                let end_offset = start_offset + OffsetUtf16(token.length as usize);
546
547                let start = buffer_snapshot
548                    .as_rope()
549                    .offset_utf16_to_offset(start_offset);
550                if start < last {
551                    return None;
552                }
553
554                let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset);
555                if end < last {
556                    return None;
557                }
558                last = end;
559
560                if start == end {
561                    return None;
562                }
563
564                Some(BufferSemanticToken {
565                    range: buffer_snapshot.anchor_before(start)..buffer_snapshot.anchor_after(end),
566                    token_type: token.token_type,
567                    token_modifiers: token.token_modifiers,
568                })
569            }));
570            yield_now().await;
571        }
572
573        res.insert(server_id, buffer_tokens.into());
574        yield_now().await;
575    }
576    res
577}
578
579#[derive(Default, Debug)]
580pub struct SemanticTokensData {
581    pub(super) raw_tokens: RawSemanticTokens,
582    pub(super) latest_invalidation_requests: HashMap<LanguageServerId, Option<usize>>,
583    update: Option<(Global, SemanticTokensTask)>,
584}
585
586/// All the semantic token tokens for a buffer.
587///
588/// This aggregates semantic tokens from multiple language servers in a specific order.
589/// Semantic tokens later in the list will override earlier ones in case of overlap.
590#[derive(Default, Debug, Clone)]
591pub(super) struct RawSemanticTokens {
592    pub servers: HashMap<lsp::LanguageServerId, Arc<ServerSemanticTokens>>,
593}
594
595/// All the semantic tokens for a buffer, from a single language server.
596#[derive(Debug, Clone)]
597pub struct ServerSemanticTokens {
598    /// Each value is:
599    /// data[5*i] - deltaLine: token line number, relative to the start of the previous token
600    /// data[5*i+1] - deltaStart: token start character, relative to the start of the previous token (relative to 0 or the previous token’s start if they are on the same line)
601    /// data[5*i+2] - length: the length of the token.
602    /// data[5*i+3] - tokenType: will be looked up in SemanticTokensLegend.tokenTypes. We currently ask that tokenType < 65536.
603    /// data[5*i+4] - tokenModifiers: each set bit will be looked up in SemanticTokensLegend.tokenModifiers
604    ///
605    /// See https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/ for more.
606    data: Vec<u32>,
607
608    pub(crate) result_id: Option<SharedString>,
609}
610
611pub struct SemanticTokensIter<'a> {
612    prev: Option<(u32, u32)>,
613    data: ChunksExact<'a, u32>,
614}
615
616// A single item from `data`.
617struct SemanticTokenValue {
618    delta_line: u32,
619    delta_start: u32,
620    length: u32,
621    token_type: TokenType,
622    token_modifiers: u32,
623}
624
625/// A semantic token, independent of its position.
626#[derive(Debug, PartialEq, Eq)]
627pub struct SemanticToken {
628    pub line: u32,
629    pub start: u32,
630    pub length: u32,
631    pub token_type: TokenType,
632    pub token_modifiers: u32,
633}
634
635impl ServerSemanticTokens {
636    pub fn from_full(data: Vec<u32>, result_id: Option<SharedString>) -> Self {
637        ServerSemanticTokens { data, result_id }
638    }
639
640    pub(crate) fn apply(&mut self, edits: &[SemanticTokensEdit]) {
641        for edit in edits {
642            let start = edit.start as usize;
643            let end = start + edit.delete_count as usize;
644            self.data.splice(start..end, edit.data.iter().copied());
645        }
646    }
647
648    pub fn tokens(&self) -> SemanticTokensIter<'_> {
649        SemanticTokensIter {
650            prev: None,
651            data: self.data.chunks_exact(5),
652        }
653    }
654}
655
656impl Iterator for SemanticTokensIter<'_> {
657    type Item = SemanticToken;
658
659    fn next(&mut self) -> Option<Self::Item> {
660        let chunk = self.data.next()?;
661        let token = SemanticTokenValue {
662            delta_line: chunk[0],
663            delta_start: chunk[1],
664            length: chunk[2],
665            token_type: TokenType(chunk[3]),
666            token_modifiers: chunk[4],
667        };
668
669        let (line, start) = if let Some((last_line, last_start)) = self.prev {
670            let line = last_line + token.delta_line;
671            let start = if token.delta_line == 0 {
672                last_start + token.delta_start
673            } else {
674                token.delta_start
675            };
676            (line, start)
677        } else {
678            (token.delta_line, token.delta_start)
679        };
680
681        self.prev = Some((line, start));
682
683        Some(SemanticToken {
684            line,
685            start,
686            length: token.length,
687            token_type: token.token_type,
688            token_modifiers: token.token_modifiers,
689        })
690    }
691}
692
693#[cfg(test)]
694mod tests {
695    use super::*;
696    use crate::lsp_command::SemanticTokensEdit;
697    use lsp::SEMANTIC_TOKEN_MODIFIERS;
698
699    fn modifier_names(bits: u32) -> String {
700        if bits == 0 {
701            return "-".to_string();
702        }
703        let names: Vec<&str> = SEMANTIC_TOKEN_MODIFIERS
704            .iter()
705            .enumerate()
706            .filter(|(i, _)| bits & (1 << i) != 0)
707            .map(|(_, m)| m.as_str())
708            .collect();
709
710        // Check for unknown bits
711        let known_bits = (1u32 << SEMANTIC_TOKEN_MODIFIERS.len()) - 1;
712        let unknown = bits & !known_bits;
713
714        if unknown != 0 {
715            let mut result = names.join("+");
716            if !result.is_empty() {
717                result.push('+');
718            }
719            result.push_str(&format!("?0x{:x}", unknown));
720            result
721        } else {
722            names.join("+")
723        }
724    }
725
726    /// Debug tool: parses semantic token JSON from LSP and prints human-readable output.
727    ///
728    /// Usage: Paste JSON into `json_input`, then run:
729    ///   cargo test -p project debug_parse_tokens -- --nocapture --ignored
730    ///
731    /// Accepts either:
732    /// - Full LSP response: `{"jsonrpc":"2.0","id":1,"result":{"data":[...]}}`
733    /// - Just the data array: `[0,0,5,1,0,...]`
734    ///
735    /// For delta responses, paste multiple JSON messages (one per line) and they
736    /// will be applied in sequence.
737    ///
738    /// Token encoding (5 values per token):
739    ///   [deltaLine, deltaStart, length, tokenType, tokenModifiers]
740    #[test]
741    #[ignore] // Run with: cargo test -p project debug_parse_tokens -- --nocapture --ignored
742    fn debug_parse_tokens() {
743        // ============================================================
744        // PASTE YOUR JSON HERE (one message per line for sequences)
745        // Comments starting with // are ignored
746        // ============================================================
747        let json_input = r#"
748// === EXAMPLE 1: Full response (LSP spec example) ===
749// 3 tokens: property at line 2, type at line 2, class at line 5
750{"jsonrpc":"2.0","id":1,"result":{"resultId":"1","data":[2,5,3,9,3,0,5,4,6,0,3,2,7,1,0]}}
751
752// === EXAMPLE 2: Delta response ===
753// User added empty line at start of file, so all tokens shift down by 1 line.
754// This changes first token's deltaLine from 2 to 3 (edit at index 0).
755{"jsonrpc":"2.0","id":2,"result":{"resultId":"2","edits":[{"start":0,"deleteCount":1,"data":[3]}]}}
756
757// === EXAMPLE 3: Another delta ===
758// User added a new token. Insert 5 values at position 5 (after first token).
759// New token: same line as token 1, 2 chars after it ends, len 5, type=function(12), mods=definition(2)
760{"jsonrpc":"2.0","id":3,"result":{"resultId":"3","edits":[{"start":5,"deleteCount":0,"data":[0,2,5,12,2]}]}}
761        "#;
762        // Accepted formats:
763        // - Full response: {"result":{"data":[...]}}
764        // - Delta response: {"result":{"edits":[{"start":N,"deleteCount":N,"data":[...]}]}}
765        // - Just array: [0,0,5,1,0,...]
766
767        // ============================================================
768        // PROCESSING
769        // ============================================================
770        let mut current_data: Vec<u32> = Vec::new();
771        let mut result_id: Option<String> = None;
772
773        for line in json_input.lines() {
774            let line = line.trim();
775            if line.is_empty() || line.starts_with("//") {
776                continue;
777            }
778
779            let parsed: serde_json::Value =
780                serde_json::from_str(line).expect("Failed to parse JSON");
781
782            // Try to extract data from various JSON shapes
783            let (data, edits, new_result_id) = extract_semantic_tokens(&parsed);
784
785            if let Some(new_id) = new_result_id {
786                result_id = Some(new_id);
787            }
788
789            if let Some(full_data) = data {
790                println!("\n{}", "=".repeat(70));
791                println!("FULL RESPONSE (resultId: {:?})", result_id);
792                current_data = full_data;
793            } else if let Some(delta_edits) = edits {
794                println!("\n{}", "=".repeat(70));
795                println!(
796                    "DELTA RESPONSE: {} edit(s) (resultId: {:?})",
797                    delta_edits.len(),
798                    result_id
799                );
800                for (i, edit) in delta_edits.iter().enumerate() {
801                    println!(
802                        "  [{}] start={}, delete={}, insert {} values",
803                        i,
804                        edit.start,
805                        edit.delete_count,
806                        edit.data.len()
807                    );
808                }
809                let mut tokens = ServerSemanticTokens::from_full(current_data.clone(), None);
810                tokens.apply(&delta_edits);
811                current_data = tokens.data;
812            }
813        }
814
815        // Print parsed tokens
816        println!(
817            "\nDATA: {} values = {} tokens",
818            current_data.len(),
819            current_data.len() / 5
820        );
821        println!("\nPARSED TOKENS:");
822        println!("{:-<100}", "");
823        println!(
824            "{:>5} {:>6} {:>4}  {:<15} {}",
825            "LINE", "START", "LEN", "TYPE", "MODIFIERS"
826        );
827        println!("{:-<100}", "");
828
829        let tokens = ServerSemanticTokens::from_full(current_data, None);
830        for token in tokens.tokens() {
831            println!(
832                "{:>5} {:>6} {:>4}  {:<15} {}",
833                token.line,
834                token.start,
835                token.length,
836                token.token_type.0,
837                modifier_names(token.token_modifiers),
838            );
839        }
840        println!("{:-<100}", "");
841        println!("{}\n", "=".repeat(100));
842    }
843
844    fn extract_semantic_tokens(
845        value: &serde_json::Value,
846    ) -> (
847        Option<Vec<u32>>,
848        Option<Vec<SemanticTokensEdit>>,
849        Option<String>,
850    ) {
851        // Try as array directly: [1,2,3,...]
852        if let Some(arr) = value.as_array() {
853            let data: Vec<u32> = arr
854                .iter()
855                .filter_map(|v| v.as_u64().map(|n| n as u32))
856                .collect();
857            return (Some(data), None, None);
858        }
859
860        // Try as LSP response: {"result": {"data": [...]} } or {"result": {"edits": [...]}}
861        let result = value.get("result").unwrap_or(value);
862        let result_id = result
863            .get("resultId")
864            .and_then(|v| v.as_str())
865            .map(String::from);
866
867        // Full response with data
868        if let Some(data_arr) = result.get("data").and_then(|v| v.as_array()) {
869            let data: Vec<u32> = data_arr
870                .iter()
871                .filter_map(|v| v.as_u64().map(|n| n as u32))
872                .collect();
873            return (Some(data), None, result_id);
874        }
875
876        // Delta response with edits
877        if let Some(edits_arr) = result.get("edits").and_then(|v| v.as_array()) {
878            let edits: Vec<SemanticTokensEdit> = edits_arr
879                .iter()
880                .filter_map(|e| {
881                    Some(SemanticTokensEdit {
882                        start: e.get("start")?.as_u64()? as u32,
883                        delete_count: e.get("deleteCount")?.as_u64()? as u32,
884                        data: e
885                            .get("data")
886                            .and_then(|d| d.as_array())
887                            .map(|arr| {
888                                arr.iter()
889                                    .filter_map(|v| v.as_u64().map(|n| n as u32))
890                                    .collect()
891                            })
892                            .unwrap_or_default(),
893                    })
894                })
895                .collect();
896            return (None, Some(edits), result_id);
897        }
898
899        (None, None, result_id)
900    }
901
902    #[test]
903    fn parses_sample_tokens() {
904        // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
905        let tokens = ServerSemanticTokens::from_full(
906            vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
907            None,
908        )
909        .tokens()
910        .collect::<Vec<SemanticToken>>();
911
912        // The spec uses 1-based line numbers, and 0-based character numbers. This test uses 0-based for both.
913        assert_eq!(
914            tokens,
915            &[
916                SemanticToken {
917                    line: 2,
918                    start: 5,
919                    length: 3,
920                    token_type: TokenType(0),
921                    token_modifiers: 3
922                },
923                SemanticToken {
924                    line: 2,
925                    start: 10,
926                    length: 4,
927                    token_type: TokenType(1),
928                    token_modifiers: 0
929                },
930                SemanticToken {
931                    line: 5,
932                    start: 2,
933                    length: 7,
934                    token_type: TokenType(2),
935                    token_modifiers: 0
936                }
937            ]
938        );
939    }
940
941    #[test]
942    fn applies_delta_edit() {
943        // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
944        // After a user types a new empty line at the beginning of the file,
945        // the tokens shift down by one line. The delta edit transforms
946        // [2,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0] into [3,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0]
947        // by replacing the first element (deltaLine of first token) from 2 to 3.
948
949        let mut tokens = ServerSemanticTokens::from_full(
950            vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
951            None,
952        );
953
954        tokens.apply(&[SemanticTokensEdit {
955            start: 0,
956            delete_count: 1,
957            data: vec![3],
958        }]);
959
960        let result = tokens.tokens().collect::<Vec<SemanticToken>>();
961
962        assert_eq!(
963            result,
964            &[
965                SemanticToken {
966                    line: 3,
967                    start: 5,
968                    length: 3,
969                    token_type: TokenType(0),
970                    token_modifiers: 3
971                },
972                SemanticToken {
973                    line: 3,
974                    start: 10,
975                    length: 4,
976                    token_type: TokenType(1),
977                    token_modifiers: 0
978                },
979                SemanticToken {
980                    line: 6,
981                    start: 2,
982                    length: 7,
983                    token_type: TokenType(2),
984                    token_modifiers: 0
985                }
986            ]
987        );
988    }
989}