semantic_tokens.rs

   1use std::{collections::hash_map, ops::Range, slice::ChunksExact, sync::Arc};
   2
   3use anyhow::Result;
   4
   5use clock::Global;
   6use collections::HashMap;
   7use futures::{
   8    FutureExt as _,
   9    future::{Shared, join_all},
  10};
  11use gpui::{App, AppContext, AsyncApp, Context, Entity, ReadGlobal as _, SharedString, Task};
  12use language::{Buffer, LanguageName, language_settings::all_language_settings};
  13use lsp::{AdapterServerCapabilities, LanguageServerId};
  14use rpc::{TypedEnvelope, proto};
  15use settings::{SemanticTokenRule, SemanticTokenRules, Settings as _, SettingsStore};
  16use smol::future::yield_now;
  17use text::{Anchor, Bias, OffsetUtf16, PointUtf16, Unclipped};
  18use util::ResultExt as _;
  19
  20use crate::{
  21    LanguageServerToQuery, LspStore, LspStoreEvent,
  22    lsp_command::{
  23        LspCommand, SemanticTokensDelta, SemanticTokensEdit, SemanticTokensFull,
  24        SemanticTokensResponse,
  25    },
  26    project_settings::ProjectSettings,
  27};
  28
  29pub(super) struct SemanticTokenConfig {
  30    stylizers: HashMap<(LanguageServerId, Option<LanguageName>), SemanticTokenStylizer>,
  31    rules: SemanticTokenRules,
  32    global_mode: settings::SemanticTokens,
  33}
  34
  35impl SemanticTokenConfig {
  36    pub(super) fn new(cx: &App) -> Self {
  37        Self {
  38            stylizers: HashMap::default(),
  39            rules: ProjectSettings::get_global(cx)
  40                .global_lsp_settings
  41                .semantic_token_rules
  42                .clone(),
  43            global_mode: all_language_settings(None, cx).defaults.semantic_tokens,
  44        }
  45    }
  46
  47    pub(super) fn remove_server_data(&mut self, server_id: LanguageServerId) {
  48        self.stylizers.retain(|&(id, _), _| id != server_id);
  49    }
  50
  51    pub(super) fn update_rules(&mut self, new_rules: SemanticTokenRules) -> bool {
  52        if new_rules != self.rules {
  53            self.rules = new_rules;
  54            self.stylizers.clear();
  55            true
  56        } else {
  57            false
  58        }
  59    }
  60
  61    pub(super) fn update_global_mode(&mut self, new_mode: settings::SemanticTokens) -> bool {
  62        if new_mode != self.global_mode {
  63            self.global_mode = new_mode;
  64            true
  65        } else {
  66            false
  67        }
  68    }
  69}
  70
  71#[derive(Debug, Clone, Copy)]
  72pub struct RefreshForServer {
  73    pub server_id: LanguageServerId,
  74    pub request_id: Option<usize>,
  75}
  76
  77impl LspStore {
  78    pub fn semantic_tokens(
  79        &mut self,
  80        buffer: Entity<Buffer>,
  81        refresh: Option<RefreshForServer>,
  82        cx: &mut Context<Self>,
  83    ) -> SemanticTokensTask {
  84        let version_queried_for = buffer.read(cx).version();
  85        let latest_lsp_data = self.latest_lsp_data(&buffer, cx);
  86        let semantic_tokens_data = latest_lsp_data.semantic_tokens.get_or_insert_default();
  87        if let Some(refresh) = refresh {
  88            let mut invalidate_cache = true;
  89            match semantic_tokens_data
  90                .latest_invalidation_requests
  91                .entry(refresh.server_id)
  92            {
  93                hash_map::Entry::Occupied(mut o) => {
  94                    if refresh.request_id > *o.get() {
  95                        o.insert(refresh.request_id);
  96                    } else {
  97                        invalidate_cache = false;
  98                    }
  99                }
 100                hash_map::Entry::Vacant(v) => {
 101                    v.insert(refresh.request_id);
 102                }
 103            }
 104
 105            if invalidate_cache {
 106                let SemanticTokensData {
 107                    raw_tokens,
 108                    latest_invalidation_requests: _,
 109                    update,
 110                } = semantic_tokens_data;
 111                *update = None;
 112                raw_tokens.servers.clear();
 113            }
 114        }
 115
 116        if let Some((updating_for, task)) = &semantic_tokens_data.update
 117            && !version_queried_for.changed_since(updating_for)
 118        {
 119            return task.clone();
 120        }
 121
 122        let new_tokens = self.fetch_semantic_tokens_for_buffer(
 123            &buffer,
 124            refresh.map(|refresh| refresh.server_id),
 125            cx,
 126        );
 127
 128        let task_buffer = buffer.clone();
 129        let task_version_queried_for = version_queried_for.clone();
 130        let task = cx
 131            .spawn(async move |lsp_store, cx| {
 132                let buffer = task_buffer;
 133                let version_queried_for = task_version_queried_for;
 134                let res = if let Some(new_tokens) = new_tokens.await {
 135                    let (raw_tokens, buffer_snapshot) = lsp_store
 136                        .update(cx, |lsp_store, cx| {
 137                            let lsp_data = lsp_store.latest_lsp_data(&buffer, cx);
 138                            let semantic_tokens_data =
 139                                lsp_data.semantic_tokens.get_or_insert_default();
 140
 141                            if version_queried_for == lsp_data.buffer_version {
 142                                for (server_id, new_tokens_response) in new_tokens {
 143                                    match new_tokens_response {
 144                                        SemanticTokensResponse::Full { data, result_id } => {
 145                                            semantic_tokens_data.raw_tokens.servers.insert(
 146                                                server_id,
 147                                                Arc::new(ServerSemanticTokens::from_full(
 148                                                    data, result_id,
 149                                                )),
 150                                            );
 151                                        }
 152                                        SemanticTokensResponse::Delta { edits, result_id } => {
 153                                            if let Some(tokens) = semantic_tokens_data
 154                                                .raw_tokens
 155                                                .servers
 156                                                .get_mut(&server_id)
 157                                            {
 158                                                let tokens = Arc::make_mut(tokens);
 159                                                tokens.result_id = result_id;
 160                                                tokens.apply(&edits);
 161                                            }
 162                                        }
 163                                    }
 164                                }
 165                            }
 166                            let buffer_snapshot =
 167                                buffer.read_with(cx, |buffer, _| buffer.snapshot());
 168                            (semantic_tokens_data.raw_tokens.clone(), buffer_snapshot)
 169                        })
 170                        .map_err(Arc::new)?;
 171                    Some(
 172                        cx.background_spawn(raw_to_buffer_semantic_tokens(
 173                            raw_tokens,
 174                            buffer_snapshot.text.clone(),
 175                        ))
 176                        .await,
 177                    )
 178                } else {
 179                    lsp_store.update(cx, |lsp_store, cx| {
 180                        if let Some(current_lsp_data) =
 181                            lsp_store.current_lsp_data(buffer.read(cx).remote_id())
 182                        {
 183                            if current_lsp_data.buffer_version == version_queried_for {
 184                                current_lsp_data.semantic_tokens = None;
 185                            }
 186                        }
 187                    })?;
 188                    None
 189                };
 190                Ok(BufferSemanticTokens { tokens: res })
 191            })
 192            .shared();
 193
 194        self.latest_lsp_data(&buffer, cx)
 195            .semantic_tokens
 196            .get_or_insert_default()
 197            .update = Some((version_queried_for, task.clone()));
 198
 199        task
 200    }
 201
 202    pub(super) fn fetch_semantic_tokens_for_buffer(
 203        &mut self,
 204        buffer: &Entity<Buffer>,
 205        for_server: Option<LanguageServerId>,
 206        cx: &mut Context<Self>,
 207    ) -> Task<Option<HashMap<LanguageServerId, SemanticTokensResponse>>> {
 208        if let Some((client, upstream_project_id)) = self.upstream_client() {
 209            let request = SemanticTokensFull { for_server };
 210            if !self.is_capable_for_proto_request(buffer, &request, cx) {
 211                return Task::ready(None);
 212            }
 213
 214            let request_timeout = ProjectSettings::get_global(cx)
 215                .global_lsp_settings
 216                .get_request_timeout();
 217            let request_task = client.request_lsp(
 218                upstream_project_id,
 219                None,
 220                request_timeout,
 221                cx.background_executor().clone(),
 222                request.to_proto(upstream_project_id, buffer.read(cx)),
 223            );
 224            let buffer = buffer.clone();
 225            cx.spawn(async move |weak_lsp_store, cx| {
 226                let lsp_store = weak_lsp_store.upgrade()?;
 227                let tokens = join_all(
 228                    request_task
 229                        .await
 230                        .log_err()
 231                        .flatten()
 232                        .map(|response| response.payload)
 233                        .unwrap_or_default()
 234                        .into_iter()
 235                        .map(|response| {
 236                            let server_id = LanguageServerId::from_proto(response.server_id);
 237                            let response = request.response_from_proto(
 238                                response.response,
 239                                lsp_store.clone(),
 240                                buffer.clone(),
 241                                cx.clone(),
 242                            );
 243                            async move {
 244                                match response.await {
 245                                    Ok(tokens) => Some((server_id, tokens)),
 246                                    Err(e) => {
 247                                        log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
 248                                        None
 249                                    }
 250                                }
 251                            }
 252                        }),
 253                )
 254                .await
 255                .into_iter()
 256                .flatten()
 257                .collect();
 258                Some(tokens)
 259            })
 260        } else {
 261            let token_tasks = self
 262                .local_lsp_servers_for_buffer(&buffer, cx)
 263                .into_iter()
 264                .filter(|&server_id| {
 265                    for_server.is_none_or(|for_server_id| for_server_id == server_id)
 266                })
 267                .filter_map(|server_id| {
 268                    let capabilities = AdapterServerCapabilities {
 269                        server_capabilities: self.lsp_server_capabilities.get(&server_id)?.clone(),
 270                        code_action_kinds: None,
 271                    };
 272                    let request_task = match self.semantic_tokens_result_id(server_id, buffer, cx) {
 273                        Some(result_id) => {
 274                            let delta_request = SemanticTokensDelta {
 275                                previous_result_id: result_id,
 276                            };
 277                            if !delta_request.check_capabilities(capabilities.clone()) {
 278                                let full_request = SemanticTokensFull {
 279                                    for_server: Some(server_id),
 280                                };
 281                                if !full_request.check_capabilities(capabilities) {
 282                                    return None;
 283                                }
 284
 285                                self.request_lsp(
 286                                    buffer.clone(),
 287                                    LanguageServerToQuery::Other(server_id),
 288                                    full_request,
 289                                    cx,
 290                                )
 291                            } else {
 292                                self.request_lsp(
 293                                    buffer.clone(),
 294                                    LanguageServerToQuery::Other(server_id),
 295                                    delta_request,
 296                                    cx,
 297                                )
 298                            }
 299                        }
 300                        None => {
 301                            let request = SemanticTokensFull {
 302                                for_server: Some(server_id),
 303                            };
 304                            if !request.check_capabilities(capabilities) {
 305                                return None;
 306                            }
 307                            self.request_lsp(
 308                                buffer.clone(),
 309                                LanguageServerToQuery::Other(server_id),
 310                                request,
 311                                cx,
 312                            )
 313                        }
 314                    };
 315                    Some(async move { (server_id, request_task.await) })
 316                })
 317                .collect::<Vec<_>>();
 318            if token_tasks.is_empty() {
 319                return Task::ready(None);
 320            }
 321
 322            cx.background_spawn(async move {
 323                Some(
 324                    join_all(token_tasks)
 325                        .await
 326                        .into_iter()
 327                        .flat_map(|(server_id, response)| {
 328                            match response {
 329                                Ok(tokens) => Some((server_id, tokens)),
 330                                Err(e) => {
 331                                    log::error!("Failed to query remote semantic tokens for server {server_id:?}: {e:#}");
 332                                    None
 333                                }
 334                            }
 335                        })
 336                        .collect()
 337                )
 338            })
 339        }
 340    }
 341
 342    pub(crate) async fn handle_refresh_semantic_tokens(
 343        lsp_store: Entity<Self>,
 344        envelope: TypedEnvelope<proto::RefreshSemanticTokens>,
 345        mut cx: AsyncApp,
 346    ) -> Result<proto::Ack> {
 347        lsp_store.update(&mut cx, |_, cx| {
 348            cx.emit(LspStoreEvent::RefreshSemanticTokens {
 349                server_id: LanguageServerId::from_proto(envelope.payload.server_id),
 350                request_id: envelope.payload.request_id.map(|id| id as usize),
 351            });
 352        });
 353        Ok(proto::Ack {})
 354    }
 355
 356    fn semantic_tokens_result_id(
 357        &mut self,
 358        server_id: LanguageServerId,
 359        buffer: &Entity<Buffer>,
 360        cx: &mut App,
 361    ) -> Option<SharedString> {
 362        self.latest_lsp_data(buffer, cx)
 363            .semantic_tokens
 364            .as_ref()?
 365            .raw_tokens
 366            .servers
 367            .get(&server_id)?
 368            .result_id
 369            .clone()
 370    }
 371
 372    pub fn get_or_create_token_stylizer(
 373        &mut self,
 374        server_id: LanguageServerId,
 375        language: Option<&LanguageName>,
 376        cx: &mut App,
 377    ) -> Option<&SemanticTokenStylizer> {
 378        let stylizer = match self
 379            .semantic_token_config
 380            .stylizers
 381            .entry((server_id, language.cloned()))
 382        {
 383            hash_map::Entry::Occupied(o) => o.into_mut(),
 384            hash_map::Entry::Vacant(v) => {
 385                let tokens_provider = self
 386                    .lsp_server_capabilities
 387                    .get(&server_id)?
 388                    .semantic_tokens_provider
 389                    .as_ref()?;
 390                let legend = match tokens_provider {
 391                    lsp::SemanticTokensServerCapabilities::SemanticTokensOptions(opts) => {
 392                        &opts.legend
 393                    }
 394                    lsp::SemanticTokensServerCapabilities::SemanticTokensRegistrationOptions(
 395                        opts,
 396                    ) => &opts.semantic_tokens_options.legend,
 397                };
 398                let language_rules = language.and_then(|language| {
 399                    SettingsStore::global(cx).language_semantic_token_rules(language.as_ref())
 400                });
 401                let stylizer = SemanticTokenStylizer::new(server_id, legend, language_rules, cx);
 402                v.insert(stylizer)
 403            }
 404        };
 405        Some(stylizer)
 406    }
 407}
 408
 409pub type SemanticTokensTask =
 410    Shared<Task<std::result::Result<BufferSemanticTokens, Arc<anyhow::Error>>>>;
 411
 412#[derive(Debug, Default, Clone)]
 413pub struct BufferSemanticTokens {
 414    pub tokens: Option<HashMap<LanguageServerId, Arc<[BufferSemanticToken]>>>,
 415}
 416
 417#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 418pub struct TokenType(pub u32);
 419
 420#[derive(Debug, Clone)]
 421pub struct BufferSemanticToken {
 422    /// The range of the token in the buffer.
 423    ///
 424    /// Guaranteed to contain a buffer id.
 425    pub range: Range<Anchor>,
 426    pub token_type: TokenType,
 427    pub token_modifiers: u32,
 428}
 429
 430pub struct SemanticTokenStylizer {
 431    server_id: LanguageServerId,
 432    rules_by_token_type: HashMap<TokenType, Vec<SemanticTokenRule>>,
 433    token_type_names: HashMap<TokenType, SharedString>,
 434    modifier_mask: HashMap<SharedString, u32>,
 435}
 436
 437impl SemanticTokenStylizer {
 438    pub fn new(
 439        server_id: LanguageServerId,
 440        legend: &lsp::SemanticTokensLegend,
 441        language_rules: Option<&SemanticTokenRules>,
 442        cx: &App,
 443    ) -> Self {
 444        let token_types: HashMap<TokenType, SharedString> = legend
 445            .token_types
 446            .iter()
 447            .enumerate()
 448            .map(|(i, token_type)| {
 449                (
 450                    TokenType(i as u32),
 451                    SharedString::from(token_type.as_str().to_string()),
 452                )
 453            })
 454            .collect();
 455        let modifier_mask: HashMap<SharedString, u32> = legend
 456            .token_modifiers
 457            .iter()
 458            .enumerate()
 459            .map(|(i, modifier)| (SharedString::from(modifier.as_str().to_string()), 1 << i))
 460            .collect();
 461
 462        let global_rules = &ProjectSettings::get_global(cx)
 463            .global_lsp_settings
 464            .semantic_token_rules;
 465
 466        let rules_by_token_type = token_types
 467            .iter()
 468            .map(|(index, token_type_name)| {
 469                let filter = |rule: &&SemanticTokenRule| {
 470                    rule.token_type
 471                        .as_ref()
 472                        .is_none_or(|rule_token_type| rule_token_type == token_type_name.as_ref())
 473                };
 474                let matching_rules: Vec<SemanticTokenRule> = global_rules
 475                    .rules
 476                    .iter()
 477                    .chain(language_rules.into_iter().flat_map(|lr| &lr.rules))
 478                    .rev()
 479                    .filter(filter)
 480                    .cloned()
 481                    .collect();
 482                (*index, matching_rules)
 483            })
 484            .collect();
 485
 486        SemanticTokenStylizer {
 487            server_id,
 488            rules_by_token_type,
 489            token_type_names: token_types,
 490            modifier_mask,
 491        }
 492    }
 493
 494    pub fn server_id(&self) -> LanguageServerId {
 495        self.server_id
 496    }
 497
 498    pub fn token_type_name(&self, token_type: TokenType) -> Option<&SharedString> {
 499        self.token_type_names.get(&token_type)
 500    }
 501
 502    pub fn has_modifier(&self, token_modifiers: u32, modifier: &str) -> bool {
 503        let Some(mask) = self.modifier_mask.get(modifier) else {
 504            return false;
 505        };
 506        (token_modifiers & mask) != 0
 507    }
 508
 509    pub fn token_modifiers(&self, token_modifiers: u32) -> Option<String> {
 510        let modifiers: Vec<&str> = self
 511            .modifier_mask
 512            .iter()
 513            .filter(|(_, mask)| (token_modifiers & *mask) != 0)
 514            .map(|(name, _)| name.as_ref())
 515            .collect();
 516        if modifiers.is_empty() {
 517            None
 518        } else {
 519            Some(modifiers.join(", "))
 520        }
 521    }
 522
 523    pub fn rules_for_token(&self, token_type: TokenType) -> Option<&[SemanticTokenRule]> {
 524        self.rules_by_token_type
 525            .get(&token_type)
 526            .map(|v| v.as_slice())
 527    }
 528}
 529
 530async fn raw_to_buffer_semantic_tokens(
 531    raw_tokens: RawSemanticTokens,
 532    buffer_snapshot: text::BufferSnapshot,
 533) -> HashMap<LanguageServerId, Arc<[BufferSemanticToken]>> {
 534    let mut res = HashMap::default();
 535    for (&server_id, server_tokens) in &raw_tokens.servers {
 536        let mut last = 0;
 537        // We don't do `collect` here due to the filter map not pre-allocating
 538        // we'd rather over allocate here than not since we have to re-allocate into an arc slice anyways
 539        let mut buffer_tokens = Vec::with_capacity(server_tokens.data.len() / 5);
 540        let mut tokens = server_tokens.tokens();
 541        // 5000 was chosen by profiling, on a decent machine this will take about 1ms per chunk
 542        // This is to avoid blocking the main thread for hundreds of milliseconds at a time for very big files
 543        // If we every change the below code to not query the underlying rope 6 times per token we can bump this up
 544        const CHUNK_LEN: usize = 5000;
 545        loop {
 546            let mut changed = false;
 547            let chunk = tokens
 548                .by_ref()
 549                .take(CHUNK_LEN)
 550                .inspect(|_| changed = true)
 551                .filter_map(|token| {
 552                    let start = Unclipped(PointUtf16::new(token.line, token.start));
 553                    let clipped_start = buffer_snapshot.clip_point_utf16(start, Bias::Left);
 554                    let start_offset = buffer_snapshot
 555                        .as_rope()
 556                        .point_utf16_to_offset_utf16(clipped_start);
 557                    let end_offset = start_offset + OffsetUtf16(token.length as usize);
 558
 559                    let start = buffer_snapshot
 560                        .as_rope()
 561                        .offset_utf16_to_offset(start_offset);
 562                    if start < last {
 563                        return None;
 564                    }
 565
 566                    let end = buffer_snapshot.as_rope().offset_utf16_to_offset(end_offset);
 567                    if end < last {
 568                        return None;
 569                    }
 570                    last = end;
 571
 572                    if start == end {
 573                        return None;
 574                    }
 575
 576                    Some(BufferSemanticToken {
 577                        range: buffer_snapshot.anchor_before(start)
 578                            ..buffer_snapshot.anchor_after(end),
 579                        token_type: token.token_type,
 580                        token_modifiers: token.token_modifiers,
 581                    })
 582                });
 583            buffer_tokens.extend(chunk);
 584
 585            if !changed {
 586                break;
 587            }
 588            yield_now().await;
 589        }
 590
 591        res.insert(server_id, buffer_tokens.into());
 592    }
 593    res
 594}
 595
 596#[derive(Default, Debug)]
 597pub struct SemanticTokensData {
 598    pub(super) raw_tokens: RawSemanticTokens,
 599    pub(super) latest_invalidation_requests: HashMap<LanguageServerId, Option<usize>>,
 600    update: Option<(Global, SemanticTokensTask)>,
 601}
 602
 603/// All the semantic token tokens for a buffer.
 604///
 605/// This aggregates semantic tokens from multiple language servers in a specific order.
 606/// Semantic tokens later in the list will override earlier ones in case of overlap.
 607#[derive(Default, Debug, Clone)]
 608pub(super) struct RawSemanticTokens {
 609    pub servers: HashMap<lsp::LanguageServerId, Arc<ServerSemanticTokens>>,
 610}
 611
 612/// All the semantic tokens for a buffer, from a single language server.
 613#[derive(Debug, Clone)]
 614pub struct ServerSemanticTokens {
 615    /// Each value is:
 616    /// data[5*i] - deltaLine: token line number, relative to the start of the previous token
 617    /// data[5*i+1] - deltaStart: token start character, relative to the start of the previous token (relative to 0 or the previous token’s start if they are on the same line)
 618    /// data[5*i+2] - length: the length of the token.
 619    /// data[5*i+3] - tokenType: will be looked up in SemanticTokensLegend.tokenTypes. We currently ask that tokenType < 65536.
 620    /// data[5*i+4] - tokenModifiers: each set bit will be looked up in SemanticTokensLegend.tokenModifiers
 621    ///
 622    /// See https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/ for more.
 623    data: Vec<u32>,
 624
 625    pub(crate) result_id: Option<SharedString>,
 626}
 627
 628pub struct SemanticTokensIter<'a> {
 629    prev: Option<(u32, u32)>,
 630    data: ChunksExact<'a, u32>,
 631}
 632
 633// A single item from `data`.
 634struct SemanticTokenValue {
 635    delta_line: u32,
 636    delta_start: u32,
 637    length: u32,
 638    token_type: TokenType,
 639    token_modifiers: u32,
 640}
 641
 642/// A semantic token, independent of its position.
 643#[derive(Debug, PartialEq, Eq)]
 644pub struct SemanticToken {
 645    pub line: u32,
 646    pub start: u32,
 647    pub length: u32,
 648    pub token_type: TokenType,
 649    pub token_modifiers: u32,
 650}
 651
 652impl ServerSemanticTokens {
 653    pub fn from_full(data: Vec<u32>, result_id: Option<SharedString>) -> Self {
 654        ServerSemanticTokens { data, result_id }
 655    }
 656
 657    pub(crate) fn apply(&mut self, edits: &[SemanticTokensEdit]) {
 658        for edit in edits {
 659            let start = (edit.start as usize).min(self.data.len());
 660            let end = (start + edit.delete_count as usize).min(self.data.len());
 661            self.data.splice(start..end, edit.data.iter().copied());
 662        }
 663    }
 664
 665    pub fn tokens(&self) -> SemanticTokensIter<'_> {
 666        SemanticTokensIter {
 667            prev: None,
 668            data: self.data.chunks_exact(5),
 669        }
 670    }
 671}
 672
 673impl Iterator for SemanticTokensIter<'_> {
 674    type Item = SemanticToken;
 675
 676    fn next(&mut self) -> Option<Self::Item> {
 677        let chunk = self.data.next()?;
 678        let token = SemanticTokenValue {
 679            delta_line: chunk[0],
 680            delta_start: chunk[1],
 681            length: chunk[2],
 682            token_type: TokenType(chunk[3]),
 683            token_modifiers: chunk[4],
 684        };
 685
 686        let (line, start) = if let Some((last_line, last_start)) = self.prev {
 687            let line = last_line + token.delta_line;
 688            let start = if token.delta_line == 0 {
 689                last_start + token.delta_start
 690            } else {
 691                token.delta_start
 692            };
 693            (line, start)
 694        } else {
 695            (token.delta_line, token.delta_start)
 696        };
 697
 698        self.prev = Some((line, start));
 699
 700        Some(SemanticToken {
 701            line,
 702            start,
 703            length: token.length,
 704            token_type: token.token_type,
 705            token_modifiers: token.token_modifiers,
 706        })
 707    }
 708}
 709
 710#[cfg(test)]
 711mod tests {
 712    use super::*;
 713    use crate::lsp_command::SemanticTokensEdit;
 714    use lsp::SEMANTIC_TOKEN_MODIFIERS;
 715
 716    fn modifier_names(bits: u32) -> String {
 717        if bits == 0 {
 718            return "-".to_string();
 719        }
 720        let names: Vec<&str> = SEMANTIC_TOKEN_MODIFIERS
 721            .iter()
 722            .enumerate()
 723            .filter(|(i, _)| bits & (1 << i) != 0)
 724            .map(|(_, m)| m.as_str())
 725            .collect();
 726
 727        // Check for unknown bits
 728        let known_bits = (1u32 << SEMANTIC_TOKEN_MODIFIERS.len()) - 1;
 729        let unknown = bits & !known_bits;
 730
 731        if unknown != 0 {
 732            let mut result = names.join("+");
 733            if !result.is_empty() {
 734                result.push('+');
 735            }
 736            result.push_str(&format!("?0x{:x}", unknown));
 737            result
 738        } else {
 739            names.join("+")
 740        }
 741    }
 742
 743    /// Debug tool: parses semantic token JSON from LSP and prints human-readable output.
 744    ///
 745    /// Usage: Paste JSON into `json_input`, then run:
 746    ///   cargo test -p project debug_parse_tokens -- --nocapture --ignored
 747    ///
 748    /// Accepts either:
 749    /// - Full LSP response: `{"jsonrpc":"2.0","id":1,"result":{"data":[...]}}`
 750    /// - Just the data array: `[0,0,5,1,0,...]`
 751    ///
 752    /// For delta responses, paste multiple JSON messages (one per line) and they
 753    /// will be applied in sequence.
 754    ///
 755    /// Token encoding (5 values per token):
 756    ///   [deltaLine, deltaStart, length, tokenType, tokenModifiers]
 757    #[test]
 758    #[ignore] // Run with: cargo test -p project debug_parse_tokens -- --nocapture --ignored
 759    fn debug_parse_tokens() {
 760        // ============================================================
 761        // PASTE YOUR JSON HERE (one message per line for sequences)
 762        // Comments starting with // are ignored
 763        // ============================================================
 764        let json_input = r#"
 765// === EXAMPLE 1: Full response (LSP spec example) ===
 766// 3 tokens: property at line 2, type at line 2, class at line 5
 767{"jsonrpc":"2.0","id":1,"result":{"resultId":"1","data":[2,5,3,9,3,0,5,4,6,0,3,2,7,1,0]}}
 768
 769// === EXAMPLE 2: Delta response ===
 770// User added empty line at start of file, so all tokens shift down by 1 line.
 771// This changes first token's deltaLine from 2 to 3 (edit at index 0).
 772{"jsonrpc":"2.0","id":2,"result":{"resultId":"2","edits":[{"start":0,"deleteCount":1,"data":[3]}]}}
 773
 774// === EXAMPLE 3: Another delta ===
 775// User added a new token. Insert 5 values at position 5 (after first token).
 776// New token: same line as token 1, 2 chars after it ends, len 5, type=function(12), mods=definition(2)
 777{"jsonrpc":"2.0","id":3,"result":{"resultId":"3","edits":[{"start":5,"deleteCount":0,"data":[0,2,5,12,2]}]}}
 778        "#;
 779        // Accepted formats:
 780        // - Full response: {"result":{"data":[...]}}
 781        // - Delta response: {"result":{"edits":[{"start":N,"deleteCount":N,"data":[...]}]}}
 782        // - Just array: [0,0,5,1,0,...]
 783
 784        // ============================================================
 785        // PROCESSING
 786        // ============================================================
 787        let mut current_data: Vec<u32> = Vec::new();
 788        let mut result_id: Option<String> = None;
 789
 790        for line in json_input.lines() {
 791            let line = line.trim();
 792            if line.is_empty() || line.starts_with("//") {
 793                continue;
 794            }
 795
 796            let parsed: serde_json::Value =
 797                serde_json::from_str(line).expect("Failed to parse JSON");
 798
 799            // Try to extract data from various JSON shapes
 800            let (data, edits, new_result_id) = extract_semantic_tokens(&parsed);
 801
 802            if let Some(new_id) = new_result_id {
 803                result_id = Some(new_id);
 804            }
 805
 806            if let Some(full_data) = data {
 807                println!("\n{}", "=".repeat(70));
 808                println!("FULL RESPONSE (resultId: {:?})", result_id);
 809                current_data = full_data;
 810            } else if let Some(delta_edits) = edits {
 811                println!("\n{}", "=".repeat(70));
 812                println!(
 813                    "DELTA RESPONSE: {} edit(s) (resultId: {:?})",
 814                    delta_edits.len(),
 815                    result_id
 816                );
 817                for (i, edit) in delta_edits.iter().enumerate() {
 818                    println!(
 819                        "  [{}] start={}, delete={}, insert {} values",
 820                        i,
 821                        edit.start,
 822                        edit.delete_count,
 823                        edit.data.len()
 824                    );
 825                }
 826                let mut tokens = ServerSemanticTokens::from_full(current_data.clone(), None);
 827                tokens.apply(&delta_edits);
 828                current_data = tokens.data;
 829            }
 830        }
 831
 832        // Print parsed tokens
 833        println!(
 834            "\nDATA: {} values = {} tokens",
 835            current_data.len(),
 836            current_data.len() / 5
 837        );
 838        println!("\nPARSED TOKENS:");
 839        println!("{:-<100}", "");
 840        println!(
 841            "{:>5} {:>6} {:>4}  {:<15} {}",
 842            "LINE", "START", "LEN", "TYPE", "MODIFIERS"
 843        );
 844        println!("{:-<100}", "");
 845
 846        let tokens = ServerSemanticTokens::from_full(current_data, None);
 847        for token in tokens.tokens() {
 848            println!(
 849                "{:>5} {:>6} {:>4}  {:<15} {}",
 850                token.line,
 851                token.start,
 852                token.length,
 853                token.token_type.0,
 854                modifier_names(token.token_modifiers),
 855            );
 856        }
 857        println!("{:-<100}", "");
 858        println!("{}\n", "=".repeat(100));
 859    }
 860
 861    fn extract_semantic_tokens(
 862        value: &serde_json::Value,
 863    ) -> (
 864        Option<Vec<u32>>,
 865        Option<Vec<SemanticTokensEdit>>,
 866        Option<String>,
 867    ) {
 868        // Try as array directly: [1,2,3,...]
 869        if let Some(arr) = value.as_array() {
 870            let data: Vec<u32> = arr
 871                .iter()
 872                .filter_map(|v| v.as_u64().map(|n| n as u32))
 873                .collect();
 874            return (Some(data), None, None);
 875        }
 876
 877        // Try as LSP response: {"result": {"data": [...]} } or {"result": {"edits": [...]}}
 878        let result = value.get("result").unwrap_or(value);
 879        let result_id = result
 880            .get("resultId")
 881            .and_then(|v| v.as_str())
 882            .map(String::from);
 883
 884        // Full response with data
 885        if let Some(data_arr) = result.get("data").and_then(|v| v.as_array()) {
 886            let data: Vec<u32> = data_arr
 887                .iter()
 888                .filter_map(|v| v.as_u64().map(|n| n as u32))
 889                .collect();
 890            return (Some(data), None, result_id);
 891        }
 892
 893        // Delta response with edits
 894        if let Some(edits_arr) = result.get("edits").and_then(|v| v.as_array()) {
 895            let edits: Vec<SemanticTokensEdit> = edits_arr
 896                .iter()
 897                .filter_map(|e| {
 898                    Some(SemanticTokensEdit {
 899                        start: e.get("start")?.as_u64()? as u32,
 900                        delete_count: e.get("deleteCount")?.as_u64()? as u32,
 901                        data: e
 902                            .get("data")
 903                            .and_then(|d| d.as_array())
 904                            .map(|arr| {
 905                                arr.iter()
 906                                    .filter_map(|v| v.as_u64().map(|n| n as u32))
 907                                    .collect()
 908                            })
 909                            .unwrap_or_default(),
 910                    })
 911                })
 912                .collect();
 913            return (None, Some(edits), result_id);
 914        }
 915
 916        (None, None, result_id)
 917    }
 918
 919    #[test]
 920    fn parses_sample_tokens() {
 921        // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
 922        let tokens = ServerSemanticTokens::from_full(
 923            vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
 924            None,
 925        )
 926        .tokens()
 927        .collect::<Vec<SemanticToken>>();
 928
 929        // The spec uses 1-based line numbers, and 0-based character numbers. This test uses 0-based for both.
 930        assert_eq!(
 931            tokens,
 932            &[
 933                SemanticToken {
 934                    line: 2,
 935                    start: 5,
 936                    length: 3,
 937                    token_type: TokenType(0),
 938                    token_modifiers: 3
 939                },
 940                SemanticToken {
 941                    line: 2,
 942                    start: 10,
 943                    length: 4,
 944                    token_type: TokenType(1),
 945                    token_modifiers: 0
 946                },
 947                SemanticToken {
 948                    line: 5,
 949                    start: 2,
 950                    length: 7,
 951                    token_type: TokenType(2),
 952                    token_modifiers: 0
 953                }
 954            ]
 955        );
 956    }
 957
 958    #[test]
 959    fn applies_delta_edit() {
 960        // Example from the spec: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
 961        // After a user types a new empty line at the beginning of the file,
 962        // the tokens shift down by one line. The delta edit transforms
 963        // [2,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0] into [3,5,3,0,3, 0,5,4,1,0, 3,2,7,2,0]
 964        // by replacing the first element (deltaLine of first token) from 2 to 3.
 965
 966        let mut tokens = ServerSemanticTokens::from_full(
 967            vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 3, 2, 7, 2, 0],
 968            None,
 969        );
 970
 971        tokens.apply(&[SemanticTokensEdit {
 972            start: 0,
 973            delete_count: 1,
 974            data: vec![3],
 975        }]);
 976
 977        let result = tokens.tokens().collect::<Vec<SemanticToken>>();
 978
 979        assert_eq!(
 980            result,
 981            &[
 982                SemanticToken {
 983                    line: 3,
 984                    start: 5,
 985                    length: 3,
 986                    token_type: TokenType(0),
 987                    token_modifiers: 3
 988                },
 989                SemanticToken {
 990                    line: 3,
 991                    start: 10,
 992                    length: 4,
 993                    token_type: TokenType(1),
 994                    token_modifiers: 0
 995                },
 996                SemanticToken {
 997                    line: 6,
 998                    start: 2,
 999                    length: 7,
1000                    token_type: TokenType(2),
1001                    token_modifiers: 0
1002                }
1003            ]
1004        );
1005    }
1006
1007    #[test]
1008    fn applies_out_of_bounds_delta_edit_without_panic() {
1009        let mut tokens = ServerSemanticTokens::from_full(vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0], None);
1010
1011        // start beyond data length
1012        tokens.apply(&[SemanticTokensEdit {
1013            start: 100,
1014            delete_count: 5,
1015            data: vec![1, 2, 3, 4, 5],
1016        }]);
1017        assert_eq!(
1018            tokens.data,
1019            vec![2, 5, 3, 0, 3, 0, 5, 4, 1, 0, 1, 2, 3, 4, 5]
1020        );
1021
1022        // delete_count extends past data length
1023        let mut tokens = ServerSemanticTokens::from_full(vec![2, 5, 3, 0, 3], None);
1024        tokens.apply(&[SemanticTokensEdit {
1025            start: 3,
1026            delete_count: 100,
1027            data: vec![9, 9],
1028        }]);
1029        assert_eq!(tokens.data, vec![2, 5, 3, 9, 9]);
1030
1031        // empty data
1032        let mut tokens = ServerSemanticTokens::from_full(Vec::new(), None);
1033        tokens.apply(&[SemanticTokensEdit {
1034            start: 0,
1035            delete_count: 5,
1036            data: vec![1, 2, 3, 4, 5],
1037        }]);
1038        assert_eq!(tokens.data, vec![1, 2, 3, 4, 5]);
1039    }
1040}