doctor.rs

  1//! Diagnose and repair CRDT document integrity.
  2//!
  3//! Detects dangling references, dependency cycles, and parent-chain
  4//! cycles that can arise from concurrent CRDT edits across peers.
  5//! With `--fix`, applies deterministic, non-destructive repairs.
  6
  7use std::collections::{HashMap, HashSet};
  8use std::path::Path;
  9
 10use anyhow::{anyhow, Result};
 11use serde::Serialize;
 12
 13use crate::db;
 14
 15/// Categories of integrity issues that doctor can detect.
 16#[derive(Debug, Clone, Serialize)]
 17#[serde(rename_all = "snake_case")]
 18enum FindingKind {
 19    DanglingParent,
 20    DanglingBlocker,
 21    BlockerCycle,
 22    ParentCycle,
 23}
 24
 25/// A single integrity issue found during diagnosis.
 26#[derive(Debug, Clone, Serialize)]
 27struct Finding {
 28    kind: FindingKind,
 29    /// Full ULID of the primarily affected task.
 30    task: String,
 31    /// Human-readable description of the issue.
 32    detail: String,
 33    /// Whether this finding affects runtime behavior.  Blocker cycles
 34    /// involving closed or tombstoned tasks are inert because
 35    /// `partition_blockers` already treats them as resolved.
 36    active: bool,
 37    /// Whether `--fix` repaired this finding.
 38    fixed: bool,
 39}
 40
 41/// Aggregate counts for the doctor report.
 42#[derive(Debug, Clone, Default, Serialize)]
 43struct Summary {
 44    dangling_parents: usize,
 45    dangling_blockers: usize,
 46    blocker_cycles: usize,
 47    parent_cycles: usize,
 48    total: usize,
 49    fixed: usize,
 50}
 51
 52/// Full doctor report, serialised as JSON when `--json` is passed.
 53#[derive(Debug, Clone, Serialize)]
 54struct Report {
 55    findings: Vec<Finding>,
 56    summary: Summary,
 57}
 58
 59/// Repair action to apply when `--fix` is requested.
 60enum Repair {
 61    ClearParent(db::TaskId),
 62    RemoveBlocker(db::TaskId, db::TaskId),
 63}
 64
 65pub fn run(root: &Path, fix: bool, json: bool) -> Result<()> {
 66    let store = db::open(root)?;
 67    let tasks = store.list_tasks_unfiltered()?;
 68
 69    // Build lookup structures.
 70    let all_ids: HashSet<String> = tasks.iter().map(|t| t.id.as_str().to_string()).collect();
 71    let open_ids: HashSet<String> = tasks
 72        .iter()
 73        .filter(|t| t.status != db::Status::Closed && t.deleted_at.is_none())
 74        .map(|t| t.id.as_str().to_string())
 75        .collect();
 76
 77    let mut findings: Vec<Finding> = Vec::new();
 78    let mut repairs: Vec<Repair> = Vec::new();
 79
 80    check_dangling_parents(&tasks, &all_ids, &mut findings, &mut repairs);
 81    check_dangling_blockers(&tasks, &all_ids, &mut findings, &mut repairs);
 82    check_blocker_cycles(&tasks, &all_ids, &open_ids, &mut findings, &mut repairs)?;
 83    check_parent_cycles(&tasks, &all_ids, &mut findings, &mut repairs)?;
 84
 85    if fix && !repairs.is_empty() {
 86        store.apply_and_persist(|doc| {
 87            let tasks_map = doc.get_map("tasks");
 88            let ts = db::now_utc();
 89
 90            for repair in &repairs {
 91                match repair {
 92                    Repair::ClearParent(task_id) => {
 93                        let task = db::get_task_map(&tasks_map, task_id)?
 94                            .ok_or_else(|| anyhow!("task {} not found during repair", task_id))?;
 95                        task.insert("parent", "")?;
 96                        task.insert("updated_at", ts.clone())?;
 97                    }
 98                    Repair::RemoveBlocker(task_id, blocker_id) => {
 99                        let task = db::get_task_map(&tasks_map, task_id)?
100                            .ok_or_else(|| anyhow!("task {} not found during repair", task_id))?;
101                        let blockers = db::get_or_create_child_map(&task, "blockers")?;
102                        blockers.delete(blocker_id.as_str())?;
103                        task.insert("updated_at", ts.clone())?;
104                    }
105                }
106            }
107            Ok(())
108        })?;
109
110        for finding in &mut findings {
111            if finding.active {
112                finding.fixed = true;
113            }
114        }
115    }
116
117    let summary = build_summary(&findings);
118    let report = Report { findings, summary };
119
120    if json {
121        println!("{}", serde_json::to_string(&report)?);
122    } else {
123        print_human(&report, fix);
124    }
125
126    Ok(())
127}
128
129/// Detect parent fields that reference missing or tombstoned tasks.
130///
131/// Skips tombstoned tasks — their stale references are not actionable.
132fn check_dangling_parents(
133    tasks: &[db::Task],
134    all_ids: &HashSet<String>,
135    findings: &mut Vec<Finding>,
136    repairs: &mut Vec<Repair>,
137) {
138    let task_map: std::collections::HashMap<&str, &db::Task> =
139        tasks.iter().map(|t| (t.id.as_str(), t)).collect();
140
141    for task in tasks {
142        // Tombstoned tasks are already deleted; stale refs on them aren't actionable.
143        if task.deleted_at.is_some() {
144            continue;
145        }
146
147        let Some(parent) = &task.parent else {
148            continue;
149        };
150
151        if !all_ids.contains(parent.as_str()) {
152            findings.push(Finding {
153                kind: FindingKind::DanglingParent,
154                task: task.id.as_str().to_string(),
155                detail: format!(
156                    "parent references missing task {}",
157                    db::TaskId::display_id(parent.as_str()),
158                ),
159                active: true,
160                fixed: false,
161            });
162            repairs.push(Repair::ClearParent(task.id.clone()));
163            continue;
164        }
165
166        // Parent exists but is tombstoned — the subtask is orphaned.
167        if let Some(pt) = task_map.get(parent.as_str()) {
168            if pt.deleted_at.is_some() {
169                findings.push(Finding {
170                    kind: FindingKind::DanglingParent,
171                    task: task.id.as_str().to_string(),
172                    detail: format!(
173                        "parent references tombstoned task {}",
174                        db::TaskId::display_id(parent.as_str()),
175                    ),
176                    active: true,
177                    fixed: false,
178                });
179                repairs.push(Repair::ClearParent(task.id.clone()));
180            }
181        }
182    }
183}
184
185/// Detect blocker entries that reference tasks not present in the document.
186///
187/// Skips tombstoned tasks — their stale references are not actionable.
188fn check_dangling_blockers(
189    tasks: &[db::Task],
190    all_ids: &HashSet<String>,
191    findings: &mut Vec<Finding>,
192    repairs: &mut Vec<Repair>,
193) {
194    for task in tasks {
195        if task.deleted_at.is_some() {
196            continue;
197        }
198
199        for blocker in &task.blockers {
200            if !all_ids.contains(blocker.as_str()) {
201                findings.push(Finding {
202                    kind: FindingKind::DanglingBlocker,
203                    task: task.id.as_str().to_string(),
204                    detail: format!(
205                        "blocker references missing task {}",
206                        db::TaskId::display_id(blocker.as_str()),
207                    ),
208                    active: true,
209                    fixed: false,
210                });
211                repairs.push(Repair::RemoveBlocker(task.id.clone(), blocker.clone()));
212            }
213        }
214    }
215}
216
217/// Detect cycles in the blocker graph.
218///
219/// Iteratively finds one cycle at a time, records the deterministic
220/// edge to break (lowest blocker ULID in the cycle), removes that edge
221/// from the working graph, and repeats until acyclic.
222///
223/// Cycles where every node is open are "active" — they trap tasks out
224/// of `ready`/`next`.  Cycles containing any closed or tombstoned node
225/// are "inert" — `partition_blockers` already resolves them at runtime.
226/// Only active cycles are repaired by `--fix`.
227fn check_blocker_cycles(
228    tasks: &[db::Task],
229    all_ids: &HashSet<String>,
230    open_ids: &HashSet<String>,
231    findings: &mut Vec<Finding>,
232    repairs: &mut Vec<Repair>,
233) -> Result<()> {
234    // Build adjacency: task → set of blockers (only edges where both
235    // endpoints exist, to avoid mixing up dangling-ref findings).
236    let mut graph: HashMap<String, HashSet<String>> = HashMap::new();
237    for task in tasks {
238        for blocker in &task.blockers {
239            if all_ids.contains(blocker.as_str()) {
240                graph
241                    .entry(task.id.as_str().to_string())
242                    .or_default()
243                    .insert(blocker.as_str().to_string());
244            }
245        }
246    }
247
248    loop {
249        let Some(cycle) = find_cycle_dfs(&graph) else {
250            break;
251        };
252
253        // Determine which edge to break: the one whose blocker ULID
254        // is lexicographically lowest.  Edges in the cycle are
255        // (cycle[0] → cycle[1]), (cycle[1] → cycle[2]), etc.
256        let mut best: Option<(String, String)> = None;
257        for pair in cycle.windows(2) {
258            let blocker = &pair[1];
259            if best.as_ref().is_none_or(|(_, b)| blocker < b) {
260                best = Some((pair[0].clone(), blocker.clone()));
261            }
262        }
263        let (task_id, blocker_id) = best.expect("cycle must have at least one edge");
264
265        let active = cycle[..cycle.len() - 1]
266            .iter()
267            .all(|id| open_ids.contains(id));
268
269        // Build a human-readable cycle string using short IDs.
270        let display: Vec<String> = cycle.iter().map(|id| db::TaskId::display_id(id)).collect();
271        let cycle_str = display.join("");
272
273        findings.push(Finding {
274            kind: FindingKind::BlockerCycle,
275            task: task_id.clone(),
276            detail: cycle_str,
277            active,
278            fixed: false,
279        });
280
281        if active {
282            repairs.push(Repair::RemoveBlocker(
283                db::TaskId::parse(&task_id)?,
284                db::TaskId::parse(&blocker_id)?,
285            ));
286        }
287
288        // Remove the edge from the working graph so the next iteration
289        // can find further cycles (or terminate).
290        if let Some(set) = graph.get_mut(&task_id) {
291            set.remove(&blocker_id);
292        }
293    }
294
295    Ok(())
296}
297
298/// Detect cycles in the parent-child hierarchy.
299///
300/// Follows each task's parent chain; if we revisit a task already in the
301/// current path, we have a cycle.  Repairs clear the parent field on the
302/// task with the lexicographically lowest ULID in the cycle.
303fn check_parent_cycles(
304    tasks: &[db::Task],
305    all_ids: &HashSet<String>,
306    findings: &mut Vec<Finding>,
307    repairs: &mut Vec<Repair>,
308) -> Result<()> {
309    let parent_map: HashMap<String, String> = tasks
310        .iter()
311        .filter_map(|t| {
312            t.parent.as_ref().and_then(|p| {
313                if all_ids.contains(p.as_str()) {
314                    Some((t.id.as_str().to_string(), p.as_str().to_string()))
315                } else {
316                    None // dangling parents handled separately
317                }
318            })
319        })
320        .collect();
321
322    let mut globally_visited: HashSet<String> = HashSet::new();
323
324    for task in tasks {
325        let start = task.id.as_str().to_string();
326        if globally_visited.contains(&start) {
327            continue;
328        }
329
330        let mut path: Vec<String> = Vec::new();
331        let mut path_set: HashSet<String> = HashSet::new();
332        let mut current = Some(start.clone());
333
334        while let Some(node) = current {
335            if path_set.contains(&node) {
336                // Found a cycle.  Extract just the cycle portion.
337                let pos = path.iter().position(|n| *n == node).unwrap();
338                let mut cycle: Vec<String> = path[pos..].to_vec();
339                cycle.push(node); // close the loop
340
341                let display: Vec<String> =
342                    cycle.iter().map(|id| db::TaskId::display_id(id)).collect();
343                let cycle_str = display.join("");
344
345                // The task with the lowest ULID gets its parent cleared.
346                let lowest = cycle[..cycle.len() - 1]
347                    .iter()
348                    .min()
349                    .expect("cycle must have at least one node")
350                    .clone();
351
352                findings.push(Finding {
353                    kind: FindingKind::ParentCycle,
354                    task: lowest.clone(),
355                    detail: cycle_str,
356                    active: true,
357                    fixed: false,
358                });
359                repairs.push(Repair::ClearParent(db::TaskId::parse(&lowest)?));
360                break;
361            }
362            if globally_visited.contains(&node) {
363                break;
364            }
365            path_set.insert(node.clone());
366            path.push(node.clone());
367            current = parent_map.get(&node).cloned();
368        }
369
370        for node in &path {
371            globally_visited.insert(node.clone());
372        }
373    }
374
375    Ok(())
376}
377
378/// Find a single cycle in a directed graph using DFS.
379///
380/// Returns the cycle as a vec of node IDs where the first and last
381/// elements are the same (e.g. `[A, B, C, A]`), or `None` if acyclic.
382fn find_cycle_dfs(graph: &HashMap<String, HashSet<String>>) -> Option<Vec<String>> {
383    let mut visited: HashSet<String> = HashSet::new();
384
385    // Sort keys for deterministic traversal order.
386    let mut nodes: Vec<&String> = graph.keys().collect();
387    nodes.sort();
388
389    for start in nodes {
390        if visited.contains(start) {
391            continue;
392        }
393        let mut path: Vec<String> = Vec::new();
394        let mut path_set: HashSet<String> = HashSet::new();
395        if let Some(cycle) = dfs_visit(start, graph, &mut visited, &mut path, &mut path_set) {
396            return Some(cycle);
397        }
398    }
399    None
400}
401
402/// Recursive DFS helper for cycle detection.
403fn dfs_visit(
404    node: &str,
405    graph: &HashMap<String, HashSet<String>>,
406    visited: &mut HashSet<String>,
407    path: &mut Vec<String>,
408    path_set: &mut HashSet<String>,
409) -> Option<Vec<String>> {
410    visited.insert(node.to_string());
411    path_set.insert(node.to_string());
412    path.push(node.to_string());
413
414    if let Some(neighbors) = graph.get(node) {
415        // Sort neighbors for deterministic cycle detection.
416        let mut sorted: Vec<&String> = neighbors.iter().collect();
417        sorted.sort();
418
419        for neighbor in sorted {
420            if path_set.contains(neighbor.as_str()) {
421                let pos = path.iter().position(|n| n == neighbor).unwrap();
422                let mut cycle = path[pos..].to_vec();
423                cycle.push(neighbor.clone());
424                return Some(cycle);
425            }
426            if !visited.contains(neighbor.as_str()) {
427                if let Some(cycle) = dfs_visit(neighbor, graph, visited, path, path_set) {
428                    return Some(cycle);
429                }
430            }
431        }
432    }
433
434    path_set.remove(node);
435    path.pop();
436    None
437}
438
439fn build_summary(findings: &[Finding]) -> Summary {
440    let mut s = Summary::default();
441    for f in findings {
442        match f.kind {
443            FindingKind::DanglingParent => s.dangling_parents += 1,
444            FindingKind::DanglingBlocker => s.dangling_blockers += 1,
445            FindingKind::BlockerCycle => s.blocker_cycles += 1,
446            FindingKind::ParentCycle => s.parent_cycles += 1,
447        }
448        s.total += 1;
449        if f.fixed {
450            s.fixed += 1;
451        }
452    }
453    s
454}
455
456fn print_human(report: &Report, fix: bool) {
457    let c = crate::color::stderr_theme();
458
459    if report.findings.is_empty() {
460        eprintln!("{}info:{} no issues found", c.green, c.reset);
461        return;
462    }
463
464    for f in &report.findings {
465        let short = db::TaskId::display_id(&f.task);
466        let kind_label = match f.kind {
467            FindingKind::DanglingParent => "dangling parent",
468            FindingKind::DanglingBlocker => "dangling blocker",
469            FindingKind::BlockerCycle => {
470                if f.active {
471                    "blocker cycle (active)"
472                } else {
473                    "blocker cycle (inert)"
474                }
475            }
476            FindingKind::ParentCycle => "parent cycle",
477        };
478
479        if f.fixed {
480            eprintln!(
481                "{}fixed:{} {}: {}{}",
482                c.green, c.reset, kind_label, short, f.detail
483            );
484        } else {
485            eprintln!(
486                "{}issue:{} {}: {}{}",
487                c.yellow, c.reset, kind_label, short, f.detail
488            );
489        }
490    }
491
492    eprintln!();
493    let n = report.summary.total;
494    let issue_word = if n == 1 { "issue" } else { "issues" };
495    if fix {
496        eprintln!(
497            "{} {issue_word} found, {} fixed",
498            report.summary.total, report.summary.fixed,
499        );
500    } else {
501        eprintln!("{n} {issue_word} found. Run with --fix to repair.");
502    }
503}