Add meta description tag to docs pages (#35112)

Ben Kunkle and Katie Greer created

Closes #ISSUE

Adds basic frontmatter support to `.md` files in docs. The only
supported keys currently are `description` which becomes a `<meta
name="description" contents="...">` tag, and `title` which becomes a
normal `title` tag, with the title contents prefixed with the subject of
the file.

An example of the syntax can be found in `git.md`, as well as below

```md
---
title: Some more detailed title for this page
description: A page-specific description
---

# Editor
```

The above will be transformed into (with non-relevant tags removed)

```html
<head>
    <title>Editor | Some more detailed title for this page</title>
    <meta name="description" contents="A page-specific description">
</head>
<body>
<h1>Editor</h1>
</body>
```

If no front-matter is provided, or If one or both keys aren't provided,
the title and description will be set based on the `default-title` and
`default-description` keys in `book.toml` respectively.

## Implementation details

Unfortunately, `mdbook` does not support post-processing like it does
pre-processing, and only supports defining one description to put in the
meta tag per book rather than per file. So in order to apply
post-processing (necessary to modify the html head tags) the global book
description is set to a marker value `#description#` and the html
renderer is replaced with a sub-command of `docs_preprocessor` that
wraps the builtin `html` renderer and applies post-processing to the
`html` files, replacing the marker value and the `<title>(.*)</title>`
with the contents of the front-matter if there is one.

## Known limitations

The front-matter parsing is extremely simple, which avoids needing to
take on an additional dependency, or implement full yaml parsing.

* Double quotes and multi-line values are not supported, i.e. Keys and
values must be entirely on the same line, with no double quotes around
the value.

The following will not work:

```md
---
title: Some
 Multi-line
 Title
---
```

* The front-matter must be at the top of the file, with only white-space
preceding it

* The contents of the title and description will not be html-escaped.
They should be simple ascii text with no unicode or emoji characters

Release Notes:

- N/A *or* Added/Fixed/Improved ...

---------

Co-authored-by: Katie Greer <katie@zed.dev>

Change summary

Cargo.lock                           |   2 
crates/docs_preprocessor/Cargo.toml  |   8 
crates/docs_preprocessor/src/main.rs | 246 +++++++++++++++++++++++++----
crates/zlog/src/sink.rs              |  23 ++
crates/zlog/src/zlog.rs              |   2 
docs/README.md                       |  61 +++++++
docs/book.toml                       |  20 ++
docs/src/git.md                      |   5 
docs/theme/index.hbs                 |   2 
9 files changed, 319 insertions(+), 50 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -4758,7 +4758,6 @@ name = "docs_preprocessor"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap",
  "command_palette",
  "gpui",
  "mdbook",
@@ -4769,6 +4768,7 @@ dependencies = [
  "util",
  "workspace-hack",
  "zed",
+ "zlog",
 ]
 
 [[package]]

crates/docs_preprocessor/Cargo.toml 🔗

@@ -7,17 +7,17 @@ license = "GPL-3.0-or-later"
 
 [dependencies]
 anyhow.workspace = true
-clap.workspace = true
+command_palette.workspace = true
+gpui.workspace = true
 mdbook = "0.4.40"
+regex.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 settings.workspace = true
-regex.workspace = true
 util.workspace = true
 workspace-hack.workspace = true
 zed.workspace = true
-gpui.workspace = true
-command_palette.workspace = true
+zlog.workspace = true
 
 [lints]
 workspace = true

crates/docs_preprocessor/src/main.rs 🔗

@@ -1,14 +1,15 @@
-use anyhow::Result;
-use clap::{Arg, ArgMatches, Command};
+use anyhow::{Context, Result};
 use mdbook::BookItem;
 use mdbook::book::{Book, Chapter};
 use mdbook::preprocess::CmdPreprocessor;
 use regex::Regex;
 use settings::KeymapFile;
-use std::collections::HashSet;
+use std::borrow::Cow;
+use std::collections::{HashMap, HashSet};
 use std::io::{self, Read};
 use std::process;
 use std::sync::LazyLock;
+use util::paths::PathExt;
 
 static KEYMAP_MACOS: LazyLock<KeymapFile> = LazyLock::new(|| {
     load_keymap("keymaps/default-macos.json").expect("Failed to load MacOS keymap")
@@ -20,60 +21,68 @@ static KEYMAP_LINUX: LazyLock<KeymapFile> = LazyLock::new(|| {
 
 static ALL_ACTIONS: LazyLock<Vec<ActionDef>> = LazyLock::new(dump_all_gpui_actions);
 
-pub fn make_app() -> Command {
-    Command::new("zed-docs-preprocessor")
-        .about("Preprocesses Zed Docs content to provide rich action & keybinding support and more")
-        .subcommand(
-            Command::new("supports")
-                .arg(Arg::new("renderer").required(true))
-                .about("Check whether a renderer is supported by this preprocessor"),
-        )
-}
+const FRONT_MATTER_COMMENT: &'static str = "<!-- ZED_META {} -->";
 
 fn main() -> Result<()> {
-    let matches = make_app().get_matches();
+    zlog::init();
+    zlog::init_output_stderr();
     // call a zed:: function so everything in `zed` crate is linked and
     // all actions in the actual app are registered
     zed::stdout_is_a_pty();
-
-    if let Some(sub_args) = matches.subcommand_matches("supports") {
-        handle_supports(sub_args);
-    } else {
-        handle_preprocessing()?;
+    let args = std::env::args().skip(1).collect::<Vec<_>>();
+
+    match args.get(0).map(String::as_str) {
+        Some("supports") => {
+            let renderer = args.get(1).expect("Required argument");
+            let supported = renderer != "not-supported";
+            if supported {
+                process::exit(0);
+            } else {
+                process::exit(1);
+            }
+        }
+        Some("postprocess") => handle_postprocessing()?,
+        _ => handle_preprocessing()?,
     }
 
     Ok(())
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-enum Error {
+enum PreprocessorError {
     ActionNotFound { action_name: String },
     DeprecatedActionUsed { used: String, should_be: String },
+    InvalidFrontmatterLine(String),
 }
 
-impl Error {
+impl PreprocessorError {
     fn new_for_not_found_action(action_name: String) -> Self {
         for action in &*ALL_ACTIONS {
             for alias in action.deprecated_aliases {
                 if alias == &action_name {
-                    return Error::DeprecatedActionUsed {
+                    return PreprocessorError::DeprecatedActionUsed {
                         used: action_name.clone(),
                         should_be: action.name.to_string(),
                     };
                 }
             }
         }
-        Error::ActionNotFound {
+        PreprocessorError::ActionNotFound {
             action_name: action_name.to_string(),
         }
     }
 }
 
-impl std::fmt::Display for Error {
+impl std::fmt::Display for PreprocessorError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Error::ActionNotFound { action_name } => write!(f, "Action not found: {}", action_name),
-            Error::DeprecatedActionUsed { used, should_be } => write!(
+            PreprocessorError::InvalidFrontmatterLine(line) => {
+                write!(f, "Invalid frontmatter line: {}", line)
+            }
+            PreprocessorError::ActionNotFound { action_name } => {
+                write!(f, "Action not found: {}", action_name)
+            }
+            PreprocessorError::DeprecatedActionUsed { used, should_be } => write!(
                 f,
                 "Deprecated action used: {} should be {}",
                 used, should_be
@@ -89,8 +98,9 @@ fn handle_preprocessing() -> Result<()> {
 
     let (_ctx, mut book) = CmdPreprocessor::parse_input(input.as_bytes())?;
 
-    let mut errors = HashSet::<Error>::new();
+    let mut errors = HashSet::<PreprocessorError>::new();
 
+    handle_frontmatter(&mut book, &mut errors);
     template_and_validate_keybindings(&mut book, &mut errors);
     template_and_validate_actions(&mut book, &mut errors);
 
@@ -108,19 +118,41 @@ fn handle_preprocessing() -> Result<()> {
     Ok(())
 }
 
-fn handle_supports(sub_args: &ArgMatches) -> ! {
-    let renderer = sub_args
-        .get_one::<String>("renderer")
-        .expect("Required argument");
-    let supported = renderer != "not-supported";
-    if supported {
-        process::exit(0);
-    } else {
-        process::exit(1);
-    }
+fn handle_frontmatter(book: &mut Book, errors: &mut HashSet<PreprocessorError>) {
+    let frontmatter_regex = Regex::new(r"(?s)^\s*---(.*?)---").unwrap();
+    for_each_chapter_mut(book, |chapter| {
+        let new_content = frontmatter_regex.replace(&chapter.content, |caps: &regex::Captures| {
+            let frontmatter = caps[1].trim();
+            let frontmatter = frontmatter.trim_matches(&[' ', '-', '\n']);
+            let mut metadata = HashMap::<String, String>::default();
+            for line in frontmatter.lines() {
+                let Some((name, value)) = line.split_once(':') else {
+                    errors.insert(PreprocessorError::InvalidFrontmatterLine(format!(
+                        "{}: {}",
+                        chapter_breadcrumbs(&chapter),
+                        line
+                    )));
+                    continue;
+                };
+                let name = name.trim();
+                let value = value.trim();
+                metadata.insert(name.to_string(), value.to_string());
+            }
+            FRONT_MATTER_COMMENT.replace(
+                "{}",
+                &serde_json::to_string(&metadata).expect("Failed to serialize metadata"),
+            )
+        });
+        match new_content {
+            Cow::Owned(content) => {
+                chapter.content = content;
+            }
+            Cow::Borrowed(_) => {}
+        }
+    });
 }
 
-fn template_and_validate_keybindings(book: &mut Book, errors: &mut HashSet<Error>) {
+fn template_and_validate_keybindings(book: &mut Book, errors: &mut HashSet<PreprocessorError>) {
     let regex = Regex::new(r"\{#kb (.*?)\}").unwrap();
 
     for_each_chapter_mut(book, |chapter| {
@@ -128,7 +160,9 @@ fn template_and_validate_keybindings(book: &mut Book, errors: &mut HashSet<Error
             .replace_all(&chapter.content, |caps: &regex::Captures| {
                 let action = caps[1].trim();
                 if find_action_by_name(action).is_none() {
-                    errors.insert(Error::new_for_not_found_action(action.to_string()));
+                    errors.insert(PreprocessorError::new_for_not_found_action(
+                        action.to_string(),
+                    ));
                     return String::new();
                 }
                 let macos_binding = find_binding("macos", action).unwrap_or_default();
@@ -144,7 +178,7 @@ fn template_and_validate_keybindings(book: &mut Book, errors: &mut HashSet<Error
     });
 }
 
-fn template_and_validate_actions(book: &mut Book, errors: &mut HashSet<Error>) {
+fn template_and_validate_actions(book: &mut Book, errors: &mut HashSet<PreprocessorError>) {
     let regex = Regex::new(r"\{#action (.*?)\}").unwrap();
 
     for_each_chapter_mut(book, |chapter| {
@@ -152,7 +186,9 @@ fn template_and_validate_actions(book: &mut Book, errors: &mut HashSet<Error>) {
             .replace_all(&chapter.content, |caps: &regex::Captures| {
                 let name = caps[1].trim();
                 let Some(action) = find_action_by_name(name) else {
-                    errors.insert(Error::new_for_not_found_action(name.to_string()));
+                    errors.insert(PreprocessorError::new_for_not_found_action(
+                        name.to_string(),
+                    ));
                     return String::new();
                 };
                 format!("<code class=\"hljs\">{}</code>", &action.human_name)
@@ -217,6 +253,13 @@ fn name_for_action(action_as_str: String) -> String {
         .unwrap_or(action_as_str)
 }
 
+fn chapter_breadcrumbs(chapter: &Chapter) -> String {
+    let mut breadcrumbs = Vec::with_capacity(chapter.parent_names.len() + 1);
+    breadcrumbs.extend(chapter.parent_names.iter().map(String::as_str));
+    breadcrumbs.push(chapter.name.as_str());
+    format!("[{:?}] {}", chapter.source_path, breadcrumbs.join(" > "))
+}
+
 fn load_keymap(asset_path: &str) -> Result<KeymapFile> {
     let content = util::asset_str::<settings::SettingsAssets>(asset_path);
     KeymapFile::parse(content.as_ref())
@@ -254,3 +297,126 @@ fn dump_all_gpui_actions() -> Vec<ActionDef> {
 
     return actions;
 }
+
+fn handle_postprocessing() -> Result<()> {
+    let logger = zlog::scoped!("render");
+    let mut ctx = mdbook::renderer::RenderContext::from_json(io::stdin())?;
+    let output = ctx
+        .config
+        .get_mut("output")
+        .expect("has output")
+        .as_table_mut()
+        .expect("output is table");
+    let zed_html = output.remove("zed-html").expect("zed-html output defined");
+    let default_description = zed_html
+        .get("default-description")
+        .expect("Default description not found")
+        .as_str()
+        .expect("Default description not a string")
+        .to_string();
+    let default_title = zed_html
+        .get("default-title")
+        .expect("Default title not found")
+        .as_str()
+        .expect("Default title not a string")
+        .to_string();
+
+    output.insert("html".to_string(), zed_html);
+    mdbook::Renderer::render(&mdbook::renderer::HtmlHandlebars::new(), &ctx)?;
+    let ignore_list = ["toc.html"];
+
+    let root_dir = ctx.destination.clone();
+    let mut files = Vec::with_capacity(128);
+    let mut queue = Vec::with_capacity(64);
+    queue.push(root_dir.clone());
+    while let Some(dir) = queue.pop() {
+        for entry in std::fs::read_dir(&dir).context(dir.to_sanitized_string())? {
+            let Ok(entry) = entry else {
+                continue;
+            };
+            let file_type = entry.file_type().context("Failed to determine file type")?;
+            if file_type.is_dir() {
+                queue.push(entry.path());
+            }
+            if file_type.is_file()
+                && matches!(
+                    entry.path().extension().and_then(std::ffi::OsStr::to_str),
+                    Some("html")
+                )
+            {
+                if ignore_list.contains(&&*entry.file_name().to_string_lossy()) {
+                    zlog::info!(logger => "Ignoring {}", entry.path().to_string_lossy());
+                } else {
+                    files.push(entry.path());
+                }
+            }
+        }
+    }
+
+    zlog::info!(logger => "Processing {} `.html` files", files.len());
+    let meta_regex = Regex::new(&FRONT_MATTER_COMMENT.replace("{}", "(.*)")).unwrap();
+    for file in files {
+        let contents = std::fs::read_to_string(&file)?;
+        let mut meta_description = None;
+        let mut meta_title = None;
+        let contents = meta_regex.replace(&contents, |caps: &regex::Captures| {
+            let metadata: HashMap<String, String> = serde_json::from_str(&caps[1]).with_context(|| format!("JSON Metadata: {:?}", &caps[1])).expect("Failed to deserialize metadata");
+            for (kind, content) in metadata {
+                match kind.as_str() {
+                    "description" => {
+                        meta_description = Some(content);
+                    }
+                    "title" => {
+                        meta_title = Some(content);
+                    }
+                    _ => {
+                        zlog::warn!(logger => "Unrecognized frontmatter key: {} in {:?}", kind, pretty_path(&file, &root_dir));
+                    }
+                }
+            }
+            String::new()
+        });
+        let meta_description = meta_description.as_ref().unwrap_or_else(|| {
+            zlog::warn!(logger => "No meta description found for {:?}", pretty_path(&file, &root_dir));
+            &default_description
+        });
+        let page_title = extract_title_from_page(&contents, pretty_path(&file, &root_dir));
+        let meta_title = meta_title.as_ref().unwrap_or_else(|| {
+            zlog::debug!(logger => "No meta title found for {:?}", pretty_path(&file, &root_dir));
+            &default_title
+        });
+        let meta_title = format!("{} | {}", page_title, meta_title);
+        zlog::trace!(logger => "Updating {:?}", pretty_path(&file, &root_dir));
+        let contents = contents.replace("#description#", meta_description);
+        let contents = TITLE_REGEX
+            .replace(&contents, |_: &regex::Captures| {
+                format!("<title>{}</title>", meta_title)
+            })
+            .to_string();
+        // let contents = contents.replace("#title#", &meta_title);
+        std::fs::write(file, contents)?;
+    }
+    return Ok(());
+
+    fn pretty_path<'a>(
+        path: &'a std::path::PathBuf,
+        root: &'a std::path::PathBuf,
+    ) -> &'a std::path::Path {
+        &path.strip_prefix(&root).unwrap_or(&path)
+    }
+    const TITLE_REGEX: std::cell::LazyCell<Regex> =
+        std::cell::LazyCell::new(|| Regex::new(r"<title>\s*(.*?)\s*</title>").unwrap());
+    fn extract_title_from_page(contents: &str, pretty_path: &std::path::Path) -> String {
+        let title_tag_contents = &TITLE_REGEX
+            .captures(&contents)
+            .with_context(|| format!("Failed to find title in {:?}", pretty_path))
+            .expect("Page has <title> element")[1];
+        let title = title_tag_contents
+            .trim()
+            .strip_suffix("- Zed")
+            .unwrap_or(title_tag_contents)
+            .trim()
+            .to_string();
+        title
+    }
+}

crates/zlog/src/sink.rs 🔗

@@ -21,6 +21,8 @@ const ANSI_MAGENTA: &str = "\x1b[35m";
 
 /// Whether stdout output is enabled.
 static mut ENABLED_SINKS_STDOUT: bool = false;
+/// Whether stderr output is enabled.
+static mut ENABLED_SINKS_STDERR: bool = false;
 
 /// Is Some(file) if file output is enabled.
 static ENABLED_SINKS_FILE: Mutex<Option<std::fs::File>> = Mutex::new(None);
@@ -45,6 +47,12 @@ pub fn init_output_stdout() {
     }
 }
 
+pub fn init_output_stderr() {
+    unsafe {
+        ENABLED_SINKS_STDERR = true;
+    }
+}
+
 pub fn init_output_file(
     path: &'static PathBuf,
     path_rotate: Option<&'static PathBuf>,
@@ -115,6 +123,21 @@ pub fn submit(record: Record) {
             },
             record.message
         );
+    } else if unsafe { ENABLED_SINKS_STDERR } {
+        let mut stdout = std::io::stderr().lock();
+        _ = writeln!(
+            &mut stdout,
+            "{} {ANSI_BOLD}{}{}{ANSI_RESET} {} {}",
+            chrono::Local::now().format("%Y-%m-%dT%H:%M:%S%:z"),
+            LEVEL_ANSI_COLORS[record.level as usize],
+            LEVEL_OUTPUT_STRINGS[record.level as usize],
+            SourceFmt {
+                scope: record.scope,
+                module_path: record.module_path,
+                ansi: true,
+            },
+            record.message
+        );
     }
     let mut file = ENABLED_SINKS_FILE.lock().unwrap_or_else(|handle| {
         ENABLED_SINKS_FILE.clear_poison();

crates/zlog/src/zlog.rs 🔗

@@ -5,7 +5,7 @@ mod env_config;
 pub mod filter;
 pub mod sink;
 
-pub use sink::{flush, init_output_file, init_output_stdout};
+pub use sink::{flush, init_output_file, init_output_stderr, init_output_stdout};
 
 pub const SCOPE_DEPTH_MAX: usize = 4;
 

docs/README.md 🔗

@@ -69,3 +69,64 @@ Templates are just functions that modify the source of the docs pages (usually w
 - Template Trait: crates/docs_preprocessor/src/templates.rs
 - Example template: crates/docs_preprocessor/src/templates/keybinding.rs
 - Client-side plugins: docs/theme/plugins.js
+
+## Postprocessor
+
+A postprocessor is implemented as a sub-command of `docs_preprocessor` that wraps the builtin `html` renderer and applies post-processing to the `html` files, to add support for page-specific title and meta description values.
+
+An example of the syntax can be found in `git.md`, as well as below
+
+```md
+---
+title: Some more detailed title for this page
+description: A page-specific description
+---
+
+# Editor
+```
+
+The above will be transformed into (with non-relevant tags removed)
+
+```html
+<head>
+  <title>Editor | Some more detailed title for this page</title>
+  <meta name="description" contents="A page-specific description" />
+</head>
+<body>
+  <h1>Editor</h1>
+</body>
+```
+
+If no front-matter is provided, or If one or both keys aren't provided, the title and description will be set based on the `default-title` and `default-description` keys in `book.toml` respectively.
+
+### Implementation details
+
+Unfortunately, `mdbook` does not support post-processing like it does pre-processing, and only supports defining one description to put in the meta tag per book rather than per file. So in order to apply post-processing (necessary to modify the html head tags) the global book description is set to a marker value `#description#` and the html renderer is replaced with a sub-command of `docs_preprocessor` that wraps the builtin `html` renderer and applies post-processing to the `html` files, replacing the marker value and the `<title>(.*)</title>` with the contents of the front-matter if there is one.
+
+### Known limitations
+
+The front-matter parsing is extremely simple, which avoids needing to take on an additional dependency, or implement full yaml parsing.
+
+- Double quotes and multi-line values are not supported, i.e. Keys and values must be entirely on the same line, with no double quotes around the value.
+
+The following will not work:
+
+```md
+---
+title: Some
+  Multi-line
+  Title
+---
+```
+
+And neither will:
+
+```md
+---
+title: "Some title"
+---
+```
+
+- The front-matter must be at the top of the file, with only white-space preceding it
+
+- The contents of the title and description will not be html-escaped. They should be simple ascii text with no unicode or emoji characters

docs/book.toml 🔗

@@ -6,13 +6,27 @@ src = "src"
 title = "Zed"
 site-url = "/docs/"
 
-[output.html]
+[build]
+extra-watch-dirs = ["../crates/docs_preprocessor"]
+
+# zed-html is a "custom" renderer that just wraps the
+# builtin mdbook html renderer, and applies post-processing
+# as post-processing is not possible with mdbook in the same way
+# pre-processing is
+# The config is passed directly to the html renderer, so all config
+# options that apply to html apply to zed-html
+[output.zed-html]
+command = "cargo run -p docs_preprocessor -- postprocess"
+# Set here instead of above as we only use it replace the `#description#` we set in the template
+# when no front-matter is provided value
+default-description = "Learn how to use and customize Zed, the fast, collaborative code editor. Official docs on features, configuration, AI tools, and workflows."
+default-title = "Zed Code Editor Documentation"
 no-section-label = true
 preferred-dark-theme = "dark"
 additional-css = ["theme/page-toc.css", "theme/plugins.css", "theme/highlight.css"]
 additional-js  = ["theme/page-toc.js", "theme/plugins.js"]
 
-[output.html.print]
+[output.zed-html.print]
 enable = false
 
 # Redirects for `/docs` pages.
@@ -24,7 +38,7 @@ enable = false
 # The destination URLs are interpreted relative to `https://zed.dev`.
 # - Redirects to other docs pages should end in `.html`
 # - You can link to pages on the Zed site by omitting the `/docs` in front of it.
-[output.html.redirect]
+[output.zed-html.redirect]
 # AI
 "/ai.html" = "/docs/ai/overview.html"
 "/assistant-panel.html" = "/docs/ai/agent-panel.html"

docs/src/git.md 🔗

@@ -1,3 +1,8 @@
+---
+description: Zed is a text editor that supports lots of Git features
+title: Zed Editor Git integration documentation
+---
+
 # Git
 
 Zed currently offers a set of fundamental Git features, with support coming in the future for more advanced ones, like conflict resolution tools, line by line staging, and more.

docs/theme/index.hbs 🔗

@@ -15,7 +15,7 @@
         <!-- Custom HTML head -->
         {{> head}}
 
-        <meta name="description" content="{{ description }}">
+        <meta name="description" content="#description#">
         <meta name="viewport" content="width=device-width, initial-scale=1">
         <meta name="theme-color" content="#ffffff">