Replace environment variable parser with `zed --printenv` outputting JSON (#32637)

Peter Tripp and Conrad Irwin created 9 months ago

Closes: https://github.com/zed-industries/zed/issues/32445
Follow-up to: https://github.com/zed-industries/zed/pull/31799

Release Notes:

- Improved handling of environment variables

---------

Co-authored-by: Conrad Irwin <conrad@zed.dev>

Change summary

Cargo.lock                       |   2 
crates/askpass/Cargo.toml        |   1 
crates/askpass/src/askpass.rs    |  34 --
crates/remote_server/src/main.rs |   8 
crates/util/Cargo.toml           |   1 
crates/util/src/shell_env.rs     | 371 +++------------------------------
crates/util/src/util.rs          |  20 +
crates/zed/src/main.rs           |  11 +
8 files changed, 77 insertions(+), 371 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -491,7 +491,6 @@ dependencies = [
  "anyhow",
  "futures 0.3.31",
  "gpui",
- "shlex",
  "smol",
  "tempfile",
  "util",
@@ -17355,6 +17354,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_json_lenient",
+ "shlex",
  "smol",
  "take-until",
  "tempfile",

crates/askpass/Cargo.toml 🔗

@@ -15,7 +15,6 @@ path = "src/askpass.rs"
 anyhow.workspace = true
 futures.workspace = true
 gpui.workspace = true
-shlex.workspace = true
 smol.workspace = true
 tempfile.workspace = true
 util.workspace = true

crates/askpass/src/askpass.rs 🔗

@@ -16,6 +16,8 @@ use smol::fs;
 use smol::{fs::unix::PermissionsExt as _, net::unix::UnixListener};
 #[cfg(unix)]
 use util::ResultExt as _;
+#[cfg(unix)]
+use util::get_shell_safe_zed_path;
 
 #[derive(PartialEq, Eq)]
 pub enum AskPassResult {
@@ -160,38 +162,6 @@ impl AskPassSession {
     }
 }
 
-#[cfg(unix)]
-fn get_shell_safe_zed_path() -> anyhow::Result<String> {
-    let zed_path = std::env::current_exe()
-        .context("Failed to determine current executable path for use in askpass")?
-        .to_string_lossy()
-        // see https://github.com/rust-lang/rust/issues/69343
-        .trim_end_matches(" (deleted)")
-        .to_string();
-
-    // NOTE: this was previously enabled, however, it caused errors when it shouldn't have
-    //       (see https://github.com/zed-industries/zed/issues/29819)
-    //       The zed path failing to execute within the askpass script results in very vague ssh
-    //       authentication failed errors, so this was done to try and surface a better error
-    //
-    // use std::os::unix::fs::MetadataExt;
-    // let metadata = std::fs::metadata(&zed_path)
-    //     .context("Failed to check metadata of Zed executable path for use in askpass")?;
-    // let is_executable = metadata.is_file() && metadata.mode() & 0o111 != 0;
-    // anyhow::ensure!(
-    //     is_executable,
-    //     "Failed to verify Zed executable path for use in askpass"
-    // );
-
-    // As of writing, this can only be fail if the path contains a null byte, which shouldn't be possible
-    // but shlex has annotated the error as #[non_exhaustive] so we can't make it a compile error if other
-    // errors are introduced in the future :(
-    let zed_path_escaped = shlex::try_quote(&zed_path)
-        .context("Failed to shell-escape Zed executable path for use in askpass")?;
-
-    return Ok(zed_path_escaped.to_string());
-}
-
 /// The main function for when Zed is running in netcat mode for use in askpass.
 /// Called from both the remote server binary and the zed binary in their respective main functions.
 #[cfg(unix)]

crates/remote_server/src/main.rs 🔗

@@ -12,6 +12,9 @@ struct Cli {
     /// by having Zed act like netcat communicating over a Unix socket.
     #[arg(long, hide = true)]
     askpass: Option<String>,
+    /// Used for loading the environment from the project.
+    #[arg(long, hide = true)]
+    printenv: bool,
 }
 
 #[derive(Subcommand)]
@@ -55,6 +58,11 @@ fn main() {
         return;
     }
 
+    if cli.printenv {
+        util::shell_env::print_env();
+        return;
+    }
+
     let result = match cli.command {
         Some(Commands::Run {
             log_file,

crates/util/Cargo.toml 🔗

@@ -33,6 +33,7 @@ rust-embed.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 serde_json_lenient.workspace = true
+shlex.workspace = true
 smol.workspace = true
 take-until.workspace = true
 tempfile.workspace = true

crates/util/src/shell_env.rs 🔗

@@ -1,7 +1,7 @@
 #![cfg_attr(not(unix), allow(unused))]
 
 use anyhow::{Context as _, Result};
-use std::borrow::Cow;
+use collections::HashMap;
 
 /// Capture all environment variables from the login shell.
 #[cfg(unix)]
@@ -9,44 +9,37 @@ pub fn capture(directory: &std::path::Path) -> Result<collections::HashMap<Strin
     use std::os::unix::process::CommandExt;
     use std::process::Stdio;
 
+    let zed_path = super::get_shell_safe_zed_path()?;
     let shell_path = std::env::var("SHELL").map(std::path::PathBuf::from)?;
     let shell_name = shell_path.file_name().and_then(std::ffi::OsStr::to_str);
 
+    let mut command_string = String::new();
     let mut command = std::process::Command::new(&shell_path);
+    // In some shells, file descriptors greater than 2 cannot be used in interactive mode,
+    // so file descriptor 0 (stdin) is used instead. [Citation Needed]
+    const ENV_OUTPUT_FD: std::os::fd::RawFd = 0;
     command.stdin(Stdio::null());
     command.stdout(Stdio::piped());
     command.stderr(Stdio::piped());
 
-    let mut command_string = String::new();
-
-    // What we're doing here is to spawn a shell and then `cd` into
-    // the project directory to get the env in there as if the user
-    // `cd`'d into it. We do that because tools like direnv, asdf, ...
-    // hook into `cd` and only set up the env after that.
-    command_string.push_str(&format!("cd '{}';", directory.display()));
-
-    // In certain shells we need to execute additional_command in order to
-    // trigger the behavior of direnv, etc.
-    command_string.push_str(match shell_name {
-        Some("fish") => "emit fish_prompt;",
-        _ => "",
-    });
-
-    // In some shells, file descriptors greater than 2 cannot be used in interactive mode,
-    // so file descriptor 0 is used instead.
-    const ENV_OUTPUT_FD: std::os::fd::RawFd = 0;
-    command_string.push_str(&format!("sh -c 'export -p >&{ENV_OUTPUT_FD}';"));
-
-    // For csh/tcsh, the login shell option is set by passing `-` as
-    // the 0th argument instead of using `-l`.
-    if let Some("tcsh" | "csh") = shell_name {
-        command.arg0("-");
-    } else {
-        command.arg("-l");
+    match shell_name {
+        Some("tcsh" | "csh") => {
+            // For csh/tcsh, login shell requires passing `-` as 0th argument (instead of `-l`)
+            command.arg0("-");
+        }
+        Some("fish") => {
+            // in fish, asdf, direnv attach to the `fish_prompt` event
+            command_string.push_str("emit fish_prompt;");
+            command.arg("-l");
+        }
+        _ => {
+            command.arg("-l");
+        }
     }
-
+    // cd into the directory, triggering directory specific side-effects (asdf, direnv, etc)
+    command_string.push_str(&format!("cd '{}';", directory.display()));
+    command_string.push_str(&format!("sh -c '{zed_path} --printenv >&{ENV_OUTPUT_FD}';"));
     command.args(["-i", "-c", &command_string]);
-
     super::set_pre_exec_to_start_new_session(&mut command);
 
     let (env_output, process_output) = spawn_and_read_fd(command, ENV_OUTPUT_FD)?;
@@ -60,12 +53,10 @@ pub fn capture(directory: &std::path::Path) -> Result<collections::HashMap<Strin
         String::from_utf8_lossy(&process_output.stderr),
     );
 
-    parse(&env_output)
-        .filter_map(|entry| match entry {
-            Ok((name, value)) => Some(Ok((name.into(), value?.into()))),
-            Err(err) => Some(Err(err)),
-        })
-        .collect::<Result<_>>()
+    // Parse the JSON output from zed --printenv
+    let env_map: collections::HashMap<String, String> = serde_json::from_str(&env_output)
+        .with_context(|| "Failed to deserialize environment variables from json")?;
+    Ok(env_map)
 }
 
 #[cfg(unix)]
@@ -92,306 +83,12 @@ fn spawn_and_read_fd(
     Ok((buffer, process.wait_with_output()?))
 }
 
-/// Parse the result of calling `sh -c 'export -p'`.
-///
-/// https://www.man7.org/linux/man-pages/man1/export.1p.html
-fn parse(mut input: &str) -> impl Iterator<Item = Result<(Cow<'_, str>, Option<Cow<'_, str>>)>> {
-    std::iter::from_fn(move || {
-        if input.is_empty() {
-            return None;
-        }
-        match parse_declaration(input) {
-            Ok((entry, rest)) => {
-                input = rest;
-                Some(Ok(entry))
-            }
-            Err(err) => Some(Err(err)),
-        }
-    })
-}
-
-fn parse_declaration(input: &str) -> Result<((Cow<'_, str>, Option<Cow<'_, str>>), &str)> {
-    let rest = input
-        .strip_prefix("export ")
-        .context("expected 'export ' prefix")?;
-
-    if let Some((name, rest)) = parse_name_and_terminator(rest, '\n') {
-        Ok(((name, None), rest))
-    } else {
-        let (name, rest) = parse_name_and_terminator(rest, '=').context("invalid name")?;
-        let (value, rest) = parse_literal_and_terminator(rest, '\n').context("invalid value")?;
-        Ok(((name, Some(value)), rest))
-    }
-}
-
-fn parse_name_and_terminator(input: &str, terminator: char) -> Option<(Cow<'_, str>, &str)> {
-    let (name, rest) = parse_literal_and_terminator(input, terminator)?;
-    (!name.is_empty() && !name.contains('=')).then_some((name, rest))
-}
-
-fn parse_literal_and_terminator(input: &str, terminator: char) -> Option<(Cow<'_, str>, &str)> {
-    if let Some((literal, rest)) = parse_literal_ansi_c_quoted(input) {
-        let rest = rest.strip_prefix(terminator)?;
-        Some((Cow::Owned(literal), rest))
-    } else if let Some((literal, rest)) = parse_literal_single_quoted(input) {
-        let rest = rest.strip_prefix(terminator)?;
-        Some((Cow::Borrowed(literal), rest))
-    } else if let Some((literal, rest)) = parse_literal_double_quoted(input) {
-        let rest = rest.strip_prefix(terminator)?;
-        Some((Cow::Owned(literal), rest))
-    } else {
-        let (literal, rest) = input.split_once(terminator)?;
-        (!literal.contains(|c: char| c.is_ascii_whitespace()))
-            .then_some((Cow::Borrowed(literal), rest))
-    }
-}
-
-/// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
-fn parse_literal_ansi_c_quoted(input: &str) -> Option<(String, &str)> {
-    let rest = input.strip_prefix("$'")?;
-
-    let mut char_indices = rest.char_indices();
-    let mut escaping = false;
-    let (literal, rest) = loop {
-        let (index, char) = char_indices.next()?;
-        if char == '\'' && !escaping {
-            break (&rest[..index], &rest[index + 1..]);
-        } else {
-            escaping = !escaping && char == '\\';
-        }
-    };
-
-    let mut result = String::new();
-    let mut chars = literal.chars();
-    while let Some(ch) = chars.next() {
-        if ch == '\\' {
-            match chars.next() {
-                Some('n') => result.push('\n'),
-                Some('t') => result.push('\t'),
-                Some('r') => result.push('\r'),
-                Some('\\') => result.push('\\'),
-                Some('\'') => result.push('\''),
-                Some('"') => result.push('"'),
-                Some('a') => result.push('\x07'), // bell
-                Some('b') => result.push('\x08'), // backspace
-                Some('f') => result.push('\x0C'), // form feed
-                Some('v') => result.push('\x0B'), // vertical tab
-                Some('0') => result.push('\0'),   // null
-                Some(other) => {
-                    // For unknown escape sequences, keep the backslash and character
-                    result.push('\\');
-                    result.push(other);
-                }
-                None => result.push('\\'), // trailing backslash
-            }
-        } else {
-            result.push(ch);
-        }
-    }
-
-    Some((result, rest))
-}
-
-/// https://www.gnu.org/software/bash/manual/html_node/Single-Quotes.html
-fn parse_literal_single_quoted(input: &str) -> Option<(&str, &str)> {
-    input.strip_prefix('\'')?.split_once('\'')
-}
-
-/// https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html
-fn parse_literal_double_quoted(input: &str) -> Option<(String, &str)> {
-    let rest = input.strip_prefix('"')?;
-
-    let mut char_indices = rest.char_indices();
-    let mut escaping = false;
-    let (literal, rest) = loop {
-        let (index, char) = char_indices.next()?;
-        if char == '"' && !escaping {
-            break (&rest[..index], &rest[index + 1..]);
-        } else {
-            escaping = !escaping && char == '\\';
-        }
-    };
-
-    let literal = literal
-        .replace("\\$", "$")
-        .replace("\\`", "`")
-        .replace("\\\"", "\"")
-        .replace("\\\n", "")
-        .replace("\\\\", "\\");
-
-    Some((literal, rest))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[cfg(unix)]
-    #[test]
-    fn test_spawn_and_read_fd() -> anyhow::Result<()> {
-        let mut command = std::process::Command::new("sh");
-        super::super::set_pre_exec_to_start_new_session(&mut command);
-        command.args(["-lic", "printf 'abc%.0s' $(seq 1 65536) >&0"]);
-        let (bytes, _) = spawn_and_read_fd(command, 0)?;
-        assert_eq!(bytes.len(), 65536 * 3);
-        Ok(())
-    }
-
-    #[test]
-    fn test_parse() {
-        let input = indoc::indoc! {r#"
-        export foo
-        export 'foo'
-        export "foo"
-        export foo=
-        export 'foo'=
-        export "foo"=
-        export foo=bar
-        export foo='bar'
-        export foo="bar"
-        export foo='b
-        a
-        z'
-        export foo="b
-        a
-        z"
-        export foo='b\
-        a\
-        z'
-        export foo="b\
-        a\
-        z"
-        export foo='\`Hello\`
-        \"wo\
-        rld\"\n!\\
-        !'
-        export foo="\`Hello\`
-        \"wo\
-        rld\"\n!\\
-        !"
-        export foo=$'hello\nworld'
-        "#};
-
-        let expected_values = [
-            None,
-            None,
-            None,
-            Some(""),
-            Some(""),
-            Some(""),
-            Some("bar"),
-            Some("bar"),
-            Some("bar"),
-            Some("b\na\nz"),
-            Some("b\na\nz"),
-            Some("b\\\na\\\nz"),
-            Some("baz"),
-            Some(indoc::indoc! {r#"
-            \`Hello\`
-            \"wo\
-            rld\"\n!\\
-            !"#}),
-            Some(indoc::indoc! {r#"
-            `Hello`
-            "world"\n!\!"#}),
-            Some("hello\nworld"),
-        ];
-        let expected = expected_values
-            .into_iter()
-            .map(|value| ("foo".into(), value.map(Into::into)))
-            .collect::<Vec<_>>();
-
-        let actual = parse(input).collect::<Result<Vec<_>>>().unwrap();
-        assert_eq!(expected, actual);
-    }
-
-    #[test]
-    fn test_parse_declaration() {
-        let ((name, value), rest) = parse_declaration("export foo\nrest").unwrap();
-        assert_eq!(name, "foo");
-        assert_eq!(value, None);
-        assert_eq!(rest, "rest");
-
-        let ((name, value), rest) = parse_declaration("export foo=bar\nrest").unwrap();
-        assert_eq!(name, "foo");
-        assert_eq!(value.as_deref(), Some("bar"));
-        assert_eq!(rest, "rest");
-    }
-
-    #[test]
-    fn test_parse_literal_single_quoted() {
-        let input = indoc::indoc! {r#"
-        '\`Hello\`
-        \"wo\
-        rld\"\n!\\
-        !'
-        rest"#};
-
-        let expected = indoc::indoc! {r#"
-        \`Hello\`
-        \"wo\
-        rld\"\n!\\
-        !"#};
-
-        let (actual, rest) = parse_literal_single_quoted(input).unwrap();
-        assert_eq!(expected, actual);
-        assert_eq!(rest, "\nrest");
-    }
-
-    #[test]
-    fn test_parse_literal_double_quoted() {
-        let input = indoc::indoc! {r#"
-        "\`Hello\`
-        \"wo\
-        rld\"\n!\\
-        !"
-        rest"#};
-
-        let expected = indoc::indoc! {r#"
-        `Hello`
-        "world"\n!\!"#};
-
-        let (actual, rest) = parse_literal_double_quoted(input).unwrap();
-        assert_eq!(expected, actual);
-        assert_eq!(rest, "\nrest");
-    }
-
-    #[test]
-    fn test_parse_literal_ansi_c_quoted() {
-        let (actual, rest) = parse_literal_ansi_c_quoted("$'hello\\nworld'\nrest").unwrap();
-        assert_eq!(actual, "hello\nworld");
-        assert_eq!(rest, "\nrest");
-
-        let (actual, rest) = parse_literal_ansi_c_quoted("$'tab\\there'\nrest").unwrap();
-        assert_eq!(actual, "tab\there");
-        assert_eq!(rest, "\nrest");
-
-        let (actual, rest) = parse_literal_ansi_c_quoted("$'quote\\'\\'end'\nrest").unwrap();
-        assert_eq!(actual, "quote''end");
-        assert_eq!(rest, "\nrest");
-
-        let (actual, rest) = parse_literal_ansi_c_quoted("$'backslash\\\\end'\nrest").unwrap();
-        assert_eq!(actual, "backslash\\end");
-        assert_eq!(rest, "\nrest");
-    }
-
-    #[test]
-    fn test_parse_buildphase_export() {
-        let input = r#"export buildPhase=$'{ echo "------------------------------------------------------------";\n  echo " WARNING: the existence of this path is not guaranteed.";\n  echo " It is an internal implementation detail for pkgs.mkShell.";\n  echo "------------------------------------------------------------";\n  echo;\n  # Record all build inputs as runtime dependencies\n  export;\n} >> "$out"\n'
-"#;
-
-        let expected_value = r#"{ echo "------------------------------------------------------------";
-  echo " WARNING: the existence of this path is not guaranteed.";
-  echo " It is an internal implementation detail for pkgs.mkShell.";
-  echo "------------------------------------------------------------";
-  echo;
-  # Record all build inputs as runtime dependencies
-  export;
-} >> "$out"
-"#;
-
-        let ((name, value), _rest) = parse_declaration(input).unwrap();
-        assert_eq!(name, "buildPhase");
-        assert_eq!(value.as_deref(), Some(expected_value));
-    }
+pub fn print_env() {
+    let env_vars: HashMap<String, String> = std::env::vars().collect();
+    let json = serde_json::to_string_pretty(&env_vars).unwrap_or_else(|err| {
+        eprintln!("Error serializing environment variables: {}", err);
+        std::process::exit(1);
+    });
+    println!("{}", json);
+    std::process::exit(0);
 }

crates/util/src/util.rs 🔗

@@ -262,6 +262,26 @@ fn load_shell_from_passwd() -> Result<()> {
     Ok(())
 }
 
+#[cfg(unix)]
+/// Returns a shell escaped path for the current zed executable
+pub fn get_shell_safe_zed_path() -> anyhow::Result<String> {
+    use anyhow::Context;
+
+    let zed_path = std::env::current_exe()
+        .context("Failed to determine current zed executable path.")?
+        .to_string_lossy()
+        .trim_end_matches(" (deleted)") // see https://github.com/rust-lang/rust/issues/69343
+        .to_string();
+
+    // As of writing, this can only be fail if the path contains a null byte, which shouldn't be possible
+    // but shlex has annotated the error as #[non_exhaustive] so we can't make it a compile error if other
+    // errors are introduced in the future :(
+    let zed_path_escaped =
+        shlex::try_quote(&zed_path).context("Failed to shell-escape Zed executable path.")?;
+
+    return Ok(zed_path_escaped.to_string());
+}
+
 #[cfg(unix)]
 pub fn load_login_shell_environment() -> Result<()> {
     load_shell_from_passwd().log_err();

crates/zed/src/main.rs 🔗

@@ -191,11 +191,18 @@ Error: Running Zed as root or via sudo is unsupported.
 
     let args = Args::parse();
 
+    // `zed --askpass` Makes zed operate in nc/netcat mode for use with askpass
     if let Some(socket) = &args.askpass {
         askpass::main(socket);
         return;
     }
 
+    // `zed --printenv` Outputs environment variables as JSON to stdout
+    if args.printenv {
+        util::shell_env::print_env();
+        return;
+    }
+
     if args.dump_all_actions {
         dump_all_gpui_actions();
         return;
@@ -1071,6 +1078,10 @@ struct Args {
 
     #[arg(long, hide = true)]
     dump_all_actions: bool,
+
+    /// Output current environment variables as JSON to stdout
+    #[arg(long, hide = true)]
+    printenv: bool,
 }
 
 #[derive(Clone, Debug)]