Properly load environment variables from the login shell (#31799)

Haru Kim created

Fixes #11647
Fixes #13888
Fixes #18771
Fixes #19779
Fixes #22437
Fixes #23649
Fixes #24200
Fixes #27601

Zed’s current method of loading environment variables from the login
shell has two issues:
1. Some shells—​fish in particular—​​write specific escape characters to
`stdout` right before they exit. When this happens, the tail end of the
last environment variable printed by `/usr/bin/env` becomes corrupted.
2. If a multi-line value contains an equals sign, that line is
mis-parsed as a separate name-value pair.

This PR addresses those problems by:
1. Redirecting the shell command's `stdout` directly to a temporary
file, eliminating any side effects caused by the shell itself.
2. Replacing `/usr/bin/env` with `sh -c 'export -p'`, which removes
ambiguity when handling multi-line values.

Additional changes:
- Correctly set the arguments used to launch a login shell under `csh`
or `tcsh`.
- Deduplicate code by sharing the implementation that loads environment
variables on first run with the logic that reloads them for a project.



Release Notes:

- N/A

Change summary

Cargo.lock                        |   1 
crates/project/src/environment.rs | 111 ++----------
crates/util/Cargo.toml            |   1 
crates/util/src/shell_env.rs      | 273 +++++++++++++++++++++++++++++++++
crates/util/src/util.rs           |  69 -------
5 files changed, 304 insertions(+), 151 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -17129,6 +17129,7 @@ dependencies = [
  "futures-lite 1.13.0",
  "git2",
  "globset",
+ "indoc",
  "itertools 0.14.0",
  "libc",
  "log",

crates/project/src/environment.rs 🔗

@@ -245,108 +245,39 @@ async fn load_shell_environment(
     Option<HashMap<String, String>>,
     Option<EnvironmentErrorMessage>,
 ) {
-    use crate::direnv::{DirenvError, load_direnv_environment};
-    use std::path::PathBuf;
-    use util::parse_env_output;
-
-    fn message<T>(with: &str) -> (Option<T>, Option<EnvironmentErrorMessage>) {
-        let message = EnvironmentErrorMessage::from_str(with);
-        (None, Some(message))
-    }
-
-    const MARKER: &str = "ZED_SHELL_START";
-    let Some(shell) = std::env::var("SHELL").log_err() else {
-        return message("Failed to get login environment. SHELL environment variable is not set");
+    use crate::direnv::load_direnv_environment;
+    use util::shell_env;
+
+    let dir_ = dir.to_owned();
+    let mut envs = match smol::unblock(move || shell_env::capture(Some(dir_))).await {
+        Ok(envs) => envs,
+        Err(err) => {
+            util::log_err(&err);
+            return (
+                None,
+                Some(EnvironmentErrorMessage::from_str(
+                    "Failed to load environment variables. See log for details",
+                )),
+            );
+        }
     };
-    let shell_path = PathBuf::from(&shell);
-    let shell_name = shell_path.file_name().and_then(|f| f.to_str());
-
-    // What we're doing here is to spawn a shell and then `cd` into
-    // the project directory to get the env in there as if the user
-    // `cd`'d into it. We do that because tools like direnv, asdf, ...
-    // hook into `cd` and only set up the env after that.
-    //
+
     // If the user selects `Direct` for direnv, it would set an environment
     // variable that later uses to know that it should not run the hook.
     // We would include in `.envs` call so it is okay to run the hook
     // even if direnv direct mode is enabled.
-    //
-    // In certain shells we need to execute additional_command in order to
-    // trigger the behavior of direnv, etc.
-
-    let command = match shell_name {
-        Some("fish") => format!(
-            "cd '{}'; emit fish_prompt; printf '%s' {MARKER}; /usr/bin/env;",
-            dir.display()
-        ),
-        _ => format!(
-            "cd '{}'; printf '%s' {MARKER}; /usr/bin/env;",
-            dir.display()
-        ),
-    };
-
-    // csh/tcsh only supports `-l` if it's the only flag. So this won't be a login shell.
-    // Users must rely on vars from `~/.tcshrc` or `~/.cshrc` and not `.login` as a result.
-    let args = match shell_name {
-        Some("tcsh") | Some("csh") => vec!["-i".to_string(), "-c".to_string(), command],
-        _ => vec![
-            "-l".to_string(),
-            "-i".to_string(),
-            "-c".to_string(),
-            command,
-        ],
-    };
-
-    let Some(output) = smol::unblock(move || {
-        util::set_pre_exec_to_start_new_session(std::process::Command::new(&shell).args(&args))
-            .output()
-    })
-    .await
-    .log_err() else {
-        return message(
-            "Failed to spawn login shell to source login environment variables. See logs for details",
-        );
-    };
-
-    if !output.status.success() {
-        log::error!("login shell exited with {}", output.status);
-        return message("Login shell exited with nonzero exit code. See logs for details");
-    }
-
-    let stdout = String::from_utf8_lossy(&output.stdout);
-    let Some(env_output_start) = stdout.find(MARKER) else {
-        let stderr = String::from_utf8_lossy(&output.stderr);
-        log::error!(
-            "failed to parse output of `env` command in login shell. stdout: {:?}, stderr: {:?}",
-            stdout,
-            stderr
-        );
-        return message("Failed to parse stdout of env command. See logs for the output");
-    };
-
-    let mut parsed_env = HashMap::default();
-    let env_output = &stdout[env_output_start + MARKER.len()..];
-
-    parse_env_output(env_output, |key, value| {
-        parsed_env.insert(key, value);
-    });
-
     let (direnv_environment, direnv_error) = match load_direnv {
         DirenvSettings::ShellHook => (None, None),
-        DirenvSettings::Direct => match load_direnv_environment(&parsed_env, dir).await {
+        DirenvSettings::Direct => match load_direnv_environment(&envs, dir).await {
             Ok(env) => (Some(env), None),
-            Err(err) => (
-                None,
-                <Option<EnvironmentErrorMessage> as From<DirenvError>>::from(err),
-            ),
+            Err(err) => (None, err.into()),
         },
     };
-
-    for (key, value) in direnv_environment.unwrap_or(HashMap::default()) {
-        parsed_env.insert(key, value);
+    if let Some(direnv_environment) = direnv_environment {
+        envs.extend(direnv_environment);
     }
 
-    (Some(parsed_env), direnv_error)
+    (Some(envs), direnv_error)
 }
 
 fn get_directory_env_impl(

crates/util/Cargo.toml 🔗

@@ -50,5 +50,6 @@ dunce = "1.0"
 
 [dev-dependencies]
 git2.workspace = true
+indoc.workspace = true
 rand.workspace = true
 util_macros.workspace = true

crates/util/src/shell_env.rs 🔗

@@ -0,0 +1,273 @@
+use anyhow::{Context as _, Result};
+use collections::HashMap;
+use std::borrow::Cow;
+use std::ffi::OsStr;
+use std::io::Read;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use tempfile::NamedTempFile;
+
+/// Capture all environment variables from the login shell.
+pub fn capture(change_dir: Option<impl AsRef<Path>>) -> Result<HashMap<String, String>> {
+    let shell_path = std::env::var("SHELL").map(PathBuf::from)?;
+    let shell_name = shell_path.file_name().and_then(OsStr::to_str);
+
+    let mut command_string = String::new();
+
+    // What we're doing here is to spawn a shell and then `cd` into
+    // the project directory to get the env in there as if the user
+    // `cd`'d into it. We do that because tools like direnv, asdf, ...
+    // hook into `cd` and only set up the env after that.
+    if let Some(dir) = change_dir {
+        let dir_str = dir.as_ref().to_string_lossy();
+        command_string.push_str(&format!("cd '{dir_str}';"));
+    }
+
+    // In certain shells we need to execute additional_command in order to
+    // trigger the behavior of direnv, etc.
+    command_string.push_str(match shell_name {
+        Some("fish") => "emit fish_prompt;",
+        _ => "",
+    });
+
+    let mut env_output_file = NamedTempFile::new()?;
+    command_string.push_str(&format!(
+        "sh -c 'export -p' > '{}';",
+        env_output_file.path().to_string_lossy(),
+    ));
+
+    let mut command = Command::new(&shell_path);
+
+    // For csh/tcsh, the login shell option is set by passing `-` as
+    // the 0th argument instead of using `-l`.
+    if let Some("tcsh" | "csh") = shell_name {
+        #[cfg(unix)]
+        std::os::unix::process::CommandExt::arg0(&mut command, "-");
+    } else {
+        command.arg("-l");
+    }
+
+    command.args(["-i", "-c", &command_string]);
+
+    let process_output = super::set_pre_exec_to_start_new_session(&mut command).output()?;
+    anyhow::ensure!(
+        process_output.status.success(),
+        "login shell exited with {}. stdout: {:?}, stderr: {:?}",
+        process_output.status,
+        String::from_utf8_lossy(&process_output.stdout),
+        String::from_utf8_lossy(&process_output.stderr),
+    );
+
+    let mut env_output = String::new();
+    env_output_file.read_to_string(&mut env_output)?;
+
+    parse(&env_output)
+        .filter_map(|entry| match entry {
+            Ok((name, value)) => Some(Ok((name.into(), value?.into()))),
+            Err(err) => Some(Err(err)),
+        })
+        .collect::<Result<HashMap<String, String>>>()
+}
+
+/// Parse the result of calling `sh -c 'export -p'`.
+///
+/// https://www.man7.org/linux/man-pages/man1/export.1p.html
+fn parse(mut input: &str) -> impl Iterator<Item = Result<(Cow<'_, str>, Option<Cow<'_, str>>)>> {
+    std::iter::from_fn(move || {
+        if input.is_empty() {
+            return None;
+        }
+        match parse_declaration(input) {
+            Ok((entry, rest)) => {
+                input = rest;
+                Some(Ok(entry))
+            }
+            Err(err) => Some(Err(err)),
+        }
+    })
+}
+
+fn parse_declaration(input: &str) -> Result<((Cow<'_, str>, Option<Cow<'_, str>>), &str)> {
+    let rest = input
+        .strip_prefix("export ")
+        .context("expected 'export ' prefix")?;
+
+    if let Some((name, rest)) = parse_name_and_terminator(rest, '\n') {
+        Ok(((name, None), rest))
+    } else {
+        let (name, rest) = parse_name_and_terminator(rest, '=').context("invalid name")?;
+        let (value, rest) = parse_literal_and_terminator(rest, '\n').context("invalid value")?;
+        Ok(((name, Some(value)), rest))
+    }
+}
+
+fn parse_name_and_terminator(input: &str, terminator: char) -> Option<(Cow<'_, str>, &str)> {
+    let (name, rest) = parse_literal_and_terminator(input, terminator)?;
+    (!name.is_empty() && !name.contains('=')).then_some((name, rest))
+}
+
+fn parse_literal_and_terminator(input: &str, terminator: char) -> Option<(Cow<'_, str>, &str)> {
+    if let Some((literal, rest)) = parse_literal_single_quoted(input) {
+        let rest = rest.strip_prefix(terminator)?;
+        Some((Cow::Borrowed(literal), rest))
+    } else if let Some((literal, rest)) = parse_literal_double_quoted(input) {
+        let rest = rest.strip_prefix(terminator)?;
+        Some((Cow::Owned(literal), rest))
+    } else {
+        let (literal, rest) = input.split_once(terminator)?;
+        (!literal.contains(|c: char| c.is_ascii_whitespace()))
+            .then_some((Cow::Borrowed(literal), rest))
+    }
+}
+
+/// https://www.gnu.org/software/bash/manual/html_node/Single-Quotes.html
+fn parse_literal_single_quoted(input: &str) -> Option<(&str, &str)> {
+    input.strip_prefix('\'')?.split_once('\'')
+}
+
+/// https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html
+fn parse_literal_double_quoted(input: &str) -> Option<(String, &str)> {
+    let rest = input.strip_prefix('"')?;
+
+    let mut char_indices = rest.char_indices();
+    let mut escaping = false;
+    let (literal, rest) = loop {
+        let (index, char) = char_indices.next()?;
+        if char == '"' && !escaping {
+            break (&rest[..index], &rest[index + 1..]);
+        } else {
+            escaping = !escaping && char == '\\';
+        }
+    };
+
+    let literal = literal
+        .replace("\\$", "$")
+        .replace("\\`", "`")
+        .replace("\\\"", "\"")
+        .replace("\\\n", "")
+        .replace("\\\\", "\\");
+
+    Some((literal, rest))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse() {
+        let input = indoc::indoc! {r#"
+        export foo
+        export 'foo'
+        export "foo"
+        export foo=
+        export 'foo'=
+        export "foo"=
+        export foo=bar
+        export foo='bar'
+        export foo="bar"
+        export foo='b
+        a
+        z'
+        export foo="b
+        a
+        z"
+        export foo='b\
+        a\
+        z'
+        export foo="b\
+        a\
+        z"
+        export foo='\`Hello\`
+        \"wo\
+        rld\"\n!\\
+        !'
+        export foo="\`Hello\`
+        \"wo\
+        rld\"\n!\\
+        !"
+        "#};
+
+        let expected_values = [
+            None,
+            None,
+            None,
+            Some(""),
+            Some(""),
+            Some(""),
+            Some("bar"),
+            Some("bar"),
+            Some("bar"),
+            Some("b\na\nz"),
+            Some("b\na\nz"),
+            Some("b\\\na\\\nz"),
+            Some("baz"),
+            Some(indoc::indoc! {r#"
+            \`Hello\`
+            \"wo\
+            rld\"\n!\\
+            !"#}),
+            Some(indoc::indoc! {r#"
+            `Hello`
+            "world"\n!\!"#}),
+        ];
+        let expected = expected_values
+            .into_iter()
+            .map(|value| ("foo".into(), value.map(Into::into)))
+            .collect::<Vec<_>>();
+
+        let actual = parse(input).collect::<Result<Vec<_>>>().unwrap();
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn test_parse_declaration() {
+        let ((name, value), rest) = parse_declaration("export foo\nrest").unwrap();
+        assert_eq!(name, "foo");
+        assert_eq!(value, None);
+        assert_eq!(rest, "rest");
+
+        let ((name, value), rest) = parse_declaration("export foo=bar\nrest").unwrap();
+        assert_eq!(name, "foo");
+        assert_eq!(value.as_deref(), Some("bar"));
+        assert_eq!(rest, "rest");
+    }
+
+    #[test]
+    fn test_parse_literal_single_quoted() {
+        let input = indoc::indoc! {r#"
+        '\`Hello\`
+        \"wo\
+        rld\"\n!\\
+        !'
+        rest"#};
+
+        let expected = indoc::indoc! {r#"
+        \`Hello\`
+        \"wo\
+        rld\"\n!\\
+        !"#};
+
+        let (actual, rest) = parse_literal_single_quoted(input).unwrap();
+        assert_eq!(expected, actual);
+        assert_eq!(rest, "\nrest");
+    }
+
+    #[test]
+    fn test_parse_literal_double_quoted() {
+        let input = indoc::indoc! {r#"
+        "\`Hello\`
+        \"wo\
+        rld\"\n!\\
+        !"
+        rest"#};
+
+        let expected = indoc::indoc! {r#"
+        `Hello`
+        "world"\n!\!"#};
+
+        let (actual, rest) = parse_literal_double_quoted(input).unwrap();
+        assert_eq!(expected, actual);
+        assert_eq!(rest, "\nrest");
+    }
+}

crates/util/src/util.rs 🔗

@@ -5,6 +5,7 @@ pub mod fs;
 pub mod markdown;
 pub mod paths;
 pub mod serde;
+pub mod shell_env;
 pub mod size;
 #[cfg(any(test, feature = "test-support"))]
 pub mod test;
@@ -27,9 +28,6 @@ use std::{
 };
 use unicase::UniCase;
 
-#[cfg(unix)]
-use anyhow::Context as _;
-
 pub use take_until::*;
 #[cfg(any(test, feature = "test-support"))]
 pub use util_macros::{line_endings, separator, uri};
@@ -312,46 +310,21 @@ fn load_shell_from_passwd() -> Result<()> {
 pub fn load_login_shell_environment() -> Result<()> {
     load_shell_from_passwd().log_err();
 
-    let marker = "ZED_LOGIN_SHELL_START";
-    let shell = env::var("SHELL").context(
-        "SHELL environment variable is not assigned so we can't source login environment variables",
-    )?;
-
     // If possible, we want to `cd` in the user's `$HOME` to trigger programs
     // such as direnv, asdf, mise, ... to adjust the PATH. These tools often hook
     // into shell's `cd` command (and hooks) to manipulate env.
     // We do this so that we get the env a user would have when spawning a shell
     // in home directory.
-    let shell_cmd_prefix = std::env::var_os("HOME")
-        .and_then(|home| home.into_string().ok())
-        .map(|home| format!("cd '{home}';"));
+    for (name, value) in shell_env::capture(Some(paths::home_dir()))? {
+        unsafe { env::set_var(&name, &value) };
+    }
 
-    let shell_cmd = format!(
-        "{}printf '%s' {marker}; /usr/bin/env;",
-        shell_cmd_prefix.as_deref().unwrap_or("")
+    log::info!(
+        "set environment variables from shell:{}, path:{}",
+        std::env::var("SHELL").unwrap_or_default(),
+        std::env::var("PATH").unwrap_or_default(),
     );
 
-    let output = set_pre_exec_to_start_new_session(
-        std::process::Command::new(&shell).args(["-l", "-i", "-c", &shell_cmd]),
-    )
-    .output()
-    .context("failed to spawn login shell to source login environment variables")?;
-    anyhow::ensure!(output.status.success(), "login shell exited with error");
-
-    let stdout = String::from_utf8_lossy(&output.stdout);
-
-    if let Some(env_output_start) = stdout.find(marker) {
-        let env_output = &stdout[env_output_start + marker.len()..];
-
-        parse_env_output(env_output, |key, value| unsafe { env::set_var(key, value) });
-
-        log::info!(
-            "set environment variables from shell:{}, path:{}",
-            shell,
-            env::var("PATH").unwrap_or_default(),
-        );
-    }
-
     Ok(())
 }
 
@@ -375,32 +348,6 @@ pub fn set_pre_exec_to_start_new_session(
     command
 }
 
-/// Parse the result of calling `usr/bin/env` with no arguments
-pub fn parse_env_output(env: &str, mut f: impl FnMut(String, String)) {
-    let mut current_key: Option<String> = None;
-    let mut current_value: Option<String> = None;
-
-    for line in env.split_terminator('\n') {
-        if let Some(separator_index) = line.find('=') {
-            if !line[..separator_index].is_empty() {
-                if let Some((key, value)) = Option::zip(current_key.take(), current_value.take()) {
-                    f(key, value)
-                }
-                current_key = Some(line[..separator_index].to_string());
-                current_value = Some(line[separator_index + 1..].to_string());
-                continue;
-            };
-        }
-        if let Some(value) = current_value.as_mut() {
-            value.push('\n');
-            value.push_str(line);
-        }
-    }
-    if let Some((key, value)) = Option::zip(current_key.take(), current_value.take()) {
-        f(key, value)
-    }
-}
-
 pub fn merge_json_lenient_value_into(
     source: serde_json_lenient::Value,
     target: &mut serde_json_lenient::Value,