Ensure sqlez build succeeds on Windows (#7072)

į™―åąąéĒĻéœē created

On Windows, `OsStr` must be a valid
[WTF-8](https://simonsapin.github.io/wtf-8/) sequence, and there are no
safety ways converting from bytes to OsStr in std. So I added
`PathExt::try_from_bytes` and use it in `sqlez`.

Change summary

Cargo.lock                   | 35 +++++++++++++++++++++++++++++++++++
crates/sqlez/Cargo.toml      |  1 +
crates/sqlez/src/bindable.rs | 12 +++++-------
crates/util/Cargo.toml       |  3 +++
crates/util/src/paths.rs     | 28 +++++++++++++++++++++++++++-
5 files changed, 71 insertions(+), 8 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -2802,6 +2802,16 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
 
+[[package]]
+name = "futf"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
+dependencies = [
+ "mac",
+ "new_debug_unreachable",
+]
+
 [[package]]
 name = "futures"
 version = "0.1.31"
@@ -4128,6 +4138,12 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "mac"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
+
 [[package]]
 name = "mach2"
 version = "0.4.1"
@@ -4502,6 +4518,12 @@ dependencies = [
  "winapi 0.3.9",
 ]
 
+[[package]]
+name = "new_debug_unreachable"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
+
 [[package]]
 name = "nix"
 version = "0.23.2"
@@ -7281,6 +7303,7 @@ dependencies = [
  "parking_lot 0.11.2",
  "smol",
  "thread_local",
+ "util",
  "uuid 1.4.1",
 ]
 
@@ -7810,6 +7833,17 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
+[[package]]
+name = "tendril"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
+dependencies = [
+ "futf",
+ "mac",
+ "utf-8",
+]
+
 [[package]]
 name = "termcolor"
 version = "1.1.3"
@@ -8998,6 +9032,7 @@ dependencies = [
  "smol",
  "take-until",
  "tempfile",
+ "tendril",
  "url",
 ]
 

crates/sqlez/Cargo.toml 🔗

@@ -14,4 +14,5 @@ thread_local = "1.1.4"
 lazy_static.workspace = true
 parking_lot.workspace = true
 futures.workspace = true
+util = { path = "../util" }
 uuid.workspace = true

crates/sqlez/src/bindable.rs 🔗

@@ -1,11 +1,10 @@
 use std::{
-    ffi::OsStr,
-    os::unix::prelude::OsStrExt,
     path::{Path, PathBuf},
     sync::Arc,
 };
 
 use anyhow::{Context, Result};
+use util::paths::PathExt;
 
 use crate::statement::{SqlType, Statement};
 
@@ -299,7 +298,9 @@ impl<T: Bind, const COUNT: usize> Bind for [T; COUNT] {
 impl StaticColumnCount for &Path {}
 impl Bind for &Path {
     fn bind(&self, statement: &Statement, start_index: i32) -> Result<i32> {
-        self.as_os_str().as_bytes().bind(statement, start_index)
+        self.as_os_str()
+            .as_encoded_bytes()
+            .bind(statement, start_index)
     }
 }
 
@@ -321,10 +322,7 @@ impl Column for PathBuf {
     fn column(statement: &mut Statement, start_index: i32) -> Result<(Self, i32)> {
         let blob = statement.column_blob(start_index)?;
 
-        Ok((
-            PathBuf::from(OsStr::from_bytes(blob).to_owned()),
-            start_index + 1,
-        ))
+        PathBuf::try_from_bytes(blob).map(|path| (path, start_index + 1))
     }
 }
 

crates/util/Cargo.toml 🔗

@@ -31,6 +31,9 @@ git2 = { workspace = true, optional = true }
 dirs = "3.0"
 take-until = "0.2.0"
 
+[target.'cfg(windows)'.dependencies]
+tendril = "0.4.3"
+
 [dev-dependencies]
 tempfile.workspace = true
 git2.workspace = true

crates/util/src/paths.rs 🔗

@@ -1,4 +1,7 @@
-use std::path::{Path, PathBuf};
+use std::{
+    ffi::OsStr,
+    path::{Path, PathBuf},
+};
 
 use globset::{Glob, GlobMatcher};
 use serde::{Deserialize, Serialize};
@@ -40,6 +43,29 @@ pub trait PathExt {
     fn compact(&self) -> PathBuf;
     fn icon_suffix(&self) -> Option<&str>;
     fn extension_or_hidden_file_name(&self) -> Option<&str>;
+    fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
+    where
+        Self: From<&'a Path>,
+    {
+        #[cfg(unix)]
+        {
+            use std::os::unix::prelude::OsStrExt;
+            Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
+        }
+        #[cfg(windows)]
+        {
+            use anyhow::anyhow;
+            use tendril::fmt::{Format, WTF8};
+            WTF8::validate(bytes)
+                .then(|| {
+                    // Safety: bytes are valid WTF-8 sequence.
+                    Self::from(Path::new(unsafe {
+                        OsStr::from_encoded_bytes_unchecked(bytes)
+                    }))
+                })
+                .ok_or_else(|| anyhow!("Invalid WTF-8 sequence: {bytes:?}"))
+        }
+    }
 }
 
 impl<T: AsRef<Path>> PathExt for T {