diff --git a/Cargo.toml b/Cargo.toml index 560873c..44c90b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ futures-executor = { version = "0.3.5", optional = true } blocking = { version = "1.0.0", optional = true } tokio = { version = "1.0.0", features = ["rt-multi-thread", "macros", "fs", "net"], optional = true } tokio-util = { version = "0.6.0", features = ["compat"], optional = true } +libc = "0.2.86" [features] async_support = ["async-trait", "futures-channel", "futures-core", "futures-io", "futures-util", "futures-executor"] diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index 9ad68d7..cac9dee 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -5,3 +5,66 @@ mod generated; pub(crate) use crate::ffi::generated::*; + +pub(crate) unsafe fn archive_read_next_header( + archive: *mut archive, + entry: *mut *mut archive_entry, +) -> ::std::os::raw::c_int { + let old_locale = set_utf8_locale(); + let ret = generated::archive_read_next_header(archive, entry); + restore_locale(old_locale); + + ret +} + +// Change from the C to C.UTF-8 locale, allowing libarchive to +// handle filenames in UTF-8. We restrict to change LC_CTYPE only, +// since libarchive only needs the charset set. +// +// See on libarchive Website for a more complete description +// of the issue: +// +// https://github.com/libarchive/libarchive/issues/587 +// https://github.com/libarchive/libarchive/wiki/Filenames +#[cfg(unix)] +unsafe fn set_utf8_locale() -> libc::locale_t { + #[cfg(target_os = "linux")] + let locale = b"C.UTF-8\0"; + + #[cfg(target_os = "macos")] + let locale = b"UTF-8\0"; + + let utf8_locale = libc::newlocale( + libc::LC_CTYPE_MASK, + std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(), + std::ptr::null_mut(), + ); + + libc::uselocale(utf8_locale) +} + +// Restore the original LC_CTYPE after extraction to avoid side effects. +#[cfg(unix)] +unsafe fn restore_locale(old_locale: libc::locale_t) { + libc::uselocale(old_locale); +} + +#[cfg(windows)] +unsafe fn set_utf8_locale() -> std::ffi::CString { + let locale = b".UTF-8\0"; + + let old_locale = + std::ffi::CStr::from_ptr(libc::setlocale(libc::LC_CTYPE, std::ptr::null())).to_owned(); + + libc::setlocale( + libc::LC_CTYPE, + std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(), + ); + + old_locale +} + +#[cfg(windows)] +unsafe fn restore_locale(old_locale: std::ffi::CString) { + libc::setlocale(libc::LC_CTYPE, old_locale.as_ptr()); +} diff --git a/tests/fixtures/utf8.tar b/tests/fixtures/utf8.tar new file mode 100644 index 0000000..10c63fb Binary files /dev/null and b/tests/fixtures/utf8.tar differ diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 94248c1..5b553f1 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -309,6 +309,21 @@ fn uncompress_to_dir_not_preserve_owner() { ); } +#[test] +fn uncompress_to_dir_with_utf8_pathname() { + let dir = tempfile::TempDir::new().expect("Failed to create the tmp directory"); + let mut source = std::fs::File::open("tests/fixtures/utf8.tar").unwrap(); + + uncompress_archive(&mut source, dir.path(), Ownership::Ignore) + .expect("Failed to uncompress the file"); + + assert_eq!( + dir.path().join("utf-8-file-name-őúíá").exists(), + true, + "the path doesn't exist" + ); +} + #[test] fn uncompress_same_file_not_preserve_owner() { uncompress_archive(