Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
Add UTF8LocaleGuard struct and use coarse grained locale setting

Missing:
- setlocale is not threadsafe, and hance on windows tests fails due to
  wrong locale beeing restaured

Signed-off-by: Jonathas-Conceicao <jonathas.conceicao@ossystems.com.br>
  • Loading branch information
Jonathas-Conceicao committed Mar 2, 2021
1 parent a4d0667 commit cda039d
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 71 deletions.
86 changes: 86 additions & 0 deletions src/ffi/locale.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (C) 2021 O.S. Systems Software LTDA
//
// SPDX-License-Identifier: MIT OR Apache-2.0

// Change from the C to system locale, allowing libarchive to handle filenames
// in UTF-8. We restrict to change LC_CTYPE only, since libarchive only needs
// the charset set.
//
// See on libarchive Website for a more complete description of the issue:
//
// https://github.com/libarchive/libarchive/issues/587
// https://github.com/libarchive/libarchive/wiki/Filenames
pub(crate) struct UTF8LocaleGuard {
#[cfg(unix)]
save: libc::locale_t,

#[cfg(windows)]
save: Option<std::ffi::CString>,
}

#[cfg(unix)]
impl UTF8LocaleGuard {
pub(crate) fn new() -> Self {
#[cfg(target_os = "linux")]
let locale = b"\0";

#[cfg(target_os = "macos")]
let locale = b"UTF-8\0";

let utf8_locale = unsafe {
libc::newlocale(
libc::LC_CTYPE_MASK,
locale.as_ptr() as *const libc::c_char,
std::ptr::null_mut(),
)
};

let save = unsafe { libc::uselocale(utf8_locale) };

Self { save }
}
}

#[cfg(unix)]
impl Drop for UTF8LocaleGuard {
fn drop(&mut self) {
unsafe { libc::uselocale(self.save) };
}
}

#[cfg(windows)]
impl UTF8LocaleGuard {
pub(crate) fn new() -> Self {
let locale = b".UTF-8\0";

let save = {
let old_locale = unsafe { libc::setlocale(libc::LC_CTYPE, std::ptr::null()) };
if old_locale.is_null() {
None
} else {
Some(unsafe { std::ffi::CStr::from_ptr(old_locale) }.to_owned())
}
};

unsafe {
libc::setlocale(
libc::LC_CTYPE,
std::ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
)
};

Self { save }
}
}

#[cfg(windows)]
impl Drop for UTF8LocaleGuard {
fn drop(&mut self) {
if let Some(locale) = &self.save {
println!("Restauring to: {}", locale.to_str().unwrap());
unsafe { libc::setlocale(libc::LC_CTYPE, locale.as_ptr()) };
} else {
println!("No locale to restaure to");
}
}
}
73 changes: 2 additions & 71 deletions src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,76 +3,7 @@
// SPDX-License-Identifier: MIT OR Apache-2.0

mod generated;
mod locale;

pub(crate) use crate::ffi::generated::*;

pub(crate) unsafe fn archive_read_next_header(
archive: *mut archive,
entry: *mut *mut archive_entry,
) -> ::std::os::raw::c_int {
let old_locale = set_utf8_locale();
let ret = generated::archive_read_next_header(archive, entry);
restore_locale(old_locale);

ret
}

// Change from the C to the system locale, allowing libarchive to handle
// filenames in UTF-8. We restrict to change LC_CTYPE only, since libarchive
// only needs the charset set.
//
// See on libarchive Website for a more complete description
// of the issue:
//
// https://github.com/libarchive/libarchive/issues/587
// https://github.com/libarchive/libarchive/wiki/Filenames
#[cfg(unix)]
unsafe fn set_utf8_locale() -> libc::locale_t {
#[cfg(target_os = "linux")]
let locale = b"\0";

#[cfg(target_os = "macos")]
let locale = b"UTF-8\0";

let utf8_locale = libc::newlocale(
libc::LC_CTYPE_MASK,
locale.as_ptr() as *const libc::c_char,
std::ptr::null_mut(),
);

libc::uselocale(utf8_locale)
}

// Restore the original LC_CTYPE after extraction to avoid side effects.
#[cfg(unix)]
unsafe fn restore_locale(old_locale: libc::locale_t) {
libc::uselocale(old_locale);
}

#[cfg(windows)]
unsafe fn set_utf8_locale() -> Option<ffi::CString> {
let locale = b".UTF-8\0";

let old_locale = {
let old_locale = libc::setlocale(libc::LC_CTYPE, ptr::null());
if old_locale.is_null() {
None
} else {
Some(ffi::CStr::from_ptr(old_locale).to_owned())
}
};

libc::setlocale(
libc::LC_CTYPE,
ffi::CStr::from_bytes_with_nul_unchecked(locale).as_ptr(),
);

old_locale
}

#[cfg(windows)]
unsafe fn restore_locale(old_locale: Option<ffi::CString>) {
if let Some(old_locale) = old_locale {
libc::setlocale(libc::LC_CTYPE, old_locale.as_ptr());
}
}
pub(crate) use locale::UTF8LocaleGuard;
5 changes: 5 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ pub fn list_archive_files<R>(source: R) -> Result<Vec<String>>
where
R: Read + Seek,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
run_with_seekable_archive(source, |archive_reader, _, mut entry| unsafe {
let mut file_list = Vec::new();
#[allow(clippy::vec_init_then_push)]
Expand Down Expand Up @@ -171,6 +172,7 @@ where
R: Read,
W: Write,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
run_with_archive(
WriteMode::Buffer,
source,
Expand Down Expand Up @@ -206,6 +208,7 @@ pub fn uncompress_archive<R>(source: R, dest: &Path, ownership: Ownership) -> Re
where
R: Read + Seek,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
run_with_archive(
WriteMode::Disk { ownership },
source,
Expand Down Expand Up @@ -273,6 +276,7 @@ where
R: Read + Seek,
W: Write,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
run_with_seekable_archive(source, |archive_reader, _, mut entry| unsafe {
loop {
match ffi::archive_read_next_header(archive_reader, &mut entry) {
Expand Down Expand Up @@ -301,6 +305,7 @@ where
F: FnOnce(*mut ffi::archive, *mut ffi::archive, *mut ffi::archive_entry) -> Result<T>,
R: Read,
{
let _utf8_guard = ffi::UTF8LocaleGuard::new();
unsafe {
let archive_entry: *mut ffi::archive_entry = std::ptr::null_mut();
let archive_reader = ffi::archive_read_new();
Expand Down

0 comments on commit cda039d

Please sign in to comment.