Skip to content

Commit

Permalink
Auto merge of #130401 - matthiaskrgr:rollup-fri2j58, r=matthiaskrgr
Browse files Browse the repository at this point in the history
Rollup of 5 pull requests

Successful merges:

 - #129439 (Implement feature `string_from_utf8_lossy_owned` for lossy conversion from `Vec<u8>` to `String` methods)
 - #129828 (miri: treat non-memory local variables properly for data race detection)
 - #130110 (make dist vendoring configurable)
 - #130293 (Fix lint levels not getting overridden by attrs on `Stmt` nodes)
 - #130342 (interpret, miri: fix dealing with overflow during slice indexing and allocation)

Failed merges:

 - #130394 (const: don't ICE when encountering a mutable ref to immutable memory)

r? `@ghost`
`@rustbot` modify labels: rollup
  • Loading branch information
bors committed Sep 15, 2024
2 parents dde7d66 + 96195a5 commit 8c2c9a9
Show file tree
Hide file tree
Showing 37 changed files with 805 additions and 183 deletions.
16 changes: 8 additions & 8 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
self.copy_intrinsic(&args[0], &args[1], &args[2], /*nonoverlapping*/ false)?;
}
sym::write_bytes => {
self.write_bytes_intrinsic(&args[0], &args[1], &args[2])?;
self.write_bytes_intrinsic(&args[0], &args[1], &args[2], "write_bytes")?;
}
sym::compare_bytes => {
let result = self.compare_bytes_intrinsic(&args[0], &args[1], &args[2])?;
Expand Down Expand Up @@ -599,9 +599,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let count = self.read_target_usize(count)?;
let layout = self.layout_of(src.layout.ty.builtin_deref(true).unwrap())?;
let (size, align) = (layout.size, layout.align.abi);
// `checked_mul` enforces a too small bound (the correct one would probably be target_isize_max),
// but no actual allocation can be big enough for the difference to be noticeable.
let size = size.checked_mul(count, self).ok_or_else(|| {

let size = self.compute_size_in_bytes(size, count).ok_or_else(|| {
err_ub_custom!(
fluent::const_eval_size_overflow,
name = if nonoverlapping { "copy_nonoverlapping" } else { "copy" }
Expand Down Expand Up @@ -635,11 +634,12 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
Ok(())
}

pub(crate) fn write_bytes_intrinsic(
pub fn write_bytes_intrinsic(
&mut self,
dst: &OpTy<'tcx, <M as Machine<'tcx>>::Provenance>,
byte: &OpTy<'tcx, <M as Machine<'tcx>>::Provenance>,
count: &OpTy<'tcx, <M as Machine<'tcx>>::Provenance>,
name: &'static str,
) -> InterpResult<'tcx> {
let layout = self.layout_of(dst.layout.ty.builtin_deref(true).unwrap())?;

Expand All @@ -649,9 +649,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {

// `checked_mul` enforces a too small bound (the correct one would probably be target_isize_max),
// but no actual allocation can be big enough for the difference to be noticeable.
let len = layout.size.checked_mul(count, self).ok_or_else(|| {
err_ub_custom!(fluent::const_eval_size_overflow, name = "write_bytes")
})?;
let len = self
.compute_size_in_bytes(layout.size, count)
.ok_or_else(|| err_ub_custom!(fluent::const_eval_size_overflow, name = name))?;

let bytes = std::iter::repeat(byte).take(len.bytes_usize());
self.write_bytes_ptr(dst, bytes)
Expand Down
21 changes: 20 additions & 1 deletion compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,10 +540,29 @@ pub trait Machine<'tcx>: Sized {
Ok(ReturnAction::Normal)
}

/// Called immediately after an "immediate" local variable is read
/// (i.e., this is called for reads that do not end up accessing addressable memory).
#[inline(always)]
fn after_local_read(_ecx: &InterpCx<'tcx, Self>, _local: mir::Local) -> InterpResult<'tcx> {
Ok(())
}

/// Called immediately after an "immediate" local variable is assigned a new value
/// (i.e., this is called for writes that do not end up in memory).
/// `storage_live` indicates whether this is the initial write upon `StorageLive`.
#[inline(always)]
fn after_local_write(
_ecx: &mut InterpCx<'tcx, Self>,
_local: mir::Local,
_storage_live: bool,
) -> InterpResult<'tcx> {
Ok(())
}

/// Called immediately after actual memory was allocated for a local
/// but before the local's stack frame is updated to point to that memory.
#[inline(always)]
fn after_local_allocated(
fn after_local_moved_to_memory(
_ecx: &mut InterpCx<'tcx, Self>,
_local: mir::Local,
_mplace: &MPlaceTy<'tcx, Self::Provenance>,
Expand Down
11 changes: 8 additions & 3 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
} else {
Allocation::try_uninit(size, align)?
};
self.allocate_raw_ptr(alloc, kind)
self.insert_allocation(alloc, kind)
}

pub fn allocate_bytes_ptr(
Expand All @@ -233,14 +233,15 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
mutability: Mutability,
) -> InterpResult<'tcx, Pointer<M::Provenance>> {
let alloc = Allocation::from_bytes(bytes, align, mutability);
self.allocate_raw_ptr(alloc, kind)
self.insert_allocation(alloc, kind)
}

pub fn allocate_raw_ptr(
pub fn insert_allocation(
&mut self,
alloc: Allocation<M::Provenance, (), M::Bytes>,
kind: MemoryKind<M::MemoryKind>,
) -> InterpResult<'tcx, Pointer<M::Provenance>> {
assert!(alloc.size() <= self.max_size_of_val());
let id = self.tcx.reserve_alloc_id();
debug_assert_ne!(
Some(kind),
Expand Down Expand Up @@ -1046,6 +1047,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
);
res
}

pub(super) fn validation_in_progress(&self) -> bool {
self.memory.validation_in_progress
}
}

#[doc(hidden)]
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_const_eval/src/interpret/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
if matches!(op, Operand::Immediate(_)) {
assert!(!layout.is_unsized());
}
M::after_local_read(self, local)?;
Ok(OpTy { op, layout })
}

Expand Down
17 changes: 16 additions & 1 deletion compiler/rustc_const_eval/src/interpret/operator.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use either::Either;
use rustc_apfloat::{Float, FloatConvert};
use rustc_middle::mir::interpret::{InterpResult, Scalar};
use rustc_middle::mir::interpret::{InterpResult, PointerArithmetic, Scalar};
use rustc_middle::mir::NullOp;
use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
use rustc_middle::ty::{self, FloatTy, ScalarInt, Ty};
use rustc_middle::{bug, mir, span_bug};
use rustc_span::symbol::sym;
use rustc_target::abi::Size;
use tracing::trace;

use super::{throw_ub, ImmTy, InterpCx, Machine, MemPlaceMeta};
Expand Down Expand Up @@ -287,6 +288,20 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
})
}

/// Computes the total size of this access, `count * elem_size`,
/// checking for overflow beyond isize::MAX.
pub fn compute_size_in_bytes(&self, elem_size: Size, count: u64) -> Option<Size> {
// `checked_mul` applies `u64` limits independent of the target pointer size... but the
// subsequent check for `max_size_of_val` means we also handle 32bit targets correctly.
// (We cannot use `Size::checked_mul` as that enforces `obj_size_bound` as the limit, which
// would be wrong here.)
elem_size
.bytes()
.checked_mul(count)
.map(Size::from_bytes)
.filter(|&total| total <= self.max_size_of_val())
}

fn binary_ptr_op(
&self,
bin_op: mir::BinOp,
Expand Down
31 changes: 22 additions & 9 deletions compiler/rustc_const_eval/src/interpret/place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,15 +500,13 @@ where
&self,
local: mir::Local,
) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> {
// Other parts of the system rely on `Place::Local` never being unsized.
// So we eagerly check here if this local has an MPlace, and if yes we use it.
let frame = self.frame();
let layout = self.layout_of_local(frame, local, None)?;
let place = if layout.is_sized() {
// We can just always use the `Local` for sized values.
Place::Local { local, offset: None, locals_addr: frame.locals_addr() }
} else {
// Unsized `Local` isn't okay (we cannot store the metadata).
// Other parts of the system rely on `Place::Local` never being unsized.
match frame.locals[local].access()? {
Operand::Immediate(_) => bug!(),
Operand::Indirect(mplace) => Place::Ptr(*mplace),
Expand Down Expand Up @@ -562,7 +560,10 @@ where
place: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<
'tcx,
Either<MPlaceTy<'tcx, M::Provenance>, (&mut Immediate<M::Provenance>, TyAndLayout<'tcx>)>,
Either<
MPlaceTy<'tcx, M::Provenance>,
(&mut Immediate<M::Provenance>, TyAndLayout<'tcx>, mir::Local),
>,
> {
Ok(match place.to_place().as_mplace_or_local() {
Left(mplace) => Left(mplace),
Expand All @@ -581,7 +582,7 @@ where
}
Operand::Immediate(local_val) => {
// The local still has the optimized representation.
Right((local_val, layout))
Right((local_val, layout, local))
}
}
}
Expand Down Expand Up @@ -643,9 +644,13 @@ where
assert!(dest.layout().is_sized(), "Cannot write unsized immediate data");

match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, local_layout)) => {
Right((local_val, local_layout, local)) => {
// Local can be updated in-place.
*local_val = src;
// Call the machine hook (the data race detector needs to know about this write).
if !self.validation_in_progress() {
M::after_local_write(self, local, /*storage_live*/ false)?;
}
// Double-check that the value we are storing and the local fit to each other.
if cfg!(debug_assertions) {
src.assert_matches_abi(local_layout.abi, self);
Expand Down Expand Up @@ -714,8 +719,12 @@ where
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, _local_layout)) => {
Right((local_val, _local_layout, local)) => {
*local_val = Immediate::Uninit;
// Call the machine hook (the data race detector needs to know about this write).
if !self.validation_in_progress() {
M::after_local_write(self, local, /*storage_live*/ false)?;
}
}
Left(mplace) => {
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
Expand All @@ -734,8 +743,12 @@ where
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, _local_layout)) => {
Right((local_val, _local_layout, local)) => {
local_val.clear_provenance()?;
// Call the machine hook (the data race detector needs to know about this write).
if !self.validation_in_progress() {
M::after_local_write(self, local, /*storage_live*/ false)?;
}
}
Left(mplace) => {
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
Expand Down Expand Up @@ -941,7 +954,7 @@ where
mplace.mplace,
)?;
}
M::after_local_allocated(self, local, &mplace)?;
M::after_local_moved_to_memory(self, local, &mplace)?;
// Now we can call `access_mut` again, asserting it goes well, and actually
// overwrite things. This points to the entire allocation, not just the part
// the place refers to, i.e. we do this before we apply `offset`.
Expand Down
8 changes: 6 additions & 2 deletions compiler/rustc_const_eval/src/interpret/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use rustc_target::abi::{self, Size, VariantIdx};
use tracing::{debug, instrument};

use super::{
throw_ub, throw_unsup, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, OpTy,
err_ub, throw_ub, throw_unsup, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, OpTy,
Provenance, Scalar,
};

Expand Down Expand Up @@ -229,7 +229,11 @@ where
// This can only be reached in ConstProp and non-rustc-MIR.
throw_ub!(BoundsCheckFailed { len, index });
}
let offset = stride * index; // `Size` multiplication
// With raw slices, `len` can be so big that this *can* overflow.
let offset = self
.compute_size_in_bytes(stride, index)
.ok_or_else(|| err_ub!(PointerArithOverflow))?;

// All fields have the same layout.
let field_layout = base.layout().field(self, 0);
(offset, field_layout)
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_const_eval/src/interpret/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,11 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let dest_place = self.allocate_dyn(layout, MemoryKind::Stack, meta)?;
Operand::Indirect(*dest_place.mplace())
} else {
assert!(!meta.has_meta()); // we're dropping the metadata
// Just make this an efficient immediate.
assert!(!meta.has_meta()); // we're dropping the metadata
// Make sure the machine knows this "write" is happening. (This is important so that
// races involving local variable allocation can be detected by Miri.)
M::after_local_write(self, local, /*storage_live*/ true)?;
// Note that not calling `layout_of` here does have one real consequence:
// if the type is too big, we'll only notice this when the local is actually initialized,
// which is a bit too late -- we should ideally notice this already here, when the memory
Expand Down
8 changes: 3 additions & 5 deletions compiler/rustc_lint/src/levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,11 +255,9 @@ impl<'tcx> Visitor<'tcx> for LintLevelsBuilder<'_, LintLevelQueryMap<'tcx>> {
intravisit::walk_foreign_item(self, it);
}

fn visit_stmt(&mut self, e: &'tcx hir::Stmt<'tcx>) {
// We will call `add_id` when we walk
// the `StmtKind`. The outer statement itself doesn't
// define the lint levels.
intravisit::walk_stmt(self, e);
fn visit_stmt(&mut self, s: &'tcx hir::Stmt<'tcx>) {
self.add_id(s.hir_id);
intravisit::walk_stmt(self, s);
}

fn visit_expr(&mut self, e: &'tcx hir::Expr<'tcx>) {
Expand Down
3 changes: 3 additions & 0 deletions config.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -942,3 +942,6 @@
# Copy the linker, DLLs, and various libraries from MinGW into the Rust toolchain.
# Only applies when the host or target is pc-windows-gnu.
#include-mingw-linker = true

# Whether to vendor dependencies for the dist tarball.
#vendor = if "is a tarball source" || "is a git repository" { true } else { false }
74 changes: 74 additions & 0 deletions library/alloc/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,56 @@ impl String {
Cow::Owned(res)
}

/// Converts a [`Vec<u8>`] to a `String`, substituting invalid UTF-8
/// sequences with replacement characters.
///
/// See [`from_utf8_lossy`] for more details.
///
/// [`from_utf8_lossy`]: String::from_utf8_lossy
///
/// Note that this function does not guarantee reuse of the original `Vec`
/// allocation.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(string_from_utf8_lossy_owned)]
/// // some bytes, in a vector
/// let sparkle_heart = vec![240, 159, 146, 150];
///
/// let sparkle_heart = String::from_utf8_lossy_owned(sparkle_heart);
///
/// assert_eq!(String::from("💖"), sparkle_heart);
/// ```
///
/// Incorrect bytes:
///
/// ```
/// #![feature(string_from_utf8_lossy_owned)]
/// // some invalid bytes
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
/// let output = String::from_utf8_lossy_owned(input);
///
/// assert_eq!(String::from("Hello �World"), output);
/// ```
#[must_use]
#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
pub fn from_utf8_lossy_owned(v: Vec<u8>) -> String {
if let Cow::Owned(string) = String::from_utf8_lossy(&v) {
string
} else {
// SAFETY: `String::from_utf8_lossy`'s contract ensures that if
// it returns a `Cow::Borrowed`, it is a valid UTF-8 string.
// Otherwise, it returns a new allocation of an owned `String`, with
// replacement characters for invalid sequences, which is returned
// above.
unsafe { String::from_utf8_unchecked(v) }
}
}

/// Decode a UTF-16–encoded vector `v` into a `String`, returning [`Err`]
/// if `v` contains any invalid data.
///
Expand Down Expand Up @@ -2010,6 +2060,30 @@ impl FromUtf8Error {
&self.bytes[..]
}

/// Converts the bytes into a `String` lossily, substituting invalid UTF-8
/// sequences with replacement characters.
///
/// See [`String::from_utf8_lossy`] for more details on replacement of
/// invalid sequences, and [`String::from_utf8_lossy_owned`] for the
/// `String` function which corresponds to this function.
///
/// # Examples
///
/// ```
/// #![feature(string_from_utf8_lossy_owned)]
/// // some invalid bytes
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
/// let output = String::from_utf8(input).unwrap_or_else(|e| e.into_utf8_lossy());
///
/// assert_eq!(String::from("Hello �World"), output);
/// ```
#[must_use]
#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
pub fn into_utf8_lossy(self) -> String {
String::from_utf8_lossy_owned(self.bytes)
}

/// Returns the bytes that were attempted to convert to a `String`.
///
/// This method is carefully constructed to avoid allocation. It will
Expand Down
Loading

0 comments on commit 8c2c9a9

Please sign in to comment.