From 96c4542dc3c7001d5a28b05d067701f2173e9eb4 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Thu, 12 Nov 2020 11:09:58 +0100 Subject: [PATCH] Avoid muliplications by 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` Benchmark #1: ./raytracer_cg_clif_pre Time (mean ± σ): 9.553 s ± 0.129 s [User: 9.543 s, System: 0.008 s] Range (min … max): 9.438 s … 9.837 s 10 runs Benchmark #2: ./raytracer_cg_clif_post Time (mean ± σ): 9.463 s ± 0.055 s [User: 9.452 s, System: 0.008 s] Range (min … max): 9.387 s … 9.518 s 10 runs Summary './raytracer_cg_clif_post' ran 1.01 ± 0.01 times faster than './raytracer_cg_clif_pre' ``` --- src/intrinsics/mod.rs | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index ab16fabd348a5..f613d8b845036 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -497,12 +497,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( }; copy | copy_nonoverlapping, (v src, v dst, v count) { let elem_size: u64 = fx.layout_of(elem_ty).size.bytes(); - let elem_size = fx - .bcx - .ins() - .iconst(fx.pointer_type, elem_size as i64); assert_eq!(args.len(), 3); - let byte_amount = fx.bcx.ins().imul(count, elem_size); + let byte_amount = if elem_size != 1 { + fx.bcx.ins().imul_imm(count, elem_size as i64) + } else { + count + }; if intrinsic.contains("nonoverlapping") { // FIXME emit_small_memcpy @@ -515,12 +515,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( // NOTE: the volatile variants have src and dst swapped volatile_copy_memory | volatile_copy_nonoverlapping_memory, (v dst, v src, v count) { let elem_size: u64 = fx.layout_of(elem_ty).size.bytes(); - let elem_size = fx - .bcx - .ins() - .iconst(fx.pointer_type, elem_size as i64); assert_eq!(args.len(), 3); - let byte_amount = fx.bcx.ins().imul(count, elem_size); + let byte_amount = if elem_size != 1 { + fx.bcx.ins().imul_imm(count, elem_size as i64) + } else { + count + }; // FIXME make the copy actually volatile when using emit_small_mem{cpy,move} if intrinsic.contains("nonoverlapping") { @@ -676,7 +676,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( offset | arith_offset, (c base, v offset) { let pointee_ty = base.layout().ty.builtin_deref(true).unwrap().ty; let pointee_size = fx.layout_of(pointee_ty).size.bytes(); - let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64); + let ptr_diff = if pointee_size != 1 { + fx.bcx.ins().imul_imm(offset, pointee_size as i64) + } else { + offset + }; let base_val = base.load_scalar(fx); let res = fx.bcx.ins().iadd(base_val, ptr_diff); ret.write_cvalue(fx, CValue::by_val(res, base.layout())); @@ -688,7 +692,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( write_bytes | volatile_set_memory, (c dst, v val, v count) { let pointee_ty = dst.layout().ty.builtin_deref(true).unwrap().ty; let pointee_size = fx.layout_of(pointee_ty).size.bytes(); - let count = fx.bcx.ins().imul_imm(count, pointee_size as i64); + let count = if pointee_size != 1 { + fx.bcx.ins().imul_imm(count, pointee_size as i64) + } else { + count + }; let dst_ptr = dst.load_scalar(fx); // FIXME make the memset actually volatile when switching to emit_small_memset // FIXME use emit_small_memset