Skip to content

Commit

Permalink
Avoid muliplications by 1
Browse files Browse the repository at this point in the history
```
Benchmark #1: ./raytracer_cg_clif_pre
  Time (mean ± σ):      9.553 s ±  0.129 s    [User: 9.543 s, System: 0.008 s]
  Range (min … max):    9.438 s …  9.837 s    10 runs

Benchmark #2: ./raytracer_cg_clif_post
  Time (mean ± σ):      9.463 s ±  0.055 s    [User: 9.452 s, System: 0.008 s]
  Range (min … max):    9.387 s …  9.518 s    10 runs

Summary
  './raytracer_cg_clif_post' ran
    1.01 ± 0.01 times faster than './raytracer_cg_clif_pre'
```
  • Loading branch information
bjorn3 committed Nov 12, 2020
1 parent 4700926 commit 96c4542
Showing 1 changed file with 20 additions and 12 deletions.
32 changes: 20 additions & 12 deletions src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
};
copy | copy_nonoverlapping, <elem_ty> (v src, v dst, v count) {
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
let elem_size = fx
.bcx
.ins()
.iconst(fx.pointer_type, elem_size as i64);
assert_eq!(args.len(), 3);
let byte_amount = fx.bcx.ins().imul(count, elem_size);
let byte_amount = if elem_size != 1 {
fx.bcx.ins().imul_imm(count, elem_size as i64)
} else {
count
};

if intrinsic.contains("nonoverlapping") {
// FIXME emit_small_memcpy
Expand All @@ -515,12 +515,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
// NOTE: the volatile variants have src and dst swapped
volatile_copy_memory | volatile_copy_nonoverlapping_memory, <elem_ty> (v dst, v src, v count) {
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
let elem_size = fx
.bcx
.ins()
.iconst(fx.pointer_type, elem_size as i64);
assert_eq!(args.len(), 3);
let byte_amount = fx.bcx.ins().imul(count, elem_size);
let byte_amount = if elem_size != 1 {
fx.bcx.ins().imul_imm(count, elem_size as i64)
} else {
count
};

// FIXME make the copy actually volatile when using emit_small_mem{cpy,move}
if intrinsic.contains("nonoverlapping") {
Expand Down Expand Up @@ -676,7 +676,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
offset | arith_offset, (c base, v offset) {
let pointee_ty = base.layout().ty.builtin_deref(true).unwrap().ty;
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64);
let ptr_diff = if pointee_size != 1 {
fx.bcx.ins().imul_imm(offset, pointee_size as i64)
} else {
offset
};
let base_val = base.load_scalar(fx);
let res = fx.bcx.ins().iadd(base_val, ptr_diff);
ret.write_cvalue(fx, CValue::by_val(res, base.layout()));
Expand All @@ -688,7 +692,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
write_bytes | volatile_set_memory, (c dst, v val, v count) {
let pointee_ty = dst.layout().ty.builtin_deref(true).unwrap().ty;
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
let count = fx.bcx.ins().imul_imm(count, pointee_size as i64);
let count = if pointee_size != 1 {
fx.bcx.ins().imul_imm(count, pointee_size as i64)
} else {
count
};
let dst_ptr = dst.load_scalar(fx);
// FIXME make the memset actually volatile when switching to emit_small_memset
// FIXME use emit_small_memset
Expand Down

0 comments on commit 96c4542

Please sign in to comment.