Skip to content

Commit aee21c3

Browse files
authored
[RISCV] AddEdge between mask producer and user of V0 (#146855)
If there are multiple masks producers followed by multiple masked consumers, may a move(vmv* v0, vx) will be generated to save mask. By moving the mask's producer after the mask's use, the spill can be eliminated, and the move can be removed.
1 parent 9a805ba commit aee21c3

File tree

7 files changed

+190
-190
lines changed

7 files changed

+190
-190
lines changed

llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
// instructions and masked instructions, so that we can reduce the live range
1111
// overlaps of mask registers.
1212
//
13+
// If there are multiple masks producers followed by multiple masked
14+
// instructions, then at each masked instructions add dependency edges between
15+
// every producer and masked instruction.
16+
//
1317
// The reason why we need to do this:
1418
// 1. When tracking register pressure, we don't track physical registers.
1519
// 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
@@ -68,11 +72,25 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation {
6872

6973
void apply(ScheduleDAGInstrs *DAG) override {
7074
SUnit *NearestUseV0SU = nullptr;
75+
SmallVector<SUnit *, 2> DefMask;
7176
for (SUnit &SU : DAG->SUnits) {
7277
const MachineInstr *MI = SU.getInstr();
73-
if (MI->findRegisterUseOperand(RISCV::V0, TRI))
78+
if (isSoleUseCopyToV0(SU))
79+
DefMask.push_back(&SU);
80+
81+
if (MI->findRegisterUseOperand(RISCV::V0, TRI)) {
7482
NearestUseV0SU = &SU;
7583

84+
// Copy may not be a real use, so skip it here.
85+
if (DefMask.size() > 1 && !MI->isCopy())
86+
for (SUnit *Def : DefMask)
87+
if (DAG->canAddEdge(Def, &SU))
88+
DAG->addEdge(Def, SDep(&SU, SDep::Artificial));
89+
90+
if (!DefMask.empty())
91+
DefMask.erase(DefMask.begin());
92+
}
93+
7694
if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
7795
// For LMUL=8 cases, there will be more possibilities to spill.
7896
// FIXME: We should use RegPressureTracker to do fine-grained

llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -313,12 +313,12 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
313313
; CHECK-NEXT: vslidedown.vx v0, v6, a0
314314
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
315315
; CHECK-NEXT: vslidedown.vx v6, v7, a1
316+
; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
317+
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
316318
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
319+
; CHECK-NEXT: vslidedown.vx v0, v7, a0
317320
; CHECK-NEXT: vslidedown.vx v5, v6, a0
318-
; CHECK-NEXT: vslidedown.vx v4, v7, a0
319321
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
320-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
321-
; CHECK-NEXT: vmv1r.v v0, v4
322322
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
323323
; CHECK-NEXT: vmv1r.v v0, v5
324324
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
@@ -425,13 +425,15 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
425425
; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
426426
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
427427
; CHECK-NEXT: vslidedown.vx v0, v5, a1
428-
; CHECK-NEXT: vslidedown.vx v5, v7, a1
429-
; CHECK-NEXT: vslidedown.vx v4, v6, a1
430-
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
428+
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
431429
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
432-
; CHECK-NEXT: vmv1r.v v0, v4
430+
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
431+
; CHECK-NEXT: vslidedown.vx v0, v6, a1
432+
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
433433
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
434-
; CHECK-NEXT: vmv1r.v v0, v5
434+
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
435+
; CHECK-NEXT: vslidedown.vx v0, v7, a1
436+
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
435437
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
436438
; CHECK-NEXT: vadd.vv v8, v16, v8
437439
; CHECK-NEXT: addi a2, sp, 16

llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -139,21 +139,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
139139
; RV32-NEXT: slli a3, a3, 4
140140
; RV32-NEXT: sub sp, sp, a3
141141
; RV32-NEXT: andi sp, sp, -64
142-
; RV32-NEXT: addi a3, sp, 64
143142
; RV32-NEXT: vl8r.v v8, (a0)
144143
; RV32-NEXT: slli a2, a2, 3
145144
; RV32-NEXT: add a0, a0, a2
146-
; RV32-NEXT: vl8r.v v24, (a0)
145+
; RV32-NEXT: vl8r.v v16, (a0)
147146
; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma
148147
; RV32-NEXT: vmseq.vi v0, v8, 0
149-
; RV32-NEXT: vmv.v.i v16, 0
150-
; RV32-NEXT: add a1, a3, a1
151-
; RV32-NEXT: add a2, a3, a2
152-
; RV32-NEXT: vmseq.vi v8, v24, 0
153-
; RV32-NEXT: vmerge.vim v24, v16, 1, v0
154-
; RV32-NEXT: vs8r.v v24, (a3)
155-
; RV32-NEXT: vmv1r.v v0, v8
156-
; RV32-NEXT: vmerge.vim v8, v16, 1, v0
148+
; RV32-NEXT: vmv.v.i v8, 0
149+
; RV32-NEXT: vmerge.vim v24, v8, 1, v0
150+
; RV32-NEXT: vmseq.vi v0, v16, 0
151+
; RV32-NEXT: addi a0, sp, 64
152+
; RV32-NEXT: add a1, a0, a1
153+
; RV32-NEXT: add a2, a0, a2
154+
; RV32-NEXT: vs8r.v v24, (a0)
155+
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
157156
; RV32-NEXT: vs8r.v v8, (a2)
158157
; RV32-NEXT: lbu a0, 0(a1)
159158
; RV32-NEXT: addi sp, s0, -80
@@ -179,21 +178,20 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind {
179178
; RV64-NEXT: slli a3, a3, 4
180179
; RV64-NEXT: sub sp, sp, a3
181180
; RV64-NEXT: andi sp, sp, -64
182-
; RV64-NEXT: addi a3, sp, 64
183181
; RV64-NEXT: vl8r.v v8, (a0)
184182
; RV64-NEXT: slli a2, a2, 3
185183
; RV64-NEXT: add a0, a0, a2
186-
; RV64-NEXT: vl8r.v v24, (a0)
184+
; RV64-NEXT: vl8r.v v16, (a0)
187185
; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma
188186
; RV64-NEXT: vmseq.vi v0, v8, 0
189-
; RV64-NEXT: vmv.v.i v16, 0
190-
; RV64-NEXT: add a1, a3, a1
191-
; RV64-NEXT: add a2, a3, a2
192-
; RV64-NEXT: vmseq.vi v8, v24, 0
193-
; RV64-NEXT: vmerge.vim v24, v16, 1, v0
194-
; RV64-NEXT: vs8r.v v24, (a3)
195-
; RV64-NEXT: vmv1r.v v0, v8
196-
; RV64-NEXT: vmerge.vim v8, v16, 1, v0
187+
; RV64-NEXT: vmv.v.i v8, 0
188+
; RV64-NEXT: vmerge.vim v24, v8, 1, v0
189+
; RV64-NEXT: vmseq.vi v0, v16, 0
190+
; RV64-NEXT: addi a0, sp, 64
191+
; RV64-NEXT: add a1, a0, a1
192+
; RV64-NEXT: add a2, a0, a2
193+
; RV64-NEXT: vs8r.v v24, (a0)
194+
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
197195
; RV64-NEXT: vs8r.v v8, (a2)
198196
; RV64-NEXT: lbu a0, 0(a1)
199197
; RV64-NEXT: addi sp, s0, -80

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll

Lines changed: 52 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -324,24 +324,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
324324
; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
325325
; RV32-NEXT: addi s0, sp, 384
326326
; RV32-NEXT: andi sp, sp, -128
327-
; RV32-NEXT: zext.b a1, a1
328-
; RV32-NEXT: mv a2, sp
329-
; RV32-NEXT: li a3, 128
330-
; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma
327+
; RV32-NEXT: li a2, 128
328+
; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma
331329
; RV32-NEXT: vle8.v v8, (a0)
332330
; RV32-NEXT: addi a0, a0, 128
333331
; RV32-NEXT: vle8.v v16, (a0)
334-
; RV32-NEXT: add a1, a2, a1
335332
; RV32-NEXT: vmseq.vi v0, v8, 0
336-
; RV32-NEXT: vmv.v.i v24, 0
337-
; RV32-NEXT: vmseq.vi v8, v16, 0
338-
; RV32-NEXT: vmerge.vim v16, v24, 1, v0
339-
; RV32-NEXT: vse8.v v16, (a2)
340-
; RV32-NEXT: vmv1r.v v0, v8
341-
; RV32-NEXT: vmerge.vim v8, v24, 1, v0
342-
; RV32-NEXT: addi a0, sp, 128
343-
; RV32-NEXT: vse8.v v8, (a0)
344-
; RV32-NEXT: lbu a0, 0(a1)
333+
; RV32-NEXT: vmv.v.i v8, 0
334+
; RV32-NEXT: vmerge.vim v24, v8, 1, v0
335+
; RV32-NEXT: vmseq.vi v0, v16, 0
336+
; RV32-NEXT: zext.b a0, a1
337+
; RV32-NEXT: mv a1, sp
338+
; RV32-NEXT: add a0, a1, a0
339+
; RV32-NEXT: vse8.v v24, (a1)
340+
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
341+
; RV32-NEXT: addi a1, sp, 128
342+
; RV32-NEXT: vse8.v v8, (a1)
343+
; RV32-NEXT: lbu a0, 0(a0)
345344
; RV32-NEXT: addi sp, s0, -384
346345
; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
347346
; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -355,24 +354,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
355354
; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
356355
; RV64-NEXT: addi s0, sp, 384
357356
; RV64-NEXT: andi sp, sp, -128
358-
; RV64-NEXT: zext.b a1, a1
359-
; RV64-NEXT: mv a2, sp
360-
; RV64-NEXT: li a3, 128
361-
; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma
357+
; RV64-NEXT: li a2, 128
358+
; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
362359
; RV64-NEXT: vle8.v v8, (a0)
363360
; RV64-NEXT: addi a0, a0, 128
364361
; RV64-NEXT: vle8.v v16, (a0)
365-
; RV64-NEXT: add a1, a2, a1
366362
; RV64-NEXT: vmseq.vi v0, v8, 0
367-
; RV64-NEXT: vmv.v.i v24, 0
368-
; RV64-NEXT: vmseq.vi v8, v16, 0
369-
; RV64-NEXT: vmerge.vim v16, v24, 1, v0
370-
; RV64-NEXT: vse8.v v16, (a2)
371-
; RV64-NEXT: vmv1r.v v0, v8
372-
; RV64-NEXT: vmerge.vim v8, v24, 1, v0
373-
; RV64-NEXT: addi a0, sp, 128
374-
; RV64-NEXT: vse8.v v8, (a0)
375-
; RV64-NEXT: lbu a0, 0(a1)
363+
; RV64-NEXT: vmv.v.i v8, 0
364+
; RV64-NEXT: vmerge.vim v24, v8, 1, v0
365+
; RV64-NEXT: vmseq.vi v0, v16, 0
366+
; RV64-NEXT: zext.b a0, a1
367+
; RV64-NEXT: mv a1, sp
368+
; RV64-NEXT: add a0, a1, a0
369+
; RV64-NEXT: vse8.v v24, (a1)
370+
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
371+
; RV64-NEXT: addi a1, sp, 128
372+
; RV64-NEXT: vse8.v v8, (a1)
373+
; RV64-NEXT: lbu a0, 0(a0)
376374
; RV64-NEXT: addi sp, s0, -384
377375
; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
378376
; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
@@ -386,24 +384,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
386384
; RV32ZBS-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
387385
; RV32ZBS-NEXT: addi s0, sp, 384
388386
; RV32ZBS-NEXT: andi sp, sp, -128
389-
; RV32ZBS-NEXT: zext.b a1, a1
390-
; RV32ZBS-NEXT: mv a2, sp
391-
; RV32ZBS-NEXT: li a3, 128
392-
; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
387+
; RV32ZBS-NEXT: li a2, 128
388+
; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
393389
; RV32ZBS-NEXT: vle8.v v8, (a0)
394390
; RV32ZBS-NEXT: addi a0, a0, 128
395391
; RV32ZBS-NEXT: vle8.v v16, (a0)
396-
; RV32ZBS-NEXT: add a1, a2, a1
397392
; RV32ZBS-NEXT: vmseq.vi v0, v8, 0
398-
; RV32ZBS-NEXT: vmv.v.i v24, 0
399-
; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
400-
; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0
401-
; RV32ZBS-NEXT: vse8.v v16, (a2)
402-
; RV32ZBS-NEXT: vmv1r.v v0, v8
403-
; RV32ZBS-NEXT: vmerge.vim v8, v24, 1, v0
404-
; RV32ZBS-NEXT: addi a0, sp, 128
405-
; RV32ZBS-NEXT: vse8.v v8, (a0)
406-
; RV32ZBS-NEXT: lbu a0, 0(a1)
393+
; RV32ZBS-NEXT: vmv.v.i v8, 0
394+
; RV32ZBS-NEXT: vmerge.vim v24, v8, 1, v0
395+
; RV32ZBS-NEXT: vmseq.vi v0, v16, 0
396+
; RV32ZBS-NEXT: zext.b a0, a1
397+
; RV32ZBS-NEXT: mv a1, sp
398+
; RV32ZBS-NEXT: add a0, a1, a0
399+
; RV32ZBS-NEXT: vse8.v v24, (a1)
400+
; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0
401+
; RV32ZBS-NEXT: addi a1, sp, 128
402+
; RV32ZBS-NEXT: vse8.v v8, (a1)
403+
; RV32ZBS-NEXT: lbu a0, 0(a0)
407404
; RV32ZBS-NEXT: addi sp, s0, -384
408405
; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
409406
; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
@@ -417,24 +414,23 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind {
417414
; RV64ZBS-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
418415
; RV64ZBS-NEXT: addi s0, sp, 384
419416
; RV64ZBS-NEXT: andi sp, sp, -128
420-
; RV64ZBS-NEXT: zext.b a1, a1
421-
; RV64ZBS-NEXT: mv a2, sp
422-
; RV64ZBS-NEXT: li a3, 128
423-
; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma
417+
; RV64ZBS-NEXT: li a2, 128
418+
; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma
424419
; RV64ZBS-NEXT: vle8.v v8, (a0)
425420
; RV64ZBS-NEXT: addi a0, a0, 128
426421
; RV64ZBS-NEXT: vle8.v v16, (a0)
427-
; RV64ZBS-NEXT: add a1, a2, a1
428422
; RV64ZBS-NEXT: vmseq.vi v0, v8, 0
429-
; RV64ZBS-NEXT: vmv.v.i v24, 0
430-
; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
431-
; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0
432-
; RV64ZBS-NEXT: vse8.v v16, (a2)
433-
; RV64ZBS-NEXT: vmv1r.v v0, v8
434-
; RV64ZBS-NEXT: vmerge.vim v8, v24, 1, v0
435-
; RV64ZBS-NEXT: addi a0, sp, 128
436-
; RV64ZBS-NEXT: vse8.v v8, (a0)
437-
; RV64ZBS-NEXT: lbu a0, 0(a1)
423+
; RV64ZBS-NEXT: vmv.v.i v8, 0
424+
; RV64ZBS-NEXT: vmerge.vim v24, v8, 1, v0
425+
; RV64ZBS-NEXT: vmseq.vi v0, v16, 0
426+
; RV64ZBS-NEXT: zext.b a0, a1
427+
; RV64ZBS-NEXT: mv a1, sp
428+
; RV64ZBS-NEXT: add a0, a1, a0
429+
; RV64ZBS-NEXT: vse8.v v24, (a1)
430+
; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0
431+
; RV64ZBS-NEXT: addi a1, sp, 128
432+
; RV64ZBS-NEXT: vse8.v v8, (a1)
433+
; RV64ZBS-NEXT: lbu a0, 0(a0)
438434
; RV64ZBS-NEXT: addi sp, s0, -384
439435
; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
440436
; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -143,16 +143,15 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) {
143143
; CHECK-LABEL: deinterleave6_0_i8:
144144
; CHECK: # %bb.0: # %entry
145145
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
146-
; CHECK-NEXT: vle8.v v9, (a0)
146+
; CHECK-NEXT: vle8.v v8, (a0)
147147
; CHECK-NEXT: vmv.v.i v0, 2
148-
; CHECK-NEXT: vmv.v.i v8, 4
149148
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
150-
; CHECK-NEXT: vslidedown.vi v10, v9, 8
149+
; CHECK-NEXT: vslidedown.vi v9, v8, 8
151150
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
152-
; CHECK-NEXT: vslidedown.vi v9, v9, 5, v0.t
153-
; CHECK-NEXT: vmv1r.v v0, v8
154-
; CHECK-NEXT: vrgather.vi v9, v10, 4, v0.t
155-
; CHECK-NEXT: vse8.v v9, (a1)
151+
; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
152+
; CHECK-NEXT: vmv.v.i v0, 4
153+
; CHECK-NEXT: vrgather.vi v8, v9, 4, v0.t
154+
; CHECK-NEXT: vse8.v v8, (a1)
156155
; CHECK-NEXT: ret
157156
entry:
158157
%0 = load <16 x i8>, ptr %in, align 1
@@ -188,16 +187,15 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) {
188187
; CHECK-LABEL: deinterleave7_0_i8:
189188
; CHECK: # %bb.0: # %entry
190189
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
191-
; CHECK-NEXT: vle8.v v9, (a0)
190+
; CHECK-NEXT: vle8.v v8, (a0)
192191
; CHECK-NEXT: vmv.v.i v0, 2
193-
; CHECK-NEXT: vmv.v.i v8, 4
194192
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
195-
; CHECK-NEXT: vslidedown.vi v10, v9, 8
193+
; CHECK-NEXT: vslidedown.vi v9, v8, 8
196194
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
197-
; CHECK-NEXT: vslidedown.vi v9, v9, 6, v0.t
198-
; CHECK-NEXT: vmv1r.v v0, v8
199-
; CHECK-NEXT: vrgather.vi v9, v10, 6, v0.t
200-
; CHECK-NEXT: vse8.v v9, (a1)
195+
; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t
196+
; CHECK-NEXT: vmv.v.i v0, 4
197+
; CHECK-NEXT: vrgather.vi v8, v9, 6, v0.t
198+
; CHECK-NEXT: vse8.v v8, (a1)
201199
; CHECK-NEXT: ret
202200
entry:
203201
%0 = load <16 x i8>, ptr %in, align 1

0 commit comments

Comments
 (0)