Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22993,6 +22993,9 @@ Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); while (StoreSDNode *Chain = dyn_cast(STChain->getChain())) { + if (Chain->getMemoryVT().isScalableVector()) + return false; + // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7712,6 +7712,12 @@ EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); if (VecVT.isScalableVector()) { + // If this is a constant index and we know the value is less than the + // minimum number of elements then it's safe to return Idx. + if (auto *IdxCst = dyn_cast(Idx)) + if (IdxCst->getZExtValue() < NElts) + return Idx; + SDValue VS = DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts)); Index: llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll =================================================================== --- llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -12,15 +12,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -33,16 +29,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w10, #15 -; CHECK-NEXT: cmp x9, #15 // =15 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, x10, lo -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9] +; CHECK-NEXT: orr x8, x8, #0xf +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -78,15 +70,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cnth x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -99,16 +87,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cnth x10 -; CHECK-NEXT: sub x10, x10, #1 // =1 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w9, #7 -; CHECK-NEXT: cmp x10, #7 // =7 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x10, x9, lo -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] +; CHECK-NEXT: orr x8, x8, #0xe +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -144,15 +128,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntw x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -165,16 +145,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntw x10 -; CHECK-NEXT: sub x10, x10, #1 // =1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w9, #3 -; CHECK-NEXT: cmp x10, #3 // =3 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x10, x9, lo -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] +; CHECK-NEXT: orr x8, x8, #0xc +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -210,15 +186,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -231,15 +203,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #1 // =1 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csinc x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -275,15 +244,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cnth x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -296,16 +261,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cnth x10 -; CHECK-NEXT: sub x10, x10, #1 // =1 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w9, #7 -; CHECK-NEXT: cmp x10, #7 // =7 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x10, x9, lo -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] +; CHECK-NEXT: orr x8, x8, #0xe +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -341,15 +302,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntw x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -362,16 +319,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntw x10 -; CHECK-NEXT: sub x10, x10, #1 // =1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w9, #3 -; CHECK-NEXT: cmp x10, #3 // =3 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x10, x9, lo -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] +; CHECK-NEXT: orr x8, x8, #0xc +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -407,15 +360,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #0 // =0 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -428,15 +377,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #1 // =1 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csinc x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -473,17 +419,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sub x9, x9, #1 // =1 -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 +; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #1 // =1 -; CHECK-NEXT: st1d { z0.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csinc x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: and z0.d, z0.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: addvl sp, sp, #2 @@ -499,18 +442,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntw x10 ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sub x10, x10, #1 // =1 -; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 +; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x10, #2 // =2 -; CHECK-NEXT: st1w { z0.s }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x10, x9, lo -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] +; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: and z0.s, z0.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: addvl sp, sp, #2 @@ -526,18 +465,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cnth x10 ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sub x10, x10, #1 // =1 -; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 +; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x10, #4 // =4 -; CHECK-NEXT: st1h { z0.h }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x10, x9, lo -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] +; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: and z0.h, z0.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: addvl sp, sp, #2 @@ -553,18 +488,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: rdvl x9, #1 ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: sub x9, x9, #1 // =1 -; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w10, #8 -; CHECK-NEXT: cmp x9, #8 // =8 -; CHECK-NEXT: st1b { z0.b }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csel x9, x9, x10, lo -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9] +; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8] ; CHECK-NEXT: and z0.b, z0.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: addvl sp, sp, #2 @@ -580,15 +511,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cmp x9, #1 // =1 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: csinc x9, x9, xzr, lo -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] +; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -602,18 +530,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: cnth x10 -; CHECK-NEXT: sub x10, x10, #1 // =1 -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x10, #2 // =2 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl] -; CHECK-NEXT: orr x8, x8, x9, lsl #2 +; CHECK-NEXT: orr x8, x8, #0x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 Index: llvm/test/CodeGen/AArch64/split-vector-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/split-vector-insert.ll +++ llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -20,35 +20,31 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x10, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x10, x10, #3 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x10] +; CHECK-NEXT: str q1, [sp] +; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x8, #2 // =2 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #1 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: str q2, [x10, x9] +; CHECK-NEXT: str q2, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 // =4 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #2 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: str q3, [x10, x9] +; CHECK-NEXT: str q3, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] -; CHECK-NEXT: mov w9, #6 -; CHECK-NEXT: cmp x8, #6 // =6 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: cmp x9, #6 // =6 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #3 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl] @@ -57,6 +53,7 @@ ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv2i64.v8i64( %a, <8 x i64> %b, i64 0) ret %r } @@ -73,35 +70,31 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x10, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: lsl x10, x10, #3 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x10] +; CHECK-NEXT: str q1, [sp] +; CHECK-NEXT: sub x9, x9, #1 // =1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x8, #2 // =2 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #1 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: str q2, [x10, x9] +; CHECK-NEXT: str q2, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 // =4 -; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #2 -; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] -; CHECK-NEXT: str q3, [x10, x9] +; CHECK-NEXT: str q3, [x10, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] -; CHECK-NEXT: mov w9, #6 -; CHECK-NEXT: cmp x8, #6 // =6 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: cmp x9, #6 // =6 +; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: addvl x10, sp, #3 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl] @@ -110,6 +103,7 @@ ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv2f64.v8f64( %a, <8 x double> %b, i64 0) ret %r } Index: llvm/test/CodeGen/AArch64/sve-extract-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -17,15 +17,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -49,15 +43,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #4] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -81,15 +69,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #2] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -113,14 +95,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: csinc x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldur q0, [sp, #1] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -6,15 +6,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -28,15 +22,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: lsl x8, x8, #3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -50,15 +38,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -72,15 +54,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #4] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -94,15 +70,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 -; CHECK-NEXT: csel x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -116,15 +86,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 -; CHECK-NEXT: csinc x8, x8, xzr, lo ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: lsl x8, x8, #1 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #2] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -138,14 +102,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #0 // =0 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: csel x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: str q1, [sp] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -159,14 +118,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sub x8, x8, #1 // =1 -; CHECK-NEXT: cmp x8, #1 // =1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: csinc x8, x8, xzr, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: stur q1, [sp, #1] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -615,22 +615,12 @@ ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: addi a0, sp, 80 +; CHECK-NEXT: vse64.v v25, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: slli a2, a0, 4 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: addi a3, zero, 8 -; CHECK-NEXT: bltu a2, a3, .LBB29_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi a2, zero, 8 -; CHECK-NEXT: .LBB29_2: -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: add a2, a3, a2 -; CHECK-NEXT: vsetivli a4, 2, e64,m1,ta,mu -; CHECK-NEXT: vse64.v v25, (a2) -; CHECK-NEXT: slli a0, a0, 6 -; CHECK-NEXT: add a2, a3, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: add a2, a2, a0 ; CHECK-NEXT: vl8re64.v v8, (a2) ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8re64.v v16, (a2)