diff --git a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp --- a/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -194,7 +194,7 @@ // Remove VPNOTs while there's still room in the block, so we can make the // largest block possible. - ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Then; + ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; while (BlockSize < 4 && Iter != EndIter && Iter->getOpcode() == ARM::MVE_VPNOT) { @@ -222,28 +222,19 @@ DeadInstructions.push_back(&*Iter); ++Iter; - // Replace "then" by "elses" in the block until we find an instruction that - // defines VPR, then after that leave everything to "t". + // Replace the predicates of the instructions we're adding. // Note that we are using "Iter" to iterate over the block so we can update // it at the same time. - bool ChangeToElse = (CurrentPredicate == ARMVCC::Then); for (; Iter != VPNOTBlockEndIter; ++Iter) { // Find the register in which the predicate is int OpIdx = findFirstVPTPredOperandIdx(*Iter); assert(OpIdx != -1); - // Update the mask + change the predicate to an else if needed. - if (ChangeToElse) { - // Change the predicate and update the mask - Iter->getOperand(OpIdx).setImm(ARMVCC::Else); - BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Else); - // Reset back to a "then" predicate if this instruction defines VPR. - if (Iter->definesRegister(ARM::VPR)) - ChangeToElse = false; - } else - BlockMask = ExpandBlockMask(BlockMask, ARMVCC::Then); - - LLVM_DEBUG(dbgs() << " adding: "; Iter->dump()); + // Change the predicate and update the mask + Iter->getOperand(OpIdx).setImm(CurrentPredicate); + BlockMask = ExpandBlockMask(BlockMask, CurrentPredicate); + + LLVM_DEBUG(dbgs() << " adding : "; Iter->dump()); } CurrentPredicate = diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-not.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-not.ll @@ -405,12 +405,42 @@ define arm_aapcs_vfpcc <4 x i32> @vpttet_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: vpttet_v4i1: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vcmp.s32 ge, q0, q2 +; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill +; CHECK-NEXT: vpstt +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sge <4 x i32> %x, %z + %1 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %0, <4 x i32> %x) + %2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %0, <4 x i32> %1) + %3 = xor <4 x i1> %0, + %4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %3, <4 x i32> %2) + %5 = xor <4 x i1> %3, + %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %5, <4 x i32> %4) + ret <4 x i32> %6 +} + +define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttee_v4i1: +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov q3, q2 -; CHECK-NEXT: vpttet.s32 ge, q0, q2 +; CHECK-NEXT: vpttee.s32 ge, q0, q2 ; CHECK-NEXT: vmaxt.s32 q3, q0, q1 ; CHECK-NEXT: vcmpt.s32 gt, q0, zr -; CHECK-NEXT: vcmpe.s32 gt, q1, zr -; CHECK-NEXT: vmovt q3, q2 +; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vmove q3, q2 ; CHECK-NEXT: vmov q0, q3 ; CHECK-NEXT: bx lr entry: @@ -419,20 +449,19 @@ %2 = icmp sgt <4 x i32> %x, zeroinitializer %3 = and <4 x i1> %0, %2 %4 = xor <4 x i1> %3, - %5 = icmp sgt <4 x i32> %y, zeroinitializer - %6 = and <4 x i1> %5, %4 - %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1) - ret <4 x i32> %7 + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1) + %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5) + ret <4 x i32> %6 } -define arm_aapcs_vfpcc <4 x i32> @vpttee_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: vpttee_v4i1: +define arm_aapcs_vfpcc <4 x i32> @vpttee2_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: vpttee2_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov q3, q2 ; CHECK-NEXT: vpttee.s32 ge, q0, q2 ; CHECK-NEXT: vmaxt.s32 q3, q0, q1 ; CHECK-NEXT: vcmpt.s32 gt, q0, zr -; CHECK-NEXT: vmove q3, q2 +; CHECK-NEXT: vcmpe.s32 gt, q1, zr ; CHECK-NEXT: vmove q3, q2 ; CHECK-NEXT: vmov q0, q3 ; CHECK-NEXT: bx lr @@ -442,9 +471,10 @@ %2 = icmp sgt <4 x i32> %x, zeroinitializer %3 = and <4 x i1> %0, %2 %4 = xor <4 x i1> %3, - %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %1) - %6 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %4, <4 x i32> %5) - ret <4 x i32> %6 + %5 = icmp sgt <4 x i32> %y, zeroinitializer + %6 = and <4 x i1> %5, %4 + %7 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %z, <4 x i32> %z, <4 x i1> %6, <4 x i32> %1) + ret <4 x i32> %7 } define arm_aapcs_vfpcc <4 x i32> @vpttte_v4i1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir --- a/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-block-elses.mir @@ -69,11 +69,11 @@ ; CHECK: liveins: $q0, $q1, $q2 ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit killed $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { - ; CHECK: MVE_VPTv4s32 5, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: MVE_VPTv4s32 7, renamable $q0, renamable $q2, 10, implicit-def $vpr ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 ; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q0, $zr, 12, 1, internal killed renamable $vpr ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr - ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal renamable $q3 ; CHECK: } ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 @@ -87,20 +87,20 @@ ; CHECK: $q0 = MVE_VORR $q3, $q3, 0, $noreg, undef $q0 ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { - ; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr - ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 ; CHECK: } ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { - ; CHECK: MVE_VPTv4s32 9, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: MVE_VPTv4s32 15, renamable $q0, renamable $q2, 10, implicit-def $vpr ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr - ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, internal renamable $q3 - ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 ; CHECK: } ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 @@ -122,10 +122,10 @@ ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 ; CHECK: BUNDLE implicit-def dead $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit $q1, implicit killed $q3, implicit $zr { - ; CHECK: MVE_VPTv4s32 10, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: MVE_VPTv4s32 14, renamable $q0, renamable $q2, 10, implicit-def $vpr ; CHECK: renamable $q3 = MVE_VMAXs32 renamable $q0, renamable $q1, 1, internal renamable $vpr, killed renamable $q3 ; CHECK: renamable $vpr = MVE_VCMPs32r renamable $q1, $zr, 12, 2, internal killed renamable $vpr - ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal killed renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal killed renamable $vpr, internal killed renamable $q3 ; CHECK: } ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 @@ -145,6 +145,14 @@ ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3 ; CHECK: } ; CHECK: $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + ; CHECK: $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + ; CHECK: BUNDLE implicit-def $vpr, implicit-def $q3, implicit-def $d6, implicit-def $s12, implicit-def $s13, implicit-def $d7, implicit-def $s14, implicit-def $s15, implicit $q0, implicit $q2, implicit killed $q3 { + ; CHECK: MVE_VPTv4s32 13, renamable $q0, renamable $q2, 10, implicit-def $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, internal renamable $vpr, killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 2, internal killed renamable $vpr + ; CHECK: renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 2, internal renamable $vpr, internal killed renamable $q3 + ; CHECK: renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, internal killed renamable $vpr + ; CHECK: } ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0 renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 @@ -226,6 +234,15 @@ renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 $q0 = MVE_VORR killed $q3, killed $q3, 0, $noreg, undef $q0 + $q3 = MVE_VORR $q2, $q2, 0, $noreg, undef $q3 + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 10, 0, $noreg + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, killed renamable $vpr + renamable $q3 = MVE_VORR renamable $q2, renamable $q2, 1, renamable $vpr, killed renamable $q3 + renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg + renamable $vpr = MVE_VCMPs32 renamable $q0, renamable $q2, 11, 1, killed renamable $vpr + tBX_RET 14, $noreg, implicit $q0 ...