Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -121,6 +121,9 @@ void extractParts(unsigned Reg, LLT Ty, int NumParts, SmallVectorImpl &VRegs); + LegalizeResult + fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); MachineRegisterInfo &MRI; Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1220,15 +1220,82 @@ } } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Src0Reg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(Src0Reg); + + unsigned NumParts; + LLT NarrowTy0, NarrowTy1; + + if (TypeIdx == 0) { + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + unsigned OldElts = DstTy.getNumElements(); + + NarrowTy0 = NarrowTy; + NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); + NarrowTy1 = NarrowTy.isVector() ? + LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : + SrcTy.getElementType(); + + } else { + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + unsigned OldElts = SrcTy.getNumElements(); + + NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : + NarrowTy.getNumElements(); + NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), + DstTy.getScalarSizeInBits()); + NarrowTy1 = NarrowTy; + } + + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (NarrowTy1.isVector() && + NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) + return UnableToLegalize; + + CmpInst::Predicate Pred + = static_cast(MI.getOperand(1).getPredicate()); + + SmallVector Src1Regs, Src2Regs, DstRegs; + extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); + extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); + + for (unsigned I = 0; I < NumParts; ++I) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + + if (MI.getOpcode() == TargetOpcode::G_ICMP) + MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); + else { + MachineInstr *NewCmp + = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); + NewCmp->setFlags(MI.getFlags()); + } + + DstRegs.push_back(DstReg); + } + + if (NarrowTy1.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - // FIXME: Don't know how to handle secondary types yet. - if (TypeIdx != 0) - return UnableToLegalize; + using namespace TargetOpcode; MIRBuilder.setInstr(MI); - switch (MI.getOpcode()) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { default: return UnableToLegalize; case TargetOpcode::G_IMPLICIT_DEF: { @@ -1281,8 +1348,15 @@ MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: + return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; unsigned ValReg = MI.getOperand(0).getReg(); unsigned AddrReg = MI.getOperand(1).getReg(); Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -161,10 +161,6 @@ // Must use fadd + fneg setAction({G_FSUB, S64}, Lower); - setAction({G_FCMP, S1}, Legal); - setAction({G_FCMP, 1, S32}, Legal); - setAction({G_FCMP, 1, S64}, Legal); - getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalFor({{S64, S32}, {S32, S16}, {S64, S16}}) .fewerElementsIf( @@ -203,8 +199,14 @@ setAction({G_BLOCK_ADDR, CodePtr}, Legal); - setAction({G_ICMP, S1}, Legal); - setAction({G_ICMP, 1, S32}, Legal); + getActionDefinitionsBuilder({G_ICMP, G_FCMP}) + .legalFor({{S1, S32}, {S1, S64}}) + .widenScalarToNextPow2(1) + .clampScalar(1, S32, S64) + .clampMaxNumElements(0, S1, 1) + .clampMaxNumElements(1, S32, 1); + + setAction({G_CTLZ, S32}, Legal); setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal); @@ -326,7 +328,12 @@ .legalForCartesianProduct(AllS64Vectors, {S64}) .clampNumElements(0, V16S32, V16S32) .clampNumElements(0, V2S64, V8S64) - .minScalarSameAs(1, 0); + .minScalarSameAs(1, 0) + // FIXME: Sort of a hack to make progress on other legalizations. Shouldn't + // really be valid for i1, but I guess RegBank constraints can make it work? + .legalIf([=](const LegalityQuery &Query) { + return Query.Types[0].getScalarSizeInBits() < 32; + }); // TODO: Support any combination of v2s32 getActionDefinitionsBuilder(G_CONCAT_VECTORS) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir @@ -1,27 +1,153 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- -name: test_fcmp_f32 +name: test_fcmp_s32 body: | - bb.0: + bb.0.entry: liveins: $vgpr0 - ; CHECK-LABEL: name: test_fcmp_f32 + ; CHECK-LABEL: name: test_fcmp_s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[COPY]] + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] + ; CHECK: $vgpr0 = COPY [[SELECT]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = COPY $vgpr0 - - %2:_(s1) = G_FCMP floatpred(uge), %0, %1 + %2:_(s1) = G_FCMP floatpred(oeq), %0, %1 + %3:_(s32) = G_SELECT %2, %0, %1 + $vgpr0 = COPY %3 ... + --- -name: test_fcmp_f64 +name: test_fcmp_s64 body: | - bb.0: + bb.0.entry: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_fcmp_f64 + ; CHECK-LABEL: name: test_fcmp_s64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s64), [[COPY]] + ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s1) = G_FCMP floatpred(oeq), %0, %1 + %3:_(s64) = G_SELECT %2, %0, %1 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: test_fcmp_s16 +body: | + bb.0.entry: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_fcmp_s16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; CHECK: $vgpr0 = COPY [[COPY3]](s32) + %0:_(s16) = G_CONSTANT i16 0 + %1:_(s32) = COPY $vgpr0 + %2:_(s16) = G_TRUNC %1 + %3:_(s1) = G_FCMP floatpred(oeq), %0, %2 + %4:_(s16) = G_SELECT %3, %0, %2 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: test_fcmp_v2s32 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_fcmp_v2s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV]] + ; CHECK: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[FCMP]](s1), [[FCMP1]](s1) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s1>) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 + %2:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %3:_(<2 x s1>) = G_FCMP floatpred(oeq), %1, %2 + S_NOP 0, implicit %3 +... + +--- +name: test_fcmp_v2s32_flags +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_fcmp_v2s32_flags + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: %8:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV]] + ; CHECK: %9:_(s1) = nnan G_FCMP floatpred(oeq), [[C]](s32), [[UV1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR %8(s1), %9(s1) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s1>) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 + %2:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %3:_(<2 x s1>) = nnan G_FCMP floatpred(oeq), %1, %2 + S_NOP 0, implicit %3 +... + +--- +name: test_fcmp_v3s32 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_fcmp_v3s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]] + ; CHECK: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]] + ; CHECK: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s1>) = G_BUILD_VECTOR [[FCMP]](s1), [[FCMP1]](s1), [[FCMP2]](s1) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s1>) + %0:_(<3 x s32>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %2:_(<3 x s1>) = G_FCMP floatpred(oeq), %0, %1 + S_NOP 0, implicit %2 +... + +--- +name: test_fcmp_v4s32 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_fcmp_v4s32 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16) + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV4]] + ; CHECK: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV5]] + ; CHECK: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV6]] + ; CHECK: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV3]](s32), [[UV7]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[FCMP]](s1), [[FCMP1]](s1), [[FCMP2]](s1), [[FCMP3]](s1) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s1>) + %0:_(p1) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load 16) + %2:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %3:_(<4 x s1>) = G_FCMP floatpred(oeq) , %1, %2 + S_NOP 0, implicit %3 - %2:_(s1) = G_FCMP floatpred(uge), %0, %1 ... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -2,11 +2,11 @@ # RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- -name: test_icmp +name: test_icmp_s32 body: | bb.0.entry: liveins: $vgpr0 - ; CHECK-LABEL: name: test_icmp + ; CHECK-LABEL: name: test_icmp_s32 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] @@ -15,6 +15,152 @@ %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = COPY $vgpr0 %2:_(s1) = G_ICMP intpred(ne), %0, %1 - %3:_(s32) = G_SELECT %2(s1), %0(s32), %1(s32) + %3:_(s32) = G_SELECT %2, %0, %1 $vgpr0 = COPY %3 ... + +--- +name: test_icmp_s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_icmp_s64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s64), [[COPY]] + ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[COPY]] + ; CHECK: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + %0:_(s64) = G_CONSTANT i64 0 + %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s64) = G_SELECT %2, %0, %1 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: test_icmp_s16 +body: | + bb.0.entry: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_icmp_s16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s32), [[AND1]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; CHECK: $vgpr0 = COPY [[COPY5]](s32) + %0:_(s16) = G_CONSTANT i16 0 + %1:_(s32) = COPY $vgpr0 + %2:_(s16) = G_TRUNC %1 + %3:_(s1) = G_ICMP intpred(ne), %0, %2 + %4:_(s16) = G_SELECT %3, %0, %2 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: test_icmp_s8 +body: | + bb.0.entry: + liveins: $vgpr0 + ; CHECK-LABEL: name: test_icmp_s8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s32), [[AND1]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; CHECK: $vgpr0 = COPY [[COPY5]](s32) + %0:_(s8) = G_CONSTANT i16 0 + %1:_(s32) = COPY $vgpr0 + %2:_(s8) = G_TRUNC %1 + %3:_(s1) = G_ICMP intpred(ne), %0, %2 + %4:_(s8) = G_SELECT %3, %0, %2 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: test_icmp_v2s32 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_icmp_v2s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[UV1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[ICMP]](s1), [[ICMP1]](s1) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s1>) + %0:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_BUILD_VECTOR %0, %0 + %2:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %3:_(<2 x s1>) = G_ICMP intpred(ne), %1, %2 + S_NOP 0, implicit %3 +... + +--- +name: test_icmp_v3s32 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_icmp_v3s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>) + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s1>) = G_BUILD_VECTOR [[ICMP]](s1), [[ICMP1]](s1), [[ICMP2]](s1) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s1>) + %0:_(<3 x s32>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %2:_(<3 x s1>) = G_ICMP intpred(ne), %0, %1 + S_NOP 0, implicit %2 +... + +--- +name: test_icmp_v4s32 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_icmp_v4s32 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16) + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV4]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV5]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV6]] + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV3]](s32), [[UV7]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[ICMP]](s1), [[ICMP1]](s1), [[ICMP2]](s1), [[ICMP3]](s1) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s1>) + %0:_(p1) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load 16) + %2:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %3:_(<4 x s1>) = G_ICMP intpred(ne), %1, %2 + S_NOP 0, implicit %3 + +...