Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1065,38 +1065,73 @@ return Legalized; } case TargetOpcode::G_ICMP: { - uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); - if (NarrowSize * 2 != SrcSize) + Register LHS = MI.getOperand(2).getReg(); + LLT SrcTy = MRI.getType(LHS); + uint64_t SrcSize = SrcTy.getSizeInBits(); + CmpInst::Predicate Pred = + static_cast(MI.getOperand(1).getPredicate()); + + // TODO: Handle the non-equality case for weird sizes. + if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred)) return UnableToLegalize; - Observer.changingInstr(MI); - Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); - Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2)); + LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover) + SmallVector LHSPartRegs, LHSLeftoverRegs; + if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs, + LHSLeftoverRegs)) + return UnableToLegalize; - Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); - Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3)); + LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type. + SmallVector RHSPartRegs, RHSLeftoverRegs; + if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused, + RHSPartRegs, RHSLeftoverRegs)) + return UnableToLegalize; - CmpInst::Predicate Pred = - static_cast(MI.getOperand(1).getPredicate()); - LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); - - if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { - MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); - MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); - MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); - MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); - MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero); + // We now have the LHS and RHS split into narrow-type registers plus some + // leftover type. + Register Dst = MI.getOperand(0).getReg(); + LLT ResTy = MRI.getType(Dst); + if (ICmpInst::isEquality(Pred)) { + // For each part on the LHS and RHS, keep track of the result of XOR-ing + // them together. For each equal part, the result should be all 0s. For + // each non-equal part, we'll get at least one 1. + auto Zero = MIRBuilder.buildConstant(NarrowTy, 0); + SmallVector Parts; + for (unsigned I = 0, E = LHSPartRegs.size(); I < E; ++I) + Parts.push_back( + MIRBuilder.buildXor(NarrowTy, LHSPartRegs[I], RHSPartRegs[I]) + .getReg(0)); + for (unsigned I = 0, E = LHSLeftoverRegs.size(); I < E; ++I) { + auto XorH = MIRBuilder.buildXor(LeftoverTy, LHSLeftoverRegs[I], + RHSLeftoverRegs[I]); + // Leftover parts are smaller than the desired narrow type, so we need + // to extend them to the desired size. + Parts.push_back(MIRBuilder.buildZExt(NarrowTy, XorH).getReg(0)); + } + + // Now we know if each part is equal. We can OR the parts together and + // check if the result is 0. If the result of ORing everything is 0, then + // the LHS and RHS are equal. Otherwise, at least one part differed, so + // the LHS and RHS are not equal. + Register Or = MIRBuilder.buildOr(NarrowTy, Parts[0], Parts[1]).getReg(0); + for (unsigned I = 2, E = Parts.size(); I < E; ++I) + Or = MIRBuilder.buildOr(NarrowTy, Or, Parts[I]).getReg(0); + MIRBuilder.buildICmp(Pred, Dst, Or, Zero); } else { + // TODO: Handle non-power-of-two types. + assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?"); + assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?"); + Register LHSL = LHSPartRegs[0]; + Register LHSH = LHSPartRegs[1]; + Register RHSL = RHSPartRegs[0]; + Register RHSH = RHSPartRegs[1]; MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); - MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH); + MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH); } - Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -79,3 +79,164 @@ RET_ReallyLR ... +--- +name: test_s128_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s128_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s128) = G_IMPLICIT_DEF + %rhs:_(s128) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s128), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s88_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s88_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[AND]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s88) = G_IMPLICIT_DEF + %rhs:_(s88) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s88), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s88_ne +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s88_ne + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16777215 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[AND]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s88) = G_IMPLICIT_DEF + %rhs:_(s88) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(ne), %lhs(s88), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s96_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s96_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT]], [[EXTRACT1]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[XOR1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[ZEXT]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s96) = G_IMPLICIT_DEF + %rhs:_(s96) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s96), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR