Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1065,38 +1065,81 @@ return Legalized; } case TargetOpcode::G_ICMP: { - uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); - if (NarrowSize * 2 != SrcSize) + Register LHS = MI.getOperand(2).getReg(); + LLT SrcTy = MRI.getType(LHS); + uint64_t SrcSize = SrcTy.getSizeInBits(); + CmpInst::Predicate Pred = + static_cast(MI.getOperand(1).getPredicate()); + + // TODO: Handle the non-equality case for weird sizes. + if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred)) return UnableToLegalize; - Observer.changingInstr(MI); - Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); - Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2)); + LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover) + SmallVector LHSPartRegs, LHSLeftoverRegs; + if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs, + LHSLeftoverRegs)) + return UnableToLegalize; + + LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type. + SmallVector RHSPartRegs, RHSLeftoverRegs; + if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused, + RHSPartRegs, RHSLeftoverRegs)) + return UnableToLegalize; - Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); - Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3)); + // We now have the LHS and RHS of the compare split into narrow-type + // registers, plus potentially some leftover type. + Register Dst = MI.getOperand(0).getReg(); + LLT ResTy = MRI.getType(Dst); + if (ICmpInst::isEquality(Pred)) { + // For each part on the LHS and RHS, keep track of the result of XOR-ing + // them together. For each equal part, the result should be all 0s. For + // each non-equal part, we'll get at least one 1. + auto Zero = MIRBuilder.buildConstant(NarrowTy, 0); + SmallVector Xors; + for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) { + auto LHS = std::get<0>(LHSAndRHS); + auto RHS = std::get<1>(LHSAndRHS); + auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0); + Xors.push_back(Xor); + } - CmpInst::Predicate Pred = - static_cast(MI.getOperand(1).getPredicate()); - LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); - - if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { - MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); - MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); - MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); - MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); - MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero); + // Build a G_XOR for each leftover register. Each G_XOR must be widened + // to the desired narrow type so that we can OR them together later. + SmallVector WidenedXors; + for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) { + auto LHS = std::get<0>(LHSAndRHS); + auto RHS = std::get<1>(LHSAndRHS); + auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0); + LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor); + buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors, + /* PadStrategy = */ TargetOpcode::G_ZEXT); + Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end()); + } + + // Now, for each part we broke up, we know if they are equal/not equal + // based off the G_XOR. We can OR these all together and compare against + // 0 to get the result. + assert(Xors.size() >= 2 && "Should have gotten at least two Xors?"); + auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]); + for (unsigned I = 2, E = Xors.size(); I < E; ++I) + Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]); + MIRBuilder.buildICmp(Pred, Dst, Or, Zero); } else { + // TODO: Handle non-power-of-two types. + assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?"); + assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?"); + Register LHSL = LHSPartRegs[0]; + Register LHSH = LHSPartRegs[1]; + Register RHSL = RHSPartRegs[0]; + Register RHSH = RHSPartRegs[1]; MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); - MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH); + MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH); } - Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -79,3 +79,169 @@ RET_ReallyLR ... +--- +name: test_s128_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s128_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s128) = G_IMPLICIT_DEF + %rhs:_(s128) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s128), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s88_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s88_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]] + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[C]] + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[C]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR2]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s88) = G_IMPLICIT_DEF + %rhs:_(s88) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s88), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s88_ne +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s88_ne + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]] + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[C]] + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[C]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR2]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s88) = G_IMPLICIT_DEF + %rhs:_(s88) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(ne), %lhs(s88), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s96_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s96_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT]], [[EXTRACT1]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[XOR1]](s32), [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s96) = G_IMPLICIT_DEF + %rhs:_(s96) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s96), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR