Index: llvm/lib/Target/SystemZ/SystemZ.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZ.h +++ llvm/lib/Target/SystemZ/SystemZ.h @@ -55,7 +55,7 @@ // Condition-code mask assignments for logical operations. const unsigned CCMASK_LOGICAL_ZERO = CCMASK_0 | CCMASK_2; -const unsigned CCMASK_LOGICAL_NONZERO = CCMASK_1 | CCMASK_2; +const unsigned CCMASK_LOGICAL_NONZERO = CCMASK_1 | CCMASK_3; const unsigned CCMASK_LOGICAL_CARRY = CCMASK_2 | CCMASK_3; const unsigned CCMASK_LOGICAL_NOCARRY = CCMASK_0 | CCMASK_1; const unsigned CCMASK_LOGICAL_BORROW = CCMASK_LOGICAL_NOCARRY; Index: llvm/lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -88,6 +88,8 @@ SmallVectorImpl &CCUsers); bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl &CCUsers); + bool convertToLogical(MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl &CCUsers); bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl &CCUsers, unsigned ConvOpc = 0); @@ -303,6 +305,52 @@ return true; } +// See if MI is an instruction with an equivalent "logical" opcode that can +// be used and replace MI. This is useful for EQ/NE comparisons where the +// "nsw" flag is missing since the "logical" opcode always sets CC to reflect +// the result being zero or non-zero. +bool SystemZElimCompare::convertToLogical( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl &CCUsers) { + + unsigned ConvOpc = 0; + switch (MI.getOpcode()) { + case SystemZ::AR: ConvOpc = SystemZ::ALR; break; + case SystemZ::ARK: ConvOpc = SystemZ::ALRK; break; + case SystemZ::AGR: ConvOpc = SystemZ::ALGR; break; + case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break; + case SystemZ::A: ConvOpc = SystemZ::AL; break; + case SystemZ::AY: ConvOpc = SystemZ::ALY; break; + case SystemZ::AG: ConvOpc = SystemZ::ALG; break; + default: break; + } + if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc)) + return false; + + // Operands should be identical, so just change the opcode and remove the + // dead flag on CC. + MI.setDesc(TII->get(ConvOpc)); + MI.clearRegisterDeads(SystemZ::CC); + return true; +} + +#ifndef NDEBUG +static bool isAddWithImmediate(unsigned Opcode) { + switch(Opcode) { + case SystemZ::AHI: + case SystemZ::AHIK: + case SystemZ::AGHI: + case SystemZ::AGHIK: + case SystemZ::AFI: + case SystemZ::AIH: + case SystemZ::AGFI: + return true; + default: break; + } + return false; +} +#endif + // The CC users in CCUsers are testing the result of a comparison of some // value X against zero and we know that any CC value produced by MI would // also reflect the value of X. ConvOpc may be used to pass the transfomed @@ -313,6 +361,8 @@ MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl &CCUsers, unsigned ConvOpc) { + unsigned CompareFlags = Compare.getDesc().TSFlags; + unsigned CompareCCValues = SystemZII::getCCValues(CompareFlags); int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode()); const MCInstrDesc &Desc = TII->get(Opcode); unsigned MIFlags = Desc.TSFlags; @@ -330,60 +380,97 @@ } // See which compare-style condition codes are available. - unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); - + unsigned CCValues = SystemZII::getCCValues(MIFlags); + unsigned ReusableCCMask = CCValues; // For unsigned comparisons with zero, only equality makes sense. - unsigned CompareFlags = Compare.getDesc().TSFlags; if (CompareFlags & SystemZII::IsLogical) ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; - + unsigned OFImplies = 0; + bool LogicalMI = false; + bool MIEquivalentToCmp = false; + if (MI.getFlag(MachineInstr::NoSWrap) && + (MIFlags & SystemZII::CCIfNoSignedWrap)) { + // If MI has the NSW flag set in combination with the + // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid. + } + else if ((MIFlags & SystemZII::CCIfNoSignedWrap) && + MI.getOperand(2).isImm()) { + // Signed addition of immediate. If adding a positive immediate + // overflows, the result must be less than zero. If adding a negative + // immediate overflows, the result must be larger than zero (except in + // the special case of adding the minimum value of the result range, in + // which case we cannot predict whether the result is larger than or + // equal to zero). + assert(isAddWithImmediate(Opcode) && "Expected an add with immediate."); + assert(!MI.mayLoadOrStore() && "Expected an immediate term."); + int64_t RHS = MI.getOperand(2).getImm(); + if (SystemZ::GRX32BitRegClass.contains(MI.getOperand(0).getReg()) && + RHS == INT32_MIN) + return false; + OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT); + } + else if ((MIFlags & SystemZII::IsLogical) && CCValues) { + // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be + // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO. + LogicalMI = true; + ReusableCCMask = SystemZ::CCMASK_CMP_EQ; + } + else { + ReusableCCMask &= SystemZII::getCompareZeroCCMask(MIFlags); + assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); + MIEquivalentToCmp = + ReusableCCMask == CCValues && CCValues == CompareCCValues; + } if (ReusableCCMask == 0) return false; - unsigned CCValues = SystemZII::getCCValues(MIFlags); - assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); - - bool MIEquivalentToCmp = - (ReusableCCMask == CCValues && - CCValues == SystemZII::getCCValues(CompareFlags)); - if (!MIEquivalentToCmp) { // Now check whether these flags are enough for all users. SmallVector AlterMasks; for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { - MachineInstr *MI = CCUsers[I]; + MachineInstr *CCUserMI = CCUsers[I]; // Fail if this isn't a use of CC that we understand. - unsigned Flags = MI->getDesc().TSFlags; + unsigned Flags = CCUserMI->getDesc().TSFlags; unsigned FirstOpNum; if (Flags & SystemZII::CCMaskFirst) FirstOpNum = 0; else if (Flags & SystemZII::CCMaskLast) - FirstOpNum = MI->getNumExplicitOperands() - 2; + FirstOpNum = CCUserMI->getNumExplicitOperands() - 2; else return false; // Check whether the instruction predicate treats all CC values // outside of ReusableCCMask in the same way. In that case it // doesn't matter what those CC values mean. - unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); - unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); + unsigned CCValid = CCUserMI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = CCUserMI->getOperand(FirstOpNum + 1).getImm(); + assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 && + "Corrupt CC operands of CCUser."); unsigned OutValid = ~ReusableCCMask & CCValid; unsigned OutMask = ~ReusableCCMask & CCMask; if (OutMask != 0 && OutMask != OutValid) return false; - AlterMasks.push_back(&MI->getOperand(FirstOpNum)); - AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); + AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum)); + AlterMasks.push_back(&CCUserMI->getOperand(FirstOpNum + 1)); } // All users are OK. Adjust the masks for MI. for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { AlterMasks[I]->setImm(CCValues); unsigned CCMask = AlterMasks[I + 1]->getImm(); - if (CCMask & ~ReusableCCMask) - AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | - (CCValues & ~ReusableCCMask)); + if (LogicalMI) { + // Translate the CCMask into its "logical" value. + CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? + SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO); + CCMask &= CCValues; // Logical subtracts never set CC=0. + } else { + if (CCMask & ~ReusableCCMask) + CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask); + CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0; + } + AlterMasks[I + 1]->setImm(CCMask); } } @@ -460,7 +547,9 @@ } // Try to eliminate Compare by reusing a CC result from MI. if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) || - (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { + (!CCRefs.Def && + (adjustCCMasksForInstr(MI, Compare, CCUsers) || + convertToLogical(MI, Compare, CCUsers)))) { EliminatedComparisons += 1; return true; } Index: llvm/lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -75,8 +75,9 @@ // SystemZ::CCMASK_*. bits<4> CCValues = 0; - // The subset of CCValues that have the same meaning as they would after - // a comparison of the first operand against zero. + // The subset of CCValues that have the same meaning as they would after a + // comparison of the first operand against zero. "Logical" instructions + // leave this blank as they set CC in a different way. bits<4> CompareZeroCCMask = 0; // True if the instruction is conditional and if the CC mask operand @@ -87,9 +88,16 @@ bit CCMaskLast = 0; // True if the instruction is the "logical" rather than "arithmetic" form, - // in cases where a distinction exists. + // in cases where a distinction exists. Except for logical compares, if the + // instruction sets this flag along with a non-zero CCValues field, it is + // assumed to set CC to either CCMASK_LOGICAL_ZERO or + // CCMASK_LOGICAL_NONZERO. bit IsLogical = 0; + // True if the (add or sub) instruction sets CC like a compare of the + // result against zero, but only if the 'nsw' flag is set. + bit CCIfNoSignedWrap = 0; + let TSFlags{0} = SimpleBDXLoad; let TSFlags{1} = SimpleBDXStore; let TSFlags{2} = Has20BitOffset; @@ -101,6 +109,7 @@ let TSFlags{18} = CCMaskFirst; let TSFlags{19} = CCMaskLast; let TSFlags{20} = IsLogical; + let TSFlags{21} = CCIfNoSignedWrap; } //===----------------------------------------------------------------------===// Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -46,7 +46,8 @@ CompareZeroCCMaskShift = 14, CCMaskFirst = (1 << 18), CCMaskLast = (1 << 19), - IsLogical = (1 << 20) + IsLogical = (1 << 20), + CCIfNoSignedWrap = (1 << 21) }; static inline unsigned getAccessSize(unsigned int Flags) { Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -945,6 +945,12 @@ } } +static void transferMIFlag(MachineInstr *OldMI, MachineInstr *NewMI, + MachineInstr::MIFlag Flag) { + if (OldMI->getFlag(Flag)) + NewMI->setFlag(Flag); +} + MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { MachineBasicBlock *MBB = MI.getParent(); @@ -1050,6 +1056,7 @@ .addImm(0) .addImm(MI.getOperand(2).getImm()); transferDeadCC(&MI, BuiltMI); + transferMIFlag(&MI, BuiltMI, MachineInstr::NoSWrap); return BuiltMI; } @@ -1200,6 +1207,7 @@ if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); transferDeadCC(&MI, MIB); + transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); return MIB; } } Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -915,7 +915,7 @@ //===----------------------------------------------------------------------===// // Addition producing a signed overflow flag. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { +let Defs = [CC], CCValues = 0xF, CCIfNoSignedWrap = 1 in { // Addition of a register. let isCommutable = 1 in { defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; @@ -957,7 +957,7 @@ defm : SXB; // Addition producing a carry. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0xF, IsLogical = 1 in { // Addition of a register. let isCommutable = 1 in { defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>; @@ -997,7 +997,7 @@ defm : ZXB; // Addition producing and using a carry. -let Defs = [CC], Uses = [CC] in { +let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in { // Addition of a register. def ALCR : BinaryRRE<"alcr", 0xB998, z_addcarry, GR32, GR32>; def ALCGR : BinaryRRE<"alcgr", 0xB988, z_addcarry, GR64, GR64>; @@ -1017,7 +1017,8 @@ //===----------------------------------------------------------------------===// // Subtraction producing a signed overflow flag. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8, + CCIfNoSignedWrap = 1 in { // Subtraction of a register. defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; @@ -1066,7 +1067,7 @@ (SGR GR64:$src1, (LLILF imm64lf32n:$src2))>; // Subtraction producing a carry. -let Defs = [CC] in { +let Defs = [CC], CCValues = 0x7, IsLogical = 1 in { // Subtraction of a register. defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>; def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; @@ -1104,7 +1105,7 @@ (SLGFI GR64:$src1, imm64zx32n:$src2)>; // Subtraction producing and using a carry. -let Defs = [CC], Uses = [CC] in { +let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in { // Subtraction of a register. def SLBR : BinaryRRE<"slbr", 0xB999, z_subcarry, GR32, GR32>; def SLBGR : BinaryRRE<"slbgr", 0xB989, z_subcarry, GR64, GR64>; Index: llvm/test/CodeGen/SystemZ/int-cmp-44.ll =================================================================== --- llvm/test/CodeGen/SystemZ/int-cmp-44.ll +++ llvm/test/CodeGen/SystemZ/int-cmp-44.ll @@ -6,15 +6,16 @@ declare void @foo() -; Addition provides enough for equality comparisons with zero. First teest -; the EQ case. +; Addition provides enough for comparisons with zero if we know no +; signed overflow happens, which is when the "nsw" flag is set. +; First test the EQ case. define i32 @f1(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f1: ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: ber %r14 ; CHECK: br %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -30,10 +31,10 @@ define i32 @f2(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f2: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: bner %r14 +; CHECK-NEXT: blhr %r14 ; CHECK: br %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp ne i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -45,14 +46,13 @@ ret i32 %res } -; SLT requires a comparison. +; ...and again with SLT. define i32 @f3(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f3: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: cibl %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: blr %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp slt i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -64,14 +64,13 @@ ret i32 %res } -; ...SLE too. +; ...and again with SLE. define i32 @f4(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f4: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: cible %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: bler %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp sle i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -83,14 +82,13 @@ ret i32 %res } -; ...SGT too. +; ...and again with SGT. define i32 @f5(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f5: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: cibh %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: bhr %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp sgt i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -102,14 +100,13 @@ ret i32 %res } -; ...SGE too. +; ...and again with SGE. define i32 @f6(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f6: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: cibhe %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: bher %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp sge i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -121,7 +118,8 @@ ret i32 %res } -; Subtraction also provides enough for equality comparisons with zero. +; Subtraction provides in addition also enough for equality comparisons with +; zero even without "nsw". define i32 @f7(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f7: ; CHECK: s %r2, 0(%r4) @@ -141,15 +139,14 @@ ret i32 %res } -; ...but not for ordered comparisons. +; ...and again with SLT. define i32 @f8(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f8: ; CHECK: s %r2, 0(%r4) -; CHECK-NEXT: cibl %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: blr %r14 entry: %cur = load i32, i32 *%dest - %res = sub i32 %a, %cur + %res = sub nsw i32 %a, %cur %cmp = icmp slt i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -445,10 +442,10 @@ ; CHECK-LABEL: f23: ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: st %r2, 0(%r4) -; CHECK-NEXT: bner %r14 +; CHECK-NEXT: blhr %r14 ; CHECK: br %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 store i32 %res, i32 *%dest1 %cmp = icmp ne i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -491,10 +488,10 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: blah ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: bner %r14 +; CHECK-NEXT: blhr %r14 ; CHECK: br %r14 entry: - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 call void asm sideeffect "blah", "r"(i32 %add) %cmp = icmp ne i32 %add, 0 br i1 %cmp, label %exit, label %store @@ -540,7 +537,7 @@ ; CHECK-NEXT: cibe %r2, 0, 0(%r14) ; CHECK: br %r14 entry: - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %sub = sub i32 %b, %add store i32 %sub, i32 *%dest1 %cmp = icmp eq i32 %add, 0 Index: llvm/test/CodeGen/SystemZ/int-cmp-45.ll =================================================================== --- llvm/test/CodeGen/SystemZ/int-cmp-45.ll +++ llvm/test/CodeGen/SystemZ/int-cmp-45.ll @@ -3,14 +3,15 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -no-integrated-as | FileCheck %s -; Addition provides enough for equality comparisons with zero. First teest -; the EQ case with LOC. +; Addition provides enough for comparisons with zero if we know no +; signed overflow happens, which is when the "nsw" flag is set. +; First test the EQ case with LOC. define i32 @f1(i32 %a, i32 %b, i32 *%cptr) { ; CHECK-LABEL: f1: ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: loce %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %c = load i32, i32 *%cptr %arg = select i1 %cmp, i32 %c, i32 %b @@ -24,7 +25,7 @@ ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: stoce %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %c = load i32, i32 *%cptr %newval = select i1 %cmp, i32 %b, i32 %c @@ -36,9 +37,9 @@ define i32 @f3(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: f3: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: locrne %r3, %r4 +; CHECK-NEXT: locrlh %r3, %r4 ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %arg = select i1 %cmp, i32 %b, i32 %c call void asm sideeffect "blah $0", "{r3}"(i32 %arg) @@ -49,9 +50,9 @@ define i32 @f4(i32 %a, i32 %b, i32 *%cptr) { ; CHECK-LABEL: f4: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: locne %r3, 0(%r4) +; CHECK-NEXT: loclh %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %c = load i32, i32 *%cptr %arg = select i1 %cmp, i32 %b, i32 %c @@ -63,9 +64,9 @@ define i32 @f5(i32 %a, i32 %b, i32 *%cptr) { ; CHECK-LABEL: f5: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: stocne %r3, 0(%r4) +; CHECK-NEXT: stoclh %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %c = load i32, i32 *%cptr %newval = select i1 %cmp, i32 %c, i32 %b @@ -79,7 +80,7 @@ ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: locre %r3, %r4 ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp ne i32 %add, 0 %arg = select i1 %cmp, i32 %b, i32 %c call void asm sideeffect "blah $0", "{r3}"(i32 %arg) @@ -92,7 +93,7 @@ ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: loce %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp ne i32 %add, 0 %c = load i32, i32 *%cptr %arg = select i1 %cmp, i32 %b, i32 %c @@ -106,7 +107,7 @@ ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: stoce %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp ne i32 %add, 0 %c = load i32, i32 *%cptr %newval = select i1 %cmp, i32 %c, i32 %b Index: llvm/test/CodeGen/SystemZ/int-cmp-56.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/int-cmp-56.ll @@ -0,0 +1,163 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Check that signed comparisons against 0 are only eliminated if the "nsw" +; flag is present on the defining add (with register) instruction. For an +; equality comparison, add logical can be used. + +define i32 @fun0(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun0: +; CHECK: jle .LBB0_2{{$}} +; CHECK: je .LBB0_4{{$}} + +bb: + %tmp2 = add nsw i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = add nsw i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; No "nsw" flag +define i32 @fun1(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun1: +; CHECK: cijle +; CHECK: alr +; CHECK: jhe + +bb: + %tmp2 = add i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = add i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; "nuw" flag +define i32 @fun2(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun2: +; CHECK: cijle +; CHECK: alr +; CHECK: jhe + +bb: + %tmp2 = add nuw i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = add nuw i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; Subtraction does not produce the value of zero in case of overflow, so +; "nsw" is not needed for the equality check against zero. +define i32 @fun3(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun3: +; CHECK: jle .LBB3_2{{$}} +; CHECK: je .LBB3_4{{$}} + +bb: + %tmp2 = sub nsw i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = sub nsw i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; No "nsw" flag +define i32 @fun4(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun4: +; CHECK: cijle +; CHECK: je .LBB4_4{{$}} + +bb: + %tmp2 = sub i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = sub i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; "nuw" flag +define i32 @fun5(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun5: +; CHECK: cijle +; CHECK: je .LBB5_4{{$}} + +bb: + %tmp2 = sub nuw i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = sub nuw i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} Index: llvm/test/CodeGen/SystemZ/int-cmp-57.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/int-cmp-57.ll @@ -0,0 +1,103 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -disable-cgp | FileCheck %s +; +; Check that signed comparisons against 0 are eliminated if the defining +; instruction is an add with immediate. +; +; Addition of an immediate does not depend on the "nsw" flag, since the +; result can be predicted in case of overflow. For example, if adding a +; positive immediate gives overflow, the result must be negative. + +; Addition of a negative immediate gives a positive result in case of +; overflow (except for the case of the minimum value which may also result in +; a zero result). +define i32 @fun0(i32 %arg) { +; CHECK-LABEL: fun0: +; CHECK: ahik +; CHECK-NEXT: locre +bb: + %tmp = add i32 %arg, -1 + %tmp1 = icmp eq i32 %tmp, 0 + %res = select i1 %tmp1, i32 %tmp, i32 %arg + ret i32 %res +} + +define i32 @fun1(i32 %arg) { +; CHECK-LABEL: fun1: +; CHECK: ahik +; CHECK-NEXT: locrnle +bb: + %tmp = add i32 %arg, -1 + %tmp1 = icmp sgt i32 %tmp, 0 + %res = select i1 %tmp1, i32 %tmp, i32 %arg + ret i32 %res +} + +define i32 @fun2(i32 %arg) { +; CHECK-LABEL: fun2: +; CHECK: ahik +; CHECK-NEXT: locrl +bb: + %tmp = add i32 %arg, -1 + %tmp1 = icmp slt i32 %tmp, 0 + %res = select i1 %tmp1, i32 %tmp, i32 %arg + ret i32 %res +} + +; Addition of a positive immediate gives a negative result in case of overflow. +define i32 @fun3(i32 %arg) { +; CHECK-LABEL: fun3: +; CHECK: ahik +; CHECK-NEXT: locre +bb: + %tmp = add i32 %arg, 1 + %tmp1 = icmp eq i32 %tmp, 0 + %res = select i1 %tmp1, i32 %tmp, i32 %arg + ret i32 %res +} + +define i32 @fun4(i32 %arg) { +; CHECK-LABEL: fun4: +; CHECK: ahik +; CHECK-NEXT: locrh +bb: + %tmp = add i32 %arg, 1 + %tmp1 = icmp sgt i32 %tmp, 0 + %res = select i1 %tmp1, i32 %tmp, i32 %arg + ret i32 %res +} + +define i32 @fun5(i32 %arg) { +; CHECK-LABEL: fun5: +; CHECK: ahik +; CHECK-NEXT: locrnhe +bb: + %tmp = add i32 %arg, 1 + %tmp1 = icmp slt i32 %tmp, 0 + %res = select i1 %tmp1, i32 %tmp, i32 %arg + ret i32 %res +} + +; Addition of the minimum value gives a positive or zero result. +define i32 @fun6(i32 %arg) { +; CHECK-LABEL: fun6: +; CHECK: afi +; CHECK-NEXT: chi +; CHECK-NEXT: locrlh +bb: + %tmp = add i32 %arg, -2147483648 + %tmp1 = icmp eq i32 %tmp, 0 + %res = select i1 %tmp1, i32 %tmp, i32 %arg + ret i32 %res +} + +define i32 @fun7(i32 %arg) { +; CHECK-LABEL: fun7: +; CHECK: afi +; CHECK-NEXT: chi +; CHECK-NEXT: locrle +bb: + %tmp = add i32 %arg, -2147483648 + %tmp1 = icmp sgt i32 %tmp, 0 + %res = select i1 %tmp1, i32 %tmp, i32 %arg + ret i32 %res +} Index: llvm/test/CodeGen/SystemZ/int-cmp-58.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/int-cmp-58.mir @@ -0,0 +1,71 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z14 -start-before=postrapseudos %s \ +# RUN: -o - | FileCheck %s +# +# Test that the CC values of logical adds and subs can be used in compare +# elimination in the cases of EQ/NE. + +# CHECK-LABEL: fun0: +# CHECK: alr %r3, %r2 +# CHECK-NEXT: locrhe %r2, %r3 +# CHECK-NEXT: alr %r3, %r2 +# CHECK-NEXT: locrnhe %r2, %r3 +# CHECK-NEXT: alr %r3, %r2 +# CHECK-NEXT: chi %r3, 0 +# CHECK-NEXT: locrle %r2, %r3 +# CHECK-NEXT: alr %r3, %r2 +# CHECK-NEXT: chi %r3, 0 +# CHECK-NEXT: locrhe %r2, %r3 +# CHECK-NEXT: slrk %r3, %r2, %r3 +# CHECK-NEXT: locrh %r2, %r3 +# CHECK-NEXT: slrk %r3, %r2, %r3 +# CHECK-NEXT: locrnhe %r2, %r3 +# CHECK-NEXT: slrk %r3, %r2, %r3 +# CHECK-NEXT: chi %r3, 0 +# CHECK-NEXT: locrle %r2, %r3 +# CHECK-NEXT: slrk %r3, %r2, %r3 +# CHECK-NEXT: chi %r3, 0 +# CHECK-NEXT: locrhe %r2, %r3 + + +--- | + define i32 @fun0(i32 %arg1, i32 %arg2) { bb: ret i32 0 } +... +--- +name: fun0 +body: | + bb.0: + + renamable $r3l = ALRK renamable $r2l, killed renamable $r3l, implicit-def dead $cc + CHIMux renamable $r3l, 0, implicit-def $cc + renamable $r2l = LOCR killed renamable $r2l, killed renamable $r3l, 14, 8, implicit killed $cc + + renamable $r3l = ALRK renamable $r2l, killed renamable $r3l, implicit-def dead $cc + CHIMux renamable $r3l, 0, implicit-def $cc + renamable $r2l = LOCR killed renamable $r2l, killed renamable $r3l, 14, 6, implicit killed $cc + + renamable $r3l = ALRK renamable $r2l, killed renamable $r3l, implicit-def dead $cc + CHIMux renamable $r3l, 0, implicit-def $cc + renamable $r2l = LOCR killed renamable $r2l, killed renamable $r3l, 14, 12, implicit killed $cc + + renamable $r3l = ALRK renamable $r2l, killed renamable $r3l, implicit-def dead $cc + CHIMux renamable $r3l, 0, implicit-def $cc + renamable $r2l = LOCR killed renamable $r2l, killed renamable $r3l, 14, 10, implicit killed $cc + + renamable $r3l = SLRK renamable $r2l, killed renamable $r3l, implicit-def dead $cc + CHIMux renamable $r3l, 0, implicit-def $cc + renamable $r2l = LOCR killed renamable $r2l, killed renamable $r3l, 14, 8, implicit killed $cc + + renamable $r3l = SLRK renamable $r2l, killed renamable $r3l, implicit-def dead $cc + CHIMux renamable $r3l, 0, implicit-def $cc + renamable $r2l = LOCR killed renamable $r2l, killed renamable $r3l, 14, 6, implicit killed $cc + + renamable $r3l = SLRK renamable $r2l, killed renamable $r3l, implicit-def dead $cc + CHIMux renamable $r3l, 0, implicit-def $cc + renamable $r2l = LOCR killed renamable $r2l, killed renamable $r3l, 14, 12, implicit killed $cc + + renamable $r3l = SLRK renamable $r2l, killed renamable $r3l, implicit-def dead $cc + CHIMux renamable $r3l, 0, implicit-def $cc + renamable $r2l = LOCR killed renamable $r2l, killed renamable $r3l, 14, 10, implicit killed $cc + + Return implicit $r2l +...