Index: lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- lib/Target/SystemZ/SystemZElimCompare.cpp +++ lib/Target/SystemZ/SystemZElimCompare.cpp @@ -320,7 +320,14 @@ unsigned MIFlags = Desc.TSFlags; // See which compare-style condition codes are available. - unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); + unsigned CCValues = SystemZII::getCCValues(MIFlags); + unsigned ReusableCCMask; + if (MI.getFlag(MachineInstr::NoSWrap) && + (MIFlags & SystemZII::CCIfNoSignedWrap)) { + CCValues &= ~SystemZ::CCMASK_ARITH_OVERFLOW; + ReusableCCMask = CCValues; + } else + ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); // For unsigned comparisons with zero, only equality makes sense. unsigned CompareFlags = Compare.getDesc().TSFlags; @@ -330,7 +337,6 @@ if (ReusableCCMask == 0) return false; - unsigned CCValues = SystemZII::getCCValues(MIFlags); assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); bool MIEquivalentToCmp = Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -90,6 +90,10 @@ // in cases where a distinction exists. bit IsLogical = 0; + // True if the (add or sub) instruction sets CC according its CCValues, but + // only if the 'nsw' flag is set. + bit CCIfNoSignedWrap = 0; + let TSFlags{0} = SimpleBDXLoad; let TSFlags{1} = SimpleBDXStore; let TSFlags{2} = Has20BitOffset; @@ -101,6 +105,7 @@ let TSFlags{18} = CCMaskFirst; let TSFlags{19} = CCMaskLast; let TSFlags{20} = IsLogical; + let TSFlags{21} = CCIfNoSignedWrap; } //===----------------------------------------------------------------------===// Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -46,7 +46,8 @@ CompareZeroCCMaskShift = 14, CCMaskFirst = (1 << 18), CCMaskLast = (1 << 19), - IsLogical = (1 << 20) + IsLogical = (1 << 20), + CCIfNoSignedWrap = (1 << 21) }; static inline unsigned getAccessSize(unsigned int Flags) { Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1036,6 +1036,12 @@ } } +static void transferMIFlag(MachineInstr *OldMI, MachineInstr *NewMI, + MachineInstr::MIFlag Flag) { + if (OldMI->getFlag(Flag)) + NewMI->setFlag(Flag); +} + MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { MachineBasicBlock *MBB = MI.getParent(); @@ -1141,6 +1147,7 @@ .addImm(0) .addImm(MI.getOperand(2).getImm()); transferDeadCC(&MI, BuiltMI); + transferMIFlag(&MI, BuiltMI, MachineInstr::NoSWrap); return BuiltMI; } @@ -1291,6 +1298,7 @@ if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); transferDeadCC(&MI, MIB); + transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); return MIB; } } Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -915,7 +915,7 @@ //===----------------------------------------------------------------------===// // Addition producing a signed overflow flag. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { +let Defs = [CC], CCValues = 0xF, CCIfNoSignedWrap = 1 in { // Addition of a register. let isCommutable = 1 in { defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; @@ -1017,7 +1017,8 @@ //===----------------------------------------------------------------------===// // Subtraction producing a signed overflow flag. -let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { +let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8, + CCIfNoSignedWrap = 1 in { // Subtraction of a register. defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; Index: test/CodeGen/SystemZ/int-cmp-44.ll =================================================================== --- test/CodeGen/SystemZ/int-cmp-44.ll +++ test/CodeGen/SystemZ/int-cmp-44.ll @@ -6,15 +6,16 @@ declare void @foo() -; Addition provides enough for equality comparisons with zero. First teest -; the EQ case. +; Addition provides enough for comparisons with zero if we know no +; signed overflow happens, which is when the "nsw" flag is set. +; First test the EQ case. define i32 @f1(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f1: ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: ber %r14 ; CHECK: br %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -30,10 +31,10 @@ define i32 @f2(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f2: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: bner %r14 +; CHECK-NEXT: blhr %r14 ; CHECK: br %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp ne i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -45,14 +46,13 @@ ret i32 %res } -; SLT requires a comparison. +; ...and again with SLT. define i32 @f3(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f3: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: cibl %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: blr %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp slt i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -64,14 +64,13 @@ ret i32 %res } -; ...SLE too. +; ...and again with SLE. define i32 @f4(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f4: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: cible %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: bler %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp sle i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -83,14 +82,13 @@ ret i32 %res } -; ...SGT too. +; ...and again with SGT. define i32 @f5(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f5: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: cibh %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: bhr %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp sgt i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -102,14 +100,13 @@ ret i32 %res } -; ...SGE too. +; ...and again with SGE. define i32 @f6(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f6: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: cibhe %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: bher %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 %cmp = icmp sge i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -121,7 +118,8 @@ ret i32 %res } -; Subtraction also provides enough for equality comparisons with zero. +; Subtraction provides in addition also enough for equality comparisons with +; zero even without "nsw". define i32 @f7(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f7: ; CHECK: s %r2, 0(%r4) @@ -141,15 +139,14 @@ ret i32 %res } -; ...but not for ordered comparisons. +; ...and again with SLT. define i32 @f8(i32 %a, i32 %b, i32 *%dest) { ; CHECK-LABEL: f8: ; CHECK: s %r2, 0(%r4) -; CHECK-NEXT: cibl %r2, 0, 0(%r14) -; CHECK: br %r14 +; CHECK-NEXT: blr %r14 entry: %cur = load i32, i32 *%dest - %res = sub i32 %a, %cur + %res = sub nsw i32 %a, %cur %cmp = icmp slt i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -445,10 +442,10 @@ ; CHECK-LABEL: f23: ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: st %r2, 0(%r4) -; CHECK-NEXT: bner %r14 +; CHECK-NEXT: blhr %r14 ; CHECK: br %r14 entry: - %res = add i32 %a, 1000000 + %res = add nsw i32 %a, 1000000 store i32 %res, i32 *%dest1 %cmp = icmp ne i32 %res, 0 br i1 %cmp, label %exit, label %store @@ -491,10 +488,10 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: blah ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: bner %r14 +; CHECK-NEXT: blhr %r14 ; CHECK: br %r14 entry: - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 call void asm sideeffect "blah", "r"(i32 %add) %cmp = icmp ne i32 %add, 0 br i1 %cmp, label %exit, label %store @@ -540,7 +537,7 @@ ; CHECK-NEXT: cibe %r2, 0, 0(%r14) ; CHECK: br %r14 entry: - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %sub = sub i32 %b, %add store i32 %sub, i32 *%dest1 %cmp = icmp eq i32 %add, 0 Index: test/CodeGen/SystemZ/int-cmp-45.ll =================================================================== --- test/CodeGen/SystemZ/int-cmp-45.ll +++ test/CodeGen/SystemZ/int-cmp-45.ll @@ -3,14 +3,15 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -no-integrated-as | FileCheck %s -; Addition provides enough for equality comparisons with zero. First teest -; the EQ case with LOC. +; Addition provides enough for comparisons with zero if we know no +; signed overflow happens, which is when the "nsw" flag is set. +; First test the EQ case with LOC. define i32 @f1(i32 %a, i32 %b, i32 *%cptr) { ; CHECK-LABEL: f1: ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: loce %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %c = load i32, i32 *%cptr %arg = select i1 %cmp, i32 %c, i32 %b @@ -24,7 +25,7 @@ ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: stoce %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %c = load i32, i32 *%cptr %newval = select i1 %cmp, i32 %b, i32 %c @@ -36,9 +37,9 @@ define i32 @f3(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: f3: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: locrne %r3, %r4 +; CHECK-NEXT: locrlh %r3, %r4 ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %arg = select i1 %cmp, i32 %b, i32 %c call void asm sideeffect "blah $0", "{r3}"(i32 %arg) @@ -49,9 +50,9 @@ define i32 @f4(i32 %a, i32 %b, i32 *%cptr) { ; CHECK-LABEL: f4: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: locne %r3, 0(%r4) +; CHECK-NEXT: loclh %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %c = load i32, i32 *%cptr %arg = select i1 %cmp, i32 %b, i32 %c @@ -63,9 +64,9 @@ define i32 @f5(i32 %a, i32 %b, i32 *%cptr) { ; CHECK-LABEL: f5: ; CHECK: afi %r2, 1000000 -; CHECK-NEXT: stocne %r3, 0(%r4) +; CHECK-NEXT: stoclh %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp eq i32 %add, 0 %c = load i32, i32 *%cptr %newval = select i1 %cmp, i32 %c, i32 %b @@ -79,7 +80,7 @@ ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: locre %r3, %r4 ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp ne i32 %add, 0 %arg = select i1 %cmp, i32 %b, i32 %c call void asm sideeffect "blah $0", "{r3}"(i32 %arg) @@ -92,7 +93,7 @@ ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: loce %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp ne i32 %add, 0 %c = load i32, i32 *%cptr %arg = select i1 %cmp, i32 %b, i32 %c @@ -106,7 +107,7 @@ ; CHECK: afi %r2, 1000000 ; CHECK-NEXT: stoce %r3, 0(%r4) ; CHECK: br %r14 - %add = add i32 %a, 1000000 + %add = add nsw i32 %a, 1000000 %cmp = icmp ne i32 %add, 0 %c = load i32, i32 *%cptr %newval = select i1 %cmp, i32 %c, i32 %b Index: test/CodeGen/SystemZ/int-cmp-56.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/int-cmp-56.ll @@ -0,0 +1,160 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Check that signed comparisons against 0 are only eliminated if +; the "nsw" flag is present on the defining add instruction. + +define i32 @fun0(i32 %arg, i32 %arg2) { +; CHECK-LABEL: fun0: +; CHECK: jle .LBB0_2{{$}} +; CHECK: je .LBB0_4{{$}} + +bb: + %tmp2 = add nsw i32 %arg, -1 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = add nsw i32 %arg, %arg2 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; No "nsw" flag +define i32 @fun1(i32 %arg, i32 %arg2) { +; CHECK-LABEL: fun1: +; CHECK: cijle +; CHECK: cije + +bb: + %tmp2 = add i32 %arg, -1 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = add i32 %arg, %arg2 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; "nuw" flag +define i32 @fun2(i32 %arg, i32 %arg2) { +; CHECK-LABEL: fun2: +; CHECK: cijle +; CHECK: cije + +bb: + %tmp2 = add nuw i32 %arg, -1 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = add nuw i32 %arg, %arg2 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; Subtraction does not produce the value of zero in case of overflow, so +; "nsw" is not needed for the equality check against zero. +define i32 @fun3(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun3: +; CHECK: jle .LBB3_2{{$}} +; CHECK: je .LBB3_4{{$}} + +bb: + %tmp2 = sub nsw i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = sub nsw i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; No "nsw" flag +define i32 @fun4(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun4: +; CHECK: cijle +; CHECK: je .LBB4_4{{$}} + +bb: + %tmp2 = sub i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = sub i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} + +; "nuw" flag +define i32 @fun5(i32 %arg, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: fun5: +; CHECK: cijle +; CHECK: je .LBB5_4{{$}} + +bb: + %tmp2 = sub nuw i32 %arg, %arg2 + %tmp3 = icmp sgt i32 %tmp2, 0 + br i1 %tmp3, label %bb3, label %bb1 + +bb1: + %tmp4 = sub nuw i32 %arg, %arg3 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb4, label %bb2 + +bb2: + ret i32 0 + +bb3: + ret i32 1 + +bb4: + ret i32 2 +} Index: test/CodeGen/SystemZ/loop-01.ll =================================================================== --- test/CodeGen/SystemZ/loop-01.ll +++ test/CodeGen/SystemZ/loop-01.ll @@ -94,7 +94,7 @@ ; CHECK: aghi [[REG:%r[0-5]]], -1 ; CHECK: lr [[REG2:%r[0-5]]], [[REG]] ; CHECK: stg [[REG2]], -; CHECK: jne {{\..*}} +; CHECK: cgijlh [[REG]], 0, {{\..*}} ; CHECK: br %r14 entry: br label %loop