Index: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.h @@ -215,6 +215,8 @@ bool expandPostRAPseudo(MachineInstr &MI) const override; + bool shouldSink(const MachineInstr &MI) const override; + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr &Orig, Index: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2534,14 +2534,28 @@ } } +/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by ADD(a,b,X). +inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { + switch (CC) { + default: return ARMCC::AL; + case ARMCC::HS: return ARMCC::LO; + case ARMCC::LO: return ARMCC::HS; + case ARMCC::VS: return ARMCC::VS; + case ARMCC::VC: return ARMCC::VC; + } +} + /// isRedundantFlagInstr - check whether the first instruction, whose only /// purpose is to update flags, can be made redundant. /// CMPrr can be made redundant by SUBrr if the operands are the same. /// CMPri can be made redundant by SUBri if the operands are the same. +/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). /// This function can be extended later on. -inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, - unsigned SrcReg2, int ImmValue, - MachineInstr *OI) { +inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, + unsigned SrcReg, unsigned SrcReg2, + int ImmValue, const MachineInstr *OI) { if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && (OI->getOpcode() == ARM::SUBrr || @@ -2559,6 +2573,14 @@ OI->getOperand(1).getReg() == SrcReg && OI->getOperand(2).getImm() == ImmValue) return true; + + if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && + (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || + OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && + OI->getOperand(0).isReg() && OI->getOperand(1).isReg() && + OI->getOperand(0).getReg() == SrcReg && + OI->getOperand(1).getReg() == SrcReg2) + return true; return false; } @@ -2661,17 +2683,18 @@ if (I == B) return false; // There are two possible candidates which can be changed to set CPSR: - // One is MI, the other is a SUB instruction. - // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). + // One is MI, the other is a SUB or ADD instruction. + // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or + // ADDr[ri](r1, r2, X). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). - MachineInstr *Sub = nullptr; + MachineInstr *SubAdd = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. MI = nullptr; else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. - // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. + // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. // Thus we cannot return here. if (CmpInstr.getOpcode() == ARM::CMPri || CmpInstr.getOpcode() == ARM::t2CMPri) @@ -2713,38 +2736,43 @@ } I = CmpInstr; E = MI; + } else { + // Allow the loop below to search E (which was initially MI). Since MI and + // SubAdd have different tests, even if that instruction could not be MI, it + // could still potentially be SubAdd. + --E; } // Check that CPSR isn't set between the comparison instruction and the one we - // want to change. At the same time, search for Sub. + // want to change. At the same time, search for SubAdd. const TargetRegisterInfo *TRI = &getRegisterInfo(); --I; for (; I != E; --I) { const MachineInstr &Instr = *I; + // Check whether CmpInstr can be made redundant by the current instruction. + if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { + SubAdd = &*I; + break; + } + if (Instr.modifiesRegister(ARM::CPSR, TRI) || Instr.readsRegister(ARM::CPSR, TRI)) // This instruction modifies or uses CPSR after the one we want to // change. We can't do this transformation. return false; - // Check whether CmpInstr can be made redundant by the current instruction. - if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { - Sub = &*I; - break; - } - if (I == B) // The 'and' is below the comparison instruction. return false; } // Return false if no candidates exist. - if (!MI && !Sub) + if (!MI && !SubAdd) return false; // The single candidate is called MI. - if (!MI) MI = Sub; + if (!MI) MI = SubAdd; // We can't use a predicated instruction - it doesn't always write the flags. if (isPredicated(*MI)) @@ -2802,25 +2830,31 @@ break; } - if (Sub) { - ARMCC::CondCodes NewCC = getSwappedCondition(CC); - if (NewCC == ARMCC::AL) - return false; + if (SubAdd) { // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based // on CMP needs to be updated to be based on SUB. + // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also + // needs to be modified. // Push the condition code operands to OperandsToUpdate. // If it is safe to remove CmpInstr, the condition code of these // operands will be modified. - if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) { + unsigned Opc = SubAdd->getOpcode(); + bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || + Opc == ARM::SUBri || Opc == ARM::t2SUBri; + if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 && + SubAdd->getOperand(2).getReg() == SrcReg)) { // VSel doesn't support condition code update. if (IsInstrVSel) return false; + // Ensure we can swap the condition. + ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC)); + if (NewCC == ARMCC::AL) + return false; OperandsToUpdate.push_back( std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } } else { - // No Sub, so this is x = y, z; cmp x, 0. + // No SubAdd, so this is x = y, z; cmp x, 0. switch (CC) { case ARMCC::EQ: // Z case ARMCC::NE: // Z @@ -2874,6 +2908,23 @@ return true; } +bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { + // Do not sink MI if it might be used to optimize a redundant compare. + // We heuristically only look at the instruction immediately following MI to + // avoid potentially searching the entire basic block. + if (isPredicated(MI)) + return true; + MachineBasicBlock::const_iterator Next = &MI; + ++Next; + unsigned SrcReg, SrcReg2; + int CmpMask, CmpValue; + if (Next != MI.getParent()->end() && + analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) && + isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI)) + return false; + return true; +} + bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const { Index: llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll +++ llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll @@ -33,10 +33,10 @@ ; CHECK-LABEL: sadd_overflow: - ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; ARM: mov r[[R1]], #1 - ; ARM: cmp r[[R2]], r[[R0]] - ; ARM: movvc r[[R1]], #0 + ; ARM: adds r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] + ; ARM: mov r[[R0]], #1 + ; ARM: movvc r[[R0]], #0 + ; ARM: mov pc, lr ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]] ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]] @@ -47,11 +47,10 @@ ; THUMBV6: mov r[[R0]], r[[R1]] ; THUMBV6: .L[[LABEL]]: - ; THUMBV7: movs r[[R1]], #1 - ; THUMBV7: cmp r[[R2]], r[[R0]] + ; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV7: mov.w r[[R0:[0-9]+]], #1 ; THUMBV7: it vc - ; THUMBV7: movvc r[[R1]], #0 - ; THUMBV7: mov r[[R0]], r[[R1]] + ; THUMBV7: movvc r[[R0]], #0 } define i32 @usub_overflow(i32 %a, i32 %b) #0 { Index: llvm/trunk/test/CodeGen/ARM/su-addsub-overflow.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/su-addsub-overflow.ll +++ llvm/trunk/test/CodeGen/ARM/su-addsub-overflow.ll @@ -2,9 +2,7 @@ define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: sadd: -; CHECK: mov r[[R0:[0-9]+]], r0 -; CHECK-NEXT: add r[[R1:[0-9]+]], r[[R0]], r1 -; CHECK-NEXT: cmp r[[R1]], r[[R0]] +; CHECK: adds r0, r0, r1 ; CHECK-NEXT: movvc pc, lr entry: %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) @@ -23,10 +21,8 @@ define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: uadd: -; CHECK: mov r[[R0:[0-9]+]], r0 -; CHECK-NEXT: adds r[[R1:[0-9]+]], r[[R0]], r1 -; CHECK-NEXT: cmp r[[R1]], r[[R0]] -; CHECK-NEXT: movhs pc, lr +; CHECK: adds r0, r0, r1 +; CHECK-NEXT: movlo pc, lr entry: %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %1 = extractvalue { i32, i1 } %0, 1 @@ -44,8 +40,7 @@ define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: ssub: -; CHECK: cmp r0, r1 -; CHECK-NEXT: subvc r0, r0, r1 +; CHECK: subs r0, r0, r1 ; CHECK-NEXT: movvc pc, lr entry: %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) @@ -64,9 +59,7 @@ define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: usub: -; CHECK: mov r[[R0:[0-9]+]], r0 -; CHECK-NEXT: subs r[[R1:[0-9]+]], r[[R0]], r1 -; CHECK-NEXT: cmp r[[R0]], r1 +; CHECK: subs r0, r0, r1 ; CHECK-NEXT: movhs pc, lr entry: %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) @@ -87,11 +80,9 @@ ; CHECK-LABEL: sum: ; CHECK: ldr [[R0:r[0-9]+]], ; CHECK-NEXT: ldr [[R1:r[0-9]+|lr]], -; CHECK-NEXT: add [[R2:r[0-9]+]], [[R1]], [[R0]] -; CHECK-NEXT: cmp [[R2]], [[R1]] +; CHECK-NEXT: adds [[R2:r[0-9]+]], [[R1]], [[R0]] ; CHECK-NEXT: strvc [[R2]], -; CHECK-NEXT: addvc -; CHECK-NEXT: cmpvc +; CHECK-NEXT: addsvc ; CHECK-NEXT: bvs entry: %cmp7 = icmp eq i32 %n, 0 @@ -128,6 +119,46 @@ } +define void @extern_loop(i32 %n) local_unnamed_addr #0 { +; Do not replace the compare around the clobbering call. +; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1 +; CHECK-NEXT: bl external_fn +; CHECK: cmp +entry: + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont.lr.ph + +cont.lr.ph: + %2 = extractvalue { i32, i1 } %0, 0 + %cmp5 = icmp sgt i32 %2, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + br label %for.body + +trap: + tail call void @llvm.trap() #2 + unreachable + +for.cond.cleanup: + ret void + +for.body: + %i.046 = phi i32 [ %5, %cont1 ], [ 0, %for.body.preheader ] + tail call void bitcast (void (...)* @external_fn to void ()*)() #4 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.046, i32 1) + %4 = extractvalue { i32, i1 } %3, 1 + br i1 %4, label %trap, label %cont1 + +cont1: + %5 = extractvalue { i32, i1 } %3, 0 + %cmp = icmp slt i32 %5, %2 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +declare void @external_fn(...) local_unnamed_addr #0 + declare void @llvm.trap() #2 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1