Index: llvm/trunk/lib/Target/PowerPC/PPCExpandISEL.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCExpandISEL.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -59,6 +59,8 @@ typedef SmallDenseMap ISELInstructionList; // A map of MBB numbers to their lists of contained ISEL instructions. + // Please note when we traverse this list and expand ISEL, we only remove + // the ISEL from the MBB not from this list. ISELInstructionList ISELInstructions; /// Initialize the object. @@ -124,9 +126,6 @@ #endif bool runOnMachineFunction(MachineFunction &MF) override { - if (!isExpandISELEnabled(MF)) - return false; - DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n"); initialize(MF); @@ -190,30 +189,71 @@ } void PPCExpandISEL::expandAndMergeISELs() { - for (auto &BlockList : ISELInstructions) { + bool ExpandISELEnabled = isExpandISELEnabled(*MF); - DEBUG(dbgs() << printMBBReference(*MF->getBlockNumbered(BlockList.first)) - << ":\n"); + for (auto &BlockList : ISELInstructions) { DEBUG(dbgs() << "Expanding ISEL instructions in " << printMBBReference(*MF->getBlockNumbered(BlockList.first)) << "\n"); - BlockISELList &CurrentISELList = BlockList.second; auto I = CurrentISELList.begin(); auto E = CurrentISELList.end(); while (I != E) { - BlockISELList SubISELList; - - SubISELList.push_back(*I++); - - // Collect the ISELs that can be merged together. - while (I != E && canMerge(SubISELList.back(), *I)) + assert(isISEL(**I) && "Expecting an ISEL instruction"); + MachineOperand &Dest = (*I)->getOperand(0); + MachineOperand &TrueValue = (*I)->getOperand(1); + MachineOperand &FalseValue = (*I)->getOperand(2); + + // Special case 1, all registers used by ISEL are the same one. + // The non-redundant isel 0, 0, 0, N would not satisfy these conditions + // as it would be ISEL %R0, %ZERO, %R0, %CRN. + if (useSameRegister(Dest, TrueValue) && + useSameRegister(Dest, FalseValue)) { + DEBUG(dbgs() << "Remove redudant ISEL instruction: " << **I << "\n"); + // FIXME: if the CR field used has no other uses, we could eliminate the + // instruction that defines it. This would have to be done manually + // since this pass runs too late to run DCE after it. + NumRemoved++; + (*I)->eraseFromParent(); + I++; + } else if (useSameRegister(TrueValue, FalseValue)) { + // Special case 2, the two input registers used by ISEL are the same. + // Note: the non-foldable isel RX, 0, 0, N would not satisfy this + // condition as it would be ISEL %RX, %ZERO, %R0, %CRN, which makes it + // safe to fold ISEL to MR(OR) instead of ADDI. + MachineBasicBlock *MBB = (*I)->getParent(); + DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy:\n"); + DEBUG(dbgs() << "ISEL: " << **I << "\n"); + NumFolded++; + // Note: we're using both the TrueValue and FalseValue operands so as + // not to lose the kill flag if it is set on either of them. + BuildMI(*MBB, (*I), dl, TII->get(isISEL8(**I) ? PPC::OR8 : PPC::OR)) + .add(Dest) + .add(TrueValue) + .add(FalseValue); + (*I)->eraseFromParent(); + I++; + } else if (ExpandISELEnabled) { // Normal cases expansion enabled + DEBUG(dbgs() << "Expand ISEL instructions:\n"); + DEBUG(dbgs() << "ISEL: " << **I << "\n"); + BlockISELList SubISELList; SubISELList.push_back(*I++); - - expandMergeableISELs(SubISELList); - } - } + // Collect the ISELs that can be merged together. + // This will eat up ISEL instructions without considering whether they + // may be redundant or foldable to a register copy. So we still keep + // the handleSpecialCases() downstream to handle them. + while (I != E && canMerge(SubISELList.back(), *I)) { + DEBUG(dbgs() << "ISEL: " << **I << "\n"); + SubISELList.push_back(*I++); + } + + expandMergeableISELs(SubISELList); + } else { // Normal cases expansion disabled + I++; // leave the ISEL as it is + } + } // end while + } // end for } void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL, @@ -236,13 +276,15 @@ // Similarly, if at least one of the ISEL instructions satisfy the // following condition, we need the False Block: // The Dest Register and False Value Register are not the same. - bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue); bool IsORIInstRequired = !useSameRegister(Dest, FalseValue); // Special case 1, all registers used by ISEL are the same one. if (!IsADDIInstRequired && !IsORIInstRequired) { DEBUG(dbgs() << "Remove redudant ISEL instruction."); + // FIXME: if the CR field used has no other uses, we could eliminate the + // instruction that defines it. This would have to be done manually + // since this pass runs too late to run DCE after it. NumRemoved++; (*MI)->eraseFromParent(); // Setting MI to the erase result keeps the iterator valid and increased. @@ -257,14 +299,15 @@ // PPC::ZERO8 will be used for the first operand if the value is meant to // be zero. In this case, the useSameRegister method will return false, // thereby preventing this ISEL from being folded. - if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) { DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy."); NumFolded++; - BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::ADDI8 : PPC::ADDI)) + // Note: we're using both the TrueValue and FalseValue operands so as + // not to lose the kill flag if it is set on either of them. + BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::OR8 : PPC::OR)) .add(Dest) .add(TrueValue) - .add(MachineOperand::CreateImm(0)); + .add(FalseValue); (*MI)->eraseFromParent(); // Setting MI to the erase result keeps the iterator valid and increased. MI = BIL.erase(MI); Index: llvm/trunk/test/CodeGen/PowerPC/expand-contiguous-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/expand-contiguous-isel.ll +++ llvm/trunk/test/CodeGen/PowerPC/expand-contiguous-isel.ll @@ -1,55 +1,66 @@ target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" ; This file mainly tests that one of the ISEL instruction in the group uses the same register for operand RT, RA, RB +; This redudant ISEL is introduced during simple register coalescing stage. + +; Simple register coalescing first create the foldable ISEL instruction as we have seen in expand-foldable-isel.ll: +; %vreg85 = ISEL8 %vreg83, %vreg83, %vreg33:sub_eq + +; Later the register coalescer figures out it could further coalesce %vreg85 with %vreg83: +; merge %vreg85:1@2288r into %vreg83:5@400B --> @400B +; erased: 2288r %vreg85 = COPY %vreg83 + +; After that we have: +; updated: 1504B %vreg83 = ISEL8 %vreg83, %vreg83, %vreg33:sub_eq + ; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=true < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE ; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel -; Function Attrs: norecurse nounwind readnone + @.str = private unnamed_addr constant [3 x i8] c"]]\00", align 1 @.str.1 = private unnamed_addr constant [35 x i8] c"Index < Length && \22Invalid index!\22\00", align 1 @.str.2 = private unnamed_addr constant [50 x i8] c"/home/jtony/src/llvm/include/llvm/ADT/StringRef.h\00", align 1 @__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm = private unnamed_addr constant [47 x i8] c"char llvm::StringRef::operator[](size_t) const\00", align 1 @.str.3 = private unnamed_addr constant [95 x i8] c"(data || length == 0) && \22StringRef cannot be built from a NULL argument with non-null length\22\00", align 1 @__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm = private unnamed_addr constant [49 x i8] c"llvm::StringRef::StringRef(const char *, size_t)\00", align 1 -; Function Attrs: nounwind -define i64 @_Z3fn1N4llvm9StringRefE([2 x i64] %Str.coerce) local_unnamed_addr #0 { +define i64 @_Z3fn1N4llvm9StringRefE([2 x i64] %Str.coerce) { entry: %Str.coerce.fca.0.extract = extractvalue [2 x i64] %Str.coerce, 0 %Str.coerce.fca.1.extract = extractvalue [2 x i64] %Str.coerce, 1 br label %while.cond.outer -while.cond.outer: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit, %entry +while.cond.outer: %Str.sroa.0.0.ph = phi i64 [ %8, %_ZNK4llvm9StringRef6substrEmm.exit ], [ %Str.coerce.fca.0.extract, %entry ] %.sink.ph = phi i64 [ %sub.i, %_ZNK4llvm9StringRef6substrEmm.exit ], [ %Str.coerce.fca.1.extract, %entry ] %BracketDepth.0.ph = phi i64 [ %BracketDepth.1, %_ZNK4llvm9StringRef6substrEmm.exit ], [ undef, %entry ] %cmp65 = icmp eq i64 %BracketDepth.0.ph, 0 br i1 %cmp65, label %while.cond.us.preheader, label %while.cond.preheader -while.cond.us.preheader: ; preds = %while.cond.outer +while.cond.us.preheader: br label %while.cond.us -while.cond.preheader: ; preds = %while.cond.outer +while.cond.preheader: %cmp.i34129 = icmp eq i64 %.sink.ph, 0 br i1 %cmp.i34129, label %cond.false.i.loopexit135, label %_ZNK4llvm9StringRefixEm.exit.preheader -_ZNK4llvm9StringRefixEm.exit.preheader: ; preds = %while.cond.preheader +_ZNK4llvm9StringRefixEm.exit.preheader: br label %_ZNK4llvm9StringRefixEm.exit -while.cond.us: ; preds = %while.cond.us.preheader, %_ZNK4llvm9StringRef6substrEmm.exit50.us +while.cond.us: %Str.sroa.0.0.us = phi i64 [ %3, %_ZNK4llvm9StringRef6substrEmm.exit50.us ], [ %Str.sroa.0.0.ph, %while.cond.us.preheader ] %.sink.us = phi i64 [ %sub.i41.us, %_ZNK4llvm9StringRef6substrEmm.exit50.us ], [ %.sink.ph, %while.cond.us.preheader ] %cmp.i30.us = icmp ult i64 %.sink.us, 2 br i1 %cmp.i30.us, label %if.end.us, label %if.end.i.i.us -if.end.i.i.us: ; preds = %while.cond.us +if.end.i.i.us: %0 = inttoptr i64 %Str.sroa.0.0.us to i8* - %call.i.i.us = tail call signext i32 @memcmp(i8* %0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i64 2) #3 + %call.i.i.us = tail call signext i32 @memcmp(i8* %0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i64 2) %phitmp.i.us = icmp eq i32 %call.i.i.us, 0 br i1 %phitmp.i.us, label %if.then, label %_ZNK4llvm9StringRefixEm.exit.us -if.end.us: ; preds = %while.cond.us +if.end.us: %cmp.i34.us = icmp eq i64 %.sink.us, 0 br i1 %cmp.i34.us, label %cond.false.i.loopexit, label %_ZNK4llvm9StringRefixEm.exit.us -_ZNK4llvm9StringRefixEm.exit.us: ; preds = %if.end.i.i.us, %if.end.us +_ZNK4llvm9StringRefixEm.exit.us: %1 = inttoptr i64 %Str.sroa.0.0.us to i8* - %2 = load i8, i8* %1, align 1, !tbaa !2 + %2 = load i8, i8* %1, align 1 switch i8 %2, label %_ZNK4llvm9StringRef6substrEmm.exit.loopexit [ i8 92, label %if.then4.us i8 93, label %if.then9 ] -if.then4.us: ; preds = %_ZNK4llvm9StringRefixEm.exit.us +if.then4.us: %.sroa.speculated12.i38.us = select i1 %cmp.i30.us, i64 %.sink.us, i64 2 %add.ptr.i40.us = getelementptr inbounds i8, i8* %1, i64 %.sroa.speculated12.i38.us %sub.i41.us = sub i64 %.sink.us, %.sroa.speculated12.i38.us @@ -57,30 +68,30 @@ %cmp.i4.i45.us = icmp eq i64 %sub.i41.us, 0 %or.cond.i.i46.us = or i1 %tobool.i.i44.us, %cmp.i4.i45.us br i1 %or.cond.i.i46.us, label %_ZNK4llvm9StringRef6substrEmm.exit50.us, label %cond.false.i.i47.loopexit -_ZNK4llvm9StringRef6substrEmm.exit50.us: ; preds = %if.then4.us +_ZNK4llvm9StringRef6substrEmm.exit50.us: %3 = ptrtoint i8* %add.ptr.i40.us to i64 br label %while.cond.us -if.then: ; preds = %if.end.i.i.us +if.then: ret i64 undef -cond.false.i.loopexit: ; preds = %if.end.us +cond.false.i.loopexit: br label %cond.false.i -cond.false.i.loopexit134: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit50 +cond.false.i.loopexit134: br label %cond.false.i -cond.false.i.loopexit135: ; preds = %while.cond.preheader +cond.false.i.loopexit135: br label %cond.false.i -cond.false.i: ; preds = %cond.false.i.loopexit135, %cond.false.i.loopexit134, %cond.false.i.loopexit - tail call void @__assert_fail(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.1, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 225, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm, i64 0, i64 0)) #4 +cond.false.i: + tail call void @__assert_fail(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.1, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 225, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm, i64 0, i64 0)) unreachable -_ZNK4llvm9StringRefixEm.exit: ; preds = %_ZNK4llvm9StringRefixEm.exit.preheader, %_ZNK4llvm9StringRef6substrEmm.exit50 +_ZNK4llvm9StringRefixEm.exit: %.sink131 = phi i64 [ %sub.i41, %_ZNK4llvm9StringRef6substrEmm.exit50 ], [ %.sink.ph, %_ZNK4llvm9StringRefixEm.exit.preheader ] %Str.sroa.0.0130 = phi i64 [ %6, %_ZNK4llvm9StringRef6substrEmm.exit50 ], [ %Str.sroa.0.0.ph, %_ZNK4llvm9StringRefixEm.exit.preheader ] %4 = inttoptr i64 %Str.sroa.0.0130 to i8* - %5 = load i8, i8* %4, align 1, !tbaa !2 + %5 = load i8, i8* %4, align 1 switch i8 %5, label %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 [ i8 92, label %if.then4 i8 93, label %if.end10 ] -if.then4: ; preds = %_ZNK4llvm9StringRefixEm.exit +if.then4: %cmp.i.i37 = icmp ult i64 %.sink131, 2 %.sroa.speculated12.i38 = select i1 %cmp.i.i37, i64 %.sink131, i64 2 %add.ptr.i40 = getelementptr inbounds i8, i8* %4, i64 %.sroa.speculated12.i38 @@ -89,28 +100,28 @@ %cmp.i4.i45 = icmp eq i64 %sub.i41, 0 %or.cond.i.i46 = or i1 %tobool.i.i44, %cmp.i4.i45 br i1 %or.cond.i.i46, label %_ZNK4llvm9StringRef6substrEmm.exit50, label %cond.false.i.i47.loopexit133 -cond.false.i.i47.loopexit: ; preds = %if.then4.us +cond.false.i.i47.loopexit: br label %cond.false.i.i47 -cond.false.i.i47.loopexit133: ; preds = %if.then4 +cond.false.i.i47.loopexit133: br label %cond.false.i.i47 -cond.false.i.i47: ; preds = %cond.false.i.i47.loopexit133, %cond.false.i.i47.loopexit - tail call void @__assert_fail(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 90, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm, i64 0, i64 0)) #4 +cond.false.i.i47: + tail call void @__assert_fail(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 90, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm, i64 0, i64 0)) unreachable -_ZNK4llvm9StringRef6substrEmm.exit50: ; preds = %if.then4 +_ZNK4llvm9StringRef6substrEmm.exit50: %6 = ptrtoint i8* %add.ptr.i40 to i64 %cmp.i34 = icmp eq i64 %sub.i41, 0 br i1 %cmp.i34, label %cond.false.i.loopexit134, label %_ZNK4llvm9StringRefixEm.exit -if.then9: ; preds = %_ZNK4llvm9StringRefixEm.exit.us - tail call void @exit(i32 signext 1) #4 +if.then9: + tail call void @exit(i32 signext 1) unreachable -if.end10: ; preds = %_ZNK4llvm9StringRefixEm.exit +if.end10: %dec = add i64 %BracketDepth.0.ph, -1 br label %_ZNK4llvm9StringRef6substrEmm.exit -_ZNK4llvm9StringRef6substrEmm.exit.loopexit: ; preds = %_ZNK4llvm9StringRefixEm.exit.us +_ZNK4llvm9StringRef6substrEmm.exit.loopexit: br label %_ZNK4llvm9StringRef6substrEmm.exit -_ZNK4llvm9StringRef6substrEmm.exit.loopexit132: ; preds = %_ZNK4llvm9StringRefixEm.exit +_ZNK4llvm9StringRef6substrEmm.exit.loopexit132: br label %_ZNK4llvm9StringRef6substrEmm.exit -_ZNK4llvm9StringRef6substrEmm.exit: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit, %if.end10 +_ZNK4llvm9StringRef6substrEmm.exit: %.sink76 = phi i64 [ %.sink131, %if.end10 ], [ %.sink.us, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %.sink131, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ] %7 = phi i8* [ %4, %if.end10 ], [ %1, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %4, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ] %BracketDepth.1 = phi i64 [ %dec, %if.end10 ], [ 0, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %BracketDepth.0.ph, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ] @@ -120,7 +131,8 @@ br label %while.cond.outer ; CHECK-LABEL: @_Z3fn1N4llvm9StringRefE -; CHECK-GEN-ISEL-TRUE: isel [[SAME:r[0-9]+]], [[SAME]], [[SAME]] +; Unecessary ISEL (all the registers are the same) is always removed +; CHECK-GEN-ISEL-TRUE-NOT: isel [[SAME:r[0-9]+]], [[SAME]], [[SAME]] ; CHECK-GEN-ISEL-TRUE: isel [[SAME:r[0-9]+]], {{r[0-9]+}}, [[SAME]] ; CHECK: bc 12, eq, [[TRUE:.LBB[0-9]+]] ; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] @@ -131,21 +143,6 @@ -; Function Attrs: noreturn nounwind -declare void @exit(i32 signext) local_unnamed_addr #1 -; Function Attrs: nounwind readonly -declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #2 -; Function Attrs: noreturn nounwind -declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*) local_unnamed_addr #1 -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind readonly } -attributes #4 = { noreturn nounwind } -!llvm.module.flags = !{!0} -!llvm.ident = !{!1} -!0 = !{i32 1, !"PIC Level", i32 2} -!1 = !{!"clang version 4.0.0 (trunk 286863) (llvm/trunk 286967)"} -!2 = !{!3, !3, i64 0} -!3 = !{!"omnipotent char", !4, i64 0} -!4 = !{!"Simple C++ TBAA"} +declare void @exit(i32 signext) +declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) +declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*) Index: llvm/trunk/test/CodeGen/PowerPC/expand-foldable-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/expand-foldable-isel.ll +++ llvm/trunk/test/CodeGen/PowerPC/expand-foldable-isel.ll @@ -0,0 +1,71 @@ +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" +; This file mainly tests the case that the two input registers of the ISEL instruction are the same register. +; The foldable ISEL in this test case is introduced at simple register coalescing stage. + +; Before that stage we have: +; %vreg18 = ISEL8 %vreg5, %vreg2, %vreg15; + +; At simple register coalescing stage, the register coalescer figures out it could remove the copy +; from %vreg2 to %vreg5, put the original value %X3 into %vreg5 directly +; erased: 336r %vreg5 = COPY %vreg2 +; updated: 288B %vreg5 = COPY %X3; + +; After that we have: +; updated: 416B %vreg18 = ISEL8 %vreg5, %vreg5, %vreg15; + +; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=true < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE +; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel +%"struct.pov::ot_block_struct" = type { %"struct.pov::ot_block_struct"*, [3 x double], [3 x double], float, float, float, float, float, float, float, float, float, [3 x float], float, float, [3 x double], i16 } +%"struct.pov::ot_node_struct" = type { %"struct.pov::ot_id_struct", %"struct.pov::ot_block_struct"*, [8 x %"struct.pov::ot_node_struct"*] } +%"struct.pov::ot_id_struct" = type { i32, i32, i32, i32 } + +define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE(%"struct.pov::ot_block_struct"* %new_block) { +; CHECK-GEN-ISEL-TRUE-LABEL: _ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE: +; Note: the following line fold the original isel (isel r4, r3, r3) +; CHECK-GEN-ISEL-TRUE: mr r4, r3 +; CHECK-GEN-ISEL-TRUE: isel r29, r5, r6, 4*cr5+lt +; CHECK-GEN-ISEL-TRUE: blr +; +; CHECK-LABEL: _ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE: +; CHECK: mr r4, r3 +; CHECK: bc 12, 4*cr5+lt, .LBB0_3 +; CHECK: # %bb.2: +; CHECK: ori r29, r6, 0 +; CHECK: b .LBB0_4 +; CHECK: .LBB0_3: +; CHECK: addi r29, r5, 0 +; CHECK: .LBB0_4: +; CHECK: blr +entry: + br label %while.cond11 + +while.cond11: + %this_node.0250 = phi %"struct.pov::ot_node_struct"* [ undef, %entry ], [ %1, %cond.false21.i156 ], [ %1, %cond.true18.i153 ] + %temp_id.sroa.21.1 = phi i32 [ undef, %entry ], [ %shr2039.i152, %cond.true18.i153 ], [ %div24.i155, %cond.false21.i156 ] + %0 = load i32, i32* undef, align 4 + %cmp17 = icmp eq i32 0, %0 + br i1 %cmp17, label %lor.rhs, label %while.body21 + +lor.rhs: + %Values = getelementptr inbounds %"struct.pov::ot_node_struct", %"struct.pov::ot_node_struct"* %this_node.0250, i64 0, i32 1 + store %"struct.pov::ot_block_struct"* %new_block, %"struct.pov::ot_block_struct"** %Values, align 8 + ret void + +while.body21: + %call.i84 = tail call i8* @ZN3pov10pov_callocEmmPKciS1_pov() + store i8* %call.i84, i8** undef, align 8 + %1 = bitcast i8* %call.i84 to %"struct.pov::ot_node_struct"* + br i1 undef, label %cond.true18.i153, label %cond.false21.i156 + +cond.true18.i153: + %shr2039.i152 = lshr i32 %temp_id.sroa.21.1, 1 + br label %while.cond11 + +cond.false21.i156: + %add23.i154 = add nsw i32 %temp_id.sroa.21.1, 1 + %div24.i155 = sdiv i32 %add23.i154, 2 + br label %while.cond11 +} + +declare i8* @ZN3pov10pov_callocEmmPKciS1_pov() Index: llvm/trunk/test/CodeGen/PowerPC/expand-isel-10.mir =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/expand-isel-10.mir +++ llvm/trunk/test/CodeGen/PowerPC/expand-isel-10.mir @@ -0,0 +1,54 @@ +# This file tests the scenario: ISEL RX, RX, RX, CR (X != 0), +# which is redudant and removed. +# RUN: llc -ppc-gen-isel=true -run-pass ppc-expand-isel -o - %s | FileCheck %s + +--- | + target datalayout = "E-m:e-i64:64-n32:64" + target triple = "powerpc64-unknown-linux-gnu" + define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { + entry: + %cmp = icmp sgt i32 %i, 0 + %add = add nsw i32 %i, 1 + %cond = select i1 %cmp, i32 %add, i32 %j + ret i32 %cond + } + +... +--- +name: testExpandISEL +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%x3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %x3 + + %r5 = ADDI %r3, 1 + %cr0 = CMPWI %r3, 0 + %r3 = ISEL %r3, %r3, %cr0gt + %x3 = EXTSW_32_64 %r3 + ; CHECK: %r5 = ADDI %r3, 1 + ; CHECK: %cr0 = CMPWI %r3, 0 + ; CHECK-NOT: %r3 = ISEL %r3, %r3, %cr0gt + ; CHECK: %x3 = EXTSW_32_64 %r3 + +... Index: llvm/trunk/test/CodeGen/PowerPC/expand-isel-9.mir =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/expand-isel-9.mir +++ llvm/trunk/test/CodeGen/PowerPC/expand-isel-9.mir @@ -0,0 +1,54 @@ +# This file tests the scenario: ISEL RX, RY, RY, CR (X != 0 && Y != 0) +# It is folded into a copy (%RX = OR %RY, %RY) +# RUN: llc -ppc-gen-isel=true -run-pass ppc-expand-isel -o - %s | FileCheck %s + +--- | + target datalayout = "E-m:e-i64:64-n32:64" + target triple = "powerpc64-unknown-linux-gnu" + define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) { + entry: + %cmp = icmp sgt i32 %i, 0 + %add = add nsw i32 %i, 1 + %cond = select i1 %cmp, i32 %add, i32 %j + ret i32 %cond + } + +... +--- +name: testExpandISEL +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%x3' } + - { reg: '%x4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %x3, %x4 + + %r5 = ADDI %r3, 1 + %cr0 = CMPWI %r3, 0 + %r3 = ISEL %r4, %r4, %cr0gt + ; Test fold ISEL to a copy + ; CHECK: %r3 = OR %r4, %r4 + + %x3 = EXTSW_32_64 %r3 + +... Index: llvm/trunk/test/CodeGen/PowerPC/expand-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/expand-isel.ll +++ llvm/trunk/test/CodeGen/PowerPC/expand-isel.ll @@ -1,7 +1,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" ; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel -; Function Attrs: norecurse nounwind readnone + define signext i32 @testExpandISELToIfElse(i32 signext %i, i32 signext %j) { entry: %cmp = icmp sgt i32 %i, 0 @@ -23,7 +23,6 @@ } -; Function Attrs: norecurse nounwind readnone define signext i32 @testExpandISELToIf(i32 signext %i, i32 signext %j) { entry: %cmp = icmp sgt i32 %i, 0 @@ -39,7 +38,6 @@ ; CHECK-NEXT: blr } -; Function Attrs: norecurse nounwind readnone define signext i32 @testExpandISELToElse(i32 signext %i, i32 signext %j) { entry: %cmp = icmp sgt i32 %i, 0 @@ -53,22 +51,7 @@ ; CHECK-NEXT: blr } -; Function Attrs: norecurse nounwind readnone -define signext i32 @testReplaceISELWithCopy(i32 signext %i, i32 signext %j) { -entry: - %cmp = icmp sgt i32 %i, 0 - %cond = select i1 %cmp, i32 %j, i32 %j - ret i32 %cond - -; CHECK-LABEL: @testReplaceISELWithCopy - -; Fix me should really check: addi r3, r4, 0 -; but for some reason it's optimized to mr r3, r4 -; CHECK: mr r3, r4 -; CHECK-NEXT: blr -} -; Function Attrs: norecurse nounwind readnone define signext i32 @testExpandISELToNull(i32 signext %i, i32 signext %j) { entry: %cmp = icmp sgt i32 %i, 0 @@ -81,7 +64,6 @@ ; CHECK: blr } -; Function Attrs: norecurse nounwind readnone define signext i32 @testExpandISELsTo2ORIs2ADDIs (i32 signext %a, i32 signext %b, i32 signext %d, i32 signext %f, i32 signext %g) { @@ -108,7 +90,6 @@ ; CHECK-NEXT: blr } -; Function Attrs: norecurse nounwind readnone define signext i32 @testExpandISELsTo2ORIs1ADDI (i32 signext %a, i32 signext %b, i32 signext %d, i32 signext %f, i32 signext %g) { @@ -133,7 +114,6 @@ ; CHECK-NEXT: blr } -; Function Attrs: norecurse nounwind readnone define signext i32 @testExpandISELsTo1ORI1ADDI (i32 signext %a, i32 signext %b, i32 signext %d, i32 signext %f, i32 signext %g) { @@ -160,7 +140,6 @@ ; CHECK-NEXT: blr } -; Function Attrs: norecurse nounwind readnone define signext i32 @testExpandISELsTo0ORI2ADDIs (i32 signext %a, i32 signext %b, i32 signext %d, i32 signext %f, i32 signext %g) {