Index: lib/Target/PowerPC/PPCTargetTransformInfo.h =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.h +++ lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -57,6 +57,9 @@ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2); + /// @} /// \name Vector TTI Implementations Index: lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -243,6 +243,14 @@ return true; } +bool PPCTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) { + + // PowerPC specific: check instruction count as well as default + // cost calculation. + return C1.Insns <= C2.Insns && BaseT::isLSRCostLess(C1, C2); +} + unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { if (Vector && !ST->hasAltivec() && !ST->hasQPX()) return 0; Index: test/Transforms/LoopStrengthReduce/PowerPC/lsr-insns-3.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/PowerPC/lsr-insns-3.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O2 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%struct.myType2 = type <{ i32, i8, %struct.myType, [2 x i8] }> +%struct.myType = type { i8 } + +define nonnull %struct.myType2* @_Z6myIniti(i32 signext %n) local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: _Z6myIniti: +; CHECK: [[LABEL1:.[0-9A-Z_]+]]: # %arrayctor.loop +; CHECK: mr {{[0-9]+}}, [[REG1:[0-9]+]] +; CHECK-NEXT: bl _ZN6myTypeC1Ev +; CHECK: addi [[REG2:[0-9]+]], [[REG2]], -8 +; CHECK-NEXT: addi [[REG1]], [[REG1]], 8 +; CHECK-NEXT: cmpldi [[REG2]], 0 +; CHECK-NEXT: bne 0, [[LABEL1]] + +entry: + %0 = sext i32 %n to i64 + %1 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %0, i64 8) + %2 = extractvalue { i64, i1 } %1, 1 + %3 = extractvalue { i64, i1 } %1, 0 + %4 = select i1 %2, i64 -1, i64 %3 + %call = tail call i8* @_Znam(i64 %4) #5 + %5 = bitcast i8* %call to %struct.myType2* + %isempty = icmp eq i32 %n, 0 + br i1 %isempty, label %arrayctor.cont, label %new.ctorloop + +new.ctorloop: ; preds = %entry + %arrayctor.end = getelementptr inbounds %struct.myType2, %struct.myType2* %5, i64 %0 + br label %arrayctor.loop + +arrayctor.loop: ; preds = %invoke.cont, %new.ctorloop + %arrayctor.cur = phi %struct.myType2* [ %5, %new.ctorloop ], [ %arrayctor.next, %invoke.cont ] + %x.i = getelementptr inbounds %struct.myType2, %struct.myType2* %arrayctor.cur, i64 0, i32 2 + invoke void @_ZN6myTypeC1Ev(%struct.myType* nonnull %x.i) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %arrayctor.loop + %arrayctor.next = getelementptr inbounds %struct.myType2, %struct.myType2* %arrayctor.cur, i64 1 + %arrayctor.done = icmp eq %struct.myType2* %arrayctor.next, %arrayctor.end + br i1 %arrayctor.done, label %arrayctor.cont, label %arrayctor.loop + +arrayctor.cont: ; preds = %invoke.cont, %entry + ret %struct.myType2* %5 + +lpad: ; preds = %arrayctor.loop + %6 = landingpad { i8*, i32 } + cleanup + tail call void @_ZdaPv(i8* nonnull %call) #6 + resume { i8*, i32 } %6 +} + +; Function Attrs: nounwind readnone speculatable +declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1 + +; Function Attrs: nobuiltin +declare noalias nonnull i8* @_Znam(i64) local_unnamed_addr #2 + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: nobuiltin nounwind +declare void @_ZdaPv(i8*) local_unnamed_addr #3 + +declare void @_ZN6myTypeC1Ev(%struct.myType*) unnamed_addr #4 + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { builtin } +attributes #6 = { builtin nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 6.0.0 (git@github.ibm.com:llvm/clang.git 61f290578c0923263d4551e1f8287d4bcd71da95) (llvm/llvm.git 5f491ee27ef421710a0be7a6156d74ee99dbaa9a)"}