diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -95,6 +95,10 @@ bool HasUnsafeFPMath = false; bool HasFP32Denormals = false; + DenseMap BreakPhiNodesCache; + + bool canBreakPHINode(const PHINode &I); + /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to /// binary operation \p V. /// @@ -1398,48 +1402,105 @@ return Changed; } +static bool isUseInSameBB(const Value *A, const Value *B) { + const auto *IA = dyn_cast(A); + const auto *IB = dyn_cast(B); + return IA && IB && IA->getParent() == IB->getParent(); +} + // Helper for breaking large PHIs that returns true when an extractelement on V // is likely to be folded away by the DAG combiner. -static bool isInterestingPHIIncomingValue(Value *V, FixedVectorType *FVT) { - InsertElementInst *IE = dyn_cast(V); +static bool isInterestingPHIIncomingValue(const Value *V) { + const auto *FVT = dyn_cast(V->getType()); + if (!FVT) + return false; - // Constants & InsertElements chains are interesting. - if (!IE) - return isa(V); + const Value *CurVal = V; - // Check if this is a simple chain of insertelement that fills the vector. If - // that's the case, we can break up this PHI node profitably because the - // extractelement we will insert will get folded out. - BasicBlock *BB = IE->getParent(); + // Check for insertelements, keeping track of the elements covered. BitVector EltsCovered(FVT->getNumElements()); - InsertElementInst *Next = IE; - while (Next && !EltsCovered.all()) { - ConstantInt *Idx = dyn_cast(Next->getOperand(2)); + while (const auto *IE = dyn_cast(CurVal)) { + const auto *Idx = dyn_cast(IE->getOperand(2)); // Non constant index/out of bounds index -> folding is unlikely. - // Note that this is more of a sanity check - canonical IR should - // already have replaced those with poison. + // The latter is more of a sanity check because canonical IR should just + // have replaced those with poison. if (!Idx || Idx->getSExtValue() >= FVT->getNumElements()) return false; + const auto *VecSrc = IE->getOperand(0); + + // if the vector source is another instruction, it must be in the same basic + // block. Otherwise, the DAGCombiner won't see the whole thing and is + // unlikely to be able to do anything interesting here. + if (isa(VecSrc) && !areInSameBB(VecSrc, IE)) + return false; + + CurVal = VecSrc; EltsCovered.set(Idx->getSExtValue()); - // If the insertelement chain ends with a constant, it's fine. - if (isa(Next->getOperand(0))) + // all elements covered. + if (EltsCovered.all()) return true; + } - Next = dyn_cast(Next->getOperand(0)); + // We either didn't find a single insertelement, or the insertelement chain + // ended before all elements were covered. Check for other interesting values. - // If the chain is spread across basic blocks, the DAG combiner - // won't see it in its entirety and is unlikely to be able to fold - // evevrything away. - if (Next && Next->getParent() != BB) - return false; + // Constants are always interesting because we can just constant fold the + // extractelements. + if (isa(CurVal)) + return true; + + // shufflevector is likely to be profitable if either operand is a constant, + // or if either source is in the same block. + // This is because shufflevector is most often lowered as a series of + // insert/extract elements anyway. + if (const auto *SV = dyn_cast(CurVal)) { + return isa(SV->getOperand(1)) || + areInSameBB(SV, SV->getOperand(0)) || + areInSameBB(SV, SV->getOperand(1)); + } + + return false; +} + +bool AMDGPUCodeGenPrepare::canBreakPHINode(const PHINode &I) { + // Check in the cache, or add an entry for this node. + // + // We init with false because we consider all PHI nodes unbreakable until we + // reach a conclusion. Doing the opposite - assuming they're break-able until + // proven otherwise - can be harmful in some pathological cases so we're + // conservative for now. + const auto [It, DidInsert] = BreakPhiNodesCache.insert({&I, false}); + if (!DidInsert) + return It->second; + + // This function may recurse, so to guard against infinite looping, this PHI + // is conservatively considered unbreakable until we reach a conclusion. + + // Don't break PHIs that have no interesting incoming values. That is, where + // there is no clear opportunity to fold the "extractelement" instructions we + // would add. + // + // Note: IC does not run after this pass, so we're only interested in the + // foldings that the DAG combiner can do. + if (none_of(I.incoming_values(), + [&](Value *V) { return isInterestingPHIIncomingValue(V); })) + return false; + + // Now, check users for unbreakable PHI nodes. If we have an unbreakable PHI + // node as user, we don't want to break this PHI either because it's unlikely + // to be beneficial. We would just explode the vector and reassemble it + // directly, wasting instructions. + for (const Value *U : I.users()) { + if (const auto *PU = dyn_cast(U)) { + if (!canBreakPHINode(*PU)) + return false; + } } - // All elements covered, all of the extract elements will likely be - // combined. - return EltsCovered.all(); + return BreakPhiNodesCache[&I] = true; } bool AMDGPUCodeGenPrepare::visitPHINode(PHINode &I) { @@ -1460,23 +1521,8 @@ if (!FVT || DL->getTypeSizeInBits(FVT) <= ScalarizeLargePHIsThreshold) return false; - // Try to avoid unprofitable cases: - // - Don't break PHIs that have no interesting incoming values. That is, where - // there is no clear opportunity to fold the "extractelement" instructions we - // would add. - // - Note: IC does not run after this pass, so we're only interested in the - // folding that the DAG combiner can do. - // - For simplicity, don't break PHIs that are used by other PHIs because it'd - // require us to determine if the whole "chain" can be converted or not. e.g. - // if we broke this PHI but not its user, we would actually make things worse. - if (!ForceScalarizeLargePHIs) { - if (none_of( - I.incoming_values(), - [&](Value *V) { return isInterestingPHIIncomingValue(V, FVT); }) || - any_of(I.users(), [&](User *U) { return isa(U); })) { - return false; - } - } + if (!ForceScalarizeLargePHIs && !canBreakPHINode(I)) + return false; struct VectorSlice { Type *Ty = nullptr; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll @@ -138,6 +138,197 @@ ret void } +define amdgpu_kernel void @shufflevec_inc_with_cst_op(<5 x double> %in, ptr %out, i1 %cond) { +; CHECK-LABEL: @shufflevec_inc_with_cst_op( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: br label [[FINALLY:%.*]] +; CHECK: else: +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[IN]], <5 x double> poison, <5 x i32> +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 +; CHECK-NEXT: br label [[FINALLY]] +; CHECK: finally: +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %then, label %else + +then: + %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3 + br label %finally + +else: + %shuffled = shufflevector <5 x double> %in, <5 x double> poison, <5 x i32> + br label %finally + +finally: + %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ] + store <5 x double> %val, ptr %out, align 1 + ret void +} + +define amdgpu_kernel void @shufflevec_inc_with_local_lhs(<5 x double> %in, ptr %out, i1 %cond) { +; CHECK-LABEL: @shufflevec_inc_with_local_lhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: br label [[FINALLY:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LOCAL_SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN]], double 3.250000e+00, i64 2 +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[LOCAL_SHUFFLE_SRC]], <5 x double> [[IN]], <5 x i32> +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 +; CHECK-NEXT: br label [[FINALLY]] +; CHECK: finally: +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %then, label %else + +then: + %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3 + br label %finally + +else: + %local.shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2 + %shuffled = shufflevector <5 x double> %local.shuffle.src, <5 x double> %in, <5 x i32> + br label %finally + +finally: + %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ] + store <5 x double> %val, ptr %out, align 1 + ret void +} + +define amdgpu_kernel void @shufflevec_inc_with_local_rhs(<5 x double> %in, ptr %out, i1 %cond) { +; CHECK-LABEL: @shufflevec_inc_with_local_rhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 +; CHECK-NEXT: br label [[FINALLY:%.*]] +; CHECK: else: +; CHECK-NEXT: [[LOCAL_SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN]], double 3.250000e+00, i64 2 +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[IN]], <5 x double> [[LOCAL_SHUFFLE_SRC]], <5 x i32> +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[SHUFFLED]], i64 4 +; CHECK-NEXT: br label [[FINALLY]] +; CHECK: finally: +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %then, label %else + +then: + %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3 + br label %finally + +else: + %local.shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2 + %shuffled = shufflevector <5 x double> %in, <5 x double> %local.shuffle.src, <5 x i32> + br label %finally + +finally: + %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ] + store <5 x double> %val, ptr %out, align 1 + ret void +} + +define amdgpu_kernel void @shufflevec_inc_with_nonlocal_ops(<5 x double> %in, ptr %out, i1 %cond) { +; CHECK-LABEL: @shufflevec_inc_with_nonlocal_ops( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHUFFLE_SRC:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.250000e+00, i64 2 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN]], double 3.140000e+00, i64 3 +; CHECK-NEXT: br label [[FINALLY:%.*]] +; CHECK: else: +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <5 x double> [[SHUFFLE_SRC]], <5 x double> [[IN]], <5 x i32> +; CHECK-NEXT: br label [[FINALLY]] +; CHECK: finally: +; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ [[SHUFFLED]], [[ELSE]] ] +; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + %shuffle.src = insertelement <5 x double> %in, double 3.250000e+00, i64 2 + br i1 %cond, label %then, label %else + +then: + %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3 + br label %finally + +else: + %shuffled = shufflevector <5 x double> %shuffle.src, <5 x double> %in, <5 x i32> + br label %finally + +finally: + %val = phi <5 x double> [ %x, %then ], [ %shuffled, %else ] + store <5 x double> %val, ptr %out, align 1 + ret void +} + define amdgpu_kernel void @trivial_insertelt_chain(<5 x double> %in, ptr %out, i1 %cond, double %x, double %y, double %z) { ; CHECK-LABEL: @trivial_insertelt_chain( ; CHECK-NEXT: entry: @@ -246,20 +437,39 @@ ret void } -define amdgpu_kernel void @nontrivial_insertelt_chain(<5 x double> %in, ptr %out, i1 %cond, double %x, i32 %idx) { -; CHECK-LABEL: @nontrivial_insertelt_chain( +define amdgpu_kernel void @insertelt_shufflevec(<5 x double> %in, ptr %out, i1 %cond, double %x, i32 %idx) { +; CHECK-LABEL: @insertelt_shufflevec( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[X_1:%.*]] = insertelement <5 x double> , double [[X:%.*]], i32 [[IDX:%.*]] ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <5 x double> [[X_1]], <5 x double> , <5 x i32> ; CHECK-NEXT: [[X_4:%.*]] = insertelement <5 x double> [[TMP0]], double [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X_4]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X_4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X_4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X_4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X_4]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[IN]], i64 4 ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: -; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X_4]], [[THEN]] ], [ [[IN:%.*]], [[ELSE]] ] -; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE01]], [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE12]], [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE23]], [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE34]], [[ELSE]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ [[LARGEPHI_EXTRACTSLICE45]], [[ELSE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP1]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP2]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP3]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP5]], i64 4 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 ; CHECK-NEXT: ret void ; entry: @@ -354,38 +564,52 @@ ret void } -define amdgpu_kernel void @used_by_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) { -; CHECK-LABEL: @used_by_phi( +define amdgpu_kernel void @used_by_breakable_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) { +; CHECK-LABEL: @used_by_breakable_phi( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[X]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[X]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 4 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: ; CHECK-NEXT: br label [[FINALLY]] ; CHECK: finally: -; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ zeroinitializer, [[ELSE]] ] -; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1 -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]] -; CHECK: then1: -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[VAL]], i64 0 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[VAL]], i64 1 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[VAL]], i64 2 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[VAL]], i64 3 -; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[VAL]], i64 4 -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE1]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE3]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ] ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 -; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT]], align 1 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]] +; CHECK: then1: +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE12:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE23:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE34:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE45:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE01]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE12]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE23]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE34]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE45]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE06:%.*]] = insertelement <5 x double> poison, double [[TMP5]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE17:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE06]], double [[TMP6]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE28:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE17]], double [[TMP7]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE39:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE28]], double [[TMP8]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE410:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE39]], double [[TMP9]], i64 4 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE410]], ptr [[OUT]], align 1 ; CHECK-NEXT: ret void ; entry: @@ -411,3 +635,75 @@ store <5 x double> %endval, ptr %out, align 1 ret void } + +define amdgpu_kernel void @used_by_unbreakable_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) { +; CHECK-LABEL: @used_by_unbreakable_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 +; CHECK-NEXT: br label [[FINALLY:%.*]] +; CHECK: else: +; CHECK-NEXT: br label [[FINALLY]] +; CHECK: finally: +; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[X]], [[THEN]] ], [ zeroinitializer, [[ELSE]] ] +; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]] +; CHECK: then1: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[ENDVAL:%.*]] = phi <5 x double> [ [[VAL]], [[THEN1]] ], [ [[IN]], [[FINALLY]] ] +; CHECK-NEXT: store <5 x double> [[ENDVAL]], ptr [[OUT]], align 1 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %then, label %else + +then: + %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3 + br label %finally + +else: + br label %finally + +finally: + %val = phi <5 x double> [ %x, %then ], [ zeroinitializer, %else ] + store <5 x double> %val, ptr %out, align 1 + br i1 %cond2, label %then1, label %end + +then1: + br label %end + +end: + %endval = phi <5 x double> [ %val, %then1 ], [ %in, %finally ] + store <5 x double> %endval, ptr %out, align 1 + ret void +} + +; check for infinite recursion +define amdgpu_kernel void @used_by_phi_self(<5 x double> %in, ptr %out, i8 %count) { +; CHECK-LABEL: @used_by_phi_self( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL:%.*]] = phi <5 x double> [ [[IN:%.*]], [[ENTRY:%.*]] ], [ [[VAL]], [[LOOP]] ] +; CHECK-NEXT: store <5 x double> [[VAL]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: [[COUNT_DEC:%.*]] = sub i8 [[COUNT:%.*]], 0 +; CHECK-NEXT: [[COND:%.*]] = icmp ne i8 [[COUNT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %val = phi <5 x double> [ %in, %entry ], [ %val, %loop ] + store <5 x double> %val, ptr %out, align 1 + %count.dec = sub i8 %count, 0 + %cond = icmp ne i8 %count, 0 + br i1 %cond, label %loop, label %end + +end: + ret void +}