Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -62,21 +62,31 @@ return false; } -// If we have a PHI node with a vector type that has only 2 uses: feed +// If we have a PHI node with a vector type that is only used to feed // itself and be an operand of extractelement at a constant location, // try to replace the PHI of the vector type with a PHI of a scalar type. Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { - // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. - if (!PN->hasNUses(2)) - return nullptr; + SmallVector Extracts; + // The users we want the PHI to have are: + // 1) The EI ExtractElement (we already know this) + // 2) Possibly more ExtractElements with the same index. + // 3) Another operand, which will feed back into the PHI. + Instruction *PHIUser = nullptr; + for (auto U : PN->users()) { + if (ExtractElementInst *EU = dyn_cast(U)) { + if (EI.getIndexOperand() == EU->getIndexOperand()) + Extracts.push_back(EU); + else + return nullptr; + } else if (!PHIUser) { + PHIUser = cast(U); + } else { + return nullptr; + } + } - // If so, it's known at this point that one operand is PHI and the other is - // an extractelement node. Find the PHI user that is not the extractelement - // node. - auto iu = PN->user_begin(); - Instruction *PHIUser = dyn_cast(*iu); - if (PHIUser == cast(&EI)) - PHIUser = cast(*(++iu)); + if (!PHIUser) + return nullptr; // Verify that this PHI user has one use, which is the PHI itself, // and that it is a binary operation which is cheap to scalarize. @@ -126,7 +136,11 @@ scalarPHI->addIncoming(newEI, inBB); } } - return replaceInstUsesWith(EI, scalarPHI); + + for (auto E : Extracts) + replaceInstUsesWith(*E, scalarPHI); + + return &EI; } Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Index: llvm/trunk/test/Transforms/InstCombine/vec_phi_extract.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vec_phi_extract.ll +++ llvm/trunk/test/Transforms/InstCombine/vec_phi_extract.ll @@ -1,8 +1,9 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s define void @f(i64 %val, i32 %limit, i32 *%ptr) { -;CHECK: %0 = trunc i64 -;CHECK: %1 = phi i32 +; CHECK-LABEL: @f +; CHECK: %0 = trunc i64 %val to i32 +; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ] entry: %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0 %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer @@ -25,18 +26,72 @@ ret void } +define void @copy(i64 %val, i32 %limit, i32 *%ptr) { +; CHECK-LABEL: @copy +; CHECK: %0 = trunc i64 %val to i32 +; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ] +entry: + %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0 + %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer + %0 = add <16 x i64> %vector, + %1 = trunc <16 x i64> %0 to <16 x i32> + br label %loop + +loop: + %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ] + %elt = extractelement <16 x i32> %2, i32 0 + %eltcopy = extractelement <16 x i32> %2, i32 0 + %end = icmp ult i32 %elt, %limit + %3 = add i32 10, %eltcopy + %4 = sext i32 %elt to i64 + %5 = getelementptr i32, i32* %ptr, i64 %4 + store i32 %3, i32* %5 + %inc = add <16 x i32> %2, + br i1 %end, label %loop, label %ret + +ret: + ret void +} + +define void @nocopy(i64 %val, i32 %limit, i32 *%ptr) { +; CHECK-LABEL: @nocopy +; CHECK-NOT: phi i32 +; CHECK: phi <16 x i32> [ %1, %entry ], [ %inc, %loop ] +entry: + %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0 + %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer + %0 = add <16 x i64> %vector, + %1 = trunc <16 x i64> %0 to <16 x i32> + br label %loop + +loop: + %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ] + %elt = extractelement <16 x i32> %2, i32 0 + %eltcopy = extractelement <16 x i32> %2, i32 1 + %end = icmp ult i32 %elt, %limit + %3 = add i32 10, %eltcopy + %4 = sext i32 %elt to i64 + %5 = getelementptr i32, i32* %ptr, i64 %4 + store i32 %3, i32* %5 + %inc = add <16 x i32> %2, + br i1 %end, label %loop, label %ret + +ret: + ret void +} + define i1 @g(<3 x i32> %input_2) { -; CHECK: extractelement +; CHECK-LABEL: @g +; CHECK: extractelement <3 x i32> %input_2, i32 0 entry: br label %for.cond for.cond: -; CHECK: phi i32 %input_2.addr.0 = phi <3 x i32> [ %input_2, %entry ], [ %div45, %for.body ] %input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ] br i1 undef, label %for.end, label %for.body -; CHECK: extractelement +; CHECK: extractelement <3 x i32> %{{.*}}, i32 0 for.body: %dec43 = add <3 x i32> %input_1.addr.1, %sub44 = sub <3 x i32> , %dec43 Index: llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll +++ llvm/trunk/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll @@ -43,7 +43,7 @@ ; CHECK-LABEL: @s173 ; CHECK: load <4 x float>, <4 x float>* -; CHECK: add nsw i64 %.lhs, 16000 +; CHECK: add nsw i64 %1, 16000 ; CHECK: ret i32 0 }