Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -274,6 +274,31 @@ return Instruction::CastOps(Res); } +static bool castFeedsReduction(PHINode *PN, CastInst &CI) { + for (User *PhiUser : PN->users()) { + auto *UserCI = dyn_cast(PhiUser); + if (UserCI && UserCI != &CI) { + for (User *U : UserCI->users()) { + auto *BO = dyn_cast(U); + if (!BO) + return false; + switch(BO->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return true; + default: + break; + } + } + } + } + return false; +} + /// Implement the transforms common to all CastInst visitors. Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); @@ -311,12 +336,18 @@ // If we are casting a PHI, then fold the cast into the PHI. if (auto *PN = dyn_cast(Src)) { - // Don't do this if it would create a PHI node with an illegal type from a - // legal type. - if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() || - shouldChangeType(CI.getSrcTy(), CI.getType())) - if (Instruction *NV = foldOpIntoPhi(CI, PN)) - return NV; + // Don't do this if there is another user of this phi which is also a cast + // instruction which then feeds an instruction that vectorization would + // consider for reductions. We might be able to simply the other cast in + // a way that does not hinder loop vectorization opportunities. + if (!castFeedsReduction(PN, CI)) { + // Don't do this if it would create a PHI node with an illegal type from a + // legal type. + if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() || + shouldChangeType(CI.getSrcTy(), CI.getType())) + if (Instruction *NV = foldOpIntoPhi(CI, PN)) + return NV; + } } // Canonicalize a unary shuffle after the cast if neither operation changes Index: llvm/test/Transforms/InstCombine/phi-multiple-zext.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/phi-multiple-zext.ll @@ -0,0 +1,39 @@ +; RUN: opt -instcombine -S %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" + +@x = internal global [16384 x i8] zeroinitializer, align 16 + +define internal fastcc void @do_one() { +entry: + call void (i8*, ...) @obfuscate(i8* noundef getelementptr inbounds ([16384 x i8], [16384 x i8]* @x, i64 0, i64 0)) + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %a.0 = phi i8 [ 0, %entry ], [ %conv2, %for.body ] + %cmp = icmp ult i64 %i.0, 16384 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + ; CHECK-NOT: %conv = zext i8 %0 to i32 + ; CHECK-NOT: %conv1 = zext i8 %a.0 to i32 + ; CHECK-NOT: %add = add nsw i32 %conv1, %conv + ; CHECK-NOT: %conv2 = trunc i32 %add to i8 + ; CHECK: %add = add i8 %a.0, %0 + %arrayidx = getelementptr inbounds [16384 x i8], [16384 x i8]* @x, i64 0, i64 %i.0 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %conv1 = zext i8 %a.0 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i8 + %inc = add i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %conv3 = zext i8 %a.0 to i32 + call void (i8*, ...) @obfuscate(i8* noundef getelementptr inbounds ([16384 x i8], [16384 x i8]* @x, i64 0, i64 0), i32 noundef signext %conv3) + ret void +} + +declare void @obfuscate(i8* noundef, ...)