Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1280,52 +1280,83 @@ // The opcodes must be the same. Use a new name to make that clear. BinaryOperator::BinaryOps BOpc = Opc0; + // We are moving a binop after a shuffle. When a shuffle has an undefined + // mask element, the result is undefined, but it is not poison or undefined + // behavior. That is not necessarily true for div/rem/shift. + Constant *Mask = Shuf.getMask(); + bool MightCreatePoisonOrUB = + Mask->containsUndefElement() && + (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc)); + + Constant *NewC; Value *V; if (X == Y) { + NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); + + // The new binop constant must not have any potential for extra poison/UB. + if (MightCreatePoisonOrUB) { + // TODO: Use getBinOpAbsorber for LHS replacement constants? + if (!ConstantsAreOp1) + return nullptr; + + Type *EltTy = Shuf.getType()->getVectorElementType(); + auto *IdC = ConstantExpr::getBinOpIdentity(BOpc, EltTy, true); + if (!IdC) + return nullptr; + + // Replace undef elements caused by the mask with identity constants. + NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); + unsigned NumElts = Shuf.getType()->getVectorNumElements(); + SmallVector VectorOfNewC(NumElts); + for (unsigned i = 0; i != NumElts; i++) { + if (isa(Mask->getAggregateElement(i))) + VectorOfNewC[i] = IdC; + else + VectorOfNewC[i] = NewC->getAggregateElement(i); + } + NewC = ConstantVector::get(VectorOfNewC); + } + // Remove a binop and the shuffle by rearranging the constant: // shuffle (op V, C0), (op V, C1), M --> op V, C' // shuffle (op C0, V), (op C1, V), M --> op C', V V = X; - } else if (!Instruction::isIntDivRem(BOpc) && - (B0->hasOneUse() || B1->hasOneUse())) { + } else { // If there are 2 different variable operands, we must create a new shuffle // (select) first, so check uses to ensure that we don't end up with more // instructions than we started with. - // + if (!B0->hasOneUse() && !B1->hasOneUse()) + return nullptr; + + // Bail out if we can not guarantee safety of the transform. + // TODO: We could try harder by replacing the undef mask elements. + if (MightCreatePoisonOrUB) + return nullptr; + // Note: In general, we do not create new shuffles in InstCombine because we // do not know if a target can lower an arbitrary shuffle optimally. In this // case, the shuffle uses the existing mask, so there is no additional risk. - // - // TODO: We are disallowing div/rem because a shuffle with an undef mask - // element would propagate an undef value to the div/rem. That's not - // safe in general because div/rem allow for undefined behavior. We can - // loosen this restriction (eg, check if the mask has no undefs or replace - // undef elements). // Select the variable vectors first, then perform the binop: // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C' // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M) - V = Builder.CreateShuffleVector(X, Y, Shuf.getMask()); - } else { - return nullptr; + NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); + V = Builder.CreateShuffleVector(X, Y, Mask); } - Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Shuf.getMask()); - - // If the shuffle mask contains undef elements, then the new constant - // vector will have undefs in those lanes. This could cause the entire - // binop to be undef. - if (Instruction::isIntDivRem(BOpc)) - NewC = getSafeVectorConstantForIntDivRem(NewC); - Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) : BinaryOperator::Create(BOpc, NewC, V); - // Flags are intersected from the 2 source binops. + // Flags are intersected from the 2 source binops. But there are 2 exceptions: + // 1. If we changed an opcode, poison conditions might have changed. + // 2. If the shuffle had undef mask elements, the new binop might have undefs + // where the original code did not. Drop all poison potential to be safe. NewBO->copyIRFlags(B0); NewBO->andIRFlags(B1); if (DropNSW) NewBO->setHasNoSignedWrap(false); + if (Mask->containsUndefElement()) + NewBO->dropPoisonGeneratingFlags(); return NewBO; } Index: test/Transforms/InstCombine/shuffle_select.ll =================================================================== --- test/Transforms/InstCombine/shuffle_select.ll +++ test/Transforms/InstCombine/shuffle_select.ll @@ -451,11 +451,11 @@ ret <4 x i32> %t3 } -; FIXME: Poison flags must be dropped or undef must be replaced with safe constant. +; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @add_add_nsw_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = add nsw <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = add nsw <4 x i32> %v0, @@ -499,11 +499,11 @@ ret <4 x i32> %t3 } -; FIXME: Poison flags must be dropped or undef must be replaced with safe constant. +; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = sub nuw <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub nuw <4 x i32> , %v0 @@ -550,11 +550,11 @@ ret <4 x i32> %t3 } -; FIXME: Shift by undef is poison. Undef must be replaced by safe constant. +; Shift by undef is poison. Undef must be replaced by safe constant. define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @shl_shl_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> %v0, @@ -563,11 +563,12 @@ ret <4 x i32> %t3 } -; FIXME: Shift by undef is poison. Undef must be replaced by safe constant. +; Shift by undef is poison. Undef must be replaced by safe constant. +; TODO: We could have propagated 'nuw' here. define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nuw <4 x i32> %v0, @@ -695,9 +696,11 @@ ret <4 x i32> %t3 } +; TODO: 'exact' could have propagated. + define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv exact <4 x i32> %v0, @@ -717,9 +720,13 @@ ret <4 x i32> %t3 } +; TODO: This could be folded by using a safe constant. + define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @urem_urem_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T2:%.*]] = urem <4 x i32> , [[V0]] +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = urem <4 x i32> , %v0 @@ -739,9 +746,13 @@ ret <4 x i32> %t3 } +; TODO: This could be folded by using a safe constant. + define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @srem_srem_undef_mask_elt( -; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> , [[V0]] +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = srem <4 x i32> , %v0 @@ -863,12 +874,12 @@ ret <4 x i32> %t3 } -; FIXME: Poison flags must be dropped or undef must be replaced with safe constant. +; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = sub nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub nsw <4 x i32> , %v0 @@ -916,12 +927,12 @@ ret <4 x i32> %t3 } -; FIXME: Poison flags must be dropped or undef must be replaced with safe constant. +; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, @@ -956,12 +967,13 @@ ret <4 x i32> %t3 } -; FIXME: Shift by undef is poison. Undef must be replaced by safe constant. +; TODO: Shift by undef is poison. Undef must be replaced by safe constant. define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], +; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shl <4 x i32> [[V1:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> %v0, @@ -970,12 +982,13 @@ ret <4 x i32> %t3 } -; FIXME: Shift by undef is poison. Undef must be replaced by safe constant. +; TODO: Shift by undef is poison. Undef must be replaced by safe constant. define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], +; CHECK-NEXT: [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], +; CHECK-NEXT: [[T2:%.*]] = shl nsw <4 x i32> [[V1:%.*]], +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, @@ -1010,13 +1023,13 @@ ret <4 x i32> %t3 } -; FIXME: This is not safe for the same reason that div/rem with undef mask is not safe. -; Analysis could determine that the shift has an undef shift amount creating poison. +; TODO: This could be transformed by using a safe constant. define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[T1:%.*]] = lshr <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> , [[V1:%.*]] +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr <4 x i32> , %v0 @@ -1025,14 +1038,13 @@ ret <4 x i32> %t3 } -; FIXME: This is not safe for the same reason that div/rem with undef mask is not safe. -; Analysis could determine that the shift has an undef shift amount creating poison. -; The 'exact' could also result in poison by choosing a -1 shift value. +; TODO: This could be transformed by using a safe constant. define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = lshr exact <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> , [[V0:%.*]] +; CHECK-NEXT: [[T2:%.*]] = lshr exact <4 x i32> , [[V1:%.*]] +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr exact <4 x i32> , %v0 @@ -1107,9 +1119,8 @@ define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars( -; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv <4 x i32> , %v0 @@ -1120,9 +1131,8 @@ define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_exact( -; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = udiv exact <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = udiv exact <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv exact <4 x i32> , %v0 @@ -1131,6 +1141,8 @@ ret <4 x i32> %t3 } +; TODO: This could be transformed using a safe constant. + define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] @@ -1144,6 +1156,8 @@ ret <4 x i32> %t3 } +; TODO: This could be transformed using a safe constant. + define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> , [[V0:%.*]] @@ -1157,13 +1171,12 @@ ret <4 x i32> %t3 } -; TODO: If the shuffle has no undefs, it's safe to shuffle the variables first. +; If the shuffle has no undefs, it's safe to shuffle the variables first. define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars( -; CHECK-NEXT: [[T1:%.*]] = sdiv <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = sdiv <4 x i32> [[V1:%.*]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv <4 x i32> %v0, @@ -1174,9 +1187,8 @@ define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_exact( -; CHECK-NEXT: [[T1:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = sdiv exact <4 x i32> [[V1:%.*]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv exact <4 x i32> %v0, @@ -1211,13 +1223,12 @@ ret <4 x i32> %t3 } -; TODO: If the shuffle has no undefs, it's safe to shuffle the variables first. +; If the shuffle has no undefs, it's safe to shuffle the variables first. define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @urem_2_vars( -; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> , [[V0:%.*]] -; CHECK-NEXT: [[T2:%.*]] = urem <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = urem <4 x i32> , %v0