diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1543,11 +1543,13 @@ // If this is a widening shuffle, we must be able to extend with undef // elements. If the original binop does not produce an undef in the high // lanes, then this transform is not safe. + // Similarly for undef lanes due to the shuffle mask, we can only + // transform binops that preserve undef. // TODO: We could shuffle those non-undef constant values into the // result by using a constant vector (rather than an undef vector) // as operand 1 of the new binop, but that might be too aggressive // for target-independent shuffle creation. - if (I >= SrcVecNumElts) { + if (I >= SrcVecNumElts || ShMask[I] < 0) { Constant *MaybeUndef = ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt) : ConstantExpr::get(Opcode, CElt, UndefScalar); diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -1004,10 +1004,13 @@ ret <2 x i32> %r } +; AND does not fold to undef for undef operands, we cannot move it +; across a shuffle with undef masks. define <4 x i16> @and_constant_mask_undef(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: @@ -1016,10 +1019,13 @@ ret <4 x i16> %and } +; AND does not fold to undef for undef operands, we cannot move it +; across a shuffle with undef masks. define <4 x i16> @and_constant_mask_undef_2(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: @@ -1028,10 +1034,13 @@ ret <4 x i16> %and } +; FIXME: We should be able to move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes. define <4 x i16> @and_constant_mask_undef_3(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i16> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], +; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: %shuffle = shufflevector <4 x i16> %add, <4 x i16> undef, <4 x i32> @@ -1039,11 +1048,12 @@ ret <4 x i16> %and } +; FIXME: We should be able to move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes. define <4 x i16> @and_constant_mask_undef_4(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], -; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: @@ -1052,7 +1062,6 @@ ret <4 x i16> %and } - define <4 x i16> @and_constant_mask_not_undef(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_not_undef( ; CHECK-NEXT: entry: @@ -1066,10 +1075,13 @@ ret <4 x i16> %and } +; OR does not fold to undef for undef operands, we cannot move it +; across a shuffle with undef masks. define <4 x i16> @or_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: @@ -1078,10 +1090,13 @@ ret <4 x i16> %or } +; OR does not fold to undef for undef operands, we cannot move it +; across a shuffle with undef masks. define <4 x i16> @or_constant_mask_undef_2(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: @@ -1090,10 +1105,13 @@ ret <4 x i16> %or } +; FIXME: We should be able to move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes. define <4 x i16> @or_constant_mask_undef_3(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret <4 x i16> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], +; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: %shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> @@ -1101,11 +1119,12 @@ ret <4 x i16> %or } +; FIXME: We should be able to move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes. define <4 x i16> @or_constant_mask_undef_4(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: @@ -1130,8 +1149,8 @@ define <4 x i16> @shl_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @shl_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[SHL:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[SHL]] ; entry: