Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1107,12 +1107,14 @@ Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); - // Attempt to extend the entire input expression tree to the destination - // type. Only do this if the dest type is a simple type, don't convert the + // Attempt to extend the entire input expression tree to the wide destination + // type. Only do this if the dest type is a simple type. Do not convert the // expression tree to something weird like i93 unless the source is also - // strange. + // strange. Do not attempt this with vectors because it might create + // unsupported operations that the backend can not narrow back to the original + // type. unsigned BitsToClear; - if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) && + if (!DestTy->isVectorTy() && shouldChangeType(SrcTy, DestTy) && canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) { assert(BitsToClear <= SrcTy->getScalarSizeInBits() && "Can't clear more bits than in SrcTy"); @@ -1389,11 +1391,13 @@ return replaceInstUsesWith(CI, ZExt); } - // Attempt to extend the entire input expression tree to the destination - // type. Only do this if the dest type is a simple type, don't convert the + // Attempt to extend the entire input expression tree to the wide destination + // type. Only do this if the dest type is a simple type. Do not convert the // expression tree to something weird like i93 unless the source is also - // strange. - if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) && + // strange. Do not attempt this with vectors because it might create + // unsupported operations that the backend can not narrow back to the original + // type. + if (!DestTy->isVectorTy() && shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) { // Okay, we can transform this! Insert the new expression now. LLVM_DEBUG( Index: test/Transforms/InstCombine/cast.ll =================================================================== --- test/Transforms/InstCombine/cast.ll +++ test/Transforms/InstCombine/cast.ll @@ -592,9 +592,11 @@ define <2 x i64> @test46vec(<2 x i64> %A) { ; CHECK-LABEL: @test46vec( -; CHECK-NEXT: [[C:%.*]] = shl <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[C]], -; CHECK-NEXT: ret <2 x i64> [[D]] +; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[C]], +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[D]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[E]] ; %B = trunc <2 x i64> %A to <2 x i32> %C = and <2 x i32> %B, @@ -749,9 +751,9 @@ define <2 x i64> @test56vec(<2 x i16> %A) { ; CHECK-LABEL: @test56vec( -; CHECK-NEXT: [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[P354:%.*]] = lshr <2 x i64> [[P353]], -; CHECK-NEXT: [[P355:%.*]] = and <2 x i64> [[P354]], +; CHECK-NEXT: [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[P354:%.*]] = lshr <2 x i32> [[P353]], +; CHECK-NEXT: [[P355:%.*]] = zext <2 x i32> [[P354]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[P355]] ; %p353 = sext <2 x i16> %A to <2 x i32> @@ -774,8 +776,9 @@ define <2 x i64> @test57vec(<2 x i64> %A) { ; CHECK-LABEL: @test57vec( -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[C]], +; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[C]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[E]] ; %B = trunc <2 x i64> %A to <2 x i32> Index: test/Transforms/InstCombine/select-bitext.ll =================================================================== --- test/Transforms/InstCombine/select-bitext.ll +++ test/Transforms/InstCombine/select-bitext.ll @@ -98,9 +98,8 @@ define <2 x i64> @trunc_sel_larger_sext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_larger_sext_vec( -; CHECK-NEXT: [[TRUNC:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i64> [[TRUNC]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i64> [[SEXT]], +; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i32> [[A:%.*]] to <2 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TRUNC]] to <2 x i64> ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[EXT]] ; @@ -125,9 +124,8 @@ define <2 x i32> @trunc_sel_smaller_sext_vec(<2 x i64> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_smaller_sext_vec( -; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[TRUNC]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i32> [[SEXT]], +; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TRUNC]] to <2 x i32> ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; @@ -152,9 +150,9 @@ define <2 x i32> @trunc_sel_equal_sext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_equal_sext_vec( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i32> [[SEXT]], -; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], +; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; %trunc = trunc <2 x i32> %a to <2 x i16> @@ -178,9 +176,9 @@ define <2 x i64> @trunc_sel_larger_zext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_larger_zext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP2]], <2 x i64> +; CHECK-NEXT: [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[TRUNC_MASK]] to <2 x i64> +; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[EXT]] ; %trunc = trunc <2 x i32> %a to <2 x i16> @@ -204,9 +202,9 @@ define <2 x i32> @trunc_sel_smaller_zext_vec(<2 x i64> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_smaller_zext_vec( -; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[TRUNC]], -; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; %trunc = trunc <2 x i64> %a to <2 x i16> Index: test/Transforms/InstCombine/vector-casts.ll =================================================================== --- test/Transforms/InstCombine/vector-casts.ll +++ test/Transforms/InstCombine/vector-casts.ll @@ -163,8 +163,8 @@ define <2 x i65> @foo(<2 x i64> %t) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = zext <2 x i64> [[TMP1]] to <2 x i65> +; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], +; CHECK-NEXT: [[B:%.*]] = zext <2 x i64> [[A_MASK]] to <2 x i65> ; CHECK-NEXT: ret <2 x i65> [[B]] ; %a = trunc <2 x i64> %t to <2 x i32> @@ -174,8 +174,8 @@ define <2 x i64> @bar(<2 x i65> %t) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> -; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> +; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i65> %t to <2 x i32> @@ -185,9 +185,8 @@ define <2 x i64> @bars(<2 x i65> %t) { ; CHECK-LABEL: @bars( -; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i64> [[A]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[SEXT]], +; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i32> +; CHECK-NEXT: [[B:%.*]] = sext <2 x i32> [[A]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i65> %t to <2 x i32> @@ -197,8 +196,8 @@ define <2 x i64> @quxs(<2 x i64> %t) { ; CHECK-LABEL: @quxs( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[SEXT]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i64> %t to <2 x i32> @@ -384,9 +383,10 @@ define <2 x i64> @sext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @sext_less_casting_with_wideop( -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i64> [[MUL]], -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i64> [[SEXT]], +; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> +; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]] +; CHECK-NEXT: [[R:%.*]] = sext <2 x i32> [[MUL]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[R]] ; %xnarrow = trunc <2 x i64> %x to <2 x i32> @@ -398,8 +398,10 @@ define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @zext_less_casting_with_wideop( -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[MUL]], +; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> +; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]] +; CHECK-NEXT: [[R:%.*]] = zext <2 x i32> [[MUL]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[R]] ; %xnarrow = trunc <2 x i64> %x to <2 x i32>