Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -277,13 +277,13 @@ /// Implement the transforms common to all CastInst visitors. Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); + auto *Ty = CI.getType(); // Try to eliminate a cast of a cast. if (auto *CSrc = dyn_cast(Src)) { // A->B->C cast if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. - auto *Ty = CI.getType(); auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty); // Point debug users of the dying cast to the new one. if (CSrc->hasOneUse()) @@ -319,6 +319,23 @@ return NV; } + // Canonicalize a unary shuffle after the cast if neither operation changes + // the size or element size of the input vector. + // TODO: We could allow size-changing ops if that doesn't harm codegen. + // cast (shuffle X, Mask) --> shuffle (cast X), Mask + Value *X; + ArrayRef Mask; + if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) { + auto *SrcTy = dyn_cast(X->getType()); + auto *DestTy = dyn_cast(Ty); + if (SrcTy && DestTy && + SrcTy->getNumElements() == DestTy->getNumElements() && + SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) { + Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy); + return new ShuffleVectorInst(CastX, UndefValue::get(DestTy), Mask); + } + } + return nullptr; } Index: llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll +++ llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll @@ -24,8 +24,8 @@ ; All 8 elements required. define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) { ; CHECK-LABEL: @demand_vcvtph2ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <8 x half> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x half> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> undef, <8 x i32> ; CHECK-NEXT: [[CVTPH2PS:%.*]] = fpext <8 x half> [[TMP2]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[CVTPH2PS]] ; Index: llvm/test/Transforms/InstCombine/X86/x86-f16c.ll =================================================================== --- llvm/test/Transforms/InstCombine/X86/x86-f16c.ll +++ llvm/test/Transforms/InstCombine/X86/x86-f16c.ll @@ -24,8 +24,8 @@ ; All 8 elements required. define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) { ; CHECK-LABEL: @demand_vcvtph2ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <8 x half> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x half> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> undef, <8 x i32> ; CHECK-NEXT: [[CVTPH2PS:%.*]] = fpext <8 x half> [[TMP2]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[CVTPH2PS]] ; Index: llvm/test/Transforms/InstCombine/shuffle-cast-dist.ll =================================================================== --- llvm/test/Transforms/InstCombine/shuffle-cast-dist.ll +++ llvm/test/Transforms/InstCombine/shuffle-cast-dist.ll @@ -4,8 +4,8 @@ define <2 x float> @vtrn1(<2 x i32> %v) ; CHECK-LABEL: @vtrn1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[R_UNCASTED:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = bitcast <2 x i32> [[R_UNCASTED]] to <2 x float> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <2 x float> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; { Index: llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll =================================================================== --- llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll +++ llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll @@ -56,9 +56,9 @@ define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[S2]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[BC]] ; %s1 = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> %bc = bitcast <4 x float> %s1 to <4 x i32> Index: llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll =================================================================== --- llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -56,9 +56,9 @@ define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[S2]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[BC]] ; %s1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %bc = bitcast <4 x float> %s1 to <4 x i32> Index: llvm/test/Transforms/InstCombine/vector-casts.ll =================================================================== --- llvm/test/Transforms/InstCombine/vector-casts.ll +++ llvm/test/Transforms/InstCombine/vector-casts.ll @@ -413,8 +413,8 @@ define <4 x float> @sitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @sitofp_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = sitofp <4 x i32> [[S]] to <4 x float> +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[X:%.*]] to <4 x float> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -424,8 +424,8 @@ define <3 x half> @uitofp_shuf(<3 x i16> %x) { ; CHECK-LABEL: @uitofp_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x i16> [[X:%.*]], <3 x i16> poison, <3 x i32> -; CHECK-NEXT: [[R:%.*]] = uitofp <3 x i16> [[S]] to <3 x half> +; CHECK-NEXT: [[TMP1:%.*]] = uitofp <3 x i16> [[X:%.*]] to <3 x half> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x half> [[TMP1]], <3 x half> undef, <3 x i32> ; CHECK-NEXT: ret <3 x half> [[R]] ; %s = shufflevector <3 x i16> %x, <3 x i16> poison, <3 x i32> @@ -435,8 +435,8 @@ define <4 x i64> @fptosi_shuf(<4 x double> %x) { ; CHECK-LABEL: @fptosi_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fptosi <4 x double> [[S]] to <4 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi <4 x double> [[X:%.*]] to <4 x i64> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[R]] ; %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> @@ -446,8 +446,8 @@ define <2 x i32> @fptoui_shuf(<2 x float> %x) { ; CHECK-LABEL: @fptoui_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[R:%.*]] = fptoui <2 x float> [[S]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fptoui <2 x float> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> @@ -455,6 +455,9 @@ ret <2 x i32> %r } +; negative test +; TODO: Should we reduce the width of the shuffle? + define <4 x half> @narrowing_sitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @narrowing_sitofp_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> @@ -466,6 +469,8 @@ ret <4 x half> %r } +; negative test + define <4 x double> @widening_uitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @widening_uitofp_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> @@ -477,6 +482,8 @@ ret <4 x double> %r } +; negative test + define <3 x i64> @fptosi_narrowing_shuf(<4 x double> %x) { ; CHECK-LABEL: @fptosi_narrowing_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <3 x i32> @@ -488,6 +495,9 @@ ret <3 x i64> %r } +; negative test +; TODO: Should we reduce the width of the cast? + define <3 x i32> @fptoui_widening_shuf(<2 x float> %x) { ; CHECK-LABEL: @fptoui_widening_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> @@ -499,6 +509,9 @@ ret <3 x i32> %r } +; negative test +; TODO: Should we reduce the width of the cast? + define <4 x half> @narrowing_sitofp_widening_shuf(<2 x i32> %x) { ; CHECK-LABEL: @narrowing_sitofp_widening_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> @@ -512,6 +525,8 @@ declare void @use(<4 x i32>) +; negative test + define <4 x float> @sitofp_shuf_extra_use(<4 x i32> %x) { ; CHECK-LABEL: @sitofp_shuf_extra_use( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32>