Index: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -468,8 +468,10 @@ static Value *insertValues(IRBuilder<> &Builder, Type *Ty, SmallVectorImpl &Values) { - if (Values.size() == 1) + if (!Ty->isVectorTy()) { + assert(Values.size() == 1); return Values[0]; + } Value *NewVal = UndefValue::get(Ty); for (int I = 0, E = Values.size(); I != E; ++I) Index: llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll @@ -56,6 +56,35 @@ ret i32 %mul } +define <2 x i8> @f(<1 x i16> %arg) { +; SI-LABEL: @f( +; SI-NEXT: BB: +; SI-NEXT: [[TMP0:%.*]] = extractelement <1 x i16> [[ARG:%.*]], i64 0 +; SI-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32 +; SI-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 42) +; SI-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 +; SI-NEXT: [[MUL:%.*]] = insertelement <1 x i16> undef, i16 [[TMP3]], i64 0 +; SI-NEXT: [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8> +; SI-NEXT: ret <2 x i8> [[CAST]] +; +; VI-LABEL: @f( +; VI-NEXT: BB: +; VI-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], +; VI-NEXT: [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8> +; VI-NEXT: ret <2 x i8> [[CAST]] +; +; DISABLED-LABEL: @f( +; DISABLED-NEXT: BB: +; DISABLED-NEXT: [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], +; DISABLED-NEXT: [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8> +; DISABLED-NEXT: ret <2 x i8> [[CAST]] +; +BB: + %mul = mul <1 x i16> %arg, + %cast = bitcast <1 x i16> %mul to <2 x i8> + ret <2 x i8> %cast +} + define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; SI-LABEL: @smul24_v2i32( ; SI-NEXT: [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]],