diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3940,7 +3940,7 @@ } } - if (DestVT.getSizeInBits() != 64 && !DestVT.isVector()) + if (DestVT.getSizeInBits() != 64 || !DestVT.isVector()) break; // Fold bitcasts of constants. @@ -3949,14 +3949,12 @@ // TODO: Generalize and move to DAGCombiner SDValue Src = N->getOperand(0); if (ConstantSDNode *C = dyn_cast(Src)) { - if (Src.getValueType() == MVT::i64) { - SDLoc SL(N); - uint64_t CVal = C->getZExtValue(); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, - DAG.getConstant(Lo_32(CVal), SL, MVT::i32), - DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); - return DAG.getNode(ISD::BITCAST, SL, DestVT, BV); - } + SDLoc SL(N); + uint64_t CVal = C->getZExtValue(); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, + DAG.getConstant(Lo_32(CVal), SL, MVT::i32), + DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); + return DAG.getNode(ISD::BITCAST, SL, DestVT, BV); } if (ConstantFPSDNode *C = dyn_cast(Src)) { diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll @@ -298,3 +298,15 @@ %div = udiv <2 x i64> %cast, %arg ret <2 x i64> %div } + +declare half @llvm.canonicalize.f16(half) + +; FUNC-LABEL: {{^}}bitcast_f32_to_v1i32: +define amdgpu_kernel void @bitcast_f32_to_v1i32(i32 addrspace(1)* %out) { + %f16 = call arcp afn half @llvm.canonicalize.f16(half 0xH03F0) + %f32 = fpext half %f16 to float + %v = bitcast float %f32 to <1 x i32> + %v1 = extractelement <1 x i32> %v, i32 0 + store i32 %v1, i32 addrspace(1)* %out + ret void +}