diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -684,6 +684,15 @@ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); } + + // Custom-legalize bitcasts from fixed-length vectors to scalar types. + setOperationAction(ISD::BITCAST, MVT::i8, Custom); + setOperationAction(ISD::BITCAST, MVT::i16, Custom); + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::i64, Custom); + setOperationAction(ISD::BITCAST, MVT::f16, Custom); + setOperationAction(ISD::BITCAST, MVT::f32, Custom); + setOperationAction(ISD::BITCAST, MVT::f64, Custom); } } @@ -1470,27 +1479,42 @@ case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); case ISD::BITCAST: { + SDLoc DL(Op); SDValue Op0 = Op.getOperand(0); + MVT XLenVT = Subtarget.getXLenVT(); // We can handle fixed length vector bitcasts with a simple replacement // in isel. if (Op.getValueType().isFixedLengthVector()) { if (Op0.getValueType().isFixedLengthVector()) return Op; + if (!Op0.getValueType().isVector()) { + auto BVT = MVT::getVectorVT(Op0.getValueType().getSimpleVT(), 1); + return DAG.getBitcast(Op.getValueType(), + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT, + DAG.getUNDEF(BVT), Op0, + DAG.getConstant(0, DL, XLenVT))); + } return SDValue(); } + if (!Op.getValueType().isVector() && + Op0.getValueType().isFixedLengthVector()) { + auto BV = DAG.getBitcast( + MVT::getVectorVT(Op.getValueType().getSimpleVT(), 1), Op0); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), BV, + DAG.getConstant(0, DL, XLenVT)); + } assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || Subtarget.hasStdExtZfh()) && "Unexpected custom legalisation"); - SDLoc DL(Op); if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { if (Op0.getValueType() != MVT::i16) return SDValue(); - SDValue NewOp0 = - DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0); SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0); return FPConv; - } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && - Subtarget.hasStdExtF()) { + } + if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF()) { if (Op0.getValueType() != MVT::i32) return SDValue(); SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); @@ -3954,18 +3978,27 @@ return; } case ISD::BITCAST: { - assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - Subtarget.hasStdExtF()) || - (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && - "Unexpected custom legalisation"); + EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); - if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { + MVT XLenVT = Subtarget.getXLenVT(); + // Custom-legalize bitcasts from fixed-length vector types to illegal + // scalar types in order to improve codegen. + if (!VT.isVector() && Op0.getValueType().isFixedLengthVector()) { + LLVMContext &Context = *DAG.getContext(); + auto BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0); + Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec, + DAG.getConstant(0, DL, XLenVT))); + break; + } + assert(((VT == MVT::i32 && Subtarget.is64Bit() && Subtarget.hasStdExtF()) || + (VT == MVT::i16 && Subtarget.hasStdExtZfh())) && + "Unexpected custom legalisation"); + if (VT == MVT::i16 && Subtarget.hasStdExtZfh()) { if (Op0.getValueType() != MVT::f16) return; - SDValue FPConv = - DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0); + SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); - } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + } else if (VT == MVT::i32 && Subtarget.is64Bit() && Subtarget.hasStdExtF()) { if (Op0.getValueType() != MVT::f32) return; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <32 x i1> @bitcast_v4i8_v32i1(<4 x i8> %a, <32 x i1> %b) { ; CHECK-LABEL: bitcast_v4i8_v32i1: @@ -13,3 +13,367 @@ %d = xor <32 x i1> %b, %c ret <32 x i1> %d } + +define i8 @bitcast_v1i8_i8(<1 x i8> %a) { +; CHECK-LABEL: bitcast_v1i8_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <1 x i8> %a to i8 + ret i8 %b +} + +define i16 @bitcast_v2i8_i16(<2 x i8> %a) { +; CHECK-LABEL: bitcast_v2i8_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <2 x i8> %a to i16 + ret i16 %b +} + +define i16 @bitcast_v1i16_i16(<1 x i16> %a) { +; CHECK-LABEL: bitcast_v1i16_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <1 x i16> %a to i16 + ret i16 %b +} + +define i32 @bitcast_v4i8_i32(<4 x i8> %a) { +; CHECK-LABEL: bitcast_v4i8_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <4 x i8> %a to i32 + ret i32 %b +} + +define i32 @bitcast_v2i16_i32(<2 x i16> %a) { +; CHECK-LABEL: bitcast_v2i16_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <2 x i16> %a to i32 + ret i32 %b +} + +define i32 @bitcast_v1i32_i32(<1 x i32> %a) { +; CHECK-LABEL: bitcast_v1i32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <1 x i32> %a to i32 + ret i32 %b +} + +define i64 @bitcast_v8i8_i64(<8 x i8> %a) { +; RV32-LABEL: bitcast_v8i8_i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v8, a0 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_v8i8_i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret + %b = bitcast <8 x i8> %a to i64 + ret i64 %b +} + +define i64 @bitcast_v4i16_i64(<4 x i16> %a) { +; RV32-LABEL: bitcast_v4i16_i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v8, a0 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_v4i16_i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret + %b = bitcast <4 x i16> %a to i64 + ret i64 %b +} + +define i64 @bitcast_v2i32_i64(<2 x i32> %a) { +; RV32-LABEL: bitcast_v2i32_i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v8, a0 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_v2i32_i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret + %b = bitcast <2 x i32> %a to i64 + ret i64 %b +} + +define i64 @bitcast_v1i64_i64(<1 x i64> %a) { +; RV32-LABEL: bitcast_v1i64_i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v8, a0 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_v1i64_i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret + %b = bitcast <1 x i64> %a to i64 + ret i64 %b +} + +define half @bitcast_v2i8_f16(<2 x i8> %a) { +; CHECK-LABEL: bitcast_v2i8_f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <2 x i8> %a to half + ret half %b +} + +define half @bitcast_v1i16_f16(<1 x i16> %a) { +; CHECK-LABEL: bitcast_v1i16_f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <1 x i16> %a to half + ret half %b +} + +define float @bitcast_v4i8_f32(<4 x i8> %a) { +; CHECK-LABEL: bitcast_v4i8_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <4 x i8> %a to float + ret float %b +} + +define float @bitcast_v2i16_f32(<2 x i16> %a) { +; CHECK-LABEL: bitcast_v2i16_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <2 x i16> %a to float + ret float %b +} + +define float @bitcast_v1i32_f32(<1 x i32> %a) { +; CHECK-LABEL: bitcast_v1i32_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <1 x i32> %a to float + ret float %b +} + +define double @bitcast_v8i8_f64(<8 x i8> %a) { +; RV32-LABEL: bitcast_v8i8_f64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v8, a0 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_v8i8_f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret + %b = bitcast <8 x i8> %a to double + ret double %b +} + +define double @bitcast_v4i16_f64(<4 x i16> %a) { +; RV32-LABEL: bitcast_v4i16_f64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v8, a0 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_v4i16_f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret + %b = bitcast <4 x i16> %a to double + ret double %b +} + +define double @bitcast_v2i32_f64(<2 x i32> %a) { +; RV32-LABEL: bitcast_v2i32_f64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v8, a0 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_v2i32_f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret + %b = bitcast <2 x i32> %a to double + ret double %b +} + +define double @bitcast_v1i64_f64(<1 x i64> %a) { +; RV32-LABEL: bitcast_v1i64_f64: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v8, a0 +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_v1i64_f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: ret + %b = bitcast <1 x i64> %a to double + ret double %b +} + +define <1 x i16> @bitcast_i16_v1i16(i16 %a) { +; CHECK-LABEL: bitcast_i16_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %b = bitcast i16 %a to <1 x i16> + ret <1 x i16> %b +} + +define <2 x i16> @bitcast_i32_v2i16(i32 %a) { +; RV32-LABEL: bitcast_i32_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-NEXT: vmv.s.x v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_i32_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %b = bitcast i32 %a to <2 x i16> + ret <2 x i16> %b +} + +define <1 x i32> @bitcast_i32_v1i32(i32 %a) { +; RV32-LABEL: bitcast_i32_v1i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-NEXT: vmv.s.x v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_i32_v1i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %b = bitcast i32 %a to <1 x i32> + ret <1 x i32> %b +} + +define <4 x i16> @bitcast_i64_v4i16(i64 %a) { +; RV32-LABEL: bitcast_i64_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vslide1up.vx v26, v25, a1 +; RV32-NEXT: vslide1up.vx v25, v26, a0 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslideup.vi v8, v25, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_i64_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vmv.s.x v8, a0 +; RV64-NEXT: ret + %b = bitcast i64 %a to <4 x i16> + ret <4 x i16> %b +} + +define <2 x i32> @bitcast_i64_v2i32(i64 %a) { +; RV32-LABEL: bitcast_i64_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vslide1up.vx v26, v25, a1 +; RV32-NEXT: vslide1up.vx v25, v26, a0 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslideup.vi v8, v25, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_i64_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vmv.s.x v8, a0 +; RV64-NEXT: ret + %b = bitcast i64 %a to <2 x i32> + ret <2 x i32> %b +} + +define <1 x i64> @bitcast_i64_v1i64(i64 %a) { +; RV32-LABEL: bitcast_i64_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; RV32-NEXT: vmv.v.i v25, 0 +; RV32-NEXT: vslide1up.vx v26, v25, a1 +; RV32-NEXT: vslide1up.vx v25, v26, a0 +; RV32-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-NEXT: vslideup.vi v8, v25, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: bitcast_i64_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-NEXT: vmv.s.x v8, a0 +; RV64-NEXT: ret + %b = bitcast i64 %a to <1 x i64> + ret <1 x i64> %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll @@ -0,0 +1,525 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32-FP +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64-FP + +define i16 @bitcast_v1f16_i16(<1 x half> %a) { +; CHECK-LABEL: bitcast_v1f16_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <1 x half> %a to i16 + ret i16 %b +} + +define half @bitcast_v1f16_f16(<1 x half> %a) { +; CHECK-LABEL: bitcast_v1f16_f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fmv.x.h a0, ft0 +; CHECK-NEXT: ret + %b = bitcast <1 x half> %a to half + ret half %b +} + +define i32 @bitcast_v2f16_i32(<2 x half> %a) { +; CHECK-LABEL: bitcast_v2f16_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <2 x half> %a to i32 + ret i32 %b +} + +define i32 @bitcast_v1f32_i32(<1 x float> %a) { +; CHECK-LABEL: bitcast_v1f32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %b = bitcast <1 x float> %a to i32 + ret i32 %b +} + +define float @bitcast_v2f16_f32(<2 x half> %a) { +; RV32-FP-LABEL: bitcast_v2f16_f32: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.x.s a0, v8 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_v2f16_f32: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV64-FP-NEXT: vfmv.f.s ft0, v8 +; RV64-FP-NEXT: fmv.x.w a0, ft0 +; RV64-FP-NEXT: ret + %b = bitcast <2 x half> %a to float + ret float %b +} + +define float @bitcast_v1f32_f32(<1 x float> %a) { +; RV32-FP-LABEL: bitcast_v1f32_f32: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.x.s a0, v8 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_v1f32_f32: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV64-FP-NEXT: vfmv.f.s ft0, v8 +; RV64-FP-NEXT: fmv.x.w a0, ft0 +; RV64-FP-NEXT: ret + %b = bitcast <1 x float> %a to float + ret float %b +} + +define i64 @bitcast_v4f16_i64(<4 x half> %a) { +; RV32-FP-LABEL: bitcast_v4f16_i64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi a0, zero, 32 +; RV32-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vsrl.vx v25, v8, a0 +; RV32-FP-NEXT: vmv.x.s a1, v25 +; RV32-FP-NEXT: vmv.x.s a0, v8 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_v4f16_i64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.x.s a0, v8 +; RV64-FP-NEXT: ret + %b = bitcast <4 x half> %a to i64 + ret i64 %b +} + +define i64 @bitcast_v2f32_i64(<2 x float> %a) { +; RV32-FP-LABEL: bitcast_v2f32_i64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi a0, zero, 32 +; RV32-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vsrl.vx v25, v8, a0 +; RV32-FP-NEXT: vmv.x.s a1, v25 +; RV32-FP-NEXT: vmv.x.s a0, v8 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_v2f32_i64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.x.s a0, v8 +; RV64-FP-NEXT: ret + %b = bitcast <2 x float> %a to i64 + ret i64 %b +} + +define i64 @bitcast_v1f64_i64(<1 x double> %a) { +; RV32-FP-LABEL: bitcast_v1f64_i64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi a0, zero, 32 +; RV32-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vsrl.vx v25, v8, a0 +; RV32-FP-NEXT: vmv.x.s a1, v25 +; RV32-FP-NEXT: vmv.x.s a0, v8 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_v1f64_i64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.x.s a0, v8 +; RV64-FP-NEXT: ret + %b = bitcast <1 x double> %a to i64 + ret i64 %b +} + +define double @bitcast_v4f16_f64(<4 x half> %a) { +; RV32-FP-LABEL: bitcast_v4f16_f64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.f.s ft0, v8 +; RV32-FP-NEXT: fsd ft0, 8(sp) +; RV32-FP-NEXT: lw a0, 8(sp) +; RV32-FP-NEXT: lw a1, 12(sp) +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_v4f16_f64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.x.s a0, v8 +; RV64-FP-NEXT: ret + %b = bitcast <4 x half> %a to double + ret double %b +} + +define double @bitcast_v2f32_f64(<2 x float> %a) { +; RV32-FP-LABEL: bitcast_v2f32_f64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.f.s ft0, v8 +; RV32-FP-NEXT: fsd ft0, 8(sp) +; RV32-FP-NEXT: lw a0, 8(sp) +; RV32-FP-NEXT: lw a1, 12(sp) +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_v2f32_f64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.x.s a0, v8 +; RV64-FP-NEXT: ret + %b = bitcast <2 x float> %a to double + ret double %b +} + +define double @bitcast_v1f64_f64(<1 x double> %a) { +; RV32-FP-LABEL: bitcast_v1f64_f64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.f.s ft0, v8 +; RV32-FP-NEXT: fsd ft0, 8(sp) +; RV32-FP-NEXT: lw a0, 8(sp) +; RV32-FP-NEXT: lw a1, 12(sp) +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_v1f64_f64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.x.s a0, v8 +; RV64-FP-NEXT: ret + %b = bitcast <1 x double> %a to double + ret double %b +} + +define <1 x half> @bitcast_i16_v1f16(i16 %a) { +; CHECK-LABEL: bitcast_i16_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %b = bitcast i16 %a to <1 x half> + ret <1 x half> %b +} + +define <2 x half> @bitcast_i32_v2f16(i32 %a) { +; RV32-FP-LABEL: bitcast_i32_v2f16: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.s.x v8, a0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_i32_v2f16: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-FP-NEXT: vmv.v.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast i32 %a to <2 x half> + ret <2 x half> %b +} + +define <1 x float> @bitcast_i32_v1f32(i32 %a) { +; RV32-FP-LABEL: bitcast_i32_v1f32: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.s.x v8, a0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_i32_v1f32: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV64-FP-NEXT: vmv.v.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast i32 %a to <1 x float> + ret <1 x float> %b +} + +define <4 x half> @bitcast_i64_v4f16(i64 %a) { +; RV32-FP-LABEL: bitcast_i64_v4f16: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.v.i v25, 0 +; RV32-FP-NEXT: vslide1up.vx v26, v25, a1 +; RV32-FP-NEXT: vslide1up.vx v25, v26, a0 +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vslideup.vi v8, v25, 0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_i64_v4f16: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast i64 %a to <4 x half> + ret <4 x half> %b +} + +define <2 x float> @bitcast_i64_v2f32(i64 %a) { +; RV32-FP-LABEL: bitcast_i64_v2f32: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.v.i v25, 0 +; RV32-FP-NEXT: vslide1up.vx v26, v25, a1 +; RV32-FP-NEXT: vslide1up.vx v25, v26, a0 +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vslideup.vi v8, v25, 0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_i64_v2f32: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast i64 %a to <2 x float> + ret <2 x float> %b +} + +define <1 x double> @bitcast_i64_v1f64(i64 %a) { +; RV32-FP-LABEL: bitcast_i64_v1f64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a2, 2, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.v.i v25, 0 +; RV32-FP-NEXT: vslide1up.vx v26, v25, a1 +; RV32-FP-NEXT: vslide1up.vx v25, v26, a0 +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vslideup.vi v8, v25, 0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_i64_v1f64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast i64 %a to <1 x double> + ret <1 x double> %b +} + +define <1 x i16> @bitcast_f16_v1i16(half %a) { +; CHECK-LABEL: bitcast_f16_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: ret + %b = bitcast half %a to <1 x i16> + ret <1 x i16> %b +} + +define <1 x half> @bitcast_f16_v1f16(half %a) { +; CHECK-LABEL: bitcast_f16_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.h.x ft0, a0 +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: ret + %b = bitcast half %a to <1 x half> + ret <1 x half> %b +} + +define <2 x i16> @bitcast_f32_v2i16(float %a) { +; RV32-FP-LABEL: bitcast_f32_v2i16: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.s.x v8, a0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f32_v2i16: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: fmv.w.x ft0, a0 +; RV64-FP-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; RV64-FP-NEXT: vfmv.s.f v8, ft0 +; RV64-FP-NEXT: ret + %b = bitcast float %a to <2 x i16> + ret <2 x i16> %b +} + +define <2 x half> @bitcast_f32_v2f16(float %a) { +; RV32-FP-LABEL: bitcast_f32_v2f16: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.s.x v8, a0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f32_v2f16: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: fmv.w.x ft0, a0 +; RV64-FP-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; RV64-FP-NEXT: vfmv.s.f v8, ft0 +; RV64-FP-NEXT: ret + %b = bitcast float %a to <2 x half> + ret <2 x half> %b +} + +define <1 x i32> @bitcast_f32_v1i32(float %a) { +; RV32-FP-LABEL: bitcast_f32_v1i32: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.s.x v8, a0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f32_v1i32: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: fmv.w.x ft0, a0 +; RV64-FP-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; RV64-FP-NEXT: vfmv.s.f v8, ft0 +; RV64-FP-NEXT: ret + %b = bitcast float %a to <1 x i32> + ret <1 x i32> %b +} + +define <1 x float> @bitcast_f32_v1f32(float %a) { +; RV32-FP-LABEL: bitcast_f32_v1f32: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; RV32-FP-NEXT: vmv.s.x v8, a0 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f32_v1f32: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: fmv.w.x ft0, a0 +; RV64-FP-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; RV64-FP-NEXT: vfmv.s.f v8, ft0 +; RV64-FP-NEXT: ret + %b = bitcast float %a to <1 x float> + ret <1 x float> %b +} + +define <4 x i16> @bitcast_f64_v4i16(double %a) { +; RV32-FP-LABEL: bitcast_f64_v4i16: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: sw a0, 8(sp) +; RV32-FP-NEXT: sw a1, 12(sp) +; RV32-FP-NEXT: fld ft0, 8(sp) +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.s.f v8, ft0 +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f64_v4i16: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast double %a to <4 x i16> + ret <4 x i16> %b +} + +define <4 x half> @bitcast_f64_v4f16(double %a) { +; RV32-FP-LABEL: bitcast_f64_v4f16: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: sw a0, 8(sp) +; RV32-FP-NEXT: sw a1, 12(sp) +; RV32-FP-NEXT: fld ft0, 8(sp) +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.s.f v8, ft0 +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f64_v4f16: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast double %a to <4 x half> + ret <4 x half> %b +} + +define <2 x i32> @bitcast_f64_v2i32(double %a) { +; RV32-FP-LABEL: bitcast_f64_v2i32: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: sw a0, 8(sp) +; RV32-FP-NEXT: sw a1, 12(sp) +; RV32-FP-NEXT: fld ft0, 8(sp) +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.s.f v8, ft0 +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f64_v2i32: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast double %a to <2 x i32> + ret <2 x i32> %b +} + +define <2 x float> @bitcast_f64_v2f32(double %a) { +; RV32-FP-LABEL: bitcast_f64_v2f32: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: sw a0, 8(sp) +; RV32-FP-NEXT: sw a1, 12(sp) +; RV32-FP-NEXT: fld ft0, 8(sp) +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.s.f v8, ft0 +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f64_v2f32: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast double %a to <2 x float> + ret <2 x float> %b +} + +define <1 x i64> @bitcast_f64_v1i64(double %a) { +; RV32-FP-LABEL: bitcast_f64_v1i64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: sw a0, 8(sp) +; RV32-FP-NEXT: sw a1, 12(sp) +; RV32-FP-NEXT: fld ft0, 8(sp) +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.s.f v8, ft0 +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f64_v1i64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast double %a to <1 x i64> + ret <1 x i64> %b +} + +define <1 x double> @bitcast_f64_v1f64(double %a) { +; RV32-FP-LABEL: bitcast_f64_v1f64: +; RV32-FP: # %bb.0: +; RV32-FP-NEXT: addi sp, sp, -16 +; RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RV32-FP-NEXT: sw a0, 8(sp) +; RV32-FP-NEXT: sw a1, 12(sp) +; RV32-FP-NEXT: fld ft0, 8(sp) +; RV32-FP-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; RV32-FP-NEXT: vfmv.s.f v8, ft0 +; RV32-FP-NEXT: addi sp, sp, 16 +; RV32-FP-NEXT: ret +; +; RV64-FP-LABEL: bitcast_f64_v1f64: +; RV64-FP: # %bb.0: +; RV64-FP-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; RV64-FP-NEXT: vmv.s.x v8, a0 +; RV64-FP-NEXT: ret + %b = bitcast double %a to <1 x double> + ret <1 x double> %b +}