Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -379,3 +379,6 @@ def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameIndex">, GISDNodeXFormEquiv; + +def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">, + GISDNodeXFormEquiv; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -328,6 +328,9 @@ void renderFrameIndex(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; + void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + bool isInlineImmediate16(int64_t Imm) const; bool isInlineImmediate32(int64_t Imm) const; bool isInlineImmediate64(int64_t Imm) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -5098,6 +5098,15 @@ MIB.addFrameIndex(MI.getOperand(1).getIndex()); } +void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF(); + int ExpVal = APF.getExactLog2(); + assert(ExpVal != INT_MIN); + MIB.addImm(ExpVal); +} + bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const { return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm()); } Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3316,6 +3316,69 @@ defm : Int16Med3Pat; } // End Predicates = [isGFX9Plus] + +// Convert a floating-point power of 2 to the integer exponent. +def FPPow2ToExponentXForm : SDNodeXFormgetValueAPF(); + int Log2 = APF.getExactLog2(); + assert(Log2 != INT_MIN); + return CurDAG->getTargetConstant(Log2, SDLoc(N), MVT::i32); +}]>; + +// Check if a floating point value is a power of 2 floating-point +// immediate where it's preferable to emit a multiply by as an +// ldexp. We skip over -4.0 to 4.0 as those are inline immediates +// anyway. +class fpimm_pow2_prefer_ldexp : FPImmLeaf= -1 && Exp <= 2) + return false; + + // For f32 and f16, prefer ldexp if the integer constant is an + // inline immediate. For f64 ldexp is always better than + // materializing a 64-bit constant. + return (Exp >= -16) && (Exp <= 64); + }], FPPow2ToExponentXForm +>; + +def fpimm_pow2_prefer_ldexp_f64 : FPImmLeaf 2)); + }], FPPow2ToExponentXForm +>; + +// TODO: It's not a clear win on code size to use ldexp in place of +// mul. For f32/f16 since we have to use the vop3 form, but we could +// emit ldexp here. Are there power savings or some other reason to +// prefer ldexp over mul? +// +// class FmulToLdexpPat : GCNPat< +// (any_fmul (vt (VOP3Mods vt:$src0, i32:$src0_mods)), +// fpimm_pow2_prefer_ldexp:$src1), +// (ldexp_inst i32:$src0_mods, vt:$src0, +// 0, (i32 (FPPow2ToExponentXForm $src1))) +// >; + +// def : FmulToLdexpPat; +// def : FmulToLdexpPat; + +// f64 is different because we also want to handle cases that may +// require materialization of the exponent. +// TODO: If we know f64 ops are fast, prefer add (ldexp x, N), y over fma +def : GCNPat< + (any_fmul (f64 (VOP3Mods f64:$src0, i32:$src0_mods)), + fpimm_pow2_prefer_ldexp_f64:$src1), + (V_LDEXP_F64_e64 i32:$src0_mods, VSrc_b64:$src0, + 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1)))) +>; + class AMDGPUGenericInstruction : GenericInstruction { let Namespace = "AMDGPU"; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -204,3 +204,101 @@ G_STORE %15, %2 :: (store (s32), addrspace 1) ... + +--- +name: fmul_to_ldexp_f32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fmul_to_ldexp_f32 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1098907648, implicit $exec + ; GCN-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_MUL_F32_e64_]] + ; GCN-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_FCONSTANT float 1.600000e+01 + %2:vgpr(s32) = G_FMUL %0, %1 + $vgpr0 = COPY %2 + SI_RETURN implicit $vgpr0 + +... + +--- +name: fmul_to_ldexp_f64_0 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: fmul_to_ldexp_f64_0 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] + ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_FCONSTANT double 1.600000e+01 + %2:vgpr(s64) = G_FMUL %0, %1 + $vgpr0_vgpr1 = COPY %2 + SI_RETURN implicit $vgpr0_vgpr1 + +... + +--- +name: fmul_to_ldexp_f64_1 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: fmul_to_ldexp_f64_1 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 0, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] + ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_FCONSTANT double 36893488147419103232.0 + %2:vgpr(s64) = G_FMUL %0, %1 + $vgpr0_vgpr1 = COPY %2 + SI_RETURN implicit $vgpr0_vgpr1 + +... + +--- +name: fmul_to_ldexp_f64_2 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: fmul_to_ldexp_f64_2 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]] + ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_FCONSTANT double -16.0 + %2:vgpr(s64) = G_FMUL %0, %1 + $vgpr0_vgpr1 = COPY %2 + SI_RETURN implicit $vgpr0_vgpr1 + +... Index: llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll +++ llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll @@ -716,203 +716,334 @@ ; 0x1p-1031 define double @v_mul_0x1pn1031_f64(double %x) { -; GFX9-LABEL: v_mul_0x1pn1031_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_movk_i32 s5, 0x800 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1pn1031_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xfbf9 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1pn1031_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_movk_i32 s5, 0x800 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1pn1031_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_movk_i32 s5, 0x800 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1pn1031_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_movk_i32 s1, 0x800 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1pn1031_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffbf9 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1pn1031_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_movk_i32 s5, 0x800 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1pn1031_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffbf9 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1pn1031_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_movk_i32 s1, 0x800 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 4.34584737989687770135e-311 ret double %mul } ; 0x1p-1022 define double @v_mul_0x1pn1022_f64(double %x) { -; GFX9-LABEL: v_mul_0x1pn1022_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x100000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1pn1022_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xfc02 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1pn1022_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x100000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1pn1022_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x100000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1pn1022_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x100000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1pn1022_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffc02 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1pn1022_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x100000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1pn1022_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffc02 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1pn1022_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x100000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 2.22507385850720138309e-308 ret double %mul } ; 0x1p-1021 define double @v_mul_0x1pn1021_f64(double %x) { -; GFX9-LABEL: v_mul_0x1pn1021_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x200000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1pn1021_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xfc03 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1pn1021_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x200000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1pn1021_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1pn1021_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x200000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1pn1021_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffc03 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1pn1021_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x200000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1pn1021_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xfffffc03 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1pn1021_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x200000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 4.45014771701440276618e-308 ret double %mul } ; 0x1p-64 define double @v_mul_0x1pn64_f64(double %x) { -; GFX9-LABEL: v_mul_0x1pn64_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x3bf00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1pn64_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffc0 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1pn64_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x3bf00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1pn64_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3bf00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1pn64_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x3bf00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1pn64_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xffffffc0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1pn64_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3bf00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1pn64_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xffffffc0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1pn64_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3bf00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 5.42101086242752217004e-20 ret double %mul } ; 0x1p-17 define double @v_mul_0x1pn17_f64(double %x) { -; GFX9-LABEL: v_mul_0x1pn17_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x3ee00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1pn17_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffef +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1pn17_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x3ee00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1pn17_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3ee00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1pn17_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x3ee00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1pn17_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xffffffef +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1pn17_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3ee00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1pn17_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0xffffffef +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1pn17_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3ee00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 0.00000762939453125 ret double %mul } ; 0x1p-16 define double @v_mul_0x1pn16_f64(double %x) { -; GFX9-LABEL: v_mul_0x1pn16_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x3ef00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1pn16_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -16 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1pn16_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x3ef00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1pn16_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3ef00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1pn16_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x3ef00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1pn16_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1pn16_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3ef00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1pn16_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1pn16_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3ef00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 0.0000152587890625 ret double %mul } ; 0x1p-15 define double @v_mul_0x1pn15_f64(double %x) { -; GFX9-LABEL: v_mul_0x1pn15_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0.5 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1pn15_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -15 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1pn15_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0.5 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1pn15_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0.5 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1pn15_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0.5 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1pn15_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -15 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1pn15_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0.5 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1pn15_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -15 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1pn15_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0.5 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 0.000030517578125 ret double %mul } @@ -1194,29 +1325,47 @@ } define double @v_mul_quarter_f64(double %x) { -; GFX9-LABEL: v_mul_quarter_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x3fd00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_quarter_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_quarter_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x3fd00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_quarter_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3fd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_quarter_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x3fd00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_quarter_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -2 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_quarter_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3fd00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_quarter_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], -2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_quarter_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3fd00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 0.25 ret double %mul } @@ -1291,256 +1440,419 @@ } define double @v_mul_8_f64(double %x) { -; GFX9-LABEL: v_mul_8_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40200000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_8_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 3 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_8_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40200000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_8_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_8_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40200000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_8_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_8_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40200000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_8_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_8_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40200000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 8.0 ret double %mul } define double @v_mul_16_f64(double %x) { -; GFX9-LABEL: v_mul_16_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40300000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_16_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_16_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40300000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_16_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40300000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_16_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40300000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_16_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_16_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40300000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_16_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_16_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40300000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 16.0 ret double %mul } define double @v_mul_32_f64(double %x) { -; GFX9-LABEL: v_mul_32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_32_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_32_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40400000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_32_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_32_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40400000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 32.0 ret double %mul } define double @v_mul_64_f64(double %x) { -; GFX9-LABEL: v_mul_64_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40500000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_64_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 6 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_64_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40500000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_64_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40500000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_64_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40500000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_64_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 6 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_64_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40500000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_64_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 6 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_64_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40500000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 64.0 ret double %mul } define double @v_mul_128_f64(double %x) { -; GFX9-LABEL: v_mul_128_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40600000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_128_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 7 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_128_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40600000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_128_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40600000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_128_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40600000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_128_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 7 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_128_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40600000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_128_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 7 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_128_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40600000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 128.0 ret double %mul } define double @v_mul_256_f64(double %x) { -; GFX9-LABEL: v_mul_256_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40700000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_256_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 8 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_256_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40700000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_256_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40700000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_256_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40700000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_256_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 8 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_256_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40700000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_256_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 8 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_256_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40700000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 256.0 ret double %mul } ; 0x1p+63 define double @v_mul_0x1p63_f64(double %x) { -; GFX9-LABEL: v_mul_0x1p63_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x43e00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1p63_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 63 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1p63_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x43e00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1p63_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x43e00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1p63_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x43e00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1p63_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 63 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1p63_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x43e00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1p63_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 63 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1p63_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x43e00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 9223372036854775808.0 ret double %mul } ; 0x1p+64 define double @v_mul_0x1p64_f64(double %x) { -; GFX9-LABEL: v_mul_0x1p64_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x43f00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1p64_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1p64_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x43f00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1p64_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x43f00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1p64_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x43f00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1p64_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1p64_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x43f00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1p64_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1p64_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x43f00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 18446744073709551616.0 ret double %mul } ; 0x1p+65 define double @v_mul_0x1p65_f64(double %x) { -; GFX9-LABEL: v_mul_0x1p65_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_brev_b32 s5, 34 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1p65_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x41 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1p65_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_brev_b32 s5, 34 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1p65_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_brev_b32 s5, 34 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1p65_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_brev_b32 s1, 34 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1p65_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x41 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1p65_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_brev_b32 s5, 34 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1p65_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x41 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1p65_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_brev_b32 s1, 34 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 36893488147419103232.0 ret double %mul } @@ -1548,9 +1860,8 @@ define amdgpu_ps <2 x i32> @s_mul_0x1p65_f64(double inreg %x, double inreg %y) { ; GFX9-SDAG-LABEL: s_mul_0x1p65_f64: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 34 -; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x41 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], v0 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s1, v1 ; GFX9-SDAG-NEXT: ; return to shader part epilog @@ -1566,14 +1877,37 @@ ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s1, v1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; -; GFX1011-LABEL: s_mul_0x1p65_f64: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_mov_b32 s2, 0 -; GFX1011-NEXT: s_brev_b32 s3, 34 -; GFX1011-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] -; GFX1011-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1011-NEXT: v_readfirstlane_b32 s1, v1 -; GFX1011-NEXT: ; return to shader part epilog +; GFX10-SDAG-LABEL: s_mul_0x1p65_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 0x41 +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-GISEL-LABEL: s_mul_0x1p65_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_mov_b32 s2, 0 +; GFX10-GISEL-NEXT: s_brev_b32 s3, 34 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] +; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-GISEL-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: s_mul_0x1p65_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 0x41 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-SDAG-NEXT: ; return to shader part epilog +; +; GFX11-GISEL-LABEL: s_mul_0x1p65_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_mov_b32 s2, 0 +; GFX11-GISEL-NEXT: s_brev_b32 s3, 34 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] +; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-GISEL-NEXT: ; return to shader part epilog %mul = fmul contract double %x, 36893488147419103232.0 %cast = bitcast double %mul to <2 x i32> %cast.0 = extractelement <2 x i32> %cast, i32 0 @@ -1587,87 +1921,144 @@ ; 0x1p+128 define double @v_mul_0x1p128_f64(double %x) { -; GFX9-LABEL: v_mul_0x1p128_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x47f00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1p128_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x80 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1p128_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x47f00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1p128_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x47f00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1p128_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x47f00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1p128_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x80 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1p128_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x47f00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1p128_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x80 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1p128_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x47f00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 3.40282366920938463463e+38 ret double %mul } ; 0x1p+1022 define double @v_mul_0x1p1022_f64(double %x) { -; GFX9-LABEL: v_mul_0x1p1022_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x7fd00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1p1022_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x3fe +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1p1022_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x7fd00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_0x1p1022_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x7fd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1p1022_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x7fd00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_0x1p1022_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x3fe +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1p1022_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x7fd00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_0x1p1022_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x3fe +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_0x1p1022_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x7fd00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 4.49423283715578976932e+307 ret double %mul } ; 0x1p+1023 define double @v_mul_0x1p1023_f64(double %x) { -; GFX9-LABEL: v_mul_0x1p1023_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x7fe00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_0x1p1023_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x3ff +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_mul_0x1p1023_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x7fe00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_mul_0x1p1023_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x3ff +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_0x1p1023_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x7fe00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_0x1p1023_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x7fe00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: v_mul_0x1p1023_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 0x3ff +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_0x1p1023_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x7fe00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-GISEL-LABEL: v_mul_0x1p1023_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x7fe00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 8.98846567431157953865e+307 ret double %mul } @@ -1792,32 +2183,53 @@ } define double @v_mul_add_32_f64(double %x, double %y) { -; GFX9-LABEL: v_mul_add_32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_add_32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_add_32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_add_32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_add_32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_add_32_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_add_32_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40400000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_add_32_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_add_32_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40400000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, 32.0 %fma = fadd double %mul, %y ret double %fma @@ -1976,29 +2388,47 @@ } define double @v_mul_fabs_32_f64(double %x) { -; GFX9-LABEL: v_mul_fabs_32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_fabs_32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 5 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_fabs_32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_fabs_32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_fabs_32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_fabs_32_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_fabs_32_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40400000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_fabs_32_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 5 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_fabs_32_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40400000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %x.fabs = call double @llvm.fabs.f64(double %x) %mul = fmul double %x.fabs, 32.0 ret double %mul @@ -2035,32 +2465,53 @@ } define <2 x double> @v_mul_16_v2f64(<2 x double> %x) { -; GFX9-LABEL: v_mul_16_v2f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40300000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_16_v2f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], 4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_16_v2f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40300000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_16_v2f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40300000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_16_v2f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40300000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_16_v2f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4 +; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], 4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_16_v2f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40300000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_16_v2f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 4 +; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], 4 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_16_v2f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40300000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul <2 x double> %x, ret <2 x double> %mul } @@ -2097,32 +2548,53 @@ } define <2 x double> @v_mul_fabs_16_v2f64(<2 x double> %x) { -; GFX9-LABEL: v_mul_fabs_16_v2f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40300000 -; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX9-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_fabs_16_v2f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 4 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], |v[2:3]|, 4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_fabs_16_v2f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40300000 -; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX10-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_fabs_16_v2f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40300000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_fabs_16_v2f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40300000 -; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] -; GFX11-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_fabs_16_v2f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 4 +; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], |v[2:3]|, 4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_fabs_16_v2f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40300000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_fabs_16_v2f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 4 +; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], |v[2:3]|, 4 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_fabs_16_v2f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40300000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %x.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) %mul = fmul <2 x double> %x.fabs, ret <2 x double> %mul @@ -2131,9 +2603,7 @@ define amdgpu_ps <2 x i32> @s_mul_32_f64(double inreg %x, double inreg %y) { ; GFX9-SDAG-LABEL: s_mul_32_f64: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x40400000 -; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], s[0:1], v[0:1] +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 5 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-SDAG-NEXT: v_readfirstlane_b32 s1, v1 ; GFX9-SDAG-NEXT: ; return to shader part epilog @@ -2149,14 +2619,37 @@ ; GFX9-GISEL-NEXT: v_readfirstlane_b32 s1, v1 ; GFX9-GISEL-NEXT: ; return to shader part epilog ; -; GFX1011-LABEL: s_mul_32_f64: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_mov_b32 s2, 0 -; GFX1011-NEXT: s_mov_b32 s3, 0x40400000 -; GFX1011-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] -; GFX1011-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1011-NEXT: v_readfirstlane_b32 s1, v1 -; GFX1011-NEXT: ; return to shader part epilog +; GFX10-SDAG-LABEL: s_mul_32_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 5 +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-GISEL-LABEL: s_mul_32_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_mov_b32 s2, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x40400000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] +; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-GISEL-NEXT: ; return to shader part epilog +; +; GFX11-SDAG-LABEL: s_mul_32_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], s[0:1], 5 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-SDAG-NEXT: ; return to shader part epilog +; +; GFX11-GISEL-LABEL: s_mul_32_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_mov_b32 s2, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x40400000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], s[0:1], s[2:3] +; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-GISEL-NEXT: ; return to shader part epilog %mul = fmul contract double %x, 32.0 %cast = bitcast double %mul to <2 x i32> %cast.0 = extractelement <2 x i32> %cast, i32 0 @@ -6236,57 +6729,93 @@ } define double @v_constrained_fmul_32_f64(double %x, double %y) #0 { -; GFX9-LABEL: v_constrained_fmul_32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x40400000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_constrained_fmul_32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_constrained_fmul_32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40400000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_constrained_fmul_32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_constrained_fmul_32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x40400000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_constrained_fmul_32_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_constrained_fmul_32_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40400000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_constrained_fmul_32_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 5 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_constrained_fmul_32_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40400000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double 32.0, metadata !"round.dynamic", metadata !"fpexcept.strict") ret double %val } define double @v_constrained_fmul_0x1p64_f64(double %x, double %y) #0 { -; GFX9-LABEL: v_constrained_fmul_0x1p64_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0x43f00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_constrained_fmul_0x1p64_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_constrained_fmul_0x1p64_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x43f00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_constrained_fmul_0x1p64_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x43f00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_constrained_fmul_0x1p64_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0x43f00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_constrained_fmul_0x1p64_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_constrained_fmul_0x1p64_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x43f00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_constrained_fmul_0x1p64_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], 64 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_constrained_fmul_0x1p64_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x43f00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double 18446744073709551616.0, metadata !"round.dynamic", metadata !"fpexcept.strict") ret double %val } Index: llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll +++ llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll @@ -35,10 +35,9 @@ ; FUNC-LABEL: @fp_to_sint_i64_f64 ; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] ; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]] -; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}} -; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000 +; CI-DAG: s_movk_i32 [[K0_EXP:s[0-9]+]], 0xffe0 -; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s[[[K0_LO]]:[[K0_HI]]] +; CI-DAG: v_ldexp_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], [[K0_EXP]] ; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]] ; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000 Index: llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll +++ llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll @@ -35,10 +35,9 @@ ; FUNC-LABEL: @fp_to_uint_i64_f64 ; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] ; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]] -; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}} -; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000 +; CI-DAG: s_movk_i32 [[K0_EXP:s[0-9]+]], 0xffe0 -; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s[[[K0_LO]]:[[K0_HI]]] +; CI-DAG: v_ldexp_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], [[K0_EXP]] ; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]] ; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000