Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -5131,7 +5131,7 @@ const MachineInstr &MI, int OpIdx) const { const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF(); - int ExpVal = APF.getExactLog2(); + int ExpVal = APF.getExactLog2Abs(); assert(ExpVal != INT_MIN); MIB.addImm(ExpVal); } Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3328,7 +3328,7 @@ // Convert a floating-point power of 2 to the integer exponent. def FPPow2ToExponentXForm : SDNodeXFormgetValueAPF(); - int Log2 = APF.getExactLog2(); + int Log2 = APF.getExactLog2Abs(); assert(Log2 != INT_MIN); return CurDAG->getTargetConstant(Log2, SDLoc(N), MVT::i32); }]>; @@ -3337,8 +3337,24 @@ // immediate where it's preferable to emit a multiply by as an // ldexp. We skip over 0.5 to 4.0 as those are inline immediates // anyway. -def fpimm_pow2_prefer_ldexp_f64 : FPImmLeaf 2); + }], FPPow2ToExponentXForm +>; + +def fpimm_neg_pow2_prefer_ldexp_f64 : FPImmLeaf ldexp(-x, n) // TODO: For f32/f16, it's not a clear win on code size to use ldexp // in place of mul since we have to use the vop3 form. Are there power // savings or some other reason to prefer ldexp over mul? def : GCNPat< (any_fmul (f64 (VOP3Mods f64:$src0, i32:$src0_mods)), - fpimm_pow2_prefer_ldexp_f64:$src1), + fpimm_pos_pow2_prefer_ldexp_f64:$src1), (V_LDEXP_F64_e64 i32:$src0_mods, VSrc_b64:$src0, 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1)))) >; +def : GCNPat< + (any_fmul f64:$src0, fpimm_neg_pow2_prefer_ldexp_f64:$src1), + (V_LDEXP_F64_e64 SRCMODS.NEG, VSrc_b64:$src0, + 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1)))) +>; + +// We want to avoid using VOP3Mods which could pull in another fneg +// which we would need tobe re-negated (which should never happen in +// practice). I don't see a way to apply an SDNodeXForm that accounts +// for a second operand. +def : GCNPat< + (any_fmul (fabs f64:$src0), fpimm_neg_pow2_prefer_ldexp_f64:$src1), + (V_LDEXP_F64_e64 SRCMODS.NEG_ABS, VSrc_b64:$src0, + 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1)))) +>; + class AMDGPUGenericInstruction : GenericInstruction { let Namespace = "AMDGPU"; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -289,11 +289,9 @@ ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]] + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 1, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCONSTANT double -16.0 @@ -315,11 +313,9 @@ ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 2, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]] + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 3, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 @@ -342,11 +338,14 @@ ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 3, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]] + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1 + ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 1, [[REG_SEQUENCE]], 0, [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]] ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FABS %0 Index: llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll +++ llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll @@ -1049,169 +1049,277 @@ } define double @v_mul_neg256_f64(double %x) { -; GFX9-LABEL: v_mul_neg256_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0700000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_neg256_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_neg256_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0700000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_neg256_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0700000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_neg256_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0700000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_neg256_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_neg256_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0700000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_neg256_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_neg256_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0700000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -256.0 ret double %mul } define double @v_mul_neg128_f64(double %x) { -; GFX9-LABEL: v_mul_neg128_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0600000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_neg128_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_neg128_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0600000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_neg128_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0600000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_neg128_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0600000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_neg128_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_neg128_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0600000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_neg128_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_neg128_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0600000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -128.0 ret double %mul } define double @v_mul_neg64_f64(double %x) { -; GFX9-LABEL: v_mul_neg64_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0500000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_neg64_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_neg64_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0500000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_neg64_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0500000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_neg64_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0500000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_neg64_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_neg64_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0500000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_neg64_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_neg64_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0500000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -64.0 ret double %mul } define double @v_mul_neg32_f64(double %x) { -; GFX9-LABEL: v_mul_neg32_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0400000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_neg32_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_neg32_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0400000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_neg32_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0400000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_neg32_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0400000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_neg32_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_neg32_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0400000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_neg32_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_neg32_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0400000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -32.0 ret double %mul } define double @v_mul_neg16_f64(double %x) { -; GFX9-LABEL: v_mul_neg16_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0300000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_neg16_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_neg16_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0300000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_neg16_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0300000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_neg16_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0300000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_neg16_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_neg16_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0300000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_neg16_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_neg16_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0300000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -16.0 ret double %mul } define double @v_mul_neg8_f64(double %x) { -; GFX9-LABEL: v_mul_neg8_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0200000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_neg8_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_neg8_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0200000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_neg8_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_neg8_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0200000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_neg8_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_neg8_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0200000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_neg8_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_neg8_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0200000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -8.0 ret double %mul } @@ -1297,29 +1405,47 @@ } define double @v_mul_neg_quarter_f64(double %x) { -; GFX9-LABEL: v_mul_neg_quarter_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xbfd00000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_neg_quarter_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_neg_quarter_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xbfd00000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_neg_quarter_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_neg_quarter_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xbfd00000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_neg_quarter_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_neg_quarter_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_neg_quarter_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_neg_quarter_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xbfd00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul double %x, -0.25 ret double %mul } @@ -2517,32 +2643,53 @@ } define <2 x double> @v_mul_neg16_v2f64(<2 x double> %x) { -; GFX9-LABEL: v_mul_neg16_v2f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0300000 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_neg16_v2f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4 +; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_neg16_v2f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0300000 -; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] -; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_neg16_v2f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0300000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_neg16_v2f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0300000 -; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] -; GFX11-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_neg16_v2f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4 +; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_neg16_v2f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0300000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] +; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_neg16_v2f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4 +; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_neg16_v2f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0300000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1] +; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %mul = fmul <2 x double> %x, ret <2 x double> %mul } @@ -6879,58 +7026,94 @@ } define double @v_mul_fabs_neg256_f64(double %x) { -; GFX9-LABEL: v_mul_fabs_neg256_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0700000 -; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_fabs_neg256_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_fabs_neg256_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0700000 -; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_fabs_neg256_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0700000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_fabs_neg256_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0700000 -; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_fabs_neg256_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_fabs_neg256_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0700000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_fabs_neg256_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_fabs_neg256_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0700000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, -256.0 ret double %mul } define double @v_mul_fabs_neg8_f64(double %x) { -; GFX9-LABEL: v_mul_fabs_neg8_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xc0200000 -; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_fabs_neg8_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_fabs_neg8_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xc0200000 -; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_fabs_neg8_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0200000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_fabs_neg8_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xc0200000 -; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_fabs_neg8_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_fabs_neg8_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0200000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_fabs_neg8_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_fabs_neg8_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0200000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, -8.0 ret double %mul @@ -7011,29 +7194,47 @@ } define double @v_mul_fabs_negquarter_f64(double %x) { -; GFX9-LABEL: v_mul_fabs_negquarter_f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: s_mov_b32 s5, 0xbfd00000 -; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_fabs_negquarter_f64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_mul_fabs_negquarter_f64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0xbfd00000 -; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_fabs_negquarter_f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000 +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_mul_fabs_negquarter_f64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s1, 0xbfd00000 -; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_mul_fabs_negquarter_f64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_fabs_negquarter_f64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_fabs_negquarter_f64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_fabs_negquarter_f64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xbfd00000 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %fabs.x = call double @llvm.fabs.f64(double %x) %mul = fmul double %fabs.x, -0.25 ret double %mul