Index: lib/Target/R600/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/R600/AMDGPUISelLowering.cpp +++ lib/Target/R600/AMDGPUISelLowering.cpp @@ -437,12 +437,12 @@ bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32; + return VT == MVT::f32 || VT == MVT::f64; } bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32; + return VT == MVT::f32 || VT == MVT::f64; } bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -221,11 +221,6 @@ setOperationAction(ISD::FRINT, MVT::f64, Legal); } - // FIXME: These should be removed and handled the same was as f32 fneg. Source - // modifiers also work for the double instructions. - setOperationAction(ISD::FNEG, MVT::f64, Expand); - setOperationAction(ISD::FABS, MVT::f64, Expand); - setOperationAction(ISD::FDIV, MVT::f32, Custom); setTargetDAGCombine(ISD::SELECT_CC); @@ -588,10 +583,12 @@ const SIInstrInfo *TII = static_cast(getTargetMachine().getInstrInfo()); - DebugLoc DL = MI->getDebugLoc(); unsigned DestReg = MI->getOperand(0).getReg(); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + DebugLoc DL = MI->getDebugLoc(); + // FIXME: We should use S_MOV / S_AND here and let it be fixed later once + // SALU instructions generally work. BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg) .addImm(0x7fffffff); BuildMI(*BB, I, DL, TII->get(AMDGPU::V_AND_B32_e32), DestReg) @@ -600,6 +597,34 @@ MI->eraseFromParent(); break; } + case AMDGPU::FABS64_SI: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + + DebugLoc DL = MI->getDebugLoc(); + unsigned SuperReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + + BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), ImmReg) + .addImm(0x7fffffff); + + // We only need to mask the upper half of the register pair. + BuildMI(*BB, I, DL, TII->get(AMDGPU::V_AND_B32_e32), TmpReg) + .addReg(SrcReg, 0, AMDGPU::sub1) + .addReg(ImmReg); + + BuildMI(*BB, I, DL, TII->get(AMDGPU::REG_SEQUENCE), SuperReg) + .addReg(SrcReg, 0, AMDGPU::sub0) + .addImm(AMDGPU::sub0) + .addReg(TmpReg) + .addImm(AMDGPU::sub1); + MI->eraseFromParent(); + break; + } case AMDGPU::FNEG_SI: { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); const SIInstrInfo *TII = @@ -609,6 +634,7 @@ unsigned DestReg = MI->getOperand(0).getReg(); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + // FIXME: Should use SALU instructions BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg) .addImm(0x80000000); BuildMI(*BB, I, DL, TII->get(AMDGPU::V_XOR_B32_e32), DestReg) @@ -617,6 +643,33 @@ MI->eraseFromParent(); break; } + case AMDGPU::FNEG64_SI: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + + DebugLoc DL = MI->getDebugLoc(); + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DestReg = MI->getOperand(0).getReg(); + + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + + // FIXME: Should use SALU instructions + BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), ImmReg) + .addImm(0x80000000); + BuildMI(*BB, I, DL, TII->get(AMDGPU::V_XOR_B32_e32), TmpReg) + .addReg(SrcReg, 0, AMDGPU::sub1) + .addReg(ImmReg); + + BuildMI(*BB, I, DL, TII->get(AMDGPU::REG_SEQUENCE), DestReg) + .addReg(SrcReg, 0, AMDGPU::sub0) + .addImm(AMDGPU::sub0) + .addReg(TmpReg) + .addImm(AMDGPU::sub1); + MI->eraseFromParent(); + break; + } case AMDGPU::FCLAMP_SI: { const SIInstrInfo *TII = static_cast(getTargetMachine().getInstrInfo()); Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -2333,33 +2333,28 @@ (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */ >; -def FABS_SI : AMDGPUShaderInst < - (outs VReg_32:$dst), - (ins VSrc_32:$src0), - "FABS_SI $dst, $src0", - [] -> { +class SIUnaryCustomInsertInst : + AMDGPUShaderInst< + (outs dstrc:$dst), + (ins srcrc:$src0), + name#" $dst, $src0", + [(set vt:$dst, (node vt:$src0))]> { let usesCustomInserter = 1; } -def : Pat < - (fabs f32:$src), - (FABS_SI f32:$src) ->; +def FABS_SI : SIUnaryCustomInsertInst<"FABS_SI", fabs, + f32, VReg_32, VSrc_32>; +def FNEG_SI : SIUnaryCustomInsertInst<"FNEG_SI", fneg, + f32, VReg_32, VSrc_32>; -def FNEG_SI : AMDGPUShaderInst < - (outs VReg_32:$dst), - (ins VSrc_32:$src0), - "FNEG_SI $dst, $src0", - [] -> { - let usesCustomInserter = 1; -} +def FABS64_SI : SIUnaryCustomInsertInst<"FABS64_SI", fabs, + f64, VReg_64, VSrc_64>; +def FNEG64_SI : SIUnaryCustomInsertInst<"FNEG64_SI", fneg, + f64, VReg_64, VSrc_64>; -def : Pat < - (fneg f32:$src), - (FNEG_SI f32:$src) ->; /********** ================== **********/ /********** Immediate Patterns **********/ Index: test/CodeGen/R600/fabs.f64.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/fabs.f64.ll @@ -0,0 +1,97 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone + +declare double @fabs(double) readnone +declare double @llvm.fabs.f64(double) readnone +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone + +; FUNC-LABEL: @v_fabs_f64 +; SI: V_AND_B32 +; SI: S_ENDPGM +define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tidext = sext i32 %tid to i64 + %gep = getelementptr double addrspace(1)* %in, i64 %tidext + %val = load double addrspace(1)* %gep, align 8 + %fabs = call double @llvm.fabs.f64(double %val) + store double %fabs, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fabs_f64 +; SI: V_AND_B32 +; SI-NOT: V_AND_B32 +; SI: S_ENDPGM +define void @fabs_f64(double addrspace(1)* %out, double %in) { + %fabs = call double @llvm.fabs.f64(double %in) + store double %fabs, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fabs_v2f64 +; SI: V_AND_B32 +; SI: V_AND_B32 +; SI: S_ENDPGM +define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { + %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in) + store <2 x double> %fabs, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fabs_v4f64 +; SI: V_AND_B32 +; SI: V_AND_B32 +; SI: V_AND_B32 +; SI: V_AND_B32 +; SI: S_ENDPGM +define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { + %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in) + store <4 x double> %fabs, <4 x double> addrspace(1)* %out + ret void +} + +; SI-LABEL: @fabs_fold_f64 +; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI-NOT: AND +; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}} +; SI: S_ENDPGM +define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) { + %fabs = call double @llvm.fabs.f64(double %in0) + %fmul = fmul double %fabs, %in1 + store double %fmul, double addrspace(1)* %out + ret void +} + +; SI-LABEL: @fabs_fn_fold_f64 +; SI: S_LOAD_DWORDX2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI-NOT: AND +; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}} +; SI: S_ENDPGM +define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) { + %fabs = call double @fabs(double %in0) + %fmul = fmul double %fabs, %in1 + store double %fmul, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fabs_free_f64 +; SI: V_AND_B32 +; SI: S_ENDPGM +define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) { + %bc= bitcast i64 %in to double + %fabs = call double @llvm.fabs.f64(double %bc) + store double %fabs, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fabs_fn_free_f64 +; SI: V_AND_B32 +; SI: S_ENDPGM +define void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { + %bc= bitcast i64 %in to double + %fabs = call double @fabs(double %bc) + store double %fabs, double addrspace(1)* %out + ret void +} Index: test/CodeGen/R600/fneg-fabs.f64.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/fneg-fabs.f64.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FIXME: Check something here. Currently it seems fabs + fneg aren't +; into 2 modifiers, although theoretically that shoudl work. + +; FUNC-LABEL: @fneg_fabs_free_f64 +define void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) { + %bc = bitcast i64 %in to double + %fabs = call double @llvm.fabs.f64(double %bc) + %fsub = fsub double -0.000000e+00, %fabs + store double %fsub, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fneg_fabs_fn_free_f64 +define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { + %bc = bitcast i64 %in to double + %fabs = call double @fabs(double %bc) + %fsub = fsub double -0.000000e+00, %fabs + store double %fsub, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fneg_fabs_v2f64 +define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { + %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in) + %fsub = fsub <2 x double> , %fabs + store <2 x double> %fsub, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fneg_fabs_v4f64 +define void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { + %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in) + %fsub = fsub <4 x double> , %fabs + store <4 x double> %fsub, <4 x double> addrspace(1)* %out + ret void +} + +declare double @fabs(double) readnone +declare double @llvm.fabs.f64(double) readnone +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone Index: test/CodeGen/R600/fneg.f64.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/fneg.f64.ll @@ -0,0 +1,59 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: @fneg_f64 +; SI: V_XOR_B32 +define void @fneg_f64(double addrspace(1)* %out, double %in) { + %fneg = fsub double -0.000000e+00, %in + store double %fneg, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fneg_v2f64 +; SI: V_XOR_B32 +; SI: V_XOR_B32 +define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) { + %fneg = fsub <2 x double> , %in + store <2 x double> %fneg, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fneg_v4f64 +; R600: -PV +; R600: -T +; R600: -PV +; R600: -PV + +; SI: V_XOR_B32 +; SI: V_XOR_B32 +; SI: V_XOR_B32 +; SI: V_XOR_B32 +define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) { + %fneg = fsub <4 x double> , %in + store <4 x double> %fneg, <4 x double> addrspace(1)* %out + ret void +} + +; DAGCombiner will transform: +; (fneg (f64 bitcast (i64 a))) => (f64 bitcast (xor (i64 a), 0x80000000)) +; unless the target returns true for isNegFree() + +; FUNC-LABEL: @fneg_free_f64 +; FIXME: Unnecessary copy to VGPRs +; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, 0, 0 +define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { + %bc = bitcast i64 %in to double + %fsub = fsub double 0.0, %bc + store double %fsub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: @fneg_fold +; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI-NOT: XOR +; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], {{v\[[0-9]+:[0-9]+\]}} +define void @fneg_fold_f64(double addrspace(1)* %out, double %in) { + %fsub = fsub double -0.0, %in + %fmul = fmul double %fsub, %in + store double %fmul, double addrspace(1)* %out + ret void +}