Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -92,6 +92,7 @@ bool selectG_SZA_EXT(MachineInstr &I) const; bool selectG_CONSTANT(MachineInstr &I) const; bool selectG_FNEG(MachineInstr &I) const; + bool selectG_FABS(MachineInstr &I) const; bool selectG_AND_OR_XOR(MachineInstr &I) const; bool selectG_ADD_SUB(MachineInstr &I) const; bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1969,6 +1969,48 @@ return true; } +// FIXME: This is a workaround for the same tablegen problems as G_FNEG +bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const { + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI); + if (DstRB->getID() != AMDGPU::SGPRRegBankID || + MRI->getType(Dst) != LLT::scalar(64)) + return false; + + Register Src = MI.getOperand(1).getReg(); + MachineBasicBlock *BB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + + if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) || + !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI)) + return false; + + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg) + .addReg(Src, 0, AMDGPU::sub0); + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg) + .addReg(Src, 0, AMDGPU::sub1); + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg) + .addImm(0x7fffffff); + + // Clear sign bit. + // TODO: Should this used S_BITSET0_*? + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg) + .addReg(HiReg) + .addReg(ConstReg); + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst) + .addReg(LoReg) + .addImm(AMDGPU::sub0) + .addReg(OpReg) + .addImm(AMDGPU::sub1); + + MI.eraseFromParent(); + return true; +} + static bool isConstant(const MachineInstr &MI) { return MI.getOpcode() == TargetOpcode::G_CONSTANT; } @@ -2609,6 +2651,10 @@ if (selectImpl(I, *CoverageInfo)) return true; return selectG_FNEG(I); + case TargetOpcode::G_FABS: + if (selectImpl(I, *CoverageInfo)) + return true; + return selectG_FABS(I); case TargetOpcode::G_EXTRACT: return selectG_EXTRACT(I); case TargetOpcode::G_MERGE_VALUES: Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1255,7 +1255,7 @@ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit >; -// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled +// FIXME: The implicit-def of scc from S_[X]OR/AND_B32 is mishandled // def : GCNPat < // (fneg (f64 SReg_64:$src)), // (REG_SEQUENCE SReg_64, @@ -1276,6 +1276,17 @@ // sub1) // >; +// FIXME: Use S_BITSET0_B32/B64? +// def : GCNPat < +// (fabs (f64 SReg_64:$src)), +// (REG_SEQUENCE SReg_64, +// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +// sub0, +// (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +// (i32 (S_MOV_B32 (i32 0x7fffffff)))), +// sub1) +// >; + } // End let AddedComplexity = 1 def : GCNPat < Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -200,9 +200,13 @@ liveins: $sgpr0_sgpr1 ; GCN-LABEL: name: fabs_s64_ss ; GCN: liveins: $sgpr0_sgpr1 - ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 - ; GCN: [[FABS:%[0-9]+]]:sgpr(s64) = G_FABS [[COPY]] - ; GCN: S_ENDPGM 0, implicit [[FABS]](s64) + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_FABS %0 S_ENDPGM 0, implicit %1 @@ -249,3 +253,50 @@ %1:vgpr(s64) = G_FABS %0 S_ENDPGM 0, implicit %1 ... + +# Make sure the source register is constrained +--- +name: fabs_s64_vv_no_src_constraint +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; GCN-LABEL: name: fabs_s64_vv_no_src_constraint + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1 + ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(s64) = IMPLICIT_DEF + %1:vgpr(s64) = G_FABS %0:vgpr(s64) + S_ENDPGM 0, implicit %1 +... + +--- +name: fabs_s64_ss_no_src_constraint +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; GCN-LABEL: name: fabs_s64_ss_no_src_constraint + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(s64) = IMPLICIT_DEF + %1:sgpr(s64) = G_FABS %0:sgpr(s64) + S_ENDPGM 0, implicit %1 +...