Index: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -658,7 +658,7 @@ class RsqPat : AMDGPUPat < (AMDGPUrcp (fsqrt vt:$src)), - (RsqInst $src) + (RsqInst vt:$src) >; // Instructions which select to the same v_min_f* Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -825,12 +825,10 @@ // VOP1 Patterns //===----------------------------------------------------------------------===// -let OtherPredicates = [UnsafeFPMath] in { - -//defm : RsqPat; - def : RsqPat; +let OtherPredicates = [UnsafeFPMath] in { + // Convert (x - floor(x)) to fract(x) def : GCNPat < (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.mir @@ -0,0 +1,29 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test +alignment: 1 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: test + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; CHECK: %3:vgpr_32 = afn nofpexcept V_RSQ_F32_e32 [[COPY1]], implicit $mode, implicit $exec + ; CHECK: $vgpr0 = COPY %3 + ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + %0:sgpr(s32) = COPY $sgpr0 + %4:vgpr(s32) = COPY %0(s32) + %2:vgpr(s32) = afn G_FSQRT %4 + %3:vgpr(s32) = afn G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %2(s32) + $vgpr0 = COPY %3(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0 + +...