Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -786,11 +786,9 @@ let OtherPredicates = [UnsafeFPMath] in { -//defm : RsqPat; //defm : RsqPat; def : RsqPat; -def : RsqPat; // Convert (x - floor(x)) to fract(x) def : GCNPat < Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.amdgcn.rcp.f32(float) #0 declare double @llvm.amdgcn.rcp.f64(double) #0 @@ -132,8 +132,8 @@ } ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64: -; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} -; SI-NOT: [[RESULT]] +; SI: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} +; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SQRT]] ; SI: buffer_store_dwordx2 [[RESULT]] define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { %sqrt = call double @llvm.sqrt.f64(double %src) Index: llvm/test/CodeGen/AMDGPU/rsq.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/rsq.ll +++ llvm/test/CodeGen/AMDGPU/rsq.ll @@ -17,8 +17,8 @@ } ; SI-LABEL: {{^}}rsq_f64: -; SI-UNSAFE: v_rsq_f64_e32 -; SI-SAFE: v_sqrt_f64_e32 +; SI: v_sqrt_f64 +; SI: v_rcp_f64 ; SI: s_endpgm define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 { %val = load double, double addrspace(1)* %in, align 4 @@ -97,8 +97,8 @@ ; SI-UNSAFE: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] ; SI-UNSAFE: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], [[VAL]] -; SI-UNSAFE: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], [[VAL]] -; SI-UNSAFE: v_fma_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, [[RSQ]], 1.0 +; SI-UNSAFE: v_rcp_f64_e32 [[RCP:v\[[0-9]+:[0-9]+\]]], [[VAL]] +; SI-UNSAFE: v_fma_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, [[RCP]], 1.0 ; SI-UNSAFE: v_fma_f64 ; SI-UNSAFE: v_fma_f64 ; SI-UNSAFE: v_fma_f64 @@ -135,8 +135,7 @@ ; SI-UNSAFE: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] ; SI-UNSAFE-DAG: v_sqrt_f64_e64 [[SQRT:v\[[0-9]+:[0-9]+\]]], -[[VAL]] -; SI-UNSAFE-DAG: v_xor_b32_e32 v[[HI:[0-9]+]], 0x80000000, v{{[0-9]+}} -; SI-UNSAFE: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+}}:[[HI]]{{\]}} +; SI-UNSAFE: v_rcp_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], [[SQRT]] ; SI-UNSAFE: v_fma_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, [[RSQ]], 1.0 ; SI-UNSAFE: v_fma_f64 ; SI-UNSAFE: v_fma_f64