Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -4149,6 +4149,16 @@ return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } +static Constant *simplifyFPBinop(Value *Op0, Value *Op1) { + if (isa(Op0) || isa(Op1)) + return ConstantFP::getNaN(Op0->getType()); + + if (match(Op0, m_NaN()) || match(Op1, m_NaN())) + return ConstantFP::getNaN(Op0->getType()); + + return nullptr; +} + /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, @@ -4156,8 +4166,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // fadd X, -0 ==> X if (match(Op1, m_NegZero())) @@ -4189,8 +4199,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // fsub X, 0 ==> X if (match(Op1, m_Zero())) @@ -4224,8 +4234,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // fmul X, 1.0 ==> X if (match(Op1, m_FPOne())) @@ -4265,8 +4275,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // X / 1.0 -> X if (match(Op1, m_FPOne())) @@ -4312,8 +4322,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. // The constant match may include undef elements in a vector, so return a full Index: test/CodeGen/AMDGPU/imm.ll =================================================================== --- test/CodeGen/AMDGPU/imm.ll +++ test/CodeGen/AMDGPU/imm.ll @@ -287,18 +287,17 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f32: -; GCN: s_load_dword [[VAL:s[0-9]+]] -; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -1{{$}} +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 ; GCN: buffer_store_dword [[REG]] define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { - %y = fadd float %x, 0xffffffffe0000000 + %c = bitcast i32 -1 to float + %y = fadd float %x, %c store float %y, float addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f32: -; GCN: s_load_dword [[VAL:s[0-9]+]] -; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -2{{$}} +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 ; GCN: buffer_store_dword [[REG]] define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0xffffffffc0000000 @@ -307,8 +306,7 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f32: -; GCN: s_load_dword [[VAL:s[0-9]+]] -; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -16 +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000 ; GCN: buffer_store_dword [[REG]] define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0xfffffffe00000000 @@ -495,10 +493,9 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f64: -; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c -; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -1 -; GCN: buffer_store_dwordx2 [[REG]] +; GCN: v_mov_b32_e32 v0, 0 +; GCN: v_mov_b32_e32 v1, 0x7ff80000 +; GCN: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xffffffffffffffff store double %y, double addrspace(1)* %out @@ -506,10 +503,9 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f64: -; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c -; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -2 -; GCN: buffer_store_dwordx2 [[REG]] +; GCN: v_mov_b32_e32 v0, 0 +; GCN: v_mov_b32_e32 v1, 0x7ff80000 +; GCN: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xfffffffffffffffe store double %y, double addrspace(1)* %out @@ -517,10 +513,9 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f64: -; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c -; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -16 -; GCN: buffer_store_dwordx2 [[REG]] +; GCN: v_mov_b32_e32 v0, 0 +; GCN: v_mov_b32_e32 v1, 0x7ff80000 +; GCN: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xfffffffffffffff0 store double %y, double addrspace(1)* %out Index: test/CodeGen/AMDGPU/imm16.ll =================================================================== --- test/CodeGen/AMDGPU/imm16.ll +++ test/CodeGen/AMDGPU/imm16.ll @@ -266,9 +266,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16: -; VI: buffer_load_ushort [[VAL:v[0-9]+]] -; VI: v_add_f16_e32 [[REG:v[0-9]+]], -1, [[VAL]]{{$}} -; VI: buffer_store_short [[REG]] +; VI: v_mov_b32_e32 v0, 0x7e00 +; VI: buffer_store_short v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, half %x) { %y = fadd half %x, 0xHFFFF store half %y, half addrspace(1)* %out @@ -276,9 +275,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16: -; VI: buffer_load_ushort [[VAL:v[0-9]+]] -; VI: v_add_f16_e32 [[REG:v[0-9]+]], -2, [[VAL]]{{$}} -; VI: buffer_store_short [[REG]] +; VI: v_mov_b32_e32 v0, 0x7e00 +; VI: buffer_store_short v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, half %x) { %y = fadd half %x, 0xHFFFE store half %y, half addrspace(1)* %out @@ -286,9 +284,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16: -; VI: buffer_load_ushort [[VAL:v[0-9]+]] -; VI: v_add_f16_e32 [[REG:v[0-9]+]], -16, [[VAL]]{{$}} -; VI: buffer_store_short [[REG]] +; VI: v_mov_b32_e32 v0, 0x7e00 +; VI: buffer_store_short v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, half %x) { %y = fadd half %x, 0xHFFF0 store half %y, half addrspace(1)* %out Index: test/CodeGen/AMDGPU/immv216.ll =================================================================== --- test/CodeGen/AMDGPU/immv216.ll +++ test/CodeGen/AMDGPU/immv216.ll @@ -371,17 +371,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16: -; GFX9: s_load_dword [[VAL:s[0-9]+]] -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1{{$}} -; GFX9: buffer_store_dword [[REG]] - -; VI: buffer_load_ushort [[VAL0:v[0-9]+]] -; VI: buffer_load_ushort [[VAL1:v[0-9]+]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]] -; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32 -; VI: buffer_store_dword +; GCN: v_mov_b32_e32 v0, 0x7e007e00 +; GCN: buffer_store_dword v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { %y = fadd <2 x half> %x, store <2 x half> %y, <2 x half> addrspace(1)* %out @@ -389,17 +380,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16: -; GFX9: s_load_dword [[VAL:s[0-9]+]] -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2{{$}} -; GFX9: buffer_store_dword [[REG]] - -; VI: buffer_load_ushort [[VAL0:v[0-9]+]] -; VI: buffer_load_ushort [[VAL1:v[0-9]+]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]] -; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32 -; VI: buffer_store_dword +; GCN: v_mov_b32_e32 v0, 0x7e007e00 +; GCN: buffer_store_dword v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { %y = fadd <2 x half> %x, store <2 x half> %y, <2 x half> addrspace(1)* %out @@ -407,17 +389,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16: -; GFX9: s_load_dword [[VAL:s[0-9]+]] -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -16{{$}} -; GFX9: buffer_store_dword [[REG]] - -; VI: buffer_load_ushort [[VAL0:v[0-9]+]] -; VI: buffer_load_ushort [[VAL1:v[0-9]+]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]] -; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32 -; VI: buffer_store_dword +; GCN: v_mov_b32_e32 v0, 0x7e007e00 +; GCN: buffer_store_dword v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { %y = fadd <2 x half> %x, store <2 x half> %y, <2 x half> addrspace(1)* %out Index: test/CodeGen/AMDGPU/jump-address.ll =================================================================== --- test/CodeGen/AMDGPU/jump-address.ll +++ test/CodeGen/AMDGPU/jump-address.ll @@ -43,7 +43,7 @@ %20 = load <4 x float>, <4 x float> addrspace(8)* null %21 = extractelement <4 x float> %20, i32 0 %22 = fsub float -0.000000e+00, %21 - %23 = fadd float 0xFFF8000000000000, %22 + %23 = fadd float 0x3FF8000000000000, %22 br label %ENDIF } Index: test/Transforms/InstSimplify/fp-nan.ll =================================================================== --- test/Transforms/InstSimplify/fp-nan.ll +++ test/Transforms/InstSimplify/fp-nan.ll @@ -3,8 +3,7 @@ define float @fadd_nan_op0(float %x) { ; CHECK-LABEL: @fadd_nan_op0( -; CHECK-NEXT: [[R:%.*]] = fadd float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fadd float 0x7FF8000000000000, %x ret float %r @@ -12,8 +11,7 @@ define float @fadd_nan_op1(float %x) { ; CHECK-LABEL: @fadd_nan_op1( -; CHECK-NEXT: [[R:%.*]] = fadd float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fadd float %x, 0x7FF8000000000000 ret float %r @@ -21,8 +19,7 @@ define float @fsub_nan_op0(float %x) { ; CHECK-LABEL: @fsub_nan_op0( -; CHECK-NEXT: [[R:%.*]] = fsub float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fsub float 0x7FF8000000000000, %x ret float %r @@ -30,8 +27,7 @@ define float @fsub_nan_op1(float %x) { ; CHECK-LABEL: @fsub_nan_op1( -; CHECK-NEXT: [[R:%.*]] = fsub float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fsub float %x, 0x7FF8000000000000 ret float %r @@ -39,8 +35,7 @@ define float @fmul_nan_op0(float %x) { ; CHECK-LABEL: @fmul_nan_op0( -; CHECK-NEXT: [[R:%.*]] = fmul float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fmul float 0x7FF8000000000000, %x ret float %r @@ -48,8 +43,7 @@ define float @fmul_nan_op1(float %x) { ; CHECK-LABEL: @fmul_nan_op1( -; CHECK-NEXT: [[R:%.*]] = fmul float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fmul float %x, 0x7FF8000000000000 ret float %r @@ -57,8 +51,7 @@ define float @fdiv_nan_op0(float %x) { ; CHECK-LABEL: @fdiv_nan_op0( -; CHECK-NEXT: [[R:%.*]] = fdiv float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fdiv float 0x7FF8000000000000, %x ret float %r @@ -66,8 +59,7 @@ define float @fdiv_nan_op1(float %x) { ; CHECK-LABEL: @fdiv_nan_op1( -; CHECK-NEXT: [[R:%.*]] = fdiv float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fdiv float %x, 0x7FF8000000000000 ret float %r @@ -75,8 +67,7 @@ define float @frem_nan_op0(float %x) { ; CHECK-LABEL: @frem_nan_op0( -; CHECK-NEXT: [[R:%.*]] = frem float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = frem float 0x7FF8000000000000, %x ret float %r @@ -84,8 +75,7 @@ define float @frem_nan_op1(float %x) { ; CHECK-LABEL: @frem_nan_op1( -; CHECK-NEXT: [[R:%.*]] = frem float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = frem float %x, 0x7FF8000000000000 ret float %r @@ -95,8 +85,7 @@ define float @fadd_nan_op0_nnan(float %x) { ; CHECK-LABEL: @fadd_nan_op0_nnan( -; CHECK-NEXT: [[R:%.*]] = fadd nnan float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fadd nnan float 0x7FF8000000000000, %x ret float %r @@ -104,8 +93,7 @@ define float @fadd_nan_op1_fast(float %x) { ; CHECK-LABEL: @fadd_nan_op1_fast( -; CHECK-NEXT: [[R:%.*]] = fadd fast float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fadd fast float %x, 0x7FF8000000000000 ret float %r @@ -113,8 +101,7 @@ define float @fsub_nan_op0_fast(float %x) { ; CHECK-LABEL: @fsub_nan_op0_fast( -; CHECK-NEXT: [[R:%.*]] = fsub fast float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fsub fast float 0x7FF8000000000000, %x ret float %r @@ -122,8 +109,7 @@ define float @fsub_nan_op1_nnan(float %x) { ; CHECK-LABEL: @fsub_nan_op1_nnan( -; CHECK-NEXT: [[R:%.*]] = fsub nnan float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fsub nnan float %x, 0x7FF8000000000000 ret float %r @@ -131,8 +117,7 @@ define float @fmul_nan_op0_nnan(float %x) { ; CHECK-LABEL: @fmul_nan_op0_nnan( -; CHECK-NEXT: [[R:%.*]] = fmul nnan float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fmul nnan float 0x7FF8000000000000, %x ret float %r @@ -140,8 +125,7 @@ define float @fmul_nan_op1_fast(float %x) { ; CHECK-LABEL: @fmul_nan_op1_fast( -; CHECK-NEXT: [[R:%.*]] = fmul fast float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fmul fast float %x, 0x7FF8000000000000 ret float %r @@ -149,8 +133,7 @@ define float @fdiv_nan_op0_fast(float %x) { ; CHECK-LABEL: @fdiv_nan_op0_fast( -; CHECK-NEXT: [[R:%.*]] = fdiv fast float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fdiv fast float 0x7FF8000000000000, %x ret float %r @@ -158,8 +141,7 @@ define float @fdiv_nan_op1_nnan(float %x) { ; CHECK-LABEL: @fdiv_nan_op1_nnan( -; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fdiv nnan float %x, 0x7FF8000000000000 ret float %r @@ -167,8 +149,7 @@ define float @frem_nan_op0_nnan(float %x) { ; CHECK-LABEL: @frem_nan_op0_nnan( -; CHECK-NEXT: [[R:%.*]] = frem nnan float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = frem nnan float 0x7FF8000000000000, %x ret float %r @@ -176,8 +157,7 @@ define float @frem_nan_op1_fast(float %x) { ; CHECK-LABEL: @frem_nan_op1_fast( -; CHECK-NEXT: [[R:%.*]] = frem fast float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = frem fast float %x, 0x7FF8000000000000 ret float %r