Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -4163,6 +4163,28 @@ return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } +static Constant *propagateNaN(Constant *In) { + // If the input is a vector with undef elements, just return a default NaN. + if (!In->isNaN()) + return ConstantFP::getNaN(In->getType()); + + // Propagate the existing NaN constant when possible. + // TODO: Should we quiet a signaling NaN? + return In; +} + +static Constant *simplifyFPBinop(Value *Op0, Value *Op1) { + if (isa(Op0) || isa(Op1)) + return ConstantFP::getNaN(Op0->getType()); + + if (match(Op0, m_NaN())) + return propagateNaN(cast(Op0)); + if (match(Op1, m_NaN())) + return propagateNaN(cast(Op1)); + + return nullptr; +} + /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, @@ -4170,8 +4192,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // fadd X, -0 ==> X if (match(Op1, m_NegZero())) @@ -4203,8 +4225,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // fsub X, 0 ==> X if (match(Op1, m_Zero())) @@ -4238,8 +4260,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // fmul X, 1.0 ==> X if (match(Op1, m_FPOne())) @@ -4279,8 +4301,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // X / 1.0 -> X if (match(Op1, m_FPOne())) @@ -4326,8 +4348,8 @@ if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); + if (Constant *C = simplifyFPBinop(Op0, Op1)) + return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. // The constant match may include undef elements in a vector, so return a full Index: test/CodeGen/AMDGPU/imm.ll =================================================================== --- test/CodeGen/AMDGPU/imm.ll +++ test/CodeGen/AMDGPU/imm.ll @@ -287,8 +287,7 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f32: -; GCN: s_load_dword [[VAL:s[0-9]+]] -; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -1{{$}} +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1 ; GCN: buffer_store_dword [[REG]] define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0xffffffffe0000000 @@ -297,8 +296,7 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f32: -; GCN: s_load_dword [[VAL:s[0-9]+]] -; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -2{{$}} +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -2 ; GCN: buffer_store_dword [[REG]] define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0xffffffffc0000000 @@ -307,8 +305,7 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f32: -; GCN: s_load_dword [[VAL:s[0-9]+]] -; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -16 +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -16 ; GCN: buffer_store_dword [[REG]] define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0xfffffffe00000000 @@ -495,10 +492,9 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f64: -; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c -; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -1 -; GCN: buffer_store_dwordx2 [[REG]] +; GCN: v_mov_b32_e32 v0, -1 +; GCN: v_mov_b32_e32 v1, v0 +; GCN: buffer_store_dwordx2 v[0:1] define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xffffffffffffffff store double %y, double addrspace(1)* %out @@ -506,10 +502,9 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f64: -; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c -; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -2 -; GCN: buffer_store_dwordx2 [[REG]] +; GCN: v_mov_b32_e32 v0, -2 +; GCN: v_mov_b32_e32 v1, -1 +; GCN: buffer_store_dwordx2 v[0:1] define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xfffffffffffffffe store double %y, double addrspace(1)* %out @@ -517,10 +512,9 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f64: -; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb -; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c -; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -16 -; GCN: buffer_store_dwordx2 [[REG]] +; GCN: v_mov_b32_e32 v0, -16 +; GCN: v_mov_b32_e32 v1, -1 +; GCN: buffer_store_dwordx2 v[0:1] define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xfffffffffffffff0 store double %y, double addrspace(1)* %out Index: test/CodeGen/AMDGPU/imm16.ll =================================================================== --- test/CodeGen/AMDGPU/imm16.ll +++ test/CodeGen/AMDGPU/imm16.ll @@ -266,9 +266,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16: -; VI: buffer_load_ushort [[VAL:v[0-9]+]] -; VI: v_add_f16_e32 [[REG:v[0-9]+]], -1, [[VAL]]{{$}} -; VI: buffer_store_short [[REG]] +; VI: v_mov_b32_e32 v0, -1 +; VI: buffer_store_short v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, half %x) { %y = fadd half %x, 0xHFFFF store half %y, half addrspace(1)* %out @@ -276,9 +275,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16: -; VI: buffer_load_ushort [[VAL:v[0-9]+]] -; VI: v_add_f16_e32 [[REG:v[0-9]+]], -2, [[VAL]]{{$}} -; VI: buffer_store_short [[REG]] +; VI: v_mov_b32_e32 v0, -2 +; VI: buffer_store_short v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, half %x) { %y = fadd half %x, 0xHFFFE store half %y, half addrspace(1)* %out @@ -286,9 +284,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16: -; VI: buffer_load_ushort [[VAL:v[0-9]+]] -; VI: v_add_f16_e32 [[REG:v[0-9]+]], -16, [[VAL]]{{$}} -; VI: buffer_store_short [[REG]] +; VI: v_mov_b32_e32 v0, -16 +; VI: buffer_store_short v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, half %x) { %y = fadd half %x, 0xHFFF0 store half %y, half addrspace(1)* %out Index: test/CodeGen/AMDGPU/immv216.ll =================================================================== --- test/CodeGen/AMDGPU/immv216.ll +++ test/CodeGen/AMDGPU/immv216.ll @@ -371,17 +371,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16: -; GFX9: s_load_dword [[VAL:s[0-9]+]] -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1{{$}} -; GFX9: buffer_store_dword [[REG]] - -; VI: buffer_load_ushort [[VAL0:v[0-9]+]] -; VI: buffer_load_ushort [[VAL1:v[0-9]+]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]] -; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32 -; VI: buffer_store_dword +; GCN: v_mov_b32_e32 v0, -1 +; GCN: buffer_store_dword v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { %y = fadd <2 x half> %x, store <2 x half> %y, <2 x half> addrspace(1)* %out @@ -389,17 +380,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16: -; GFX9: s_load_dword [[VAL:s[0-9]+]] -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2{{$}} -; GFX9: buffer_store_dword [[REG]] - -; VI: buffer_load_ushort [[VAL0:v[0-9]+]] -; VI: buffer_load_ushort [[VAL1:v[0-9]+]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]] -; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32 -; VI: buffer_store_dword +; GCN: v_mov_b32_e32 v0, 0xfffefffe +; GCN: buffer_store_dword v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { %y = fadd <2 x half> %x, store <2 x half> %y, <2 x half> addrspace(1)* %out @@ -407,17 +389,8 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16: -; GFX9: s_load_dword [[VAL:s[0-9]+]] -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -16{{$}} -; GFX9: buffer_store_dword [[REG]] - -; VI: buffer_load_ushort [[VAL0:v[0-9]+]] -; VI: buffer_load_ushort [[VAL1:v[0-9]+]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]] -; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32 -; VI: buffer_store_dword +; GCN: v_mov_b32_e32 v0, 0xfff0fff0 +; GCN: buffer_store_dword v0, off, s[0:3], 0 define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { %y = fadd <2 x half> %x, store <2 x half> %y, <2 x half> addrspace(1)* %out Index: test/CodeGen/AMDGPU/jump-address.ll =================================================================== --- test/CodeGen/AMDGPU/jump-address.ll +++ test/CodeGen/AMDGPU/jump-address.ll @@ -43,7 +43,7 @@ %20 = load <4 x float>, <4 x float> addrspace(8)* null %21 = extractelement <4 x float> %20, i32 0 %22 = fsub float -0.000000e+00, %21 - %23 = fadd float 0xFFF8000000000000, %22 + %23 = fadd float 0x3FF8000000000000, %22 br label %ENDIF } Index: test/Transforms/InstSimplify/fp-nan.ll =================================================================== --- test/Transforms/InstSimplify/fp-nan.ll +++ test/Transforms/InstSimplify/fp-nan.ll @@ -5,8 +5,7 @@ define double @fadd_nan_op0(double %x) { ; CHECK-LABEL: @fadd_nan_op0( -; CHECK-NEXT: [[R:%.*]] = fadd double 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret double [[R]] +; CHECK-NEXT: ret double 0x7FF8000000000000 ; %r = fadd double 0x7FF8000000000000, %x ret double %r @@ -16,8 +15,7 @@ define double @fadd_nan_op1(double %x) { ; CHECK-LABEL: @fadd_nan_op1( -; CHECK-NEXT: [[R:%.*]] = fadd double [[X:%.*]], 0xFFF8000000000000 -; CHECK-NEXT: ret double [[R]] +; CHECK-NEXT: ret double 0xFFF8000000000000 ; %r = fadd double %x, 0xFFF8000000000000 ret double %r @@ -27,8 +25,7 @@ define float @fsub_nan_op0(float %x) { ; CHECK-LABEL: @fsub_nan_op0( -; CHECK-NEXT: [[R:%.*]] = fsub float 0x7FFFFF0000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FFFFF0000000000 ; %r = fsub float 0x7FFFFF0000000000, %x ret float %r @@ -38,8 +35,7 @@ define float @fsub_nan_op1(float %x) { ; CHECK-LABEL: @fsub_nan_op1( -; CHECK-NEXT: [[R:%.*]] = fsub float [[X:%.*]], 0x7FF1000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF1000000000000 ; %r = fsub float %x, 0x7FF1000000000000 ret float %r @@ -49,8 +45,7 @@ define double @fmul_nan_op0(double %x) { ; CHECK-LABEL: @fmul_nan_op0( -; CHECK-NEXT: [[R:%.*]] = fmul double 0xFFF0000000000001, [[X:%.*]] -; CHECK-NEXT: ret double [[R]] +; CHECK-NEXT: ret double 0xFFF0000000000001 ; %r = fmul double 0xFFF0000000000001, %x ret double %r @@ -60,8 +55,7 @@ define <2 x float> @fmul_nan_op1(<2 x float> %x) { ; CHECK-LABEL: @fmul_nan_op1( -; CHECK-NEXT: [[R:%.*]] = fmul <2 x float> [[X:%.*]], -; CHECK-NEXT: ret <2 x float> [[R]] +; CHECK-NEXT: ret <2 x float> ; %r = fmul <2 x float> %x, ret <2 x float> %r @@ -71,8 +65,7 @@ define <2 x double> @fdiv_nan_op0(<2 x double> %x) { ; CHECK-LABEL: @fdiv_nan_op0( -; CHECK-NEXT: [[R:%.*]] = fdiv <2 x double> , [[X:%.*]] -; CHECK-NEXT: ret <2 x double> [[R]] +; CHECK-NEXT: ret <2 x double> ; %r = fdiv <2 x double> , %x ret <2 x double> %r @@ -82,8 +75,7 @@ define <2 x half> @fdiv_nan_op1(<2 x half> %x) { ; CHECK-LABEL: @fdiv_nan_op1( -; CHECK-NEXT: [[R:%.*]] = fdiv <2 x half> [[X:%.*]], -; CHECK-NEXT: ret <2 x half> [[R]] +; CHECK-NEXT: ret <2 x half> ; %r = fdiv <2 x half> %x, ret <2 x half> %r @@ -93,8 +85,7 @@ define <2 x double> @frem_nan_op0(<2 x double> %x) { ; CHECK-LABEL: @frem_nan_op0( -; CHECK-NEXT: [[R:%.*]] = frem <2 x double> , [[X:%.*]] -; CHECK-NEXT: ret <2 x double> [[R]] +; CHECK-NEXT: ret <2 x double> ; %r = frem <2 x double> , %x ret <2 x double> %r @@ -102,8 +93,7 @@ define float @frem_nan_op1(float %x) { ; CHECK-LABEL: @frem_nan_op1( -; CHECK-NEXT: [[R:%.*]] = frem float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = frem float %x, 0x7FF8000000000000 ret float %r @@ -131,8 +121,7 @@ define float @fadd_nan_op0_nnan(float %x) { ; CHECK-LABEL: @fadd_nan_op0_nnan( -; CHECK-NEXT: [[R:%.*]] = fadd nnan float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fadd nnan float 0x7FF8000000000000, %x ret float %r @@ -140,8 +129,7 @@ define float @fadd_nan_op1_fast(float %x) { ; CHECK-LABEL: @fadd_nan_op1_fast( -; CHECK-NEXT: [[R:%.*]] = fadd fast float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fadd fast float %x, 0x7FF8000000000000 ret float %r @@ -149,8 +137,7 @@ define float @fsub_nan_op0_fast(float %x) { ; CHECK-LABEL: @fsub_nan_op0_fast( -; CHECK-NEXT: [[R:%.*]] = fsub fast float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fsub fast float 0x7FF8000000000000, %x ret float %r @@ -158,8 +145,7 @@ define float @fsub_nan_op1_nnan(float %x) { ; CHECK-LABEL: @fsub_nan_op1_nnan( -; CHECK-NEXT: [[R:%.*]] = fsub nnan float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fsub nnan float %x, 0x7FF8000000000000 ret float %r @@ -167,8 +153,7 @@ define float @fmul_nan_op0_nnan(float %x) { ; CHECK-LABEL: @fmul_nan_op0_nnan( -; CHECK-NEXT: [[R:%.*]] = fmul nnan float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fmul nnan float 0x7FF8000000000000, %x ret float %r @@ -176,8 +161,7 @@ define float @fmul_nan_op1_fast(float %x) { ; CHECK-LABEL: @fmul_nan_op1_fast( -; CHECK-NEXT: [[R:%.*]] = fmul fast float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fmul fast float %x, 0x7FF8000000000000 ret float %r @@ -185,8 +169,7 @@ define float @fdiv_nan_op0_fast(float %x) { ; CHECK-LABEL: @fdiv_nan_op0_fast( -; CHECK-NEXT: [[R:%.*]] = fdiv fast float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fdiv fast float 0x7FF8000000000000, %x ret float %r @@ -194,8 +177,7 @@ define float @fdiv_nan_op1_nnan(float %x) { ; CHECK-LABEL: @fdiv_nan_op1_nnan( -; CHECK-NEXT: [[R:%.*]] = fdiv nnan float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = fdiv nnan float %x, 0x7FF8000000000000 ret float %r @@ -203,8 +185,7 @@ define float @frem_nan_op0_nnan(float %x) { ; CHECK-LABEL: @frem_nan_op0_nnan( -; CHECK-NEXT: [[R:%.*]] = frem nnan float 0x7FF8000000000000, [[X:%.*]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = frem nnan float 0x7FF8000000000000, %x ret float %r @@ -212,8 +193,7 @@ define float @frem_nan_op1_fast(float %x) { ; CHECK-LABEL: @frem_nan_op1_fast( -; CHECK-NEXT: [[R:%.*]] = frem fast float [[X:%.*]], 0x7FF8000000000000 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %r = frem fast float %x, 0x7FF8000000000000 ret float %r