Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicInst.h" @@ -9680,11 +9681,13 @@ if (Opcode == AMDGPU::G_FCANONICALIZE) return true; - if (Opcode == AMDGPU::G_FCONSTANT) { - auto F = MI->getOperand(1).getFPImm()->getValueAPF(); - if (F.isNaN() && F.isSignaling()) + Optional FCR; + // Constant splat (can be padded with undef) or scalar constant. + if (mi_match(Reg, MRI, MIPatternMatch::m_GFCstOrSplat(FCR))) { + if (FCR->Value.isSignaling()) return false; - return !F.isDenormal() || denormalsEnabledForType(MRI.getType(Reg), MF); + return !FCR->Value.isDenormal() || + denormalsEnabledForType(MRI.getType(FCR->VReg), MF); } if (MaxDepth == 0) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir @@ -221,3 +221,104 @@ %3:_(s32) = G_FMAXNUM_IEEE %4, %5 $vgpr0 = COPY %3(s32) ... + +--- +name: test_splat_padded_with_undef +tracksRegLiveness: true +legalized: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.0 : + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_splat_padded_with_undef + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: %two:_(s16) = G_FCONSTANT half 0xH4000 + ; CHECK: %two_s32:_(s32) = G_ANYEXT %two(s16) + ; CHECK: %two_splat:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %two_s32(s32), %two_s32(s32) + ; CHECK: %zero:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK: %zero_s32:_(s32) = G_ANYEXT %zero(s16) + ; CHECK: %undef:_(s32) = G_IMPLICIT_DEF + ; CHECK: %zero_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %zero_s32(s32), %undef(s32) + ; CHECK: %one:_(s16) = G_FCONSTANT half 0xH3C00 + ; CHECK: %one_s32:_(s32) = G_ANYEXT %one(s16) + ; CHECK: %one_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %one_s32(s32), %undef(s32) + ; CHECK: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat + ; CHECK: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[FMUL]] + ; CHECK: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %zero_undef, [[FCANONICALIZE]] + ; CHECK: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE %one_undef, [[FMAXNUM_IEEE]] + ; CHECK: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %two:_(s16) = G_FCONSTANT half 0xH4000 + %two_s32:_(s32) = G_ANYEXT %two(s16) + %two_splat:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %two_s32(s32), %two_s32(s32) + %zero:_(s16) = G_FCONSTANT half 0xH0000 + %zero_s32:_(s32) = G_ANYEXT %zero(s16) + %undef:_(s32) = G_IMPLICIT_DEF + %zero_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %zero_s32(s32), %undef(s32) + %one:_(s16) = G_FCONSTANT half 0xH3C00 + %one_s32:_(s32) = G_ANYEXT %one(s16) + %one_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %one_s32(s32), %undef(s32) + %4:_(<2 x s16>) = G_FMUL %0, %two_splat + %zero_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %zero_undef + %16:_(<2 x s16>) = G_FCANONICALIZE %4 + %8:_(<2 x s16>) = G_FMAXNUM_IEEE %zero_undef_fcan, %16 + %one_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %one_undef + %14:_(<2 x s16>) = G_FCANONICALIZE %8 + %11:_(<2 x s16>) = G_FMINNUM_IEEE %one_undef_fcan, %14 + $vgpr0 = COPY %11(<2 x s16>) +... + +--- +name: test_splat_SNaN_and_QNaN_padded_with_undef +tracksRegLiveness: true +legalized: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.0 : + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_splat_SNaN_and_QNaN_padded_with_undef + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: %two:_(s16) = G_FCONSTANT half 0xH4000 + ; CHECK: %two_s32:_(s32) = G_ANYEXT %two(s16) + ; CHECK: %two_splat:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %two_s32(s32), %two_s32(s32) + ; CHECK: %snan:_(s16) = G_FCONSTANT half 0xH7C01 + ; CHECK: %snan_s32:_(s32) = G_ANYEXT %snan(s16) + ; CHECK: %undef:_(s32) = G_IMPLICIT_DEF + ; CHECK: %snan_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %snan_s32(s32), %undef(s32) + ; CHECK: %qnan:_(s16) = G_FCONSTANT half 0xH7E01 + ; CHECK: %qnan_s32:_(s32) = G_ANYEXT %qnan(s16) + ; CHECK: %qnan_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %qnan_s32(s32), %undef(s32) + ; CHECK: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat + ; CHECK: %snan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %snan_undef + ; CHECK: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[FMUL]] + ; CHECK: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %snan_undef_fcan, [[FCANONICALIZE]] + ; CHECK: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE %qnan_undef, [[FMAXNUM_IEEE]] + ; CHECK: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %two:_(s16) = G_FCONSTANT half 0xH4000 + %two_s32:_(s32) = G_ANYEXT %two(s16) + %two_splat:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %two_s32(s32), %two_s32(s32) + %snan:_(s16) = G_FCONSTANT half 0xH7C01 + %snan_s32:_(s32) = G_ANYEXT %snan(s16) + %undef:_(s32) = G_IMPLICIT_DEF + %snan_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %snan_s32(s32), %undef(s32) + %qnan:_(s16) = G_FCONSTANT half 0xH7E01 + %qnan_s32:_(s32) = G_ANYEXT %qnan(s16) + %qnan_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %qnan_s32(s32), %undef(s32) + %4:_(<2 x s16>) = G_FMUL %0, %two_splat + %snan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %snan_undef + %16:_(<2 x s16>) = G_FCANONICALIZE %4 + %8:_(<2 x s16>) = G_FMAXNUM_IEEE %snan_undef_fcan, %16 + %qnan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %qnan_undef + %14:_(<2 x s16>) = G_FCANONICALIZE %8 + %11:_(<2 x s16>) = G_FMINNUM_IEEE %qnan_undef_fcan, %14 + $vgpr0 = COPY %11(<2 x s16>) +...