Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IntrinsicInst.h" @@ -9714,6 +9715,11 @@ return !F.isDenormal() || denormalsEnabledForType(MRI.getType(Reg), MF); } + Register SplatReg; + // Constant splat padded with undef. + if (mi_match(Reg, MRI, MIPatternMatch::FCstOrSplatFCstRegMatch(SplatReg))) + return isKnownNeverSNaN(SplatReg, MRI); + if (MaxDepth == 0) return false; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir @@ -221,3 +221,103 @@ %3:_(s32) = G_FMAXNUM_IEEE %4, %5 $vgpr0 = COPY %3(s32) ... + +--- +name: test_splat_padded_with_undef +tracksRegLiveness: true +legalized: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.0 : + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_splat_padded_with_undef + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4000 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C1]](s16) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[DEF]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16) + ; CHECK: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT2]](s32) + ; CHECK: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], [[BUILD_VECTOR_TRUNC]] + ; CHECK: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[FMUL]] + ; CHECK: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[BUILD_VECTOR_TRUNC1]], [[FCANONICALIZE]] + ; CHECK: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[BUILD_VECTOR_TRUNC2]], [[FMAXNUM_IEEE]] + ; CHECK: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %3:_(s16) = G_FCONSTANT half 0xH4000 + %16:_(s32) = G_ANYEXT %3(s16) + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %16(s32), %16(s32) + %6:_(s16) = G_FCONSTANT half 0xH0000 + %17:_(s32) = G_ANYEXT %6(s16) + %18:_(s32) = G_IMPLICIT_DEF + %5:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %17(s32), %18(s32) + %10:_(s16) = G_FCONSTANT half 0xH3C00 + %19:_(s32) = G_ANYEXT %10(s16) + %9:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %19(s32), %19(s32) + %4:_(<2 x s16>) = G_FMUL %0, %2 + %14:_(<2 x s16>) = G_FCANONICALIZE %5 + %15:_(<2 x s16>) = G_FCANONICALIZE %4 + %8:_(<2 x s16>) = G_FMAXNUM_IEEE %14, %15 + %13:_(<2 x s16>) = G_FCANONICALIZE %8 + %11:_(<2 x s16>) = G_FMINNUM_IEEE %9, %13 + $vgpr0 = COPY %11(<2 x s16>) +... + +--- +# 0xH7E01 is SNaN and 0xH7E01 is QNaN +name: test_splat_SNaN_and_QNaN_padded_with_undef +tracksRegLiveness: true +legalized: true +machineFunctionInfo: + mode: + ieee: true +body: | + bb.0 : + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_splat_SNaN_and_QNaN_padded_with_undef + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4000 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH7C01 + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C1]](s16) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[DEF]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH7E01 + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16) + ; CHECK: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT2]](s32) + ; CHECK: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], [[BUILD_VECTOR_TRUNC]] + ; CHECK: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; CHECK: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[FMUL]] + ; CHECK: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; CHECK: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[BUILD_VECTOR_TRUNC2]], [[FMAXNUM_IEEE]] + ; CHECK: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %3:_(s16) = G_FCONSTANT half 0xH4000 + %16:_(s32) = G_ANYEXT %3(s16) + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %16(s32), %16(s32) + %6:_(s16) = G_FCONSTANT half 0xH7C01 + %17:_(s32) = G_ANYEXT %6(s16) + %18:_(s32) = G_IMPLICIT_DEF + %5:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %17(s32), %18(s32) + %10:_(s16) = G_FCONSTANT half 0xH7E01 + %19:_(s32) = G_ANYEXT %10(s16) + %9:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %19(s32), %19(s32) + %4:_(<2 x s16>) = G_FMUL %0, %2 + %14:_(<2 x s16>) = G_FCANONICALIZE %5 + %15:_(<2 x s16>) = G_FCANONICALIZE %4 + %8:_(<2 x s16>) = G_FMAXNUM_IEEE %14, %15 + %13:_(<2 x s16>) = G_FCANONICALIZE %8 + %11:_(<2 x s16>) = G_FMINNUM_IEEE %9, %13 + $vgpr0 = COPY %11(<2 x s16>) +...