diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -250,6 +250,20 @@ return true; } + // trunc(trunc) -> trunc + Register TruncSrc; + if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { + // Always combine trunc(trunc) since the eventual resulting trunc must be + // legal anyway as it must be legal for all outputs of the consumer type + // set. + LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); + + Builder.buildTrunc(DstReg, TruncSrc); + UpdatedDefs.push_back(DstReg); + markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts); + return true; + } + return false; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir @@ -12,9 +12,8 @@ ; CHECK-LABEL: name: test_load_trunc ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (load 2) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s10) = G_TRUNC [[LOAD]](s16) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[TRUNC]](s10) - ; CHECK: RET_ReallyLR implicit [[TRUNC1]](s1) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s16) + ; CHECK: RET_ReallyLR implicit [[TRUNC]](s1) %0:_(p0) = G_FRAME_INDEX %stack.0 %1:_(s10) = G_LOAD %0(p0) :: (load 2) %2:_(s1) = G_TRUNC %1(s10) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir @@ -118,15 +118,13 @@ body: | bb.0: - ; Test that trunc(merge) with trunc-size > merge-source-size is not combined - ; if trunc-size % merge-source-size != 0 + ; Test that trunc(merge) with trunc-size > merge-source-size and + ; trunc-size % merge-source-size != 0 can be combined after the G_TRUNCs + ; have been combined. ; CHECK-LABEL: name: trunc_s68_merge_s128_s32 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C]](s32), [[C1]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s128) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC]](s68) - ; CHECK: $vgpr0 = COPY [[TRUNC1]](s32) + ; CHECK: $vgpr0 = COPY [[C]](s32) %0:_(s32) = G_CONSTANT i32 0 %1:_(s32) = G_CONSTANT i32 1 %2:_(s128) = G_MERGE_VALUES %0, %1, %0, %1 @@ -134,3 +132,19 @@ %4:_(s32) = G_TRUNC %3 $vgpr0 = COPY %4 ... + +--- +name: trunc_trunc + +body: | + bb.0: + ; Test that trunc(trunc) is combined to a single trunc + ; CHECK-LABEL: name: trunc_trunc + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: $vgpr0 = COPY [[TRUNC]](s32) + %0:_(s64) = G_IMPLICIT_DEF + %1:_(s48) = G_TRUNC %0 + %2:_(s32) = G_TRUNC %1 + $vgpr0 = COPY %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -340,8 +340,7 @@ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] @@ -355,10 +354,10 @@ ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s48) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -56,8 +56,7 @@ ; CHECK-LABEL: name: test_extract_s16_s31_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) - ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[TRUNC]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s31) = G_TRUNC %0 %2:_(s16) = G_EXTRACT %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir @@ -188,9 +188,8 @@ ; CHECK-LABEL: name: test_inttoptr_s33_to_p3 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC]](s33) - ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[TRUNC1]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[TRUNC]](s32) ; CHECK: S_ENDPGM 0, implicit [[INTTOPTR]](p3) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -215,9 +215,8 @@ ; CHECK-LABEL: name: test_trunc_s16_to_s1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[TRUNC]](s16) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC1]](s1) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s1) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s1) = G_TRUNC %1