diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1468,8 +1468,13 @@ MachineIRBuilder &MIRBuilder) { // If we're bitcasting to the source type, we can reuse the source vreg. if (getLLTForType(*U.getOperand(0)->getType(), *DL) == - getLLTForType(*U.getType(), *DL)) + getLLTForType(*U.getType(), *DL)) { + // If the source is a ConstantInt then it was probably created by + // ConstantHoisting and we should leave it alone. + if (isa(U.getOperand(0))) + return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); return translateCopy(U, *U.getOperand(0), MIRBuilder); + } return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1174,9 +1174,6 @@ if (SrcTy.getSizeInBits() != DstTy.getSizeInBits()) report("bitcast sizes must match", MI); - if (SrcTy == DstTy) - report("bitcast must change the type", MI); - break; } case TargetOpcode::G_INTTOPTR: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -20,6 +20,7 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" @@ -431,7 +432,27 @@ getAnalysis().getCSEWrapper(); auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, CSEInfo); + bool Changed = C.combineMachineInstrs(MF, CSEInfo); + + auto &MRI = MF.getRegInfo(); + // Eliminate any no-op bitcasts that we created to preserve hoisted constants. + // We wait until all the combiners have run otherwise the constants may get + // re-folded with others during the main combiner loop. + for (auto &MBB : MF) { + for (auto &MI : make_early_inc_range(MBB)) { + if (MI.getOpcode() == TargetOpcode::G_BITCAST) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + if (MRI.getType(Dst) != MRI.getType(Src)) + continue; + MRI.replaceRegWith(Dst, Src); + MI.eraseFromParent(); + Changed = true; + } + } + } + + return Changed; } char AArch64PostLegalizerCombiner::ID = 0; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll @@ -24,7 +24,7 @@ ; At this point we mapped 46 values. The 'i32 100' constant will grow the map. ; CHECK: %46:_(s32) = G_CONSTANT i32 100 -; CHECK: $w0 = COPY %46(s32) +; CHECK: $w0 = COPY %47(s32) %res = bitcast i32 100 to i32 ret i32 %res } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-apple-ios -global-isel -stop-after=irtranslator %s -o - | FileCheck %s --check-prefix=TRANSLATED +; RUN: llc -mtriple=aarch64-apple-ios -global-isel -stop-after=aarch64-postlegalizer-combiner %s -o - | FileCheck %s --check-prefix=PRESELECTION + +; Check we don't elide no-op bitcasts of constants since they're used by constant +; hoisting to prevent constant folding/propagation. + +define i32 @test(i32 %a) { + ; TRANSLATED-LABEL: name: test + ; TRANSLATED: bb.1.entry: + ; TRANSLATED-NEXT: liveins: $w0 + ; TRANSLATED-NEXT: {{ $}} + ; TRANSLATED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; TRANSLATED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100000 + ; TRANSLATED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[C]](s32) + ; TRANSLATED-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[BITCAST]] + ; TRANSLATED-NEXT: $w0 = COPY [[ADD]](s32) + ; TRANSLATED-NEXT: RET_ReallyLR implicit $w0 + ; PRESELECTION-LABEL: name: test + ; PRESELECTION: bb.1.entry: + ; PRESELECTION-NEXT: liveins: $w0 + ; PRESELECTION-NEXT: {{ $}} + ; PRESELECTION-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; PRESELECTION-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100000 + ; PRESELECTION-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]] + ; PRESELECTION-NEXT: $w0 = COPY [[ADD]](s32) + ; PRESELECTION-NEXT: RET_ReallyLR implicit $w0 +entry: + %hc = bitcast i32 100000 to i32 + br label %cont +cont: + %add = add i32 %a, %hc + ret i32 %add +} + + +define <2 x i32> @test_vector(<2 x i32> %a) { + ; TRANSLATED-LABEL: name: test_vector + ; TRANSLATED: bb.1.entry: + ; TRANSLATED-NEXT: liveins: $d0 + ; TRANSLATED-NEXT: {{ $}} + ; TRANSLATED-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; TRANSLATED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100000 + ; TRANSLATED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; TRANSLATED-NEXT: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD [[COPY]], [[BUILD_VECTOR]] + ; TRANSLATED-NEXT: $d0 = COPY [[ADD]](<2 x s32>) + ; TRANSLATED-NEXT: RET_ReallyLR implicit $d0 + ; PRESELECTION-LABEL: name: test_vector + ; PRESELECTION: bb.1.entry: + ; PRESELECTION-NEXT: liveins: $d0 + ; PRESELECTION-NEXT: {{ $}} + ; PRESELECTION-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; PRESELECTION-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100000 + ; PRESELECTION-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; PRESELECTION-NEXT: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD [[COPY]], [[BUILD_VECTOR]] + ; PRESELECTION-NEXT: $d0 = COPY [[ADD]](<2 x s32>) + ; PRESELECTION-NEXT: RET_ReallyLR implicit $d0 +entry: + %hc = bitcast <2 x i32> to <2 x i32> + br label %cont +cont: + %add = add <2 x i32> %a, %hc + ret <2 x i32> %add +} + diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -118,12 +118,13 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2228259 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2 ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2228259 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 @@ -133,19 +134,19 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2 - ; CHECK-NEXT: G_STORE [[C]](s32), [[GV3]](p0) :: (store (s32) into @var2) + ; CHECK-NEXT: G_STORE [[BITCAST]](s32), [[GV3]](p0) :: (store (s32) into @var2) ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.then2: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1 - ; CHECK-NEXT: G_STORE [[C]](s32), [[GV4]](p0) :: (store (s32) into @var1) + ; CHECK-NEXT: G_STORE [[BITCAST]](s32), [[GV4]](p0) :: (store (s32) into @var1) ; CHECK-NEXT: G_BR %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3 - ; CHECK-NEXT: G_STORE [[C]](s32), [[GV5]](p0) :: (store (s32) into @var3) + ; CHECK-NEXT: G_STORE [[BITCAST]](s32), [[GV5]](p0) :: (store (s32) into @var3) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: $w0 = COPY [[C3]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -173,12 +174,13 @@ ; CHECK: bb.1.entry: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2228259 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64 ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2228259 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[C1]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]] ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 @@ -188,19 +190,19 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64 - ; CHECK-NEXT: G_STORE [[C]](s64), [[GV3]](p0) :: (store (s64) into @var2_64) + ; CHECK-NEXT: G_STORE [[BITCAST]](s64), [[GV3]](p0) :: (store (s64) into @var2_64) ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.then2: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64 - ; CHECK-NEXT: G_STORE [[C]](s64), [[GV4]](p0) :: (store (s64) into @var1_64) + ; CHECK-NEXT: G_STORE [[BITCAST]](s64), [[GV4]](p0) :: (store (s64) into @var1_64) ; CHECK-NEXT: G_BR %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64 - ; CHECK-NEXT: G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) + ; CHECK-NEXT: G_STORE [[BITCAST]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: $x0 = COPY [[C3]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 diff --git a/llvm/test/MachineVerifier/test_g_bitcast.mir b/llvm/test/MachineVerifier/test_g_bitcast.mir --- a/llvm/test/MachineVerifier/test_g_bitcast.mir +++ b/llvm/test/MachineVerifier/test_g_bitcast.mir @@ -34,6 +34,4 @@ %10:_(p1) = G_IMPLICIT_DEF %11:_(p3) = G_BITCAST %8 - ; CHECK: Bad machine code: bitcast must change the type - %12:_(s64) = G_BITCAST %0 ...