diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1468,8 +1468,13 @@ MachineIRBuilder &MIRBuilder) { // If we're bitcasting to the source type, we can reuse the source vreg. if (getLLTForType(*U.getOperand(0)->getType(), *DL) == - getLLTForType(*U.getType(), *DL)) + getLLTForType(*U.getType(), *DL)) { + // If the source is a ConstantInt then it was probably created by + // ConstantHoisting and we should leave it alone. + if (isa(U.getOperand(0))) + return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); return translateCopy(U, *U.getOperand(0), MIRBuilder); + } return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -20,6 +20,7 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" @@ -431,7 +432,27 @@ getAnalysis().getCSEWrapper(); auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, CSEInfo); + bool Changed = C.combineMachineInstrs(MF, CSEInfo); + + auto &MRI = MF.getRegInfo(); + // Eliminate any no-op bitcasts that we created to preserve hoisted constants. + // We wait until all the combiners have run otherwise the constants may get + // re-folded with others during the main combiner loop. + for (auto &MBB : MF) { + for (auto &MI : make_early_inc_range(MBB)) { + if (MI.getOpcode() == TargetOpcode::G_BITCAST) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + if (MRI.getType(Dst) != MRI.getType(Src)) + continue; + MRI.replaceRegWith(Dst, Src); + MI.eraseFromParent(); + Changed = true; + } + } + } + + return Changed; } char AArch64PostLegalizerCombiner::ID = 0; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-apple-ios -global-isel -stop-after=irtranslator %s -o - | FileCheck %s --check-prefix=TRANSLATED +; RUN: llc -mtriple=aarch64-apple-ios -global-isel -stop-after=aarch64-postlegalizer-combiner %s -o - | FileCheck %s --check-prefix=PRESELECTION + +; Check we don't elide no-op bitcasts of constants since they're used by constant +; hoisting to prevent constant folding/propagation. + +define i32 @test(i32 %a) { + ; TRANSLATED-LABEL: name: test + ; TRANSLATED: bb.1.entry: + ; TRANSLATED-NEXT: liveins: $w0 + ; TRANSLATED-NEXT: {{ $}} + ; TRANSLATED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; TRANSLATED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100000 + ; TRANSLATED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[C]](s32) + ; TRANSLATED-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[BITCAST]] + ; TRANSLATED-NEXT: $w0 = COPY [[ADD]](s32) + ; TRANSLATED-NEXT: RET_ReallyLR implicit $w0 + ; PRESELECTION-LABEL: name: test + ; PRESELECTION: bb.1.entry: + ; PRESELECTION-NEXT: liveins: $w0 + ; PRESELECTION-NEXT: {{ $}} + ; PRESELECTION-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; PRESELECTION-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100000 + ; PRESELECTION-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]] + ; PRESELECTION-NEXT: $w0 = COPY [[ADD]](s32) + ; PRESELECTION-NEXT: RET_ReallyLR implicit $w0 +entry: + %hc = bitcast i32 100000 to i32 + br label %cont +cont: + %add = add i32 %a, %hc + ret i32 %add +} +