diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -18,6 +18,7 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "Thumb2InstrInfo.h" #include "Utils/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" @@ -260,7 +261,7 @@ bool removeUnusedCPEntries(); bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned Disp, bool NegOk, - bool DoDump = false); + const bool IsSoImm, bool DoDump = false); bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, CPUser &U, unsigned &Growth); bool fixupImmediateBr(ImmBranch &Br); @@ -280,12 +281,26 @@ void dumpBBs(); void verify(); + unsigned getPCOffset(unsigned Opcode) { + switch (Opcode) { + case ARM::LEApcrel: + return 8; + case ARM::t2LEApcrel: + return 4; + default: + return 0; + } + llvm_unreachable("Should not be here"); + } + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, - unsigned Disp, bool NegativeOK, bool IsSoImm = false); + unsigned PCOffset, unsigned Disp, bool NegativeOK, + bool IsSoImm); bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, const CPUser &U) { return isOffsetInRange(UserOffset, TrialOffset, - U.getMaxDisp(), U.NegOk, U.IsSoImm); + getPCOffset(U.MI->getOpcode()), U.getMaxDisp(), + U.NegOk, U.IsSoImm); } }; @@ -309,8 +324,8 @@ unsigned UserOffset = getUserOffset(U); // Verify offset using the real max displacement without the safety // adjustment. - if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk, - /* DoDump = */ true)) { + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp() + 2, U.NegOk, + U.IsSoImm, /* DoDump = */ true)) { LLVM_DEBUG(dbgs() << "OK\n"); continue; } @@ -990,17 +1005,23 @@ /// the mod 4 alignment of UserOffset is not known, the uncertainty must be /// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset, - unsigned TrialOffset, unsigned MaxDisp, + unsigned TrialOffset, + unsigned PCOffset, unsigned MaxDisp, bool NegativeOK, bool IsSoImm) { - if (UserOffset <= TrialOffset) { + long int Delta = + (long int)TrialOffset - ((long int)PCOffset + (long int)UserOffset); + if (Delta >= 0) { // User before the Trial. - if (TrialOffset - UserOffset <= MaxDisp) + if (IsSoImm) + return (ARM_AM::getSOImmVal(Delta) != -1); + if (Delta <= MaxDisp) return true; - // FIXME: Make use full range of soimm values. } else if (NegativeOK) { - if (UserOffset - TrialOffset <= MaxDisp) + Delta = -Delta; + if (IsSoImm) + return (ARM_AM::getSOImmVal(Delta) != -1); + if (Delta <= MaxDisp) return true; - // FIXME: Make use full range of soimm values. } return false; } @@ -1051,13 +1072,15 @@ /// isCPEntryInRange - Returns true if the distance between specific MI and /// specific ConstPool entry instruction can fit in MI's displacement field. bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, - MachineInstr *CPEMI, unsigned MaxDisp, - bool NegOk, bool DoDump) { + MachineInstr *CPEMI, unsigned MaxDisp, + bool NegOk, const bool IsSoImm, + bool DoDump) { unsigned CPEOffset = BBUtils->getOffsetOf(CPEMI); + unsigned PCOffset = getPCOffset(MI->getOpcode()); if (DoDump) { LLVM_DEBUG({ - BBInfoVector &BBInfo = BBUtils->getBBInfo(); + BBInfoVector &BBInfo = BBUtils->getBBInfo(); unsigned Block = MI->getParent()->getNumber(); const BasicBlockInfo &BBI = BBInfo[Block]; dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() @@ -1066,11 +1089,12 @@ << printMBBReference(*MI->getParent()) << ": " << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI << format("CPE address=%#x offset=%+d: ", CPEOffset, - int(CPEOffset - UserOffset)); + int(CPEOffset - (UserOffset + PCOffset))); }); } - return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk); + return isOffsetInRange(UserOffset, CPEOffset, PCOffset, MaxDisp, NegOk, + IsSoImm); } #ifndef NDEBUG @@ -1127,7 +1151,7 @@ // Check to see if the CPE is already in-range. if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, - true)) { + U.IsSoImm, true)) { LLVM_DEBUG(dbgs() << "In range\n"); return 1; } @@ -1143,7 +1167,7 @@ if (CPEs[i].CPEMI == nullptr) continue; if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), - U.NegOk)) { + U.IsSoImm, U.NegOk)) { LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement @@ -1765,7 +1789,8 @@ MaxOffs -= 2; // FIXME: Check if offset is multiple of scale if scale is not 4. - if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, false, + true)) { LLVM_DEBUG(dbgs() << "Shrink: " << *U.MI); U.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = U.MI->getParent(); diff --git a/llvm/test/CodeGen/ARM/constant-island-SOImm.mir b/llvm/test/CodeGen/ARM/constant-island-SOImm.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/constant-island-SOImm.mir @@ -0,0 +1,252 @@ +# RUN: llc --filetype=obj -start-before=arm-cp-islands -o - %s | llvm-objdump --arch=armv8a --disassemble - | FileCheck %s +# CHECK: add r{{[1-9]}}, pc, #252 +# CHECK: add r{{[1-9]}}, pc, #260 + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "armv8.2a-arm-none-eabi" + + define dso_local void @a(half %0, half %1) #0 { + %3 = alloca half, align 2 + %4 = alloca half, align 2 + store half %0, half* %3, align 2 + store half %1, half* %4, align 2 + ret void + } + + define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca <16 x i8>, align 8 + %3 = alloca <16 x i8>, align 8 + %4 = alloca [16 x i8], align 1 + %5 = alloca <16 x i8>, align 8 + %6 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + call void @a(half 0xH5440, half 0xH61B0) + %7 = getelementptr inbounds [16 x i8], [16 x i8]* %4, i32 0, i32 0 + store i8 1, i8* %7, align 1 + %8 = getelementptr inbounds i8, i8* %7, i32 1 + store i8 9, i8* %8, align 1 + %9 = getelementptr inbounds i8, i8* %8, i32 1 + store i8 31, i8* %9, align 1 + %10 = getelementptr inbounds i8, i8* %9, i32 1 + store i8 30, i8* %10, align 1 + %11 = getelementptr inbounds i8, i8* %10, i32 1 + store i8 2, i8* %11, align 1 + %12 = getelementptr inbounds i8, i8* %11, i32 1 + store i8 5, i8* %12, align 1 + %13 = getelementptr inbounds i8, i8* %12, i32 1 + store i8 4, i8* %13, align 1 + %14 = getelementptr inbounds i8, i8* %13, i32 1 + store i8 7, i8* %14, align 1 + %15 = getelementptr inbounds i8, i8* %14, i32 1 + store i8 35, i8* %15, align 1 + %16 = getelementptr inbounds i8, i8* %15, i32 1 + store i8 29, i8* %16, align 1 + %17 = getelementptr inbounds i8, i8* %16, i32 1 + store i8 3, i8* %17, align 1 + %18 = getelementptr inbounds i8, i8* %17, i32 1 + store i8 85, i8* %18, align 1 + %19 = getelementptr inbounds i8, i8* %18, i32 1 + store i8 60, i8* %19, align 1 + %20 = getelementptr inbounds i8, i8* %19, i32 1 + store i8 50, i8* %20, align 1 + %21 = getelementptr inbounds i8, i8* %20, i32 1 + store i8 8, i8* %21, align 1 + %22 = getelementptr inbounds i8, i8* %21, i32 1 + store i8 14, i8* %22, align 1 + %23 = getelementptr inbounds [16 x i8], [16 x i8]* %4, i32 0, i32 0 + %24 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %23, i32 1) + store <16 x i8> %24, <16 x i8>* %3, align 8 + %25 = load <16 x i8>, <16 x i8>* %3, align 8 + store <16 x i8> %25, <16 x i8>* %5, align 8 + %26 = load <16 x i8>, <16 x i8>* %5, align 8 + store <16 x i8> %26, <16 x i8>* %2, align 8 + call void asm sideeffect "nop", ""() #2, !srcloc !3 + call void asm sideeffect "nop", ""() #2, !srcloc !4 + store i32 0, i32* %6, align 4 + br label %27 + + 27: ; preds = %31, %0 + %28 = load i32, i32* %6, align 4 + %29 = icmp slt i32 %28, 1051 + br i1 %29, label %30, label %34 + + 30: ; preds = %27 + br label %31 + + 31: ; preds = %30 + %32 = load i32, i32* %6, align 4 + %33 = add nsw i32 %32, 1 + store i32 %33, i32* %6, align 4 + br label %27 + + 34: ; preds = %27 + ret i32 0 + } + declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32) #1 + declare void @llvm.stackprotector(i8*, i8**) #2 + attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.2-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+strict-align,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { argmemonly nounwind readonly } + attributes #2 = { nounwind } + !llvm.module.flags = !{!0, !1} + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"min_enum_size", i32 4} + !3 = !{i32 194} + !4 = !{i32 210} + +... +--- +name: a +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$r0' } + - { reg: '$r1' } +frameInfo: + stackSize: 4 + maxAlignment: 2 + maxCallFrameSize: 0 + localFrameSize: 4 +stack: + - { id: 0, offset: -2, size: 2, alignment: 2, local-offset: -2 } + - { id: 1, offset: -4, size: 2, alignment: 2, local-offset: -4 } +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.2): + liveins: $r0, $r1 + + $sp = frame-setup SUBri killed $sp, 4, 14, $noreg, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 4 + renamable $s0 = VMOVHR killed renamable $r1, 14, $noreg + renamable $s2 = VMOVHR killed renamable $r0, 14, $noreg + VSTRH killed renamable $s2, $sp, 1, 14, $noreg :: (store 2 into %ir.3) + VSTRH killed renamable $s0, $sp, 0, 14, $noreg :: (store 2 into %ir.4) + $sp = frame-destroy ADDri killed $sp, 4, 14, $noreg, $noreg + BX_RET 14, $noreg + +... +--- +name: main +alignment: 4 +tracksRegLiveness: true +frameInfo: + stackSize: 88 + offsetAdjustment: -80 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 + localFrameSize: 76 +stack: + - { id: 0, offset: -12, size: 4, alignment: 4, local-offset: -4 } + - { id: 1, offset: -32, size: 16, alignment: 8, local-offset: -24 } + - { id: 2, offset: -48, size: 16, alignment: 8, local-offset: -40 } + - { id: 3, offset: -64, size: 16, alignment: 1, local-offset: -56 } + - { id: 4, offset: -80, size: 16, alignment: 8, local-offset: -72 } + - { id: 5, offset: -84, size: 4, alignment: 4, local-offset: -76 } + - { id: 6, type: spill-slot, offset: -88, size: 4, alignment: 4 } + - { id: 7, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr', + callee-saved-restored: false } + - { id: 8, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r11' } +constants: + - id: 0 + value: half 0xH5440 + alignment: 2 + - id: 1 + value: half 0xH61B0 + alignment: 2 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $lr + + $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r11, -8 + $r11 = frame-setup MOVr killed $sp, 14, $noreg, $noreg + frame-setup CFI_INSTRUCTION def_cfa_register $r11 + $sp = frame-setup SUBri killed $sp, 80, 14, $noreg, $noreg + renamable $r0 = MOVi 0, 14, $noreg, $noreg + STRi12 renamable $r0, $r11, -4, 14, $noreg :: (store 4 into %ir.1) + renamable $r1 = LEApcrel %const.0, 14, $noreg + renamable $r1 = LDRH killed renamable $r1, $noreg, 0, 14, $noreg :: (load 2 from constant-pool) + renamable $r2 = LEApcrel %const.1, 14, $noreg + renamable $r2 = LDRH killed renamable $r2, $noreg, 0, 14, $noreg :: (load 2 from constant-pool) + STRi12 killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.6) + $r0 = MOVr killed $r1, 14, $noreg, $noreg + $r1 = MOVr killed $r2, 14, $noreg, $noreg + BL_pred @a, 14, $noreg, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp + renamable $r0 = MOVi 1, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 24, 14, $noreg :: (store 1 into %ir.7) + renamable $r0 = MOVi 9, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 25, 14, $noreg :: (store 1 into %ir.8) + renamable $r0 = MOVi 31, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 26, 14, $noreg :: (store 1 into %ir.9) + renamable $r0 = MOVi 30, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 27, 14, $noreg :: (store 1 into %ir.10) + renamable $r0 = MOVi 2, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 28, 14, $noreg :: (store 1 into %ir.11) + renamable $r0 = MOVi 5, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 29, 14, $noreg :: (store 1 into %ir.12) + renamable $r0 = MOVi 4, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 30, 14, $noreg :: (store 1 into %ir.13) + renamable $r0 = MOVi 7, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 31, 14, $noreg :: (store 1 into %ir.14) + renamable $r0 = MOVi 35, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 32, 14, $noreg :: (store 1 into %ir.15) + renamable $r0 = MOVi 29, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 33, 14, $noreg :: (store 1 into %ir.16) + renamable $r0 = MOVi 3, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 34, 14, $noreg :: (store 1 into %ir.17) + renamable $r0 = MOVi 85, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 35, 14, $noreg :: (store 1 into %ir.18) + renamable $r0 = MOVi 60, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 36, 14, $noreg :: (store 1 into %ir.19) + renamable $r0 = MOVi 50, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 37, 14, $noreg :: (store 1 into %ir.20) + renamable $r0 = MOVi 8, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 38, 14, $noreg :: (store 1 into %ir.21) + renamable $r0 = MOVi 14, 14, $noreg, $noreg + STRBi12 killed renamable $r0, $sp, 39, 14, $noreg :: (store 1 into %ir.22) + renamable $r0 = ADDri $sp, 24, 14, $noreg, $noreg + $q8 = VLD1q8 killed renamable $r0, 0, 14, $noreg :: (load 16 from %ir.23, align 1) + renamable $r0 = ADDri $sp, 40, 14, $noreg, $noreg + VST1q64 $r0, 0, killed $q8, 14, $noreg :: (store 16 into %ir.3, align 8) + $q8 = VLD1q64 killed renamable $r0, 0, 14, $noreg :: (dereferenceable load 16 from %ir.3, align 8) + renamable $r0 = ADDri $sp, 8, 14, $noreg, $noreg + VST1q64 $r0, 0, killed $q8, 14, $noreg :: (store 16 into %ir.5, align 8) + $q8 = VLD1q64 killed renamable $r0, 0, 14, $noreg :: (dereferenceable load 16 from %ir.5, align 8) + renamable $r0 = SUBri $r11, 24, 14, $noreg, $noreg + VST1q64 killed $r0, 0, killed $q8, 14, $noreg :: (store 16 into %ir.2, align 8) + INLINEASM &nop, 1, !3 + INLINEASM &nop, 1, !4 + $r0 = LDRi12 $sp, 0, 14, $noreg :: (load 4 from %stack.6) + STRi12 renamable $r0, $sp, 4, 14, $noreg :: (store 4 into %ir.6) + B %bb.1 + + bb.1 (%ir-block.27): + successors: %bb.2, %bb.4 + + renamable $r0 = LDRi12 $sp, 4, 14, $noreg :: (dereferenceable load 4 from %ir.6) + renamable $r1 = MOVi16 1050, 14, $noreg + CMPrr killed renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + Bcc %bb.4, 12, $cpsr + B %bb.2 + + bb.2 (%ir-block.30): + B %bb.3 + + bb.3 (%ir-block.31): + renamable $r0 = LDRi12 $sp, 4, 14, $noreg :: (dereferenceable load 4 from %ir.6) + renamable $r0 = nsw ADDri killed renamable $r0, 1, 14, $noreg, $noreg + STRi12 killed renamable $r0, $sp, 4, 14, $noreg :: (store 4 into %ir.6) + B %bb.1 + + bb.4 (%ir-block.34): + renamable $r0 = MOVi 0, 14, $noreg, $noreg + $sp = frame-destroy MOVr $r11, 14, $noreg, $noreg + $sp = frame-destroy LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +...