Index: llvm/trunk/lib/Target/AArch64/AArch64.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64.h +++ llvm/trunk/lib/Target/AArch64/AArch64.h @@ -57,6 +57,7 @@ AArch64Subtarget &, AArch64RegisterBankInfo &); FunctionPass *createAArch64PreLegalizeCombiner(); FunctionPass *createAArch64StackTaggingPass(bool MergeInit); +FunctionPass *createAArch64StackTaggingPreRAPass(); void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); @@ -80,6 +81,7 @@ void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&); void initializeLDTLSCleanupPass(PassRegistry&); void initializeAArch64StackTaggingPass(PassRegistry&); +void initializeAArch64StackTaggingPreRAPass(PassRegistry&); } // end namespace llvm #endif Index: llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -447,11 +447,14 @@ MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64InstrInfo *TII = MF.getSubtarget().getInstrInfo(); const AArch64FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + bool Tagged = + MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED; unsigned FrameReg; // Special handling of dbg_value, stackmap and patchpoint instructions. @@ -477,12 +480,36 @@ StackOffset Offset; if (MI.getOpcode() == AArch64::TAGPstack) { // TAGPstack must use the virtual frame register in its 3rd operand. - const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64FunctionInfo *AFI = MF.getInfo(); FrameReg = MI.getOperand(3).getReg(); Offset = {MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset(), MVT::i8}; + } else if (Tagged) { + StackOffset SPOffset = { + MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), MVT::i8}; + if (MFI.hasVarSizedObjects() || + isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) != + (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) { + // Can't update to SP + offset in place. Precalculate the tagged pointer + // in a scratch register. + Offset = TFI->resolveFrameIndexReference( + MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); + Register ScratchReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, + TII); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(0); + MI.getOperand(FIOperandNum) + .ChangeToRegister(ScratchReg, false, false, true); + return; + } + FrameReg = AArch64::SP; + Offset = {MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), + MVT::i8}; } else { Offset = TFI->resolveFrameIndexReference( MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); Index: llvm/trunk/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp @@ -0,0 +1,209 @@ +//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64InstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra" + +enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways }; + +cl::opt ClUncheckedLdSt( + "stack-tagging-unchecked-ld-st", cl::Hidden, + cl::init(UncheckedSafe), + cl::desc( + "Unconditionally apply unchecked-ld-st optimization (even for large " + "stack frames, or in the presence of variable sized allocas)."), + cl::values( + clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), + clEnumValN( + UncheckedSafe, "safe", + "apply unchecked-ld-st when the target is definitely within range"), + clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st"))); + +namespace { + +class AArch64StackTaggingPreRA : public MachineFunctionPass { + MachineFunction *MF; + AArch64FunctionInfo *AFI; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + const AArch64RegisterInfo *TRI; + const AArch64InstrInfo *TII; + + SmallVector ReTags; + +public: + static char ID; + AArch64StackTaggingPreRA() : MachineFunctionPass(ID) { + initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry()); + } + + bool mayUseUncheckedLoadStore(); + void uncheckUsesOf(unsigned TaggedReg, int FI); + void uncheckLoadsAndStores(); + + bool runOnMachineFunction(MachineFunction &Func) override; + StringRef getPassName() const override { + return "AArch64 Stack Tagging PreRA"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // end anonymous namespace + +char AArch64StackTaggingPreRA::ID = 0; + +INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", + "AArch64 Stack Tagging PreRA Pass", false, false) +INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", + "AArch64 Stack Tagging PreRA Pass", false, false) + +FunctionPass *llvm::createAArch64StackTaggingPreRAPass() { + return new AArch64StackTaggingPreRA(); +} + +static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) { + switch (Opcode) { + case AArch64::LDRWui: + case AArch64::LDRSHWui: + case AArch64::LDRXui: + case AArch64::LDRBui: + case AArch64::LDRBBui: + case AArch64::LDRHui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STRBui: + case AArch64::STRBBui: + case AArch64::STRHui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + return true; + default: + return false; + } +} + +bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { + if (ClUncheckedLdSt == UncheckedNever) + return false; + else if (ClUncheckedLdSt == UncheckedAlways) + return true; + + // This estimate can be improved if we had harder guarantees about stack frame + // layout. With LocalStackAllocation we can estimate SP offset to any + // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged + // objects ahead of non-tagged ones, but that's not always desirable. + // + // Underestimating SP offset here may require the use of LDG to materialize + // the tagged address of the stack slot, along with a scratch register + // allocation (post-regalloc!). + // + // For now we do the safe thing here and require that the entire stack frame + // is within range of the shortest of the unchecked instructions. + unsigned FrameSize = 0; + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) + FrameSize += MFI->getObjectSize(i); + bool EntireFrameReachableFromSP = FrameSize < 0xf00; + return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP; +} + +void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { + for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end(); + UI != E;) { + MachineInstr *UseI = &*(UI++); + if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) { + // FI operand is always the one before the immediate offset. + unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1; + if (UseI->getOperand(OpIdx).isReg() && + UseI->getOperand(OpIdx).getReg() == TaggedReg) { + UseI->getOperand(OpIdx).ChangeToFrameIndex(FI); + UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); + } + } else if (UseI->isCopy() && + Register::isVirtualRegister(UseI->getOperand(0).getReg())) { + uncheckUsesOf(UseI->getOperand(0).getReg(), FI); + } + } +} + +void AArch64StackTaggingPreRA::uncheckLoadsAndStores() { + for (auto *I : ReTags) { + unsigned TaggedReg = I->getOperand(0).getReg(); + int FI = I->getOperand(1).getIndex(); + uncheckUsesOf(TaggedReg, FI); + } +} + +bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + MRI = &MF->getRegInfo(); + AFI = MF->getInfo(); + TII = static_cast(MF->getSubtarget().getInstrInfo()); + TRI = static_cast( + MF->getSubtarget().getRegisterInfo()); + MFI = &MF->getFrameInfo(); + ReTags.clear(); + + assert(MRI->isSSA()); + + LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n" + << "********** Function: " << MF->getName() << '\n'); + + SmallSetVector TaggedSlots; + for (auto &BB : *MF) { + for (auto &I : BB) { + if (I.getOpcode() == AArch64::TAGPstack) { + ReTags.push_back(&I); + int FI = I.getOperand(1).getIndex(); + TaggedSlots.insert(FI); + // There should be no offsets in TAGP yet. + assert(I.getOperand(2).getImm() == 0); + } + } + } + + if (ReTags.empty()) + return false; + + if (mayUseUncheckedLoadStore()) + uncheckLoadsAndStores(); + + return true; +} Index: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -180,6 +180,7 @@ initializeLDTLSCleanupPass(*PR); initializeAArch64SpeculationHardeningPass(*PR); initializeAArch64StackTaggingPass(*PR); + initializeAArch64StackTaggingPreRAPass(*PR); } //===----------------------------------------------------------------------===// @@ -541,6 +542,8 @@ if (EnableStPairSuppress) addPass(createAArch64StorePairSuppressPass()); addPass(createAArch64SIMDInstrOptPass()); + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createAArch64StackTaggingPreRAPass()); return true; } Index: llvm/trunk/lib/Target/AArch64/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/AArch64/CMakeLists.txt +++ llvm/trunk/lib/Target/AArch64/CMakeLists.txt @@ -56,6 +56,7 @@ AArch64SelectionDAGInfo.cpp AArch64SpeculationHardening.cpp AArch64StackTagging.cpp + AArch64StackTaggingPreRA.cpp AArch64StorePairSuppress.cpp AArch64Subtarget.cpp AArch64TargetMachine.cpp Index: llvm/trunk/lib/Target/AArch64/Utils/AArch64BaseInfo.h =================================================================== --- llvm/trunk/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ llvm/trunk/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -635,6 +635,10 @@ /// MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag /// in bits 56-63. + /// On a FrameIndex operand, indicates that the underlying memory is tagged + /// with an unknown tag value (MTE); this needs to be lowered either to an + /// SP-relative load or store instruction (which do not check tags), or to + /// an LDG instruction to obtain the tag value. MO_TAGGED = 0x400, }; } // end namespace AArch64II Index: llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll +++ llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll @@ -97,6 +97,7 @@ ; CHECK-NEXT: Early If-Conversion ; CHECK-NEXT: AArch64 Store Pair Suppression ; CHECK-NEXT: AArch64 SIMD instructions optimization pass +; CHECK-NEXT: AArch64 Stack Tagging PreRA ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Early Machine Loop Invariant Code Motion Index: llvm/trunk/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll +++ llvm/trunk/test/CodeGen/AArch64/stack-tagging-unchecked-ld-st.ll @@ -0,0 +1,141 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s --check-prefixes=DEFAULT,COMMON +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -stack-tagging-unchecked-ld-st=never | FileCheck %s --check-prefixes=NEVER,COMMON +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -stack-tagging-unchecked-ld-st=always | FileCheck %s --check-prefixes=ALWAYS,COMMON + +declare void @use8(i8*) +declare void @use32(i32*) +declare void @use2x64([2 x i64]*) +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +define i32 @CallLd() sanitize_memtag { +entry: + %x = alloca i32, align 4 + call void @use32(i32* %x) + %a = load i32, i32* %x + ret i32 %a +} + +; COMMON: CallLd: +; COMMON: bl use32 + +; ALWAYS: ldr w0, [sp] +; DEFAULT: ldr w0, [sp] +; NEVER: ldr w0, [x{{.*}}] + +; COMMON: ret + +define void @CallStCall() sanitize_memtag { +entry: + %x = alloca i32, align 4 + call void @use32(i32* %x) + store i32 42, i32* %x + call void @use32(i32* %x) + ret void +} + +; COMMON: CallStCall: +; COMMON: bl use32 + +; ALWAYS: str w{{.*}}, [sp] +; DEFAULT: str w{{.*}}, [sp] +; NEVER: str w{{.*}}, [x{{.*}}] + +; COMMON: bl use32 +; COMMON: ret + +define void @CallStPair(i64 %z) sanitize_memtag { +entry: + %x = alloca [2 x i64], align 8 + call void @use2x64([2 x i64]* %x) + %x0 = getelementptr inbounds [2 x i64], [2 x i64]* %x, i64 0, i64 0 + store i64 %z, i64* %x0, align 8 + %x1 = getelementptr inbounds [2 x i64], [2 x i64]* %x, i64 0, i64 1 + store i64 %z, i64* %x1, align 8 + call void @use2x64([2 x i64]* %x) + ret void +} + +; COMMON: CallStPair: +; COMMON: bl use2x64 + +; ALWAYS: stp {{.*}}, [sp] +; DEFAULT: stp {{.*}}, [sp] +; NEVER: stp {{.*}}, [x{{.*}}] + +; COMMON: bl use2x64 +; COMMON: ret + +; One of the two allocas will end up out of range of ldrb [sp]. +define dso_local i8 @LargeFrame() sanitize_memtag { +entry: + %x = alloca [4096 x i8], align 4 + %y = alloca [4096 x i8], align 4 + %0 = getelementptr inbounds [4096 x i8], [4096 x i8]* %x, i64 0, i64 0 + %1 = getelementptr inbounds [4096 x i8], [4096 x i8]* %y, i64 0, i64 0 + call void @use8(i8* %0) + call void @use8(i8* %1) + %2 = load i8, i8* %0, align 4 + %3 = load i8, i8* %1, align 4 + %add = add i8 %3, %2 + ret i8 %add +} + +; COMMON: LargeFrame: +; COMMON: bl use8 +; COMMON: bl use8 + +; NEVER: ldrb [[A:w.*]], [x{{.*}}] +; NEVER: ldrb [[B:w.*]], [x{{.*}}] + +; DEFAULT: ldrb [[A:w.*]], [x{{.*}}] +; DEFAULT: ldrb [[B:w.*]], [x{{.*}}] + +; ALWAYS: ldg [[PA:x.*]], [x{{.*}}] +; ALWAYS: ldrb [[B:w.*]], [sp] +; ALWAYS: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}} + +; COMMON: add w0, [[B]], [[A]] +; COMMON: ret + +; One of these allocas is closer to FP than to SP, and within 256 bytes +; of the former (see hardcoded limit in resolveFrameOffsetReference). +; It could be lowered to an FP-relative load, but not when doing an +; unchecked access to tagged memory! +define i8 @FPOffset() "frame-pointer"="all" sanitize_memtag { + %x = alloca [200 x i8], align 4 + %y = alloca [200 x i8], align 4 + %z = alloca [200 x i8], align 4 + %x0 = getelementptr inbounds [200 x i8], [200 x i8]* %x, i64 0, i64 0 + %y0 = getelementptr inbounds [200 x i8], [200 x i8]* %y, i64 0, i64 0 + %z0 = getelementptr inbounds [200 x i8], [200 x i8]* %z, i64 0, i64 0 + call void @use8(i8* %x0) + call void @use8(i8* %y0) + call void @use8(i8* %z0) + %x1 = load i8, i8* %x0, align 4 + %y1 = load i8, i8* %y0, align 4 + %z1 = load i8, i8* %z0, align 4 + %a = add i8 %x1, %y1 + %b = add i8 %a, %z1 + ret i8 %b +} + +; COMMON: FPOffset: +; COMMON: bl use8 +; COMMON: bl use8 +; COMMON: bl use8 + +; All three loads are SP-based. +; ALWAYS-DAG: ldrb w{{.*}}, [sp, #416] +; ALWAYS-DAG: ldrb w{{.*}}, [sp, #208] +; ALWAYS-DAG: ldrb w{{.*}}, [sp] + +; DEFAULT-DAG: ldrb w{{.*}}, [sp, #416] +; DEFAULT-DAG: ldrb w{{.*}}, [sp, #208] +; DEFAULT-DAG: ldrb w{{.*}}, [sp] + +; NEVER-DAG: ldrb w{{.*}}, [x{{.*}}] +; NEVER-DAG: ldrb w{{.*}}, [x{{.*}}] +; NEVER-DAG: ldrb w{{.*}}, [x{{.*}}] + +; COMMON: ret