Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -640,13 +640,16 @@ switch (Opc) { // indirect calls: case ARM::BLX: + case ARM::BLX_noip: case ARM::BLX_pred: + case ARM::BLX_pred_noip: case ARM::BX_CALL: case ARM::BMOVPCRX_CALL: case ARM::TCRETURNri: case ARM::TAILJMPr: case ARM::TAILJMPr4: case ARM::tBLXr: + case ARM::tBLXr_noip: case ARM::tBLXNSr: case ARM::tBLXNS_CALL: case ARM::tBX_CALL: @@ -908,6 +911,10 @@ return isGather(IntInst) || isScatter(IntInst); } +unsigned getBLXOpcode(const MachineFunction &MF); +unsigned gettBLXrOpcode(const MachineFunction &MF); +unsigned getBLXpredOpcode(const MachineFunction &MF); + } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5803,7 +5803,9 @@ NumBytesToCreateFrame = Costs.FrameTailCall; SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall); } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || - LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr || + LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL || + LastInstrOpcode == ARM::tBLXr || + LastInstrOpcode == ARM::tBLXr_noip || LastInstrOpcode == ARM::tBLXi) { FrameID = MachineOutlinerThunk; NumBytesToCreateFrame = Costs.FrameThunk; @@ -6051,7 +6053,8 @@ // we don't get unexpected results with call pseudo-instructions. auto UnknownCallOutlineType = outliner::InstrType::Illegal; if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || - Opc == ARM::tBLXr || Opc == ARM::tBLXi) + Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip || + Opc == ARM::tBLXi) UnknownCallOutlineType = outliner::InstrType::LegalTerminator; if (!Callee) @@ -6343,3 +6346,19 @@ // spill/restore and VPT predication. return isVCTP(&MI) && !isPredicated(MI); } + +unsigned llvm::getBLXOpcode(const MachineFunction &MF) { + return (MF.getSubtarget().hardenSlsBlr()) ? ARM::BLX_noip + : ARM::BLX; +} + +unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) { + return (MF.getSubtarget().hardenSlsBlr()) ? ARM::tBLXr_noip + : ARM::tBLXr; +} + +unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { + return (MF.getSubtarget().hardenSlsBlr()) ? ARM::BLX_pred_noip + : ARM::BLX_pred; +} + Index: llvm/lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMCallLowering.cpp +++ llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -480,15 +480,16 @@ }; // FIXME: This should move to the ARMSubtarget when it supports all the opcodes. -unsigned getCallOpcode(const ARMSubtarget &STI, bool isDirect) { +unsigned getCallOpcode(const MachineFunction &MF, const ARMSubtarget &STI, + bool isDirect) { if (isDirect) return STI.isThumb() ? ARM::tBL : ARM::BL; if (STI.isThumb()) - return ARM::tBLXr; + return gettBLXrOpcode(MF); if (STI.hasV5TOps()) - return ARM::BLX; + return getBLXOpcode(MF); if (STI.hasV4TOps()) return ARM::BX_CALL; @@ -516,7 +517,7 @@ // Create the call instruction so we can add the implicit uses of arg // registers, but don't insert it yet. bool IsDirect = !Info.Callee.isReg(); - auto CallOpcode = getCallOpcode(STI, IsDirect); + auto CallOpcode = getCallOpcode(MF, STI, IsDirect); auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode); bool IsThumb = STI.isThumb(); Index: llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -2304,8 +2304,9 @@ MIB.addImm(0); MIB.add(predOps(ARMCC::AL)); - MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Thumb ? ARM::tBLXr : ARM::BLX)); + MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF))); if (Thumb) MIB.add(predOps(ARMCC::AL)); MIB.addReg(Reg, RegState::Kill); Index: llvm/lib/Target/ARM/ARMFastISel.cpp =================================================================== --- llvm/lib/Target/ARM/ARMFastISel.cpp +++ llvm/lib/Target/ARM/ARMFastISel.cpp @@ -2173,7 +2173,7 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { if (UseReg) - return isThumb2 ? ARM::tBLXr : ARM::BLX; + return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF); else return isThumb2 ? ARM::tBL : ARM::BL; } @@ -2264,9 +2264,11 @@ // BL / BLX don't take a predicate, but tBL / tBLX do. if (isThumb2) MIB.add(predOps(ARMCC::AL)); - if (Subtarget->genLongCalls()) + if (Subtarget->genLongCalls()) { + CalleeReg = + constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0); MIB.addReg(CalleeReg); - else + } else MIB.addExternalSymbol(TLI.getLibcallName(Call)); // Add implicit physical register uses to the call. @@ -2404,9 +2406,11 @@ // ARM calls don't take a predicate, but tBL / tBLX do. if(isThumb2) MIB.add(predOps(ARMCC::AL)); - if (UseReg) + if (UseReg) { + CalleeReg = + constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0); MIB.addReg(CalleeReg); - else if (!IntrMemName) + } else if (!IntrMemName) MIB.addGlobalAddress(GV, 0, 0); else MIB.addExternalSymbol(IntrMemName, 0); Index: llvm/lib/Target/ARM/ARMFeatures.h =================================================================== --- llvm/lib/Target/ARM/ARMFeatures.h +++ llvm/lib/Target/ARM/ARMFeatures.h @@ -75,6 +75,7 @@ // there are some "conditionally deprecated" opcodes case ARM::tADDspr: case ARM::tBLXr: + case ARM::tBLXr_noip: return Instr->getOperand(2).getReg() != ARM::PC; // ADD PC, SP and BLX PC were always unpredictable, // now on top of it they're deprecated Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -10886,7 +10886,7 @@ BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) .addExternalSymbol("__chkstk"); - BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) + BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent()))) .add(predOps(ARMCC::AL)) .addReg(Reg, RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Kill) Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -2492,23 +2492,29 @@ } // ARMv5T and above - def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, - IIC_Br, "blx\t$func", - [(ARMcall GPR:$func)]>, + def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", []>, Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; } + def BLX_noip : ARMPseudoExpand<(outs), (ins GPRnoip:$func), + 4, IIC_Br, [], (BLX GPR:$func)>, + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>; + def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm, - IIC_Br, "blx", "\t$func", - [(ARMcall_pred GPR:$func)]>, + IIC_Br, "blx", "\t$func", []>, Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; } + def BLX_pred_noip : ARMPseudoExpand<(outs), (ins GPRnoip:$func), + 4, IIC_Br, [], + (BLX_pred GPR:$func, (ops 14, zero_reg))>, + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>; + // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. @@ -2534,6 +2540,16 @@ Requires<[IsARM]>, Sched<[WriteBr]>; } +def : ARMPat<(ARMcall GPR:$func), (BLX $func)>, + Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>; +def : ARMPat<(ARMcall GPRnoip:$func), (BLX_noip $func)>, + Requires<[IsARM, HasV5T, SLSBLRMitigation]>; +def : ARMPat<(ARMcall_pred GPR:$func), (BLX_pred $func)>, + Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>; +def : ARMPat<(ARMcall_pred GPRnoip:$func), (BLX_pred_noip $func)>, + Requires<[IsARM, HasV5T, SLSBLRMitigation]>; + + let isBranch = 1, isTerminator = 1 in { // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( Index: llvm/lib/Target/ARM/ARMInstrThumb.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb.td +++ llvm/lib/Target/ARM/ARMInstrThumb.td @@ -548,14 +548,18 @@ // Also used for Thumb2 def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br, - "blx${p}\t$func", - [(ARMcall GPR:$func)]>, + "blx${p}\t$func", []>, Requires<[IsThumb, HasV5T]>, T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; let Inst{2-0} = 0b000; } + def tBLXr_noip : ARMPseudoExpand<(outs), (ins pred:$p, GPRnoip:$func), + 2, IIC_Br, [], (tBLXr pred:$p, GPR:$func)>, + Requires<[IsThumb, HasV5T]>, + Sched<[WriteBrL]>; + // ARMv8-M Security Extensions def tBLXNSr : TI<(outs), (ins pred:$p, GPRnopc:$func), IIC_Br, @@ -586,6 +590,11 @@ Requires<[IsThumb]>, Sched<[WriteBr]>; } +def : ARMPat<(ARMcall GPR:$func), (tBLXr $func)>, + Requires<[IsThumb, HasV5T, NoSLSBLRMitigation]>; +def : ARMPat<(ARMcall GPRnoip:$func), (tBLXr_noip $func)>, + Requires<[IsThumb, HasV5T, SLSBLRMitigation]>; + let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br, Index: llvm/lib/Target/ARM/ARMPredicates.td =================================================================== --- llvm/lib/Target/ARM/ARMPredicates.td +++ llvm/lib/Target/ARM/ARMPredicates.td @@ -189,6 +189,9 @@ def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&" " TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||" "Subtarget->hasMinSize())">; + def SLSBLRMitigation : Predicate<[{ MF->getSubtarget().hardenSlsBlr() }]>; + def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; + } def UseMulOps : Predicate<"Subtarget->useMulOps()">; Index: llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -156,10 +156,10 @@ "Subclass not added?"); assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) && "Subclass not added?"); - assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) && + assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnoip_and_tcGPRRegClassID)) && "Subclass not added?"); - assert(RBGPR.covers( - *TRI.getRegClass(ARM::tGPREven_and_tGPR_and_tcGPRRegClassID)) && + assert(RBGPR.covers(*TRI.getRegClass( + ARM::tGPREven_and_GPRnoip_and_tcGPRRegClassID)) && "Subclass not added?"); assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) && "Subclass not added?"); @@ -182,10 +182,12 @@ switch (RC.getID()) { case GPRRegClassID: case GPRwithAPSRRegClassID: + case GPRnoipRegClassID: case GPRnopcRegClassID: + case GPRnoip_and_GPRnopcRegClassID: case rGPRRegClassID: case GPRspRegClassID: - case tGPR_and_tcGPRRegClassID: + case GPRnoip_and_tcGPRRegClassID: case tcGPRRegClassID: case tGPRRegClassID: case tGPREvenRegClassID: @@ -193,7 +195,7 @@ case tGPR_and_tGPREvenRegClassID: case tGPR_and_tGPROddRegClassID: case tGPREven_and_tcGPRRegClassID: - case tGPREven_and_tGPR_and_tcGPRRegClassID: + case tGPREven_and_GPRnoip_and_tcGPRRegClassID: case tGPROdd_and_tcGPRRegClassID: return getRegBank(ARM::GPRRegBankID); case HPRRegClassID: Index: llvm/lib/Target/ARM/ARMRegisterInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMRegisterInfo.td +++ llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -235,6 +235,23 @@ let DiagnosticString = "operand must be a register in range [r0, r15]"; } +// Register set that excludes registers that are reserved for procedure calls. +// This is used for pseudo-instructions that are actually implemented using a +// procedure call. +def GPRnoip : RegisterClass<"ARM", [i32], 32, (sub GPR, R12, LR)> { + // Allocate LR as the first CSR since it is always saved anyway. + // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't + // know how to spill them. If we make our prologue/epilogue code smarter at + // some point, we can go back to using the above allocation orders for the + // Thumb1 instructions that know how to use hi regs. + let AltOrders = [(add GPRnoip, GPRnoip), (trunc GPRnoip, 8), + (add (trunc GPRnoip, 8), (shl GPRnoip, 8))]; + let AltOrderSelect = [{ + return MF.getSubtarget().getGPRAllocationOrder(MF); + }]; + let DiagnosticString = "operand must be a register in range [r0, r14]"; +} + // GPRs without the PC. Some ARM instructions do not allow the PC in // certain operand slots, particularly as the destination. Primarily // useful for disassembly. Index: llvm/lib/Target/ARM/ARMSLSHardening.cpp =================================================================== --- llvm/lib/Target/ARM/ARMSLSHardening.cpp +++ llvm/lib/Target/ARM/ARMSLSHardening.cpp @@ -143,9 +143,7 @@ {"__llvm_slsblr_thunk_arm_r9", ARM::R9, false}, {"__llvm_slsblr_thunk_arm_r10", ARM::R10, false}, {"__llvm_slsblr_thunk_arm_r11", ARM::R11, false}, - {"__llvm_slsblr_thunk_arm_r12", ARM::R12, false}, {"__llvm_slsblr_thunk_arm_sp", ARM::SP, false}, - {"__llvm_slsblr_thunk_arm_lr", ARM::LR, false}, {"__llvm_slsblr_thunk_arm_pc", ARM::PC, false}, {"__llvm_slsblr_thunk_thumb_r0", ARM::R0, true}, {"__llvm_slsblr_thunk_thumb_r1", ARM::R1, true}, @@ -159,9 +157,7 @@ {"__llvm_slsblr_thunk_thumb_r9", ARM::R9, true}, {"__llvm_slsblr_thunk_thumb_r10", ARM::R10, true}, {"__llvm_slsblr_thunk_thumb_r11", ARM::R11, true}, - {"__llvm_slsblr_thunk_thumb_r12", ARM::R12, true}, {"__llvm_slsblr_thunk_thumb_sp", ARM::SP, true}, - {"__llvm_slsblr_thunk_thumb_lr", ARM::LR, true}, {"__llvm_slsblr_thunk_thumb_pc", ARM::PC, true}, }; @@ -252,25 +248,18 @@ // SLSBLRThunkInserter. // This function merely needs to transform an indirect call to a direct call // to __llvm_slsblr_thunk_xN. - // - // Since linkers are allowed to clobber R12 on function calls, the above - // mitigation only works if the original indirect call instruction was not - // using R12. Code generation before must make sure that no indirect call - // using R12 was produced if the mitigation is enabled. - // Also, the transformation is incorrect if the indirect call uses LR, so - // also have to avoid that. - // FIXME: that will be done in a follow-on patch. - MachineInstr &IndirectCall = *MBBI; assert(isIndirectCall(IndirectCall) && !IndirectCall.isReturn()); int RegOpIdxOnIndirectCall = -1; bool isThumb; switch (IndirectCall.getOpcode()) { - case ARM::BLX: // !isThumb2 + case ARM::BLX: // !isThumb2 + case ARM::BLX_noip: // !isThumb2 isThumb = false; RegOpIdxOnIndirectCall = 0; break; case ARM::tBLXr: // isThumb2 + case ARM::tBLXr_noip: // isThumb2 isThumb = true; RegOpIdxOnIndirectCall = 2; break; @@ -279,6 +268,12 @@ } Register Reg = IndirectCall.getOperand(RegOpIdxOnIndirectCall).getReg(); + // Since linkers are allowed to clobber R12 on function calls, the above + // mitigation only works if the original indirect call instruction was not + // using R12. Code generation before must make sure that no indirect call + // using R12 was produced if the mitigation is enabled. + // Also, the transformation is incorrect if the indirect call uses LR, so + // also have to avoid that. assert(Reg != ARM::R12 && Reg != ARM::LR); bool RegIsKilled = IndirectCall.getOperand(RegOpIdxOnIndirectCall).isKill(); Index: llvm/test/CodeGen/ARM/speculation-hardening-sls.ll =================================================================== --- llvm/test/CodeGen/ARM/speculation-hardening-sls.ll +++ llvm/test/CodeGen/ARM/speculation-hardening-sls.ll @@ -186,10 +186,61 @@ ; CHECK: .Lfunc_end } -; HARDEN-label: __llvm_slsblr_thunk_(arm|thumb)_r5: +; Verify that neither r12 nor lr are used as registers in indirect call +; instructions when the sls-hardening-blr mitigation is enabled, as +; (a) a linker is allowed to clobber r12 on calls, and +; (b) the hardening transformation isn't correct if lr is the register holding +; the address of the function called. +define i32 @check_r12(i32 ()** %fp) { +entry: +; CHECK-LABEL: check_r12: + %f = load i32 ()*, i32 ()** %fp, align 4 + ; Force f to be moved into r12 + %r12_f = tail call i32 ()* asm "add $0, $1, #0", "={r12},{r12}"(i32 ()* %f) nounwind + %call = call i32 %r12_f() +; NOHARDENARM: blx r12 +; NOHARDENTHUMB: blx r12 +; HARDEN-NOT: bl {{__llvm_slsblr_thunk_(arm|thumb)_r12}} + ret i32 %call +; CHECK: .Lfunc_end +} + +define i32 @check_lr(i32 ()** %fp) { +entry: +; CHECK-LABEL: check_lr: + %f = load i32 ()*, i32 ()** %fp, align 4 + ; Force f to be moved into lr + %lr_f = tail call i32 ()* asm "add $0, $1, #0", "={lr},{lr}"(i32 ()* %f) nounwind + %call = call i32 %lr_f() +; NOHARDENARM: blx lr +; NOHARDENTHUMB: blx lr +; HARDEN-NOT: bl {{__llvm_slsblr_thunk_(arm|thumb)_lr}} + ret i32 %call +; CHECK: .Lfunc_end +} + +; Verify that even when sls-harden-blr is enabled, "blx r12" is still an +; instruction that is accepted by the inline assembler +define void @verify_inline_asm_blx_r12(void ()* %g) { +entry: +; CHECK-LABEL: verify_inline_asm_blx_r12: + %0 = bitcast void ()* %g to i8* + tail call void asm sideeffect "blx $0", "{r12}"(i8* %0) nounwind +; CHECK: blx r12 + ret void +; CHECK: {{bx lr$}} +; ISBDSB-NEXT: dsb sy +; ISBDSB-NEXT: isb +; SB-NEXT: {{ sb$}} +; CHECK: .Lfunc_end +} + +; HARDEN-label: {{__llvm_slsblr_thunk_(arm|thumb)_r5}}: ; HARDEN: bx r5 ; ISBDSB-NEXT: dsb sy ; ISBDSB-NEXT: isb ; SB-NEXT: dsb sy ; SB-NEXT: isb ; HARDEN-NEXT: .Lfunc_end + +