Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -17,6 +17,7 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/ConstantRange.h" @@ -44,6 +45,8 @@ "x86-promote-anyext-load", cl::init(true), cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden); +extern cl::opt IndirectBranchTracking; + //===----------------------------------------------------------------------===// // Pattern Matcher Implementation //===----------------------------------------------------------------------===// @@ -203,6 +206,7 @@ private: void Select(SDNode *N) override; + bool obscureEndbrOpcodeImmediate(SDNode *N); bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); bool matchWrapper(SDValue N, X86ISelAddressMode &AM); @@ -1331,6 +1335,218 @@ return true; } +static bool isEndbrImm64(uint64_t Imm) { +// There may be some other prefix bytes between 0xF3 and 0x0F1EFA. +// i.g: 0xF3660F1EFA, 0xF3670F1EFA + if ((Imm & 0x00FFFFFF) != 0x0F1EFA) + return false; + + uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64, + 0x65, 0x66, 0x67, 0xf0, 0xf2}; + int i = 24; // 24bit 0x0F1EFA has matched + while (i < 64) { + uint8_t Byte = (Imm >> i) & 0xFF; + if (Byte == 0xF3) + return true; + if (!llvm::is_contained(OptionalPrefixBytes, Byte)) + return false; + i += 8; + } + + return false; +} + +// This function is for CET enhancement. +// +// ENDBR32 and ENDBR64 have specific opcodes: +// ENDBR32: F3 0F 1E FB +// ENDBR64: F3 0F 1E FA +// And we want that attackers won’t find unintended ENDBR32/64 +// opcode matches in the binary +// Here’s an example: +// If the compiler had to generate asm for the following code: +// a = 0xF30F1EFA +// it could, for example, generate: +// mov 0xF30F1EFA, dword ptr[a] +// In such a case, the binary would include a gadget that starts +// with a fake ENDBR64 opcode. Therefore, we split such generation +// into multiple operations, let it not shows in the binary. +bool X86DAGToDAGISel::obscureEndbrOpcodeImmediate(SDNode *N) { + unsigned Opc = N->getMachineOpcode(); + MachineSDNode *N0 = nullptr; + MachineSDNode *N1 = nullptr; + int Idx = -1; + unsigned MOVOpc = X86::MOV32ri64; // Mov i64i32imm to 64-bit reg. + unsigned NOTOpc = X86::NOT64r; + EVT VT = MVT::i64; + SDLoc dl(N); + switch(Opc) { + default: break; + case X86::MOV32ri: // *32ri + Idx = 0; + LLVM_FALLTHROUGH; + case X86::ADC32ri: + case X86::ADD32ri: + case X86::AND32ri: + case X86::CMP32ri: + case X86::OR32ri: + case X86::SBB32ri: + case X86::SUB32ri: + case X86::TEST32ri: + case X86::XOR32ri: + if (Idx < 0) + Idx = 1; + LLVM_FALLTHROUGH; + case X86::MOV32mi: // *32mi + case X86::ADC32mi: + case X86::ADD32mi: + case X86::AND32mi: + case X86::CMP32mi: + case X86::OR32mi: + case X86::SBB32mi: + case X86::SUB32mi: + case X86::TEST32mi: + case X86::XOR32mi: + if (Idx < 0) + Idx = 5; + MOVOpc = X86::MOV32ri; + NOTOpc = X86::NOT32r; + VT = MVT::i32; + LLVM_FALLTHROUGH; + case X86::MOV64ri32: // *64ri32 + if (Idx < 0) + Idx = 0; + LLVM_FALLTHROUGH; + case X86::ADC64ri32: + case X86::ADD64ri32: + case X86::AND64ri32: + case X86::CMP64ri32: + case X86::OR64ri32: + case X86::SBB64ri32: + case X86::SUB64ri32: + case X86::TEST64ri32: + case X86::XOR64ri32: + if (Idx < 0) + Idx = 1; + LLVM_FALLTHROUGH; + case X86::MOV64mi32: // *64mi32 + case X86::ADC64mi32: + case X86::ADD64mi32: + case X86::AND64mi32: + case X86::CMP64mi32: + case X86::OR64mi32: + case X86::SBB64mi32: + case X86::SUB64mi32: + case X86::TEST64mi32: + case X86::XOR64mi32: { + if (Idx < 0) + Idx = 5; + assert(Idx >= 0); + if (!isa(N->getOperand(Idx))) + return false; + uint32_t Imm = N->getConstantOperandVal(Idx); + uint32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB; + if (Imm != EndbrImm) + return false; + + SDValue Complement = MOVOpc == X86::MOV32ri ? + getI32Imm(~EndbrImm, dl) : + getI64Imm((uint64_t)(~EndbrImm), dl); + + // ~0xF30F1EFA = 0x0CF0E105 + // ~0xF30F1EFB = 0x0CF0E104 + // 1st Move the complement of endbr-imm into a reg. + N0 = CurDAG->getMachineNode(MOVOpc, dl, VT, Complement); + + // ~~0xF30F1EFA = 0xF30F1EFA + // ~~0xF30F1EFB = 0xF30F1EFB + // 2nd The complement of endbr-imm's complement equal to the old value. + // The NOT operation has no-effect to status flags. + N0 = CurDAG->getMachineNode(NOTOpc, dl, VT, SDValue(N0, 0)); + + unsigned NewOpc; + switch (Opc) { + default: llvm_unreachable("Unexpected opcode!"); + case X86::ADC32ri: NewOpc = X86::ADC32rr; break;// *32ri + case X86::ADD32ri: NewOpc = X86::ADD32rr; break; + case X86::AND32ri: NewOpc = X86::AND32rr; break; + case X86::CMP32ri: NewOpc = X86::CMP32rr; break; + case X86::MOV32ri: NewOpc = X86::MOV32rr; break; + case X86::OR32ri: NewOpc = X86::OR32rr; break; + case X86::SBB32ri: NewOpc = X86::SBB32rr; break; + case X86::SUB32ri: NewOpc = X86::SUB32rr; break; + case X86::TEST32ri: NewOpc = X86::TEST32rr; break; + case X86::XOR32ri: NewOpc = X86::XOR32rr; break; + case X86::ADC32mi: NewOpc = X86::ADC32mr; break;// *32mi + case X86::ADD32mi: NewOpc = X86::ADD32mr; break; + case X86::AND32mi: NewOpc = X86::AND32mr; break; + case X86::CMP32mi: NewOpc = X86::CMP32mr; break; + case X86::MOV32mi: NewOpc = X86::MOV32mr; break; + case X86::OR32mi: NewOpc = X86::OR32mr; break; + case X86::SBB32mi: NewOpc = X86::SBB32mr; break; + case X86::SUB32mi: NewOpc = X86::SUB32mr; break; + case X86::TEST32mi: NewOpc = X86::TEST32mr; break; + case X86::XOR32mi: NewOpc = X86::XOR32mr; break; + case X86::ADC64ri32: NewOpc = X86::ADC64rr; break;// *64ri32 + case X86::ADD64ri32: NewOpc = X86::ADD64rr; break; + case X86::AND64ri32: NewOpc = X86::AND64rr; break; + case X86::CMP64ri32: NewOpc = X86::CMP64rr; break; + case X86::MOV64ri32: NewOpc = X86::MOV64rr; break; + case X86::OR64ri32: NewOpc = X86::OR64rr; break; + case X86::SBB64ri32: NewOpc = X86::SBB64rr; break; + case X86::SUB64ri32: NewOpc = X86::SUB64rr; break; + case X86::TEST64ri32: NewOpc = X86::TEST64rr; break; + case X86::XOR64ri32: NewOpc = X86::XOR64rr; break; + case X86::ADC64mi32: NewOpc = X86::ADC64mr; break;// *64mi32 + case X86::ADD64mi32: NewOpc = X86::ADD64mr; break; + case X86::AND64mi32: NewOpc = X86::AND64mr; break; + case X86::CMP64mi32: NewOpc = X86::CMP64mr; break; + case X86::MOV64mi32: NewOpc = X86::MOV64mr; break; + case X86::OR64mi32: NewOpc = X86::OR64mr; break; + case X86::SBB64mi32: NewOpc = X86::SBB64mr; break; + case X86::SUB64mi32: NewOpc = X86::SUB64mr; break; + case X86::TEST64mi32: NewOpc = X86::TEST64mr; break; + case X86::XOR64mi32: NewOpc = X86::XOR64mr; break; + } + if (Idx == 0) + N1 = CurDAG->getMachineNode(NewOpc, dl, VT, SDValue(N0, 0)); + else if (Idx == 1) + N1 = CurDAG->getMachineNode(NewOpc, dl, VT, + {N->getOperand(0), SDValue(N0, 0)}); + else if (Idx == 5) { // *mi + // Mem operand should always has a chain. + N1 = CurDAG->getMachineNode(NewOpc, dl, N->getVTList(), + {N->getOperand(0), + N->getOperand(1), + N->getOperand(2), + N->getOperand(3), + N->getOperand(4), + SDValue(N0, 0), + N->getOperand(6) /*chain*/ }); + CurDAG->setNodeMemRefs(N1, cast(N)->memoperands()); + } else + llvm_unreachable("Unexpected Index!"); + ReplaceUses(N, N1); + return true; + } + case X86::MOV32ri64: + case X86::MOV64ri:{ + // There maybe some address operation using *ri* opcode, since address + // may changed after assembling and linking, we don't handle it here. + if (!isa(N->getOperand(0))) + return false; + uint64_t Imm = N->getConstantOperandVal(0); + if (!isEndbrImm64(Imm)) + return false; + N0 = CurDAG->getMachineNode(X86::MOV64ri, dl, VT, getI64Imm(~Imm, dl)); + N1 = CurDAG->getMachineNode(X86::NOT64r, dl, VT, SDValue(N0, 0)); + ReplaceUses(N, N1); + return true; + } + } + return false; +} + void X86DAGToDAGISel::PostprocessISelDAG() { // Skip peepholes at -O0. if (TM.getOptLevel() == CodeGenOpt::None) @@ -1433,6 +1649,15 @@ } } + // Check that the cf-protection-branch is enabled. + Metadata *CFProtectionBranch = + MF->getMMI().getModule()->getModuleFlag("cf-protection-branch"); + if ((CFProtectionBranch || IndirectBranchTracking) && + obscureEndbrOpcodeImmediate(N)) { + MadeChange = true; + continue; + } + // Attempt to remove vectors moves that were inserted to zero upper bits. if (Opc != TargetOpcode::SUBREG_TO_REG) continue; Index: llvm/lib/Target/X86/X86IndirectBranchTracking.cpp =================================================================== --- llvm/lib/Target/X86/X86IndirectBranchTracking.cpp +++ llvm/lib/Target/X86/X86IndirectBranchTracking.cpp @@ -28,7 +28,7 @@ #define DEBUG_TYPE "x86-indirect-branch-tracking" -static cl::opt IndirectBranchTracking( +cl::opt IndirectBranchTracking( "x86-indirect-branch-tracking", cl::init(false), cl::Hidden, cl::desc("Enable X86 indirect branch tracking pass.")); Index: llvm/test/CodeGen/X86/cet_endbr_imm_enhance.ll =================================================================== --- llvm/test/CodeGen/X86/cet_endbr_imm_enhance.ll +++ llvm/test/CodeGen/X86/cet_endbr_imm_enhance.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -x86-indirect-branch-tracking | FileCheck %s -; TBD: This test is for CET enhancement, we should replace the endbr imm. +; This test is for CET enhancement. ; ; ENDBR32 and ENDBR64 have specific opcodes: ; ENDBR32: F3 0F 1E FB @@ -27,7 +27,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: endbr64 ; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movabsq $321002333478650, %rax # imm = 0x123F32E0F1EFA +; CHECK-NEXT: movabsq $-321002333478651, %rax # imm = 0xFFFEDC0CD1F0E105 +; CHECK-NEXT: notq %rax ; CHECK-NEXT: andq %rax, (%rdi) ; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq (%rax), %rax @@ -52,9 +53,11 @@ ; CHECK-LABEL: foo2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: endbr64 -; CHECK-NEXT: movl {{.*}}(%rip), %eax -; CHECK-NEXT: addl %eax, %eax -; CHECK-NEXT: andl $-217112838, %eax # imm = 0xF30F1EFA +; CHECK-NEXT: movl {{.*}}(%rip), %ecx +; CHECK-NEXT: addl %ecx, %ecx +; CHECK-NEXT: movl $217112837, %eax # imm = 0xCF0E105 +; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl %ecx, %eax ; CHECK-NEXT: retq entry: %0 = load i32, i32* @bzx, align 4 @@ -71,7 +74,9 @@ ; CHECK-LABEL: foo3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: endbr64 -; CHECK-NEXT: andl $-217112838, {{.*}}(%rip) # imm = 0xF30F1EFA +; CHECK-NEXT: movl $217112837, %eax # imm = 0xCF0E105 +; CHECK-NEXT: notl %eax +; CHECK-NEXT: andl %eax, {{.*}}(%rip) ; CHECK-NEXT: movl $czx, %eax ; CHECK-NEXT: retq entry: @@ -86,8 +91,10 @@ ; CHECK-LABEL: foo4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: endbr64 -; CHECK-NEXT: movl $-217112838, -{{[0-9]+}}(%rsp) # imm = 0xF30F1EFA -; CHECK-NEXT: movl $-217112838, %eax # imm = 0xF30F1EFA +; CHECK-NEXT: movl $217112837, %eax # imm = 0xCF0E105 +; CHECK-NEXT: notl %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl %eax, %eax ; CHECK-NEXT: retq entry: %dzx = alloca i32, align 4 @@ -100,9 +107,9 @@ ; CHECK-LABEL: foo5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: endbr64 -; CHECK-NEXT: movl $4077854458, %eax # imm = 0xF30F1EFA +; CHECK-NEXT: movabsq $-4077854459, %rax # imm = 0xFFFFFFFF0CF0E105 +; CHECK-NEXT: notq %rax ; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movl $4077854458, %eax # imm = 0xF30F1EFA ; CHECK-NEXT: retq entry: %ezx = alloca i64, align 8