Index: llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp +++ llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" @@ -53,6 +54,17 @@ static inline bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2); +/// \brief Returns true if two address displacement operands are of the same +/// type and use the same symbol/index/address regardless of the offset. +static bool isSimilarDispOp(const MachineOperand &MO1, + const MachineOperand &MO2); + +/// \brief Returns true if the type of \p MO is valid for address displacement +/// operand. According to X86DAGToDAGISel::getAddressOperands allowed types are: +/// MO_Immediate, MO_ConstantPoolIndex, MO_JumpTableIndex, MO_ExternalSymbol, +/// MO_GlobalAddress, MO_BlockAddress or MO_MCSymbol. +static inline bool isValidDispOp(const MachineOperand &MO); + /// \brief Returns true if the instruction is LEA. static inline bool isLEA(const MachineInstr &MI); @@ -75,19 +87,11 @@ if (!isIdenticalOp(*Operands[i], *Other.Operands[i])) return false; - assert((Disp->isImm() || Disp->isGlobal()) && - (Other.Disp->isImm() || Other.Disp->isGlobal()) && - "Address displacement operand is always an immediate or a global"); - - // Addresses' displacements must be either immediates or the same global. - // Immediates' and offsets' values don't matter for the operator since the - // difference will be taken care of during instruction substitution. - if ((Disp->isImm() && Other.Disp->isImm()) || - (Disp->isGlobal() && Other.Disp->isGlobal() && - Disp->getGlobal() == Other.Disp->getGlobal())) - return true; - - return false; + // Addresses' displacements don't have to be exactly the same. It only + // matters that they use the same symbol/index/address. Immediates' or + // offsets' differences will be taken care of during instruction + // substitution. + return isSimilarDispOp(*Disp, *Other.Disp); } // Address' base, scale, index and segment operands. @@ -126,10 +130,30 @@ // If the address displacement is an immediate, it should not affect the // hash so that memory operands which differ only be immediate displacement - // would have the same hash. If the address displacement is a global, we - // should reflect this global in the hash. - if (Val.Disp->isGlobal()) + // would have the same hash. If the address displacement is something else, + // we should reflect symbol/index/address in the hash. + switch (Val.Disp->getType()) { + case MachineOperand::MO_Immediate: + break; + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + Hash = hash_combine(Hash, Val.Disp->getIndex()); + break; + case MachineOperand::MO_ExternalSymbol: + Hash = hash_combine(Hash, Val.Disp->getSymbolName()); + break; + case MachineOperand::MO_GlobalAddress: Hash = hash_combine(Hash, Val.Disp->getGlobal()); + break; + case MachineOperand::MO_BlockAddress: + Hash = hash_combine(Hash, Val.Disp->getBlockAddress()); + break; + case MachineOperand::MO_MCSymbol: + Hash = hash_combine(Hash, Val.Disp->getMCSymbol()); + break; + default: + llvm_unreachable("Invalid address displacement operand"); + } return (unsigned)Hash; } @@ -163,6 +187,28 @@ !TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); } +static bool isSimilarDispOp(const MachineOperand &MO1, + const MachineOperand &MO2) { + assert(isValidDispOp(MO1) && isValidDispOp(MO2) && + "Address displacement operand is not valid"); + return (MO1.isImm() && MO2.isImm()) || + (MO1.isCPI() && MO2.isCPI() && MO1.getIndex() == MO2.getIndex()) || + (MO1.isJTI() && MO2.isJTI() && MO1.getIndex() == MO2.getIndex()) || + (MO1.isSymbol() && MO2.isSymbol() && + MO1.getSymbolName() == MO2.getSymbolName()) || + (MO1.isGlobal() && MO2.isGlobal() && + MO1.getGlobal() == MO2.getGlobal()) || + (MO1.isBlockAddress() && MO2.isBlockAddress() && + MO1.getBlockAddress() == MO2.getBlockAddress()) || + (MO1.isMCSymbol() && MO2.isMCSymbol() && + MO1.getMCSymbol() == MO2.getMCSymbol()); +} + +static inline bool isValidDispOp(const MachineOperand &MO) { + return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() || + MO.isGlobal() || MO.isBlockAddress() || MO.isMCSymbol(); +} + static inline bool isLEA(const MachineInstr &MI) { unsigned Opcode = MI.getOpcode(); return Opcode == X86::LEA16r || Opcode == X86::LEA32r || @@ -317,16 +363,19 @@ int64_t OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, unsigned N1, const MachineInstr &MI2, unsigned N2) const { - // Address displacement operands may differ by a constant. const MachineOperand &Op1 = MI1.getOperand(N1 + X86::AddrDisp); const MachineOperand &Op2 = MI2.getOperand(N2 + X86::AddrDisp); - if (Op1.isImm() && Op2.isImm()) - return Op1.getImm() - Op2.getImm(); - else if (Op1.isGlobal() && Op2.isGlobal() && - Op1.getGlobal() == Op2.getGlobal()) - return Op1.getOffset() - Op2.getOffset(); - else - llvm_unreachable("Invalid address displacement operand"); + + assert(isSimilarDispOp(Op1, Op2) && + "Address displacement operands are not compatible"); + + // After the assert above we can be sure that both operands are of the same + // valid type and use the same symbol/index/address, thus displacement shift + // calculation is rather simple. + if (Op1.isJTI()) + return 0; + return Op1.isImm() ? Op1.getImm() - Op2.getImm() + : Op1.getOffset() - Op2.getOffset(); } // Check that the Last LEA can be replaced by the First LEA. To be so, @@ -434,11 +483,6 @@ MemOpNo += X86II::getOperandBias(Desc); - // Address displacement must be an immediate or a global. - MachineOperand &Disp = MI.getOperand(MemOpNo + X86::AddrDisp); - if (!Disp.isImm() && !Disp.isGlobal()) - continue; - // Get the best LEA instruction to replace address calculation. MachineInstr *DefMI; int64_t AddrDispShift; @@ -536,13 +580,11 @@ MO.setReg(First.getOperand(0).getReg()); // Update address disp. - MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp); - if (Op->isImm()) - Op->setImm(Op->getImm() + AddrDispShift); - else if (Op->isGlobal()) - Op->setOffset(Op->getOffset() + AddrDispShift); - else - llvm_unreachable("Invalid address displacement operand"); + MachineOperand &Op = MI.getOperand(MemOpNo + X86::AddrDisp); + if (Op.isImm()) + Op.setImm(Op.getImm() + AddrDispShift); + else if (!Op.isJTI()) + Op.setOffset(Op.getOffset() + AddrDispShift); } // Since we can possibly extend register lifetime, clear kill flags. Index: llvm/trunk/test/CodeGen/X86/lea-opt-memop-check.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lea-opt-memop-check.ll +++ llvm/trunk/test/CodeGen/X86/lea-opt-memop-check.ll @@ -12,7 +12,8 @@ declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone declare <4 x float> @_mm_castsi128_ps(<2 x i64>) optsize -define void @test(i8* nocapture readonly %src, i32 %len) #0 { +; Check that the LEA optimization pass works with CPI address displacements. +define void @test1(i8* nocapture readonly %src, i32 %len) #0 { %parts = alloca [4 x i32], align 4 %part0 = bitcast [4 x i32]* %parts to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %part0, i8* %src, i32 %len, i32 1, i1 false) @@ -20,7 +21,7 @@ %tmp0 = tail call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> undef, <2 x i64> , i8 16) %call1 = tail call <4 x float> @_mm_castsi128_ps(<2 x i64> %tmp0) ret void -; CHECK-LABEL: test: +; CHECK-LABEL: test1: ; CHECK: leal{{.*}} ; CHECK: calll _memcpy ; CHECK: movaps __xmm@{{[0-9a-f]+}}, %xmm1 @@ -29,4 +30,71 @@ ; CHECK: jmp __mm_castsi128_ps } +declare i32 @GetLastError(...) +declare void @IsolationAwareDeactivateActCtx(i32, i32) +declare i8* @llvm.localaddress() +declare void @llvm.localescape(...) +declare i8* @llvm.localrecover(i8*, i8*, i32) + +@IsolationAwarePrivateT_SqbjaYRiRY = common global i32 0, align 4 + +; Check that the MCSymbol objects are created to be used in "\01?fin$0@0@test2@@". +define void @test2() #0 { +entry: + %fActivateActCtxSuccess = alloca i32, align 4 + %proc = alloca i32, align 4 + %ulpCookie = alloca i32, align 4 + call void (...) @llvm.localescape(i32* nonnull %fActivateActCtxSuccess, i32* nonnull %proc, i32* nonnull %ulpCookie) + %tmp0 = tail call i8* @llvm.localaddress() + call fastcc void @"\01?fin$0@0@test2@@"(i8* %tmp0) + ret void +; CHECK-LABEL: test2: +; CHECK: Ltest2$frame_escape_0 = 8 +; CHECK: Ltest2$frame_escape_1 = 4 +; CHECK: Ltest2$frame_escape_2 = 0 +; CHECK: calll "?fin$0@0@test2@@" +} + +; Check that the LEA optimization pass works with MCSymbol address displacements. +define internal fastcc void @"\01?fin$0@0@test2@@"(i8* readonly %frame_pointer) unnamed_addr noinline nounwind optsize { +entry: + %tmp0 = tail call i8* @llvm.localrecover(i8* bitcast (void ()* @test2 to i8*), i8* %frame_pointer, i32 1) + %proc = bitcast i8* %tmp0 to i32* + %tmp1 = tail call i8* @llvm.localrecover(i8* bitcast (void ()* @test2 to i8*), i8* %frame_pointer, i32 2) + %ulpCookie = bitcast i8* %tmp1 to i32* + %tmp2 = load i32, i32* @IsolationAwarePrivateT_SqbjaYRiRY, align 4 + %tobool = icmp eq i32 %tmp2, 0 + br i1 %tobool, label %if.end, label %land.lhs.true + +land.lhs.true: + %tmp3 = tail call i8* @llvm.localrecover(i8* bitcast (void ()* @test2 to i8*), i8* %frame_pointer, i32 0) + %fActivateActCtxSuccess = bitcast i8* %tmp3 to i32* + %tmp4 = load i32, i32* %fActivateActCtxSuccess, align 4 + %tobool1 = icmp eq i32 %tmp4, 0 + br i1 %tobool1, label %if.end, label %if.then + +if.then: + %tmp5 = load i32, i32* %proc, align 4 + %tobool2 = icmp eq i32 %tmp5, 0 + br i1 %tobool2, label %cond.end, label %cond.true + +cond.true: + %call = tail call i32 bitcast (i32 (...)* @GetLastError to i32 ()*)() + br label %cond.end + +cond.end: + %tmp6 = load i32, i32* %ulpCookie, align 4 + tail call void @IsolationAwareDeactivateActCtx(i32 0, i32 %tmp6) + br label %if.end + +if.end: + ret void +; CHECK-LABEL: "?fin$0@0@test2@@": +; CHECK: cmpl $0, Ltest2$frame_escape_0([[REG1:%[a-z]+]]) +; CHECK: leal Ltest2$frame_escape_1([[REG1]]), [[REG2:%[a-z]+]] +; CHECK: leal Ltest2$frame_escape_2([[REG1]]), [[REG3:%[a-z]+]] +; CHECK: cmpl $0, ([[REG2]]) +; CHECK: pushl ([[REG3]]) +} + attributes #0 = { nounwind optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }