diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -62,6 +62,11 @@ // Otherwise, the default basic cost is used. return TTI::TCC_Basic; + case Instruction::Freeze: + // Freeze operation is free because it should be lowered into a register + // use without any register copy in assembly code. + return TTI::TCC_Free; + case Instruction::FDiv: case Instruction::FRem: case Instruction::SDiv: diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -534,6 +534,7 @@ bool selectCall(const User *I); bool selectIntrinsicCall(const IntrinsicInst *II); bool selectBitCast(const User *I); + bool selectFreeze(const User *I); bool selectCast(const User *I, unsigned Opcode); bool selectExtractValue(const User *U); bool selectInsertValue(const User *I); diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -178,6 +178,11 @@ /// UNDEF - An undefined node. UNDEF, + // FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or + // is evaluated to UNDEF), or returns VAL otherwise. Note that each + // read of UNDEF can yield different value, but FREEZE(UNDEF) cannot. + FREEZE, + /// EXTRACT_ELEMENT - This is used to get the lower or upper (determined by /// a Constant, which is required to be operand #1) half of the integer or /// float value specified as operand #0. This is only for use before diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -324,6 +324,8 @@ void Select_UNDEF(SDNode *N); void CannotYetSelect(SDNode *N); + void Select_FREEZE(SDNode *N); + private: void DoInstructionSelection(); SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -53,9 +53,58 @@ /// anyext operations on target architectures which support it. HANDLE_TARGET_OPCODE(INSERT_SUBREG) -/// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef. +/// IMPLICIT_DEF - This instruction simulates LLVM IR's undef/poison value by +/// creating a register that contains a value of an arbitrary bit pattern. +/// The register's value can be changed by any instruction that is executed +/// after IMPLICIT_DEF, except FREEZE. This implies that different instructions +/// using the IMPLICIT_DEF can see different values. +/// To constrain different instructions to see the same value of this register, +/// FREEZE operation can be used. +/// %1 = IMPLICIT_DEF +/// read(%1) ; may have implicitly changed the value of register %1 +/// read(%1) ; can read a different value +/// %2 = FREEZE %1 +/// read(%2) +/// read(%2) ; these two read the same value from register %2. +/// +/// Some instructions may have IMPLICIT_DEF-like output register, if inputs are +/// IMPLICIT_DEF. IMPLICIT_DEF-like register means it works exactly as +/// IMPLICIT_DEF. +/// - COPY and COPY_TO_REGCLASS of IMPLICIT_DEF has IMPLICIT_DEF-like output +/// register, so its value can change by execution of its following +/// instructions. This allows optimizing COPY(IMPLICIT_DEF) to IMPLICIT_DEF. +/// - PHI having an IMPLICIT_DEF operand has IMPLICIT_DEF-like output register +/// if the previous block was from the corresponding basic block. This allows +/// optimizing PHI(IMPLICIT_DEF, ..., IMPLICIT_DEF) to IMPLICIT_DEF. +/// - A register can have IMPLICIT_DEF subregisters via REG_SEQUENCE or +/// INSERT_SUBREG. If EXTRACT_SUBREG extracts one of such subregisters, the +/// output register is also IMPLICIT_DEF-like. +/// +/// Except these operations and IMPLICIT_DEF, all other instructions' output +/// registers behave as expected. HANDLE_TARGET_OPCODE(IMPLICIT_DEF) +/// FREEZE - This is the MachineInstr-level equivalent of freeze. It copies the +/// value of the register operand, but unlike IMPLICIT_DEF, the output +/// register's value is preserved over the execution of following instructions. +/// Note that COPY(IMPLICIT_DEF) is different from FREEZE(IMPLICIT_DEF), because +/// COPY's output register works as IMPLICIT_DEF register again (see the +/// description of IMPLICIT_DEF). +/// %1 = IMPLICIT_DEF +/// %2 = FREEZE %1 +/// read(%2) +/// read(%2) ; these two read the same value. +/// +/// Unlike other instructions, FREEZE does not change the value of IMPLICIT_DEF +/// registers, meaning that consecutive FREEZEs on the same operand yield the +/// same value. If they are separated, they might return different values. +/// %1 = IMPLICIT_DEF +/// %2 = FREEZE %1 +/// read(%2) ; this may have changed the value of %1. +/// %3 = FREEZE %1 ; %3 and %2 may have different values. +/// %4 = FREEZE %1 ; %3 and %4 has the same value. +HANDLE_TARGET_OPCODE(FREEZE) + /// SUBREG_TO_REG - Assert the value of bits in a super register. /// The result of this instruction is the value of the second operand inserted /// into the subregister specified by the third operand. All other bits are diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1059,6 +1059,15 @@ let isReMaterializable = 1; let isAsCheapAsAMove = 1; } +def FREEZE : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let AsmString = "FREEZE"; + let hasSideEffects = 0; + let isAsCheapAsAMove = 1; + let hasNoSchedulingInfo = 1; + let isNotDuplicable = 1; +} def SUBREG_TO_REG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx); diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp --- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo +// This file defines a pass that expands COPY, SUBREG_TO_REG, and FREEZE pseudo // instructions after register allocation. // //===----------------------------------------------------------------------===// @@ -211,6 +211,7 @@ case TargetOpcode::SUBREG_TO_REG: MadeChange |= LowerSubregToReg(&MI); break; + case TargetOpcode::FREEZE: case TargetOpcode::COPY: MadeChange |= LowerCopy(&MI); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1571,6 +1571,27 @@ return true; } +bool FastISel::selectFreeze(const User *I) { + Register Reg = getRegForValue(I->getOperand(0)); + if (!Reg) + // Unhandled operand. + return false; + + EVT ETy = TLI.getValueType(DL, I->getOperand(0)->getType()); + if (ETy == MVT::Other || !TLI.isTypeLegal(ETy)) + // Unhandled type, bail out. + return false; + + MVT Ty = ETy.getSimpleVT(); + const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty); + Register ResultReg = createResultReg(TyRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::FREEZE), ResultReg).addReg(Reg); + + updateValueMap(I, ResultReg); + return true; +} + // Remove local value instructions starting from the instruction after // SavedLastLocalValue to the current function insert point. void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) @@ -1912,6 +1933,9 @@ case Instruction::ExtractValue: return selectExtractValue(I); + case Instruction::Freeze: + return selectFreeze(I); + case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -199,6 +199,10 @@ case ISD::VECREDUCE_UMIN: Res = PromoteIntRes_VECREDUCE(N); break; + + case ISD::FREEZE: + Res = PromoteIntRes_FREEZE(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -396,6 +400,12 @@ return ShiftVT; } +SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) { + SDValue V = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::FREEZE, SDLoc(N), + V.getValueType(), V); +} + SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); @@ -1792,6 +1802,7 @@ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break; case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -304,6 +304,7 @@ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); @@ -919,6 +920,7 @@ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVSETCC(const SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -557,3 +557,12 @@ Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } + +void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue L, H; + SDLoc dl(N); + GetSplitOp(N->getOperand(0), L, H); + + Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L); + Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -88,6 +88,7 @@ case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FREEZE: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: @@ -876,6 +877,7 @@ case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FREEZE: case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: @@ -2831,6 +2833,7 @@ case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FNEG: + case ISD::FREEZE: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -10665,6 +10665,22 @@ } void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { - SDValue N = getValue(I.getOperand(0)); - setValue(&I, N); + SDNodeFlags Flags; + + SDValue Op = getValue(I.getOperand(0)); + if (I.getOperand(0)->getType()->isAggregateType()) { + EVT VT = Op.getValueType(); + SmallVector Values; + for (unsigned i = 0; i < Op.getNumOperands(); ++i) { + SDValue Arg(Op.getNode(), i); + SDValue UnNodeValue = DAG.getNode(ISD::FREEZE, getCurSDLoc(), VT, Arg, Flags); + Values.push_back(UnNodeValue); + } + SDValue MergedValue = DAG.getMergeValues(Values, getCurSDLoc()); + setValue(&I, MergedValue); + } else { + SDValue UnNodeValue = DAG.getNode(ISD::FREEZE, getCurSDLoc(), Op.getValueType(), + Op, Flags); + setValue(&I, UnNodeValue); + } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -388,6 +388,7 @@ case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; + case ISD::FREEZE: return "freeze"; // Bit manipulation case ISD::ABS: return "abs"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2285,6 +2285,11 @@ CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } +void SelectionDAGISel::Select_FREEZE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::FREEZE, N->getValueType(0), + N->getOperand(0)); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2821,6 +2826,9 @@ case ISD::UNDEF: Select_UNDEF(NodeToMatch); return; + case ISD::FREEZE: + Select_FREEZE(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1645,7 +1645,7 @@ case ExtractValue: return ISD::MERGE_VALUES; case InsertValue: return ISD::MERGE_VALUES; case LandingPad: return 0; - case Freeze: return 0; + case Freeze: return ISD::FREEZE; } llvm_unreachable("Unknown instruction type encountered!"); diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -927,6 +927,16 @@ addMachineLateOptimization(); // Expand pseudo instructions before second scheduling pass. + // After this pass, IMPLICIT_DEF cannot yield different values per use. + // For example, following transformation is not valid anymore: + // eax = IMPLICIT_DEF + // use(eax) + // use(eax) + // => + // eax = IMPLICIT_DEF + // use(eax) + // ebx = IMPLICIT_DEF + // use(ebx) addPass(&ExpandPostRAPseudosID); // Run pre-sched2 passes. diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -485,6 +485,7 @@ case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: + case TargetOpcode::FREEZE: case TargetOpcode::DBG_VALUE: return 0; case TargetOpcode::INLINEASM: diff --git a/llvm/test/CodeGen/X86/fast-isel-freeze.ll b/llvm/test/CodeGen/X86/fast-isel-freeze.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fast-isel-freeze.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=SDAG +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=FAST + +define i32 @freeze(i32 %t) { +; SDAG: movl $10, %eax +; SDAG-NEXT: xorl %ecx, %eax +; SDAG-NEXT: retq +; FAST: movl $10, %eax +; FAST-NEXT: xorl %ecx, %eax +; FAST-NEXT: retq + %1 = freeze i32 %t + %2 = freeze i32 10 + %3 = xor i32 %1, %2 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/X86/fast-isel.ll b/llvm/test/CodeGen/X86/fast-isel.ll --- a/llvm/test/CodeGen/X86/fast-isel.ll +++ b/llvm/test/CodeGen/X86/fast-isel.ll @@ -99,6 +99,11 @@ ret void } +define void @freeze_i32(i32 %x) { + %t = freeze i32 %x + ret void +} + @crash_test1x = external global <2 x i32>, align 8 define void @crash_test1() nounwind ssp { diff --git a/llvm/test/CodeGen/X86/freeze-call.ll b/llvm/test/CodeGen/X86/freeze-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/freeze-call.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s --check-prefix=X86ASM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -optimize-regalloc=false < %s 2>&1 | FileCheck %s --check-prefix=X86ASM_NORAOPT + +declare i32 @g() + +define i32 @foo() nounwind { +; X86ASM-LABEL: foo: +; X86ASM: # %bb.0: +; X86ASM-NEXT: pushq %rbx +; X86ASM-NEXT: # kill: def $ebx killed $eax def $rbx +; X86ASM-NEXT: callq g +; X86ASM-NEXT: leal 30(%rbx,%rbx), %eax +; X86ASM-NEXT: popq %rbx +; X86ASM-NEXT: retq +; +; X86ASM_NORAOPT-LABEL: foo: +; X86ASM_NORAOPT: # %bb.0: +; X86ASM_NORAOPT-NEXT: pushq %rax +; X86ASM_NORAOPT-NEXT: # implicit-def: $eax +; X86ASM_NORAOPT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X86ASM_NORAOPT-NEXT: callq g +; X86ASM_NORAOPT-NEXT: # implicit-def: $rcx +; X86ASM_NORAOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload +; X86ASM_NORAOPT-NEXT: movl %edx, %ecx +; X86ASM_NORAOPT-NEXT: leal 30(%rcx,%rcx), %eax +; X86ASM_NORAOPT-NEXT: popq %rcx +; X86ASM_NORAOPT-NEXT: retq + %y1 = freeze i32 undef + %k = add i32 %y1, 10 + call i32 @g() + %k2 = add i32 %y1, 20 + %res = add i32 %k, %k2 + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/freeze-legalize.ll b/llvm/test/CodeGen/X86/freeze-legalize.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/freeze-legalize.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Make sure that seldag legalization works correctly for freeze instruction. +; RUN: llc -mtriple=i386-apple-darwin < %s 2>&1 | FileCheck %s + +define i64 @expand(i32 %x) { +; CHECK-LABEL: expand: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl $303174162, %eax ## imm = 0x12121212 +; CHECK-NEXT: movl $875836468, %ecx ## imm = 0x34343434 +; CHECK-NEXT: movl $1448498774, %edx ## imm = 0x56565656 +; CHECK-NEXT: xorl %eax, %edx +; CHECK-NEXT: movl $2021161080, %eax ## imm = 0x78787878 +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: retl + %y1 = freeze i64 1302123111658042420 ; 0x1212121234343434 + %y2 = freeze i64 6221254864647256184 ; 0x5656565678787878 + %t2 = xor i64 %y1, %y2 + ret i64 %t2 +} + + +define <2 x i64> @expand_vec(i32 %x) nounwind { +; CHECK-LABEL: expand_vec: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl $16843009, %ecx ## imm = 0x1010101 +; CHECK-NEXT: movl $589505315, %edx ## imm = 0x23232323 +; CHECK-NEXT: movl $303174162, %esi ## imm = 0x12121212 +; CHECK-NEXT: movl $875836468, %edi ## imm = 0x34343434 +; CHECK-NEXT: movl $1162167621, %ebx ## imm = 0x45454545 +; CHECK-NEXT: xorl %ecx, %ebx +; CHECK-NEXT: movl $1734829927, %ecx ## imm = 0x67676767 +; CHECK-NEXT: xorl %edx, %ecx +; CHECK-NEXT: movl $1448498774, %edx ## imm = 0x56565656 +; CHECK-NEXT: xorl %esi, %edx +; CHECK-NEXT: movl $2021161080, %esi ## imm = 0x78787878 +; CHECK-NEXT: xorl %edi, %esi +; CHECK-NEXT: movl %ebx, 12(%eax) +; CHECK-NEXT: movl %ecx, 8(%eax) +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: movl %esi, (%eax) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl $4 + ; <0x1212121234343434, 0x101010123232323> + %y1 = freeze <2 x i64> + ; <0x5656565678787878, 0x4545454567676767> + %y2 = freeze <2 x i64> + %t2 = xor <2 x i64> %y1, %y2 + ret <2 x i64> %t2 +} + +define i10 @promote() { +; CHECK-LABEL: promote: +; CHECK: ## %bb.0: +; CHECK-NEXT: movw $682, %ax ## imm = 0x2AA +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movw $992, %ax ## imm = 0x3E0 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retl + %a = freeze i10 682 + %b = freeze i10 992 + %res = add i10 %a, %b + ret i10 %res +} + +define <2 x i10> @promote_vec() { +; CHECK-LABEL: promote_vec: +; CHECK: ## %bb.0: +; CHECK-NEXT: movw $125, %ax +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: movw $682, %cx ## imm = 0x2AA +; CHECK-NEXT: ## kill: def $cx killed $cx def $ecx +; CHECK-NEXT: movw $393, %dx ## imm = 0x189 +; CHECK-NEXT: ## kill: def $dx killed $dx def $edx +; CHECK-NEXT: addl %eax, %edx +; CHECK-NEXT: movw $992, %ax ## imm = 0x3E0 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax +; CHECK-NEXT: ## kill: def $dx killed $dx killed $edx +; CHECK-NEXT: retl + %a = freeze <2 x i10> + %b = freeze <2 x i10> + %res = add <2 x i10> %a, %b + ret <2 x i10> %res +} diff --git a/llvm/test/CodeGen/X86/freeze-mir.ll b/llvm/test/CodeGen/X86/freeze-mir.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/freeze-mir.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs -o - -stop-after=finalize-isel %s 2>&1 | FileCheck %s --check-prefix=X86MIR + +%struct.T = type { i32, i32 } + +define i32 @freeze_int() { + ; X86MIR-LABEL: name: freeze_int + ; X86MIR: bb.0 (%ir-block.0): + ; X86MIR: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X86MIR: [[FREEZE:%[0-9]+]]:gr32 = FREEZE killed [[DEF]] + ; X86MIR: [[IMUL32rr:%[0-9]+]]:gr32 = IMUL32rr [[FREEZE]], [[FREEZE]], implicit-def dead $eflags + ; X86MIR: [[XOR32rr:%[0-9]+]]:gr32 = XOR32rr [[IMUL32rr]], [[FREEZE]], implicit-def dead $eflags + ; X86MIR: $eax = COPY [[XOR32rr]] + ; X86MIR: RET 0, $eax + %y1 = freeze i32 undef + %t1 = mul i32 %y1, %y1 + %t2 = xor i32 %t1, %y1 + ret i32 %t2 +} + +define i5 @freeze_int2() { + ; X86MIR-LABEL: name: freeze_int2 + ; X86MIR: bb.0 (%ir-block.0): + ; X86MIR: [[DEF:%[0-9]+]]:gr8 = IMPLICIT_DEF + ; X86MIR: [[FREEZE:%[0-9]+]]:gr8 = FREEZE killed [[DEF]] + ; X86MIR: $al = COPY [[FREEZE]] + ; X86MIR: MUL8r [[FREEZE]], implicit-def $al, implicit-def dead $eflags, implicit-def $ax, implicit $al + ; X86MIR: [[COPY:%[0-9]+]]:gr8 = COPY $al + ; X86MIR: [[XOR8rr:%[0-9]+]]:gr8 = XOR8rr [[COPY]], [[FREEZE]], implicit-def dead $eflags + ; X86MIR: $al = COPY [[XOR8rr]] + ; X86MIR: RET 0, $al + %y1 = freeze i5 undef + %t1 = mul i5 %y1, %y1 + %t2 = xor i5 %t1, %y1 + ret i5 %t2 +} + +define float @freeze_float() { + ; X86MIR-LABEL: name: freeze_float + ; X86MIR: bb.0 (%ir-block.0): + ; X86MIR: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; X86MIR: [[FREEZE:%[0-9]+]]:fr32 = FREEZE killed [[DEF]] + ; X86MIR: %2:fr32 = nofpexcept ADDSSrr [[FREEZE]], [[FREEZE]], implicit $mxcsr + ; X86MIR: $xmm0 = COPY %2 + ; X86MIR: RET 0, $xmm0 + %y1 = freeze float undef + %t1 = fadd float %y1, %y1 + ret float %t1 +} + +define <2 x i32> @freeze_ivec() { + ; X86MIR-LABEL: name: freeze_ivec + ; X86MIR: bb.0 (%ir-block.0): + ; X86MIR: [[DEF:%[0-9]+]]:vr128 = IMPLICIT_DEF + ; X86MIR: [[FREEZE:%[0-9]+]]:vr128 = FREEZE killed [[DEF]] + ; X86MIR: [[PADDDrr:%[0-9]+]]:vr128 = PADDDrr [[FREEZE]], [[FREEZE]] + ; X86MIR: $xmm0 = COPY [[PADDDrr]] + ; X86MIR: RET 0, $xmm0 + %y1 = freeze <2 x i32> undef + %t1 = add <2 x i32> %y1, %y1 + ret <2 x i32> %t1 +} + +define i8* @freeze_ptr() { + ; X86MIR-LABEL: name: freeze_ptr + ; X86MIR: bb.0 (%ir-block.0): + ; X86MIR: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF + ; X86MIR: [[FREEZE:%[0-9]+]]:gr64 = FREEZE killed [[DEF]] + ; X86MIR: [[ADD64ri8_:%[0-9]+]]:gr64 = ADD64ri8 [[FREEZE]], 4, implicit-def dead $eflags + ; X86MIR: $rax = COPY [[ADD64ri8_]] + ; X86MIR: RET 0, $rax + %y1 = freeze i8* undef + %t1 = getelementptr i8, i8* %y1, i64 4 + ret i8* %t1 +} + +define i32 @freeze_struct() { + ; X86MIR-LABEL: name: freeze_struct + ; X86MIR: bb.0 (%ir-block.0): + ; X86MIR: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X86MIR: [[FREEZE:%[0-9]+]]:gr32 = FREEZE killed [[DEF]] + ; X86MIR: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[FREEZE]], [[FREEZE]], implicit-def dead $eflags + ; X86MIR: $eax = COPY [[ADD32rr]] + ; X86MIR: RET 0, $eax + %y1 = freeze %struct.T undef + %v1 = extractvalue %struct.T %y1, 0 + %v2 = extractvalue %struct.T %y1, 1 + %t1 = add i32 %v1, %v2 + ret i32 %t1 +} + +define i32 @freeze_anonstruct() { + ; X86MIR-LABEL: name: freeze_anonstruct + ; X86MIR: bb.0 (%ir-block.0): + ; X86MIR: [[DEF:%[0-9]+]]:gr32 = IMPLICIT_DEF + ; X86MIR: [[FREEZE:%[0-9]+]]:gr32 = FREEZE killed [[DEF]] + ; X86MIR: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[FREEZE]], [[FREEZE]], implicit-def dead $eflags + ; X86MIR: $eax = COPY [[ADD32rr]] + ; X86MIR: RET 0, $eax + %y1 = freeze {i32, i32} undef + %v1 = extractvalue {i32, i32} %y1, 0 + %v2 = extractvalue {i32, i32} %y1, 1 + %t1 = add i32 %v1, %v2 + ret i32 %t1 +} + +define i64 @freeze_array() { + ; X86MIR-LABEL: name: freeze_array + ; X86MIR: bb.0 (%ir-block.0): + ; X86MIR: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF + ; X86MIR: [[FREEZE:%[0-9]+]]:gr64 = FREEZE killed [[DEF]] + ; X86MIR: [[ADD64rr:%[0-9]+]]:gr64 = ADD64rr [[FREEZE]], [[FREEZE]], implicit-def dead $eflags + ; X86MIR: $rax = COPY [[ADD64rr]] + ; X86MIR: RET 0, $rax + %y1 = freeze [2 x i64] undef + %v1 = extractvalue [2 x i64] %y1, 0 + %v2 = extractvalue [2 x i64] %y1, 1 + %t1 = add i64 %v1, %v2 + ret i64 %t1 +} diff --git a/llvm/test/CodeGen/X86/freeze-phielim.ll b/llvm/test/CodeGen/X86/freeze-phielim.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/freeze-phielim.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s --check-prefix=X86ASM +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -optimize-regalloc=false < %s 2>&1 | FileCheck %s --check-prefix=X86ASM_NORAOPT + +@x = global i32 0 +@y = global i32 0 + +define void @f(i1 %cond) { +; X86ASM-LABEL: f: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $eax killed $eax def $rax +; X86ASM-NEXT: testb $1, %dil +; X86ASM-NEXT: je .LBB0_2 +; X86ASM-NEXT: # %bb.1: # %BB1 +; X86ASM-NEXT: leal -1(%rax), %ecx +; X86ASM-NEXT: jmp .LBB0_3 +; X86ASM-NEXT: .LBB0_2: # %BB2 +; X86ASM-NEXT: xorl %ecx, %ecx +; X86ASM-NEXT: .LBB0_3: # %END +; X86ASM-NEXT: movl %eax, {{.*}}(%rip) +; X86ASM-NEXT: movl %ecx, {{.*}}(%rip) +; X86ASM-NEXT: retq +; +; X86ASM_NORAOPT-LABEL: f: +; X86ASM_NORAOPT: # %bb.0: +; X86ASM_NORAOPT-NEXT: # kill: def $dil killed $dil killed $edi +; X86ASM_NORAOPT-NEXT: # implicit-def: $eax +; X86ASM_NORAOPT-NEXT: testb $1, %dil +; X86ASM_NORAOPT-NEXT: je .LBB0_2 +; X86ASM_NORAOPT-NEXT: # %bb.1: +; X86ASM_NORAOPT-NEXT: movl %eax, %ecx +; X86ASM_NORAOPT-NEXT: leal -1(%rcx), %edx +; X86ASM_NORAOPT-NEXT: movl %eax, -4(%rsp) +; X86ASM_NORAOPT-NEXT: movl %edx, -8(%rsp) +; X86ASM_NORAOPT-NEXT: jmp .LBB0_3 +; X86ASM_NORAOPT-NEXT:.LBB0_2: +; X86ASM_NORAOPT-NEXT: xorl %ecx, %ecx +; X86ASM_NORAOPT-NEXT: movl %eax, -4(%rsp) +; X86ASM_NORAOPT-NEXT: movl %ecx, -8(%rsp) +; X86ASM_NORAOPT-NEXT:.LBB0_3: +; X86ASM_NORAOPT-NEXT: movl -8(%rsp), %eax +; X86ASM_NORAOPT-NEXT: movl -4(%rsp), %ecx +; X86ASM_NORAOPT-NEXT: movl %ecx, x(%rip) +; X86ASM_NORAOPT-NEXT: movl %eax, y(%rip) +; X86ASM_NORAOPT-NEXT: retq + br i1 %cond, label %BB1, label %BB2 +BB1: + %y1 = freeze i32 undef + %k1 = sub i32 %y1, 1 + br label %END +BB2: + %y2 = freeze i32 undef + br label %END +END: + %p = phi i32 [%y1, %BB1], [%y2, %BB2] + %p2 = phi i32 [%k1, %BB1], [0, %BB2] + store i32 %p, i32* @x + store i32 %p2, i32* @y + ret void +} diff --git a/llvm/test/CodeGen/X86/freeze.ll b/llvm/test/CodeGen/X86/freeze.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/freeze.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s --check-prefix=X86ASM + +%struct.T = type { i32, i32 } + +define i32 @freeze_int() { +; X86ASM-LABEL: freeze_int: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $ecx killed $eax +; X86ASM-NEXT: movl %ecx, %eax +; X86ASM-NEXT: imull %ecx, %eax +; X86ASM-NEXT: xorl %ecx, %eax +; X86ASM-NEXT: retq + %y1 = freeze i32 undef + %t1 = mul i32 %y1, %y1 + %t2 = xor i32 %t1, %y1 + ret i32 %t2 +} + +define i5 @freeze_int2() { +; X86ASM-LABEL: freeze_int2: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $cl killed $al +; X86ASM-NEXT: movl %ecx, %eax +; X86ASM-NEXT: mulb %cl +; X86ASM-NEXT: xorb %cl, %al +; X86ASM-NEXT: retq + %y1 = freeze i5 undef + %t1 = mul i5 %y1, %y1 + %t2 = xor i5 %t1, %y1 + ret i5 %t2 +} + +define float @freeze_float() { +; X86ASM-LABEL: freeze_float: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $xmm0 killed $xmm0 +; X86ASM-NEXT: addss %xmm0, %xmm0 +; X86ASM-NEXT: retq + %y1 = freeze float undef + %t1 = fadd float %y1, %y1 + ret float %t1 +} + +define <2 x i32> @freeze_ivec() { +; X86ASM-LABEL: freeze_ivec: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $xmm0 killed $xmm0 +; X86ASM-NEXT: paddd %xmm0, %xmm0 +; X86ASM-NEXT: retq + %y1 = freeze <2 x i32> undef + %t1 = add <2 x i32> %y1, %y1 + ret <2 x i32> %t1 +} + +define i8* @freeze_ptr() { +; X86ASM-LABEL: freeze_ptr: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $rax killed $rax +; X86ASM-NEXT: addq $4, %rax +; X86ASM-NEXT: retq + %y1 = freeze i8* undef + %t1 = getelementptr i8, i8* %y1, i64 4 + ret i8* %t1 +} + +define i32 @freeze_struct() { +; X86ASM-LABEL: freeze_struct: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $eax killed $eax +; X86ASM-NEXT: addl %eax, %eax +; X86ASM-NEXT: retq + %y1 = freeze %struct.T undef + %v1 = extractvalue %struct.T %y1, 0 + %v2 = extractvalue %struct.T %y1, 1 + %t1 = add i32 %v1, %v2 + ret i32 %t1 +} + +define i32 @freeze_anonstruct() { +; X86ASM-LABEL: freeze_anonstruct: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $eax killed $eax +; X86ASM-NEXT: addl %eax, %eax +; X86ASM-NEXT: retq + %y1 = freeze {i32, i32} undef + %v1 = extractvalue {i32, i32} %y1, 0 + %v2 = extractvalue {i32, i32} %y1, 1 + %t1 = add i32 %v1, %v2 + ret i32 %t1 +} + +define i64 @freeze_array() { +; X86ASM-LABEL: freeze_array: +; X86ASM: # %bb.0: +; X86ASM-NEXT: # kill: def $rax killed $rax +; X86ASM-NEXT: addq %rax, %rax +; X86ASM-NEXT: retq + %y1 = freeze [2 x i64] undef + %v1 = extractvalue [2 x i64] %y1, 0 + %v2 = extractvalue [2 x i64] %y1, 1 + %t1 = add i64 %v1, %v2 + ret i64 %t1 +}