Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -62,6 +62,11 @@ // Otherwise, the default basic cost is used. return TTI::TCC_Basic; + case Instruction::Freeze: + // Freeze operation is free because it should be lowered into a register + // use without any register copy in assembly code. + return TTI::TCC_Free; + case Instruction::FDiv: case Instruction::FRem: case Instruction::SDiv: Index: include/llvm/CodeGen/FastISel.h =================================================================== --- include/llvm/CodeGen/FastISel.h +++ include/llvm/CodeGen/FastISel.h @@ -534,6 +534,7 @@ bool selectCall(const User *I); bool selectIntrinsicCall(const IntrinsicInst *II); bool selectBitCast(const User *I); + bool selectFreeze(const User *I); bool selectCast(const User *I, unsigned Opcode); bool selectExtractValue(const User *U); bool selectInsertValue(const User *I); Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -176,6 +176,11 @@ /// UNDEF - An undefined node. UNDEF, + // FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or + // is evaluated to UNDEF), or returns VAL otherwise. Note that each + // read of UNDEF can yield different value, but FREEZE(UNDEF) cannot. + FREEZE, + /// EXTRACT_ELEMENT - This is used to get the lower or upper (determined by /// a Constant, which is required to be operand #1) half of the integer or /// float value specified as operand #0. This is only for use before Index: include/llvm/CodeGen/SelectionDAGISel.h =================================================================== --- include/llvm/CodeGen/SelectionDAGISel.h +++ include/llvm/CodeGen/SelectionDAGISel.h @@ -314,6 +314,8 @@ void Select_UNDEF(SDNode *N); void CannotYetSelect(SDNode *N); + void Select_FREEZE(SDNode *N); + private: void DoInstructionSelection(); SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, Index: lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/FastISel.cpp +++ lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1568,6 +1568,28 @@ return true; } +bool FastISel::selectFreeze(const User *I) { + // Lower Freeze to reg-reg copy. + unsigned Reg = getRegForValue(I->getOperand(0)); + if (!Reg) + // Unhandled operand. + return false; + + EVT ETy = TLI.getValueType(DL, I->getOperand(0)->getType()); + if (ETy == MVT::Other || !TLI.isTypeLegal(ETy)) + // Unhandled type, bail out. + return false; + + MVT Ty = ETy.getSimpleVT(); + const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty); + unsigned ResultReg = createResultReg(TyRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg); + + updateValueMap(I, ResultReg); + return true; +} + // Remove local value instructions starting from the instruction after // SavedLastLocalValue to the current function insert point. void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) @@ -1909,6 +1931,9 @@ case Instruction::ExtractValue: return selectExtractValue(I); + case Instruction::Freeze: + return selectFreeze(I); + case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -193,6 +193,10 @@ case ISD::VECREDUCE_UMIN: Res = PromoteIntRes_VECREDUCE(N); break; + + case ISD::FREEZE: + Res = PromoteIntRes_FREEZE(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -378,6 +382,12 @@ return ShiftVT; } +SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) { + SDValue V = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::FREEZE, SDLoc(N), + V.getValueType(), V); +} + SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); @@ -1677,6 +1687,7 @@ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break; case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; @@ -4052,7 +4063,6 @@ return Swap.getValue(1); } - SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT OutVT = N->getValueType(0); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -312,6 +312,7 @@ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); @@ -913,6 +914,7 @@ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVSETCC(const SDNode *N); Index: lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -559,3 +559,12 @@ Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } + +void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue L, H; + SDLoc dl(N); + GetSplitOp(N->getOperand(0), L, H); + + Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L); + Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H); +} Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -88,6 +88,7 @@ case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FREEZE: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: @@ -900,6 +901,7 @@ case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FREEZE: case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: case ISD::FP_ROUND: @@ -2891,6 +2893,7 @@ case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FNEG: + case ISD::FREEZE: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -668,7 +668,7 @@ void visitUnary(const User &I, unsigned Opcode); void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); } - void visitFreeze(const User &I); + void visitFreeze(const User &I) { visitUnary(I, ISD::FREEZE); } void visitBinary(const User &I, unsigned Opcode); void visitShift(const User &I, unsigned Opcode); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3102,10 +3102,24 @@ SDNodeFlags Flags; SDValue Op = getValue(I.getOperand(0)); + if (I.getOperand(0)->getType()->isAggregateType()) { + assert(Opcode == ISD::FREEZE); + + EVT VT = Op.getValueType(); + SmallVector Values; + for (unsigned i = 0; i < Op.getNumOperands(); ++i) { + SDValue Arg(Op.getNode(), i); + SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), VT, Arg, Flags); + Values.push_back(UnNodeValue); + } + SDValue MergedValue = DAG.getMergeValues(Values, getCurSDLoc()); + setValue(&I, MergedValue); + } else { SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(), Op, Flags); setValue(&I, UnNodeValue); } +} void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { SDNodeFlags Flags; @@ -10557,8 +10571,3 @@ lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); } } - -void SelectionDAGBuilder::visitFreeze(const User &I) { - SDValue N = getValue(I.getOperand(0)); - setValue(&I, N); -} Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -378,6 +378,7 @@ case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; + case ISD::FREEZE: return "freeze"; // Bit manipulation case ISD::ABS: return "abs"; Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2260,6 +2260,57 @@ CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } +void SelectionDAGISel::Select_FREEZE(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT Ty = N->getValueType(0); + SDLoc dl(N); + + // Select FREEZE to CopyToReg + CopyFromReg. + // This blocks propagation of UNDEF while translating SelDag into + // MachineInstr. + // LLVM translates an UNDEF node into multiple IMPLICIT_DEF + // instructions (in MachineInstr) if the UNDEF has multiple uses. + // For example, + // + // %y1 = UNDEF + // %t1 = mul i64 %y1, %y1 + // + // It is translated into MachineInstr code + // + // %vreg2 = IMPLICIT_DEF + // %vreg3 = IMPLICIT_DEF + // %vreg1 = IMUL32rr %vreg2, %vreg3 + // + // However, with freeze, + // + // %y1 = freeze i64 UNDEF + // %t1 = mul i64 %y1, %y1 + // + // each read of %y1 must yield same value, so it must be translated into : + // + // %vreg2 = IMPLICIT_DEF + // %vreg1 = IMUL32rr %vreg2, %vreg2 + // + // Selecting FREEZE into CopyToReg + CopyFromReg helps this. + // + // We don't have FREEZE pseudo-instruction in MachineInstr-level now. + // If FREEZE instruction is added later, the code below must be + // changed as well. + + const TargetRegisterClass *RC = TLI->getRegClassFor(Ty.getSimpleVT()); + // Create a new virtual register. + unsigned NewVirtReg = RegInfo->createVirtualRegister(RC); + // Create CopyToReg node ('copy val into NewVirtReg') + SDValue CTRVal = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, + NewVirtReg, Op); + // Create CopyFromReg node ('get value from NewVirtReg') + SDValue CFRVal = CurDAG->getCopyFromReg(CTRVal, dl, NewVirtReg, Ty); + // Mark selected. + CTRVal->setNodeId(-1); + ReplaceUses(SDValue(N, 0), CFRVal); + CurDAG->RemoveDeadNode(N); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2796,6 +2847,9 @@ case ISD::UNDEF: Select_UNDEF(NodeToMatch); return; + case ISD::FREEZE: + Select_FREEZE(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -1641,7 +1641,7 @@ case ExtractValue: return ISD::MERGE_VALUES; case InsertValue: return ISD::MERGE_VALUES; case LandingPad: return 0; - case Freeze: return 0; + case Freeze: return ISD::FREEZE; } llvm_unreachable("Unknown instruction type encountered!"); Index: test/CodeGen/X86/fast-isel-freeze.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/fast-isel-freeze.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=SDAG +; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=FAST + +define i32 @freeze(i32 %t) { +; SDAG: movl $10, %eax +; SDAG-NEXT: xorl %edi, %eax +; SDAG-NEXT: retq +; FAST: movl $10, %eax +; FAST-NEXT: xorl %edi, %eax +; FAST-NEXT: retq + %1 = freeze i32 %t + %2 = freeze i32 10 + %3 = xor i32 %1, %2 + ret i32 %3 +} Index: test/CodeGen/X86/fast-isel.ll =================================================================== --- test/CodeGen/X86/fast-isel.ll +++ test/CodeGen/X86/fast-isel.ll @@ -99,6 +99,11 @@ ret void } +define void @freeze_i32(i32 %x) { + %t = freeze i32 %x + ret void +} + @crash_test1x = external global <2 x i32>, align 8 define void @crash_test1() nounwind ssp { Index: test/CodeGen/X86/freeze-legalize.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/freeze-legalize.ll @@ -0,0 +1,68 @@ +; Make sure that seldag legalization works correctly for freeze instruction. +; RUN: llc -march=x86 < %s 2>&1 | FileCheck %s + +; CHECK-LABEL: expand: +; CHECK: movl $303174162, %eax +; CHECK: movl $875836468, %ecx +; CHECK: movl $1448498774, %edx +; CHECK: xorl %eax, %edx +; CHECK: movl $2021161080, %eax +; CHECK: xorl %ecx, %eax +; CHECK: retl + +define i64 @expand(i32 %x) { + %y1 = freeze i64 1302123111658042420 ; 0x1212121234343434 + %y2 = freeze i64 6221254864647256184 ; 0x5656565678787878 + %t2 = xor i64 %y1, %y2 + ret i64 %t2 +} + +; CHECK-LABEL: expand_vec: +; CHECK: movl $16843009, %ecx +; CHECK: movl $589505315, %edx +; CHECK: movl $303174162, %esi +; CHECK: movl $875836468, %edi +; CHECK: movl $1162167621, %ebx +; CHECK: xorl %ecx, %ebx +; CHECK: movl $1734829927, %ecx +; CHECK: xorl %edx, %ecx +; CHECK: movl $1448498774, %edx +; CHECK: xorl %esi, %edx +; CHECK: movl $2021161080, %esi +; CHECK: xorl %edi, %esi + +define <2 x i64> @expand_vec(i32 %x) { + ; <0x1212121234343434, 0x0101010123232323> + %y1 = freeze <2 x i64> + ; <0x5656565678787878, 0x4545454567676767> + %y2 = freeze <2 x i64> + %t2 = xor <2 x i64> %y1, %y2 + ret <2 x i64> %t2 +} + +; CHECK-LABEL: promote: +; CHECK: movw $682, %cx +; CHECK: movw $992, %ax +; CHECK: addl %ecx, %eax +; CHECK: retl +define i10 @promote() { + %a = freeze i10 682 + %b = freeze i10 992 + %res = add i10 %a, %b + ret i10 %res +} + +; CHECK-LABEL: promote_vec +; CHECK: movw $125, %ax +; CHECK: movw $682, %cx +; CHECK: movw $393, %dx +; CHECK: addl %eax, %edx +; CHECK: movw $992, %ax +; CHECK: addl %ecx, %eax +; CHECK: retl +define <2 x i10> @promote_vec() { + %a = freeze <2 x i10> + %b = freeze <2 x i10> + %res = add <2 x i10> %a, %b + ret <2 x i10> %res +} Index: test/CodeGen/X86/freeze.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/freeze.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s 2>&1 | FileCheck %s --check-prefix=X86ASM + +%struct.T = type { i32, i32 } + +define i32 @freeze_int() { +; X86ASM-LABEL: freeze_int: +; X86ASM: # %bb.0: +; X86ASM-NEXT: imull %eax, %eax +; X86ASM-NEXT: xorl %eax, %eax +; X86ASM-NEXT: retq + %y1 = freeze i32 undef + %t1 = mul i32 %y1, %y1 + %t2 = xor i32 %t1, %y1 + ret i32 %t2 +} + +define i5 @freeze_int2() { +; X86ASM-LABEL: freeze_int2: +; X86ASM: # %bb.0: +; X86ASM-NEXT: mulb %al +; X86ASM-NEXT: xorb %al, %al +; X86ASM-NEXT: retq + %y1 = freeze i5 undef + %t1 = mul i5 %y1, %y1 + %t2 = xor i5 %t1, %y1 + ret i5 %t2 +} + +define float @freeze_float() { +; X86ASM-LABEL: freeze_float: +; X86ASM: # %bb.0: +; X86ASM-NEXT: addss %xmm0, %xmm0 +; X86ASM-NEXT: retq + %y1 = freeze float undef + %t1 = fadd float %y1, %y1 + ret float %t1 +} + +define <2 x i32> @freeze_ivec() { +; X86ASM-LABEL: freeze_ivec: +; X86ASM: # %bb.0: +; X86ASM-NEXT: paddd %xmm0, %xmm0 +; X86ASM-NEXT: retq + %y1 = freeze <2 x i32> undef + %t1 = add <2 x i32> %y1, %y1 + ret <2 x i32> %t1 +} + +define i8* @freeze_ptr() { +; X86ASM-LABEL: freeze_ptr: +; X86ASM: # %bb.0: +; X86ASM-NEXT: addq $4, %rax +; X86ASM-NEXT: retq + %y1 = freeze i8* undef + %t1 = getelementptr i8, i8* %y1, i64 4 + ret i8* %t1 +} + +define i32 @freeze_struct() { +; X86ASM-LABEL: freeze_struct: +; X86ASM: # %bb.0: +; X86ASM-NEXT: addl %eax, %eax +; X86ASM-NEXT: retq + %y1 = freeze %struct.T undef + %v1 = extractvalue %struct.T %y1, 0 + %v2 = extractvalue %struct.T %y1, 1 + %t1 = add i32 %v1, %v2 + ret i32 %t1 +} + +define i32 @freeze_anonstruct() { +; X86ASM-LABEL: freeze_anonstruct: +; X86ASM: # %bb.0: +; X86ASM-NEXT: addl %eax, %eax +; X86ASM-NEXT: retq + %y1 = freeze {i32, i32} undef + %v1 = extractvalue {i32, i32} %y1, 0 + %v2 = extractvalue {i32, i32} %y1, 1 + %t1 = add i32 %v1, %v2 + ret i32 %t1 +} + +define i64 @freeze_array() { +; X86ASM-LABEL: freeze_array: +; X86ASM: # %bb.0: +; X86ASM-NEXT: addq %rax, %rax +; X86ASM-NEXT: retq + %y1 = freeze [2 x i64] undef + %v1 = extractvalue [2 x i64] %y1, 0 + %v2 = extractvalue [2 x i64] %y1, 1 + %t1 = add i64 %v1, %v2 + ret i64 %t1 +}