diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -253,7 +253,9 @@ LLOnly, // Expand the (load) instruction into just a load-linked, which has // greater atomic guarantees than a normal load. CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. - MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. + MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. + BitTestIntrinsic, // Use a target-specific intrinsic for special bit + // operations; used by X86. }; /// Enum that specifies when a multiplication should be expanded. @@ -1951,6 +1953,14 @@ llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); } + /// Perform a bit test atomicrmw using a target-specific intrinsic. This + /// represents the combined bit test intrinsic which will be lowered at a late + /// stage by the backend. + virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { + llvm_unreachable( + "Bit test atomicrmw expansion unimplemented on this target"); + } + /// Perform a masked cmpxchg using a target-specific intrinsic. This /// represents the core LL/SC loop which will be lowered at a late stage by /// the backend. diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -62,6 +62,16 @@ Intrinsic<[llvm_i32_ty], [], []>; } +// Lock bit test. +let TargetPrefix = "x86" in { + def int_x86_atomic_bts : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty], + [ImmArg>]>; + def int_x86_atomic_btc : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty], + [ImmArg>]>; + def int_x86_atomic_btr : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty], + [ImmArg>]>; +} + //===----------------------------------------------------------------------===// // CET SS let TargetPrefix = "x86" in { diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -621,6 +621,10 @@ expandAtomicRMWToMaskedIntrinsic(AI); return true; } + case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: { + TLI->emitBitTestAtomicRMWIntrinsic(AI); + return true; + } default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -787,6 +787,9 @@ LOR, LXOR, LAND, + LBTS, + LBTC, + LBTR, // Load, scalar_to_vector, and zero extend. VZEXT_LOAD, @@ -1640,6 +1643,9 @@ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; + void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5442,6 +5442,18 @@ Info.align = Align(1); Info.flags |= MachineMemOperand::MOLoad; return true; + case Intrinsic::x86_atomic_bts: + case Intrinsic::x86_atomic_btc: + case Intrinsic::x86_atomic_btr: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = I.getArgOperand(0); + unsigned Size = I.getType()->getScalarSizeInBits(); + Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size); + Info.align = Align(Size); + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; + return true; + } } return false; } @@ -27510,6 +27522,30 @@ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, Operation.getValue(1)); } + case Intrinsic::x86_atomic_bts: + case Intrinsic::x86_atomic_btc: + case Intrinsic::x86_atomic_btr: { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue Chain = Op.getOperand(0); + SDValue Op1 = Op.getOperand(2); + SDValue Op2 = Op.getOperand(3); + unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ? X86ISD::LBTS + : IntNo == Intrinsic::x86_atomic_btc ? X86ISD::LBTC + : X86ISD::LBTR; + SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32); + MachineMemOperand *MMO = cast(Op)->getMemOperand(); + SDValue Res = + DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other), + {Chain, Op1, Op2, Size}, VT, MMO); + Chain = Res.getValue(1); + Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); + unsigned Imm = cast(Op2)->getZExtValue(); + if (Imm) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getShiftAmountConstant(Imm, VT, DL)); + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain); + } } return SDValue(); } @@ -30427,6 +30463,65 @@ : AtomicExpansionKind::None; } +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const { + // If the atomicrmw's result isn't actually used, we can just add a "lock" + // prefix to a normal instruction for these operations. + if (AI->use_empty()) + return AtomicExpansionKind::None; + + // If the atomicrmw's result is used by a single bit AND, we may use + // bts/btr/btc instruction for these operations. + auto *C1 = dyn_cast(AI->getValOperand()); + Instruction *I = AI->user_back(); + if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And || + AI->getParent() != I->getParent()) + return AtomicExpansionKind::CmpXChg; + // The following instruction must be a AND single bit. + auto *C2 = dyn_cast(I->getOperand(1)); + unsigned Bits = AI->getType()->getPrimitiveSizeInBits(); + if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue())) + return AtomicExpansionKind::CmpXChg; + + if (AI->getOperation() == AtomicRMWInst::And) + return ~C1->getValue() == C2->getValue() + ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; + + return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; +} + +void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { + IRBuilder<> Builder(AI); + Intrinsic::ID IID = Intrinsic::not_intrinsic; + switch (AI->getOperation()) { + default: + llvm_unreachable("Unknown atomic operation"); + case AtomicRMWInst::Or: + IID = Intrinsic::x86_atomic_bts; + break; + case AtomicRMWInst::Xor: + IID = Intrinsic::x86_atomic_btc; + break; + case AtomicRMWInst::And: + IID = Intrinsic::x86_atomic_btr; + break; + } + Instruction *I = AI->user_back(); + LLVMContext &Ctx = AI->getContext(); + unsigned Imm = + countTrailingZeros(cast(I->getOperand(1))->getZExtValue()); + Function *BitTest = + Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType()); + Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(), + Type::getInt8PtrTy(Ctx)); + Value *Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)}); + I->replaceAllUsesWith(Result); + I->eraseFromParent(); + AI->eraseFromParent(); +} + TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; @@ -30451,10 +30546,7 @@ case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: - // If the atomicrmw's result isn't actually used, we can just add a "lock" - // prefix to a normal instruction for these operations. - return !AI->use_empty() ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + return shouldExpandLogicAtomicRMWInIR(AI); case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -32935,6 +33027,9 @@ NODE_NAME_CASE(LOR) NODE_NAME_CASE(LXOR) NODE_NAME_CASE(LAND) + NODE_NAME_CASE(LBTS) + NODE_NAME_CASE(LBTC) + NODE_NAME_CASE(LBTR) NODE_NAME_CASE(VZEXT_MOVL) NODE_NAME_CASE(VZEXT_LOAD) NODE_NAME_CASE(VEXTRACT_STORE) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -839,6 +839,38 @@ def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; } +// Atomic bit test. +def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, + SDTCisVT<2, i8>, SDTCisVT<3, i32>]>; +def x86bts : SDNode<"X86ISD::LBTS", X86LBTest, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def x86btc : SDNode<"X86ISD::LBTC", X86LBTest, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def x86btr : SDNode<"X86ISD::LBTR", X86LBTest, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; + +multiclass ATOMIC_LOGIC_OP { + let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteBitTestSetRegRMW] in { + def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i8imm:$src2), + !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 16)))]>, + OpSize16, TB, LOCK; + def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i8imm:$src2), + !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 32)))]>, + OpSize32, TB, LOCK; + def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i8imm:$src2), + !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (!cast("x86" # s) addr:$src1, timm:$src2, (i32 64)))]>, + TB, LOCK; + } +} + +defm LOCK_BTS : ATOMIC_LOGIC_OP; +defm LOCK_BTC : ATOMIC_LOGIC_OP; +defm LOCK_BTR : ATOMIC_LOGIC_OP; + // Atomic compare and swap. multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag> { diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll --- a/llvm/test/CodeGen/X86/atomic-bit-test.ll +++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll @@ -9,35 +9,17 @@ define i16 @bts1() nounwind { ; X86-LABEL: bts1: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB0_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $1, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB0_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $0, v16 +; X86-NEXT: setb %al ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: bts1: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB0_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $1, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB0_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $1, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $0, v16(%rip) +; X64-NEXT: setb %al ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -49,35 +31,19 @@ define i16 @bts2() nounwind { ; X86-LABEL: bts2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB1_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $2, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB1_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $2, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: bts2: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB1_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $2, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB1_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $2, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -89,35 +55,19 @@ define i16 @bts15() nounwind { ; X86-LABEL: bts15: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB2_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $32768, %ecx # imm = 0x8000 -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB2_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $32768, %eax # imm = 0x8000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: bts15: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB2_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $32768, %ecx # imm = 0x8000 -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB2_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $32768, %eax # imm = 0x8000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -129,30 +79,18 @@ define i32 @bts31() nounwind { ; X86-LABEL: bts31: ; X86: # %bb.0: # %entry -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB3_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: lock cmpxchgl %ecx, v32 -; X86-NEXT: jne .LBB3_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax ; X86-NEXT: retl ; ; X64-LABEL: bts31: ; X64: # %bb.0: # %entry -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $-2147483648, %ecx # imm = 0x80000000 -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB3_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax ; X64-NEXT: retq entry: %0 = atomicrmw or i32* @v32, i32 2147483648 monotonic, align 4 @@ -185,17 +123,10 @@ ; ; X64-LABEL: bts63: ; X64: # %bb.0: # %entry -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: movq v64(%rip), %rax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB4_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: orq %rcx, %rdx -; X64-NEXT: lock cmpxchgq %rdx, v64(%rip) -; X64-NEXT: jne .LBB4_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andq %rcx, %rax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax ; X64-NEXT: retq entry: %0 = atomicrmw or i64* @v64, i64 -9223372036854775808 monotonic, align 8 @@ -206,35 +137,17 @@ define i16 @btc1() nounwind { ; X86-LABEL: btc1: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB5_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $1, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB5_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $0, v16 +; X86-NEXT: setb %al ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btc1: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB5_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $1, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB5_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $1, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $0, v16(%rip) +; X64-NEXT: setb %al ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -246,35 +159,19 @@ define i16 @btc2() nounwind { ; X86-LABEL: btc2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB6_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $2, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB6_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $2, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btc2: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB6_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $2, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB6_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $2, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -286,35 +183,19 @@ define i16 @btc15() nounwind { ; X86-LABEL: btc15: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB7_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $32768, %ecx # imm = 0x8000 -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB7_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $32768, %eax # imm = 0x8000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btc15: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB7_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $32768, %ecx # imm = 0x8000 -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB7_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $32768, %eax # imm = 0x8000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -326,30 +207,18 @@ define i32 @btc31() nounwind { ; X86-LABEL: btc31: ; X86: # %bb.0: # %entry -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB8_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: lock cmpxchgl %ecx, v32 -; X86-NEXT: jne .LBB8_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax ; X86-NEXT: retl ; ; X64-LABEL: btc31: ; X64: # %bb.0: # %entry -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB8_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $-2147483648, %ecx # imm = 0x80000000 -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB8_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax ; X64-NEXT: retq entry: %0 = atomicrmw xor i32* @v32, i32 2147483648 monotonic, align 4 @@ -382,17 +251,10 @@ ; ; X64-LABEL: btc63: ; X64: # %bb.0: # %entry -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: movq v64(%rip), %rax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB9_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: xorq %rcx, %rdx -; X64-NEXT: lock cmpxchgq %rdx, v64(%rip) -; X64-NEXT: jne .LBB9_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andq %rcx, %rax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax ; X64-NEXT: retq entry: %0 = atomicrmw xor i64* @v64, i64 -9223372036854775808 monotonic, align 8 @@ -403,35 +265,17 @@ define i16 @btr1() nounwind { ; X86-LABEL: btr1: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB10_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $65534, %ecx # imm = 0xFFFE -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB10_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $0, v16 +; X86-NEXT: setb %al ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btr1: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB10_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $65534, %ecx # imm = 0xFFFE -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB10_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $1, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $0, v16(%rip) +; X64-NEXT: setb %al ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -443,35 +287,19 @@ define i16 @btr2() nounwind { ; X86-LABEL: btr2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB11_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $65533, %ecx # imm = 0xFFFD -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB11_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $2, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btr2: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB11_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $65533, %ecx # imm = 0xFFFD -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB11_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $2, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -483,35 +311,19 @@ define i16 @btr15() nounwind { ; X86-LABEL: btr15: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB12_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB12_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $32768, %eax # imm = 0x8000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btr15: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB12_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB12_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $32768, %eax # imm = 0x8000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -523,30 +335,18 @@ define i32 @btr31() nounwind { ; X86-LABEL: btr31: ; X86: # %bb.0: # %entry -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB13_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: lock cmpxchgl %ecx, v32 -; X86-NEXT: jne .LBB13_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax ; X86-NEXT: retl ; ; X64-LABEL: btr31: ; X64: # %bb.0: # %entry -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB13_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB13_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax ; X64-NEXT: retq entry: %0 = atomicrmw and i32* @v32, i32 2147483647 monotonic, align 4 @@ -585,18 +385,10 @@ ; ; X64-LABEL: btr63: ; X64: # %bb.0: # %entry -; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: movq v64(%rip), %rax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB14_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: andq %rcx, %rdx -; X64-NEXT: lock cmpxchgq %rdx, v64(%rip) -; X64-NEXT: jne .LBB14_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: incq %rcx -; X64-NEXT: andq %rcx, %rax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax ; X64-NEXT: retq entry: %0 = atomicrmw and i64* @v64, i64 9223372036854775807 monotonic, align 8 @@ -655,36 +447,18 @@ define i16 @multi_use2() nounwind { ; X86-LABEL: multi_use2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB16_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $1, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB16_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $0, v16 +; X86-NEXT: setb %al ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: multi_use2: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB16_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $1, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $rax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $rax -; X64-NEXT: jne .LBB16_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $1, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $0, v16(%rip) +; X64-NEXT: setb %al ; X64-NEXT: leal (%rax,%rax,2), %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -764,39 +538,23 @@ define void @no_and_cmp0_fold() nounwind { ; X86-LABEL: no_and_cmp0_fold: ; X86: # %bb.0: # %entry -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB18_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $8, %ecx -; X86-NEXT: lock cmpxchgl %ecx, v32 -; X86-NEXT: jne .LBB18_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: lock btsl $3, v32 ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: testb %al, %al -; X86-NEXT: je .LBB18_3 -; X86-NEXT: # %bb.4: # %if.end +; X86-NEXT: je .LBB18_1 +; X86-NEXT: # %bb.2: # %if.end ; X86-NEXT: retl -; X86-NEXT: .LBB18_3: # %if.then +; X86-NEXT: .LBB18_1: # %if.then ; ; X64-LABEL: no_and_cmp0_fold: ; X64: # %bb.0: # %entry -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB18_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $8, %ecx -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB18_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: lock btsl $3, v32(%rip) ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: testb %al, %al -; X64-NEXT: je .LBB18_3 -; X64-NEXT: # %bb.4: # %if.end +; X64-NEXT: je .LBB18_1 +; X64-NEXT: # %bb.2: # %if.end ; X64-NEXT: retq -; X64-NEXT: .LBB18_3: # %if.then +; X64-NEXT: .LBB18_1: # %if.then entry: %0 = atomicrmw or i32* @v32, i32 8 monotonic, align 4 %and = and i32 %0, 8 @@ -815,32 +573,20 @@ ; X86-LABEL: split_hoist_and: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB19_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %edx -; X86-NEXT: orl $8, %edx -; X86-NEXT: lock cmpxchgl %edx, v32 -; X86-NEXT: jne .LBB19_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsl $3, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $3, %eax ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: andl $8, %eax ; X86-NEXT: retl ; ; X64-LABEL: split_hoist_and: ; X64: # %bb.0: -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB19_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $8, %ecx -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB19_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsl $3, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $3, %eax ; X64-NEXT: testl %edi, %edi -; X64-NEXT: andl $8, %eax ; X64-NEXT: retq %2 = atomicrmw or i32* @v32, i32 8 monotonic, align 4 %3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false)