Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -801,6 +801,9 @@ // be done with two 4-byte compares instead of 4+2+1-byte compares. This // requires all loads in LoadSizes to be doable in an unaligned way. bool AllowOverlappingLoads = false; + + // Set to true will expand by cmp+or instead of xor+or. + bool PreferCmpToExpand = false; }; MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; Index: llvm/lib/CodeGen/ExpandMemCmp.cpp =================================================================== --- llvm/lib/CodeGen/ExpandMemCmp.cpp +++ llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -80,6 +80,7 @@ BasicBlock *EndBlock; PHINode *PhiRes; const bool IsUsedForZeroCmp; + const bool IsPreferCmpToExpand; const DataLayout &DL; DomTreeUpdater *DTU; IRBuilder<> Builder; @@ -223,8 +224,9 @@ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout, DomTreeUpdater *DTU) : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock), - IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU), - Builder(CI) { + IsUsedForZeroCmp(IsUsedForZeroCmp), + IsPreferCmpToExpand(Options.PreferCmpToExpand), DL(TheDataLayout), + DTU(DTU), Builder(CI) { assert(Size > 0 && "zero blocks"); // Scale the max size down if the target can load more bytes than we need. llvm::ArrayRef LoadSizes(Options.LoadSizes); @@ -371,7 +373,7 @@ unsigned &LoadIndex) { assert(LoadIndex < getNumLoads() && "getCompareLoadPairs() called with no remaining loads"); - std::vector XorList, OrList; + std::vector CompareList, OrList; Value *Diff = nullptr; const unsigned NumLoads = @@ -398,10 +400,14 @@ if (NumLoads != 1) { // If we have multiple loads per block, we need to generate a composite - // comparison using xor+or. - Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs); - Diff = Builder.CreateZExt(Diff, MaxLoadType); - XorList.push_back(Diff); + // comparison. + if (IsPreferCmpToExpand) { + Diff = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs); + } else { + Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs); + Diff = Builder.CreateZExt(Diff, MaxLoadType); + } + CompareList.push_back(Diff); } else { // If there's only one load per block, we just compare the loaded values. Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs); @@ -421,7 +427,7 @@ if (!Cmp) { // Pairwise OR the XOR results. - OrList = pairWiseOr(XorList); + OrList = pairWiseOr(CompareList); // Pairwise OR the OR results until one result left. while (OrList.size() != 1) { @@ -429,7 +435,11 @@ } assert(Diff && "Failed to find comparison diff"); - Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); + if (IsPreferCmpToExpand) + Cmp = OrList[0]; + else + Cmp = + Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); } return Cmp; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2298,6 +2298,7 @@ return Options; } Options.AllowOverlappingLoads = true; + Options.PreferCmpToExpand = true; Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); Options.NumLoadsPerBlock = Options.MaxNumLoads; // TODO: Though vector loads usually perform well on AArch64, in some targets Index: llvm/test/CodeGen/AArch64/bcmp-inline-small.ll =================================================================== --- llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ llvm/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -70,18 +70,17 @@ ; CHECKN-NEXT: ldp x8, x9, [x0] ; CHECKN-NEXT: ldp x10, x11, [x1] ; CHECKN-NEXT: ldr x12, [x0, #16] -; CHECKN-NEXT: ldr x13, [x1, #16] -; CHECKN-NEXT: ldur x14, [x0, #23] -; CHECKN-NEXT: eor x8, x8, x10 -; CHECKN-NEXT: ldur x15, [x1, #23] -; CHECKN-NEXT: eor x9, x9, x11 -; CHECKN-NEXT: eor x10, x12, x13 -; CHECKN-NEXT: orr x8, x8, x9 -; CHECKN-NEXT: eor x11, x14, x15 -; CHECKN-NEXT: orr x9, x10, x11 -; CHECKN-NEXT: orr x8, x8, x9 -; CHECKN-NEXT: cmp x8, #0 -; CHECKN-NEXT: cset w0, eq +; CHECKN-NEXT: cmp x8, x10 +; CHECKN-NEXT: ldr x8, [x1, #16] +; CHECKN-NEXT: ccmp x9, x11, #0, eq +; CHECKN-NEXT: ldur x9, [x0, #23] +; CHECKN-NEXT: ldur x10, [x1, #23] +; CHECKN-NEXT: cset w11, ne +; CHECKN-NEXT: cmp x12, x8 +; CHECKN-NEXT: ccmp x9, x10, #0, eq +; CHECKN-NEXT: cset w8, ne +; CHECKN-NEXT: orr w8, w11, w8 +; CHECKN-NEXT: eor w0, w8, #0x1 ; CHECKN-NEXT: ret ; ; CHECKS-LABEL: test_bs: Index: llvm/test/CodeGen/AArch64/bcmp.ll =================================================================== --- llvm/test/CodeGen/AArch64/bcmp.ll +++ llvm/test/CodeGen/AArch64/bcmp.ll @@ -46,10 +46,8 @@ ; CHECK-NEXT: ldrh w9, [x1] ; CHECK-NEXT: ldrb w10, [x0, #2] ; CHECK-NEXT: ldrb w11, [x1, #2] -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 3) @@ -77,10 +75,8 @@ ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: ldrb w10, [x0, #4] ; CHECK-NEXT: ldrb w11, [x1, #4] -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 5) @@ -95,10 +91,8 @@ ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: ldrh w10, [x0, #4] ; CHECK-NEXT: ldrh w11, [x1, #4] -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 6) @@ -138,15 +132,12 @@ define i1 @bcmp9(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp9: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w9, [x0, #8] -; CHECK-NEXT: ldrb w10, [x1, #8] ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x11, [x1] -; CHECK-NEXT: eor w9, w9, w10 -; CHECK-NEXT: and x9, x9, #0xff -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #8] +; CHECK-NEXT: ldrb w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 9) @@ -157,15 +148,12 @@ define i1 @bcmp10(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp10: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0, #8] -; CHECK-NEXT: ldrh w10, [x1, #8] ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x11, [x1] -; CHECK-NEXT: eor w9, w9, w10 -; CHECK-NEXT: and x9, x9, #0xffff -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrh w10, [x0, #8] +; CHECK-NEXT: ldrh w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 10) @@ -196,10 +184,8 @@ ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: ldr w10, [x0, #8] ; CHECK-NEXT: ldr w11, [x1, #8] -; CHECK-NEXT: eor x8, x8, x9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 12) @@ -274,14 +260,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldr w12, [x0, #16] -; CHECK-NEXT: ldr w13, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: eor w10, w12, w13 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldr w8, [x0, #16] +; CHECK-NEXT: ldr w10, [x1, #16] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ccmp x8, x10, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 20) @@ -294,14 +277,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: eor x10, x12, x13 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldr x8, [x0, #16] +; CHECK-NEXT: ldr x10, [x1, #16] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ccmp x8, x10, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 24) @@ -315,18 +295,17 @@ ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] ; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldr x13, [x1, #16] -; CHECK-NEXT: ldr w14, [x0, #24] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: ldr w15, [x1, #24] -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: eor x10, x12, x13 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: eor w11, w14, w15 -; CHECK-NEXT: orr x9, x10, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldr x8, [x1, #16] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldr w9, [x0, #24] +; CHECK-NEXT: ldr w10, [x1, #24] +; CHECK-NEXT: cset w11, ne +; CHECK-NEXT: cmp x12, x8 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: orr w8, w11, w8 +; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 28) %r = icmp eq i32 %cr, 0 @@ -336,24 +315,22 @@ define i1 @bcmp33(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp33: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldp x12, x13, [x0, #16] -; CHECK-NEXT: ldp x14, x15, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldrb w16, [x0, #32] -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ldrb w17, [x1, #32] -; CHECK-NEXT: eor x10, x12, x14 -; CHECK-NEXT: eor x11, x13, x15 -; CHECK-NEXT: eor w12, w16, w17 -; CHECK-NEXT: orr x9, x10, x11 -; CHECK-NEXT: and x10, x12, #0xff -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldrb w8, [x0, #32] +; CHECK-NEXT: ldrb w9, [x1, #32] +; CHECK-NEXT: ldp x10, x11, [x0] +; CHECK-NEXT: ldp x12, x13, [x1] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: cmp x10, x12 +; CHECK-NEXT: ldp x9, x10, [x0, #16] +; CHECK-NEXT: ccmp x11, x13, #0, eq +; CHECK-NEXT: ldp x11, x12, [x1, #16] +; CHECK-NEXT: cset w13, ne +; CHECK-NEXT: cmp x9, x11 +; CHECK-NEXT: ccmp x10, x12, #0, eq +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: orr w9, w13, w9 +; CHECK-NEXT: bic w0, w8, w9 ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 33) %r = icmp eq i32 %cr, 0 @@ -363,23 +340,22 @@ define i1 @bcmp38(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp38: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldp x12, x13, [x0, #16] -; CHECK-NEXT: ldp x14, x15, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldur x10, [x0, #30] -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ldur x11, [x1, #30] -; CHECK-NEXT: eor x12, x12, x14 -; CHECK-NEXT: eor x13, x13, x15 -; CHECK-NEXT: orr x9, x12, x13 -; CHECK-NEXT: eor x10, x10, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldur x8, [x0, #30] +; CHECK-NEXT: ldur x9, [x1, #30] +; CHECK-NEXT: ldp x10, x11, [x0] +; CHECK-NEXT: ldp x12, x13, [x1] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: cmp x10, x12 +; CHECK-NEXT: ldp x9, x10, [x0, #16] +; CHECK-NEXT: ccmp x11, x13, #0, eq +; CHECK-NEXT: ldp x11, x12, [x1, #16] +; CHECK-NEXT: cset w13, ne +; CHECK-NEXT: cmp x9, x11 +; CHECK-NEXT: ccmp x10, x12, #0, eq +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: orr w9, w13, w9 +; CHECK-NEXT: bic w0, w8, w9 ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 38) %r = icmp eq i32 %cr, 0 @@ -391,25 +367,24 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldp x12, x13, [x0, #16] -; CHECK-NEXT: ldp x14, x15, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldr x16, [x0, #32] -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ldr x17, [x1, #32] -; CHECK-NEXT: ldur x18, [x0, #37] -; CHECK-NEXT: eor x10, x12, x14 -; CHECK-NEXT: ldur x0, [x1, #37] -; CHECK-NEXT: eor x11, x13, x15 -; CHECK-NEXT: eor x12, x16, x17 -; CHECK-NEXT: orr x9, x10, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: eor x13, x18, x0 -; CHECK-NEXT: orr x10, x12, x13 -; CHECK-NEXT: orr x8, x8, x10 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ldr x12, [x0, #32] +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldp x8, x10, [x0, #16] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldp x9, x11, [x1, #16] +; CHECK-NEXT: cset w13, ne +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ldr x8, [x1, #32] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ldur x9, [x0, #37] +; CHECK-NEXT: ldur x10, [x1, #37] +; CHECK-NEXT: cset w11, ne +; CHECK-NEXT: cmp x12, x8 +; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: orr w9, w13, w11 +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 45) %r = icmp eq i32 %cr, 0 @@ -421,29 +396,28 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] ; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: ldp x12, x13, [x0, #16] -; CHECK-NEXT: ldp x14, x15, [x1, #16] -; CHECK-NEXT: eor x8, x8, x10 -; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldp x16, x17, [x0, #32] -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ldp x18, x2, [x1, #32] -; CHECK-NEXT: eor x12, x12, x14 -; CHECK-NEXT: eor x13, x13, x15 -; CHECK-NEXT: ldp x3, x0, [x0, #48] -; CHECK-NEXT: orr x9, x12, x13 -; CHECK-NEXT: ldp x10, x11, [x1, #48] -; CHECK-NEXT: eor x14, x16, x18 -; CHECK-NEXT: eor x15, x17, x2 -; CHECK-NEXT: orr x12, x14, x15 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: eor x10, x3, x10 -; CHECK-NEXT: eor x11, x0, x11 -; CHECK-NEXT: orr x10, x10, x11 -; CHECK-NEXT: orr x9, x12, x10 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldp x8, x10, [x0, #16] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldp x9, x11, [x1, #16] +; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ldp x8, x9, [x0, #32] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ldp x10, x11, [x1, #32] +; CHECK-NEXT: cset w13, ne +; CHECK-NEXT: cmp x8, x10 +; CHECK-NEXT: ldp x14, x8, [x0, #48] +; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldp x9, x10, [x1, #48] +; CHECK-NEXT: cset w11, ne +; CHECK-NEXT: cmp x14, x9 +; CHECK-NEXT: orr w9, w12, w13 +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: orr w8, w11, w8 +; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 64) %r = icmp eq i32 %cr, 0