Index: llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -85,7 +85,7 @@ return SDValue(); } - /// Emit target-specific code that performs a memcmp, in cases where that is + /// Emit target-specific code that performs a memcmp/bcmp, in cases where that is /// faster than a libcall. The first returned SDValue is the result of the /// memcmp and the second is the chain. Both SDValues can be null if a normal /// libcall should be used. Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -740,7 +740,7 @@ void visitFence(const FenceInst &I); void visitPHI(const PHINode &I); void visitCall(const CallInst &I); - bool visitMemCmpCall(const CallInst &I); + bool visitMemCmpBCmpCall(const CallInst &I); bool visitMemPCpyCall(const CallInst &I); bool visitMemChrCall(const CallInst &I); bool visitStrCpyCall(const CallInst &I, bool isStpcpy); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7271,12 +7271,12 @@ setValue(&I, Value); } -/// See if we can lower a memcmp call into an optimized form. If so, return +/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return /// true and lower it. Otherwise return false, and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. -bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { +bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast(Size); @@ -7587,6 +7587,10 @@ LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; + case LibFunc_bcmp: + if (visitMemCmpBCmpCall(I)) + return; + break; case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: @@ -7688,7 +7692,7 @@ return; break; case LibFunc_memcmp: - if (visitMemCmpCall(I)) + if (visitMemCmpBCmpCall(I)) return; break; case LibFunc_mempcpy: Index: llvm/test/CodeGen/SystemZ/bcmp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/bcmp.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare signext i32 @bcmp(i8* nocapture, i8* nocapture, i64) + +define zeroext i1 @test_bcmp_eq_0(i8* nocapture readonly %A, i8* nocapture readonly %B) { +; CHECK-LABEL: test_bcmp_eq_0: +; CHECK: # %bb.0: +; CHECK-NEXT: clc 0(2,%r3), 0(%r2) +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: afi %r0, -268435456 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 33 +; CHECK-NEXT: br %r14 + %c = tail call signext i32 @bcmp(i8* %A, i8* %B, i64 2) + %res = icmp eq i32 %c, 0 + ret i1 %res +} + +define signext i32 @test_bcmp(i8* nocapture readonly %A, i8* nocapture readonly %B) { +; CHECK-LABEL: test_bcmp: +; CHECK: # %bb.0: +; CHECK-NEXT: clc 0(2,%r3), 0(%r2) +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: sllg %r0, %r0, 34 +; CHECK-NEXT: srag %r2, %r0, 62 +; CHECK-NEXT: br %r14 + %res = tail call signext i32 @bcmp(i8* %A, i8* %B, i64 2) + ret i32 %res +}