diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h --- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -85,7 +85,7 @@ return SDValue(); } - /// Emit target-specific code that performs a memcmp, in cases where that is + /// Emit target-specific code that performs a memcmp/bcmp, in cases where that is /// faster than a libcall. The first returned SDValue is the result of the /// memcmp and the second is the chain. Both SDValues can be null if a normal /// libcall should be used. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -740,7 +740,7 @@ void visitFence(const FenceInst &I); void visitPHI(const PHINode &I); void visitCall(const CallInst &I); - bool visitMemCmpCall(const CallInst &I); + bool visitMemCmpBCmpCall(const CallInst &I); bool visitMemPCpyCall(const CallInst &I); bool visitMemChrCall(const CallInst &I); bool visitStrCpyCall(const CallInst &I, bool isStpcpy); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7245,12 +7245,12 @@ setValue(&I, Value); } -/// See if we can lower a memcmp call into an optimized form. If so, return +/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return /// true and lower it. Otherwise return false, and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. -bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { +bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast(Size); @@ -7561,6 +7561,10 @@ LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; + case LibFunc_bcmp: + if (visitMemCmpBCmpCall(I)) + return; + break; case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: @@ -7662,7 +7666,7 @@ return; break; case LibFunc_memcmp: - if (visitMemCmpCall(I)) + if (visitMemCmpBCmpCall(I)) return; break; case LibFunc_mempcpy: diff --git a/llvm/test/CodeGen/SystemZ/bcmp.ll b/llvm/test/CodeGen/SystemZ/bcmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/bcmp.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare signext i32 @bcmp(i8* nocapture, i8* nocapture, i64) + +define zeroext i1 @test_bcmp_eq_0(i8* nocapture readonly %A, i8* nocapture readonly %B) { +; CHECK-LABEL: test_bcmp_eq_0: +; CHECK: # %bb.0: +; CHECK-NEXT: clc 0(2,%r3), 0(%r2) +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: afi %r0, -268435456 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 33 +; CHECK-NEXT: br %r14 + %c = tail call signext i32 @bcmp(i8* %A, i8* %B, i64 2) + %res = icmp eq i32 %c, 0 + ret i1 %res +} + +define signext i32 @test_bcmp(i8* nocapture readonly %A, i8* nocapture readonly %B) { +; CHECK-LABEL: test_bcmp: +; CHECK: # %bb.0: +; CHECK-NEXT: clc 0(2,%r3), 0(%r2) +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: sllg %r0, %r0, 34 +; CHECK-NEXT: srag %r2, %r0, 62 +; CHECK-NEXT: br %r14 + %res = tail call signext i32 @bcmp(i8* %A, i8* %B, i64 2) + ret i32 %res +}