Index: llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -138,6 +138,8 @@ Value *optimizeStrStr(CallInst *CI, IRBuilder<> &B); Value *optimizeMemChr(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B); + Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B); Index: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -916,7 +916,9 @@ return nullptr; } -Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { +// Most simplifications for memcmp also apply to bcmp. +Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, + IRBuilder<> &B) { Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); @@ -929,16 +931,30 @@ LenC->getZExtValue(), B, DL)) return Res; + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { + if (Value *V = optimizeMemCmpBCmpCommon(CI, B)) + return V; + // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 // `bcmp` can be more efficient than memcmp because it only has to know that // there is a difference, not where it is. if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) { + Value *LHS = CI->getArgOperand(0); + Value *RHS = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); return emitBCmp(LHS, RHS, Size, B, DL, TLI); } return nullptr; } +Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) { + return optimizeMemCmpBCmpCommon(CI, B); +} + Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, @@ -2512,6 +2528,8 @@ return optimizeStrStr(CI, Builder); case LibFunc_memchr: return optimizeMemChr(CI, Builder); + case LibFunc_bcmp: + return optimizeBCmp(CI, Builder); case LibFunc_memcmp: return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: Index: llvm/trunk/test/Transforms/InstCombine/bcmp-1.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/bcmp-1.ll +++ llvm/trunk/test/Transforms/InstCombine/bcmp-1.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Test that the bcmp library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32:64" + +@foo = constant [4 x i8] c"foo\00" +@hel = constant [4 x i8] c"hel\00" +@hello_u = constant [8 x i8] c"hello_u\00" + +declare i32 @bcmp(i8*, i8*, i32) + +; Check bcmp(mem, mem, size) -> 0. + +define i32 @test_simplify1(i8* %mem, i32 %size) { +; CHECK-LABEL: @test_simplify1( +; CHECK-NEXT: ret i32 0 +; + %ret = call i32 @bcmp(i8* %mem, i8* %mem, i32 %size) + ret i32 %ret +} + +; Check bcmp(mem1, mem2, 0) -> 0. + +define i32 @test_simplify2(i8* %mem1, i8* %mem2) { +; CHECK-LABEL: @test_simplify2( +; CHECK-NEXT: ret i32 0 +; + %ret = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 0) + ret i32 %ret +} + +;; Check bcmp(mem1, mem2, 1) -> *(unsigned char*)mem1 - *(unsigned char*)mem2. + +define i32 @test_simplify3(i8* %mem1, i8* %mem2) { +; CHECK-LABEL: @test_simplify3( +; CHECK-NEXT: [[LHSC:%.*]] = load i8, i8* [[MEM1:%.*]], align 1 +; CHECK-NEXT: [[LHSV:%.*]] = zext i8 [[LHSC]] to i32 +; CHECK-NEXT: [[RHSC:%.*]] = load i8, i8* [[MEM2:%.*]], align 1 +; CHECK-NEXT: [[RHSV:%.*]] = zext i8 [[RHSC]] to i32 +; CHECK-NEXT: [[CHARDIFF:%.*]] = sub nsw i32 [[LHSV]], [[RHSV]] +; CHECK-NEXT: ret i32 [[CHARDIFF]] +; + %ret = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 1) + ret i32 %ret +} + +; Check bcmp(mem1, mem2, size) -> cnst, where all arguments are constants. + +define i32 @test_simplify4() { +; CHECK-LABEL: @test_simplify4( +; CHECK-NEXT: ret i32 0 +; + %mem1 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0 + %mem2 = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0 + %ret = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 3) + ret i32 %ret +} + +define i32 @test_simplify5() { +; CHECK-LABEL: @test_simplify5( +; CHECK-NEXT: ret i32 1 +; + %mem1 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0 + %mem2 = getelementptr [4 x i8], [4 x i8]* @foo, i32 0, i32 0 + %ret = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 3) + ret i32 %ret +} + +define i32 @test_simplify6() { +; CHECK-LABEL: @test_simplify6( +; CHECK-NEXT: ret i32 -1 +; + %mem1 = getelementptr [4 x i8], [4 x i8]* @foo, i32 0, i32 0 + %mem2 = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0 + %ret = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 3) + ret i32 %ret +} + +; Check bcmp(mem1, mem2, 8)==0 -> *(int64_t*)mem1 == *(int64_t*)mem2 + +define i1 @test_simplify7(i64 %x, i64 %y) { +; CHECK-LABEL: @test_simplify7( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %x.addr = alloca i64, align 8 + %y.addr = alloca i64, align 8 + store i64 %x, i64* %x.addr, align 8 + store i64 %y, i64* %y.addr, align 8 + %xptr = bitcast i64* %x.addr to i8* + %yptr = bitcast i64* %y.addr to i8* + %call = call i32 @bcmp(i8* %xptr, i8* %yptr, i32 8) + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +; Check bcmp(mem1, mem2, 4)==0 -> *(int32_t*)mem1 == *(int32_t*)mem2 + +define i1 @test_simplify8(i32 %x, i32 %y) { +; CHECK-LABEL: @test_simplify8( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %xptr = bitcast i32* %x.addr to i8* + %yptr = bitcast i32* %y.addr to i8* + %call = call i32 @bcmp(i8* %xptr, i8* %yptr, i32 4) + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +; Check bcmp(mem1, mem2, 2)==0 -> *(int16_t*)mem1 == *(int16_t*)mem2 + +define i1 @test_simplify9(i16 %x, i16 %y) { +; CHECK-LABEL: @test_simplify9( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %x.addr = alloca i16, align 2 + %y.addr = alloca i16, align 2 + store i16 %x, i16* %x.addr, align 2 + store i16 %y, i16* %y.addr, align 2 + %xptr = bitcast i16* %x.addr to i8* + %yptr = bitcast i16* %y.addr to i8* + %call = call i32 @bcmp(i8* %xptr, i8* %yptr, i32 2) + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +} + +define i1 @test_simplify10(i8* %mem1, i8* %mem2, i32 %size) { +; CHECK-LABEL: @test_simplify10( +; CHECK-NEXT: [[CALL:%.*]] = call i32 @bcmp(i8* [[MEM1:%.*]], i8* [[MEM2:%.*]], i32 [[SIZE:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = call i32 @bcmp(i8* %mem1, i8* %mem2, i32 %size) + %cmp = icmp eq i32 %call, 0 + ret i1 %cmp +}