Index: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -56,6 +56,38 @@ Func == LibFunc::llabs || Func == LibFunc::strlen; } +static bool isCallingConvCCompatible(CallInst *CI) { + switch(CI->getCallingConv()) { + default: + return false; + case llvm::CallingConv::C: + return true; + case llvm::CallingConv::ARM_APCS: + case llvm::CallingConv::ARM_AAPCS: + case llvm::CallingConv::ARM_AAPCS_VFP: { + + // The iOS ABI diverges from the standard in some cases, so for now don't + // try to simplify those calls. + if (Triple(CI->getModule()->getTargetTriple()).isiOS()) + return false; + + auto *FuncTy = CI->getFunctionType(); + + if (!FuncTy->getReturnType()->isPointerTy() && + !FuncTy->getReturnType()->isIntegerTy() && + !FuncTy->getReturnType()->isVoidTy()) + return false; + + for (auto Param : FuncTy->params()) { + if (!Param->isPointerTy() && !Param->isIntegerTy()) + return false; + } + return true; + } + } + return false; +} + /// Return true if it only matters that the value is equal or not-equal to zero. static bool isOnlyUsedInZeroEqualityComparison(Value *V) { for (User *U : V->users()) { @@ -1898,7 +1930,7 @@ if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) { // Make sure we never change the calling convention. assert((ignoreCallingConv(Func) || - CI->getCallingConv() == llvm::CallingConv::C) && + isCallingConvCCompatible(CI)) && "Optimizing string/memory libcall would change the calling convention"); switch (Func) { case LibFunc::strcat: @@ -1965,7 +1997,7 @@ SmallVector OpBundles; CI->getOperandBundlesAsDefs(OpBundles); IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); - bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; + bool isCallingConvC = isCallingConvCCompatible(CI); // Command-line parameter overrides instruction attribute. if (EnableUnsafeFPShrink.getNumOccurrences() > 0) @@ -2321,7 +2353,7 @@ SmallVector OpBundles; CI->getOperandBundlesAsDefs(OpBundles); IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles); - bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; + bool isCallingConvC = isCallingConvCCompatible(CI); // First, check that this is a known library functions and that the prototype // is correct. Index: llvm/trunk/test/Transforms/InstCombine/ARM/strcmp.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/ARM/strcmp.ll +++ llvm/trunk/test/Transforms/InstCombine/ARM/strcmp.ll @@ -0,0 +1,153 @@ +; Test that the strcmp library call simplifier works correctly. +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +@hello = constant [6 x i8] c"hello\00" +@hell = constant [5 x i8] c"hell\00" +@bell = constant [5 x i8] c"bell\00" +@null = constant [1 x i8] zeroinitializer + +declare i32 @strcmp(i8*, i8*) + +; strcmp("", x) -> -*x +define arm_aapcscc i32 @test1(i8* %str2) { +; CHECK-LABEL: @test1( +; CHECK: %strcmpload = load i8, i8* %str +; CHECK: %1 = zext i8 %strcmpload to i32 +; CHECK: %2 = sub nsw i32 0, %1 +; CHECK: ret i32 %2 + + %str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0 + %temp1 = call arm_apcscc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp1 + +} + +; strcmp(x, "") -> *x +define arm_aapcscc i32 @test2(i8* %str1) { +; CHECK-LABEL: @test2( +; CHECK: %strcmpload = load i8, i8* %str +; CHECK: %1 = zext i8 %strcmpload to i32 +; CHECK: ret i32 %1 + + %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0 + %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp1 +} + +; strcmp(x, y) -> cnst +define arm_aapcscc i32 @test3() { +; CHECK-LABEL: @test3( +; CHECK: ret i32 -1 + + %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0 + %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0 + %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp1 +} + +define arm_aapcscc i32 @test4() { +; CHECK-LABEL: @test4( +; CHECK: ret i32 1 + + %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0 + %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0 + %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp1 +} + +; strcmp(x, y) -> memcmp(x, y, ) +; (This transform is rather difficult to trigger in a useful manner) +define arm_aapcscc i32 @test5(i1 %b) { +; CHECK-LABEL: @test5( +; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5) +; CHECK: ret i32 %memcmp + + %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0 + %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0 + %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0 + %str2 = select i1 %b, i8* %temp1, i8* %temp2 + %temp3 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp3 +} + +; strcmp(x,x) -> 0 +define arm_aapcscc i32 @test6(i8* %str) { +; CHECK-LABEL: @test6( +; CHECK: ret i32 0 + + %temp1 = call arm_aapcscc i32 @strcmp(i8* %str, i8* %str) + ret i32 %temp1 +} + +; strcmp("", x) -> -*x +define arm_aapcs_vfpcc i32 @test1_vfp(i8* %str2) { +; CHECK-LABEL: @test1_vfp( +; CHECK: %strcmpload = load i8, i8* %str +; CHECK: %1 = zext i8 %strcmpload to i32 +; CHECK: %2 = sub nsw i32 0, %1 +; CHECK: ret i32 %2 + + %str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0 + %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp1 + +} + +; strcmp(x, "") -> *x +define arm_aapcs_vfpcc i32 @test2_vfp(i8* %str1) { +; CHECK-LABEL: @test2_vfp( +; CHECK: %strcmpload = load i8, i8* %str +; CHECK: %1 = zext i8 %strcmpload to i32 +; CHECK: ret i32 %1 + + %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0 + %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp1 +} + +; strcmp(x, y) -> cnst +define arm_aapcs_vfpcc i32 @test3_vfp() { +; CHECK-LABEL: @test3_vfp( +; CHECK: ret i32 -1 + + %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0 + %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0 + %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp1 +} + +define arm_aapcs_vfpcc i32 @test4_vfp() { +; CHECK-LABEL: @test4_vfp( +; CHECK: ret i32 1 + + %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0 + %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0 + %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp1 +} + +; strcmp(x, y) -> memcmp(x, y, ) +; (This transform is rather difficult to trigger in a useful manner) +define arm_aapcs_vfpcc i32 @test5_vfp(i1 %b) { +; CHECK-LABEL: @test5_vfp( +; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5) +; CHECK: ret i32 %memcmp + + %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0 + %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0 + %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0 + %str2 = select i1 %b, i8* %temp1, i8* %temp2 + %temp3 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2) + ret i32 %temp3 +} + +; strcmp(x,x) -> 0 +define arm_aapcs_vfpcc i32 @test6_vfp(i8* %str) { +; CHECK-LABEL: @test6_vfp( +; CHECK: ret i32 0 + + %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str, i8* %str) + ret i32 %temp1 +} Index: llvm/trunk/test/Transforms/InstCombine/ARM/strcpy.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/ARM/strcpy.ll +++ llvm/trunk/test/Transforms/InstCombine/ARM/strcpy.ll @@ -0,0 +1,76 @@ +; Test that the strcpy library call simplifier works correctly for ARM procedure calls +; RUN: opt < %s -instcombine -S | FileCheck %s +; +; This transformation requires the pointer size, as it assumes that size_t is +; the size of a pointer. +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +@hello = constant [6 x i8] c"hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 +@b = common global [32 x i8] zeroinitializer, align 1 + +declare i8* @strcpy(i8*, i8*) + +define arm_aapcscc void @test_simplify1() { +; CHECK-LABEL: @test_simplify1( + + %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0 + + call arm_aapcscc i8* @strcpy(i8* %dst, i8* %src) +; CHECK: @llvm.memcpy.p0i8.p0i8.i32 + ret void +} + +define arm_aapcscc i8* @test_simplify2() { +; CHECK-LABEL: @test_simplify2( + + %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0 + + %ret = call arm_aapcscc i8* @strcpy(i8* %dst, i8* %dst) +; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0) + ret i8* %ret +} + +define arm_aapcscc i8* @test_no_simplify1() { +; CHECK-LABEL: @test_no_simplify1( + + %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0 + + %ret = call arm_aapcscc i8* @strcpy(i8* %dst, i8* %src) +; CHECK: call arm_aapcscc i8* @strcpy + ret i8* %ret +} + +define arm_aapcs_vfpcc void @test_simplify1_vfp() { +; CHECK-LABEL: @test_simplify1_vfp( + + %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0 + + call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %src) +; CHECK: @llvm.memcpy.p0i8.p0i8.i32 + ret void +} + +define arm_aapcs_vfpcc i8* @test_simplify2_vfp() { +; CHECK-LABEL: @test_simplify2_vfp( + + %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0 + + %ret = call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %dst) +; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0) + ret i8* %ret +} + +define arm_aapcs_vfpcc i8* @test_no_simplify1_vfp() { +; CHECK-LABEL: @test_no_simplify1_vfp( + + %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0 + + %ret = call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %src) +; CHECK: call arm_aapcs_vfpcc i8* @strcpy + ret i8* %ret +}