diff --git a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp --- a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp +++ b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp @@ -105,7 +105,7 @@ return false; if (Constant *Exp = dyn_cast(CI->getArgOperand(1))) - if (ConstantFP *CFP = dyn_cast(Exp->getSplatValue())) { + if (ConstantFP *CFP = dyn_cast_or_null(Exp->getSplatValue())) { // If the argument is 0.75 or 0.25 it is cheaper to turn it into pow // intrinsic so that it could be optimzed as sequence of sqrt's. if (!CI->hasNoInfs() || !CI->hasApproxFunc()) diff --git a/llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll b/llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll --- a/llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll +++ b/llvm/test/CodeGen/PowerPC/pow_massv_075_025exp.ll @@ -2,8 +2,8 @@ ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s ; Exponent is a variable -define void @my_vpow_var(double* nocapture %z, double* nocapture readonly %y, double* nocapture readonly %x) { -; CHECK-LABEL: @vspow_var +define void @vpow_var(double* nocapture %z, double* nocapture readonly %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_var ; CHECK-PWR9: bl __powd2_P9 ; CHECK-PWR8: bl __powd2_P8 ; CHECK: blr @@ -31,8 +31,8 @@ } ; Exponent is a constant != 0.75 and !=0.25 -define void @my_vpow_const(double* nocapture %y, double* nocapture readonly %x) { -; CHECK-LABEL: @vspow_const +define void @vpow_const(double* nocapture %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_const ; CHECK-PWR9: bl __powd2_P9 ; CHECK-PWR8: bl __powd2_P8 ; CHECK: blr @@ -56,9 +56,87 @@ ret void } +; Exponent is a constant != 0.75 and !=0.25 and they are different +define void @vpow_noeq_const(double* nocapture %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_noeq_const +; CHECK-PWR9: bl __powd2_P9 +; CHECK-PWR8: bl __powd2_P8 +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr double, double* %y, i64 %index + %next.gep19 = getelementptr double, double* %x, i64 %index + %0 = bitcast double* %next.gep19 to <2 x double>* + %wide.load = load <2 x double>, <2 x double>* %0, align 8 + %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> ) + %2 = bitcast double* %next.gep to <2 x double>* + store <2 x double> %1, <2 x double>* %2, align 8 + %index.next = add i64 %index, 2 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: + ret void +} + +; Exponent is a constant != 0.75 and !=0.25 and they are different +define void @vpow_noeq075_const(double* nocapture %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_noeq075_const +; CHECK-PWR9: bl __powd2_P9 +; CHECK-PWR8: bl __powd2_P8 +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr double, double* %y, i64 %index + %next.gep19 = getelementptr double, double* %x, i64 %index + %0 = bitcast double* %next.gep19 to <2 x double>* + %wide.load = load <2 x double>, <2 x double>* %0, align 8 + %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> ) + %2 = bitcast double* %next.gep to <2 x double>* + store <2 x double> %1, <2 x double>* %2, align 8 + %index.next = add i64 %index, 2 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: + ret void +} + +; Exponent is a constant != 0.75 and !=0.25 and they are different +define void @vpow_noeq025_const(double* nocapture %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_noeq025_const +; CHECK-PWR9: bl __powd2_P9 +; CHECK-PWR8: bl __powd2_P8 +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr double, double* %y, i64 %index + %next.gep19 = getelementptr double, double* %x, i64 %index + %0 = bitcast double* %next.gep19 to <2 x double>* + %wide.load = load <2 x double>, <2 x double>* %0, align 8 + %1 = call ninf afn nsz <2 x double> @__powd2_massv(<2 x double> %wide.load, <2 x double> ) + %2 = bitcast double* %next.gep to <2 x double>* + store <2 x double> %1, <2 x double>* %2, align 8 + %index.next = add i64 %index, 2 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: + ret void +} + ; Exponent is 0.75 -define void @my_vpow_075(double* nocapture %y, double* nocapture readonly %x) { -; CHECK-LABEL: @vspow_075 +define void @vpow_075(double* nocapture %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_075 ; CHECK-NOT: bl __powd2_P{{[8,9]}} ; CHECK: xvrsqrtesp ; CHECK: blr @@ -83,8 +161,8 @@ } ; Exponent is 0.25 -define void @my_vpow_025(double* nocapture %y, double* nocapture readonly %x) { -; CHECK-LABEL: @vspow_025 +define void @vpow_025(double* nocapture %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_025 ; CHECK-NOT: bl __powd2_P{{[8,9]}} ; CHECK: xvrsqrtesp ; CHECK: blr @@ -109,8 +187,8 @@ } ; Exponent is 0.75 but no proper fast-math flags -define void @my_vpow_075_nofast(double* nocapture %y, double* nocapture readonly %x) { -; CHECK-LABEL: @vspow_075_nofast +define void @vpow_075_nofast(double* nocapture %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_075_nofast ; CHECK-PWR9: bl __powd2_P9 ; CHECK-PWR8: bl __powd2_P8 ; CHECK-NOT: xvrsqrtesp @@ -136,8 +214,8 @@ } ; Exponent is 0.25 but no proper fast-math flags -define void @my_vpow_025_nofast(double* nocapture %y, double* nocapture readonly %x) { -; CHECK-LABEL: @vspow_025_nofast +define void @vpow_025_nofast(double* nocapture %y, double* nocapture readonly %x) { +; CHECK-LABEL: @vpow_025_nofast ; CHECK-PWR9: bl __powd2_P9 ; CHECK-PWR8: bl __powd2_P8 ; CHECK-NOT: xvrsqrtesp diff --git a/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll b/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll --- a/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll +++ b/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll @@ -56,6 +56,84 @@ ret void } +; Exponent is a constant != 0.75 and !=0.25 and they are different +define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x) { +; CHECK-LABEL: @vspow_neq_const +; CHECK-PWR9: bl __powf4_P9 +; CHECK-PWR8: bl __powf4_P8 +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr float, float* %y, i64 %index + %next.gep19 = getelementptr float, float* %x, i64 %index + %0 = bitcast float* %next.gep19 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %0, align 4 + %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> ) + %2 = bitcast float* %next.gep to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: + ret void +} + +; Exponent is a constant != 0.75 and !=0.25 +define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x) { +; CHECK-LABEL: @vspow_neq075_const +; CHECK-PWR9: bl __powf4_P9 +; CHECK-PWR8: bl __powf4_P8 +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr float, float* %y, i64 %index + %next.gep19 = getelementptr float, float* %x, i64 %index + %0 = bitcast float* %next.gep19 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %0, align 4 + %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> ) + %2 = bitcast float* %next.gep to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: + ret void +} + +; Exponent is a constant != 0.75 and !=0.25 +define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x) { +; CHECK-LABEL: @vspow_neq025_const +; CHECK-PWR9: bl __powf4_P9 +; CHECK-PWR8: bl __powf4_P8 +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr float, float* %y, i64 %index + %next.gep19 = getelementptr float, float* %x, i64 %index + %0 = bitcast float* %next.gep19 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %0, align 4 + %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> ) + %2 = bitcast float* %next.gep to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: + ret void +} + ; Exponent is 0.75 define void @vspow_075(float* nocapture %y, float* nocapture readonly %x) { ; CHECK-LABEL: @vspow_075