Index: llvm/lib/CodeGen/ReplaceWithVeclib.cpp =================================================================== --- llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -105,13 +105,13 @@ // Convert vector arguments to scalar type and check that // all vector operands have identical vector width. ElementCount VF = ElementCount::getFixed(0); - SmallVector ScalarTypes; + Type *ScalarType = nullptr; for (auto Arg : enumerate(CI.args())) { auto *ArgType = Arg.value()->getType(); // Vector calls to intrinsics can still have // scalar operands for specific arguments. if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) { - ScalarTypes.push_back(ArgType); + ScalarType = ArgType; } else { // The argument in this place should be a vector if // this is a call to a vector intrinsic. @@ -133,7 +133,7 @@ } else { VF = NumElements; } - ScalarTypes.push_back(VectorArgTy->getElementType()); + ScalarType = VectorArgTy->getElementType(); } } @@ -141,8 +141,8 @@ // intrinsic using the intrinsic ID and the argument types // converted to scalar above. std::string ScalarName; - if (Intrinsic::isOverloaded(IntrinsicID)) { - ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule()); + if (Intrinsic::isOverloaded(IntrinsicID) && ScalarType) { + ScalarName = Intrinsic::getName(IntrinsicID, {ScalarType}, CI.getModule()); } else { ScalarName = Intrinsic::getName(IntrinsicID).str(); } Index: llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll =================================================================== --- llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll +++ llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll @@ -61,6 +61,60 @@ declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 +define <4 x double> @pow_v4(<4 x double> %F, <4 x double> %power) { +; SVML-LABEL: define {{[^@]+}}@pow_v4 +; SVML-SAME: (<4 x double> [[F:%.*]], <4 x double> [[POWER:%.*]]) { +; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[F]], <4 x double> [[POWER]]) +; SVML-NEXT: ret <4 x double> [[TMP1]] +; +; LIBMVEC-X86-LABEL: define {{[^@]+}}@pow_v4 +; LIBMVEC-X86-SAME: (<4 x double> [[F:%.*]], <4 x double> [[POWER:%.*]]) { +; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x double> @_ZGVdN4vv_pow(<4 x double> [[F]], <4 x double> [[POWER]]) +; LIBMVEC-X86-NEXT: ret <4 x double> [[TMP1]] +; +; MASSV-LABEL: define {{[^@]+}}@pow_v4 +; MASSV-SAME: (<4 x double> [[F:%.*]], <4 x double> [[POWER:%.*]]) { +; MASSV-NEXT: [[CALL:%.*]] = call <4 x double> @llvm.pow.v4f64(<4 x double> [[F]], <4 x double> [[POWER]]) +; MASSV-NEXT: ret <4 x double> [[CALL]] +; +; ACCELERATE-LABEL: define {{[^@]+}}@pow_v4 +; ACCELERATE-SAME: (<4 x double> [[F:%.*]], <4 x double> [[POWER:%.*]]) { +; ACCELERATE-NEXT: [[CALL:%.*]] = call <4 x double> @llvm.pow.v4f64(<4 x double> [[F]], <4 x double> [[POWER]]) +; ACCELERATE-NEXT: ret <4 x double> [[CALL]] +; + %call = call <4 x double> @llvm.pow.v4f64(<4 x double> %F, <4 x double> %power) + ret <4 x double> %call +} + +declare <4 x double> @llvm.pow.v4f64(<4 x double>, <4 x double>) #0 + +define <4 x float> @pow_f32(<4 x float> %F, <4 x float> %power) { +; SVML-LABEL: define {{[^@]+}}@pow_f32 +; SVML-SAME: (<4 x float> [[F:%.*]], <4 x float> [[POWER:%.*]]) { +; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[F]], <4 x float> [[POWER]]) +; SVML-NEXT: ret <4 x float> [[TMP1]] +; +; LIBMVEC-X86-LABEL: define {{[^@]+}}@pow_f32 +; LIBMVEC-X86-SAME: (<4 x float> [[F:%.*]], <4 x float> [[POWER:%.*]]) { +; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x float> @_ZGVbN4vv_powf(<4 x float> [[F]], <4 x float> [[POWER]]) +; LIBMVEC-X86-NEXT: ret <4 x float> [[TMP1]] +; +; MASSV-LABEL: define {{[^@]+}}@pow_f32 +; MASSV-SAME: (<4 x float> [[F:%.*]], <4 x float> [[POWER:%.*]]) { +; MASSV-NEXT: [[TMP1:%.*]] = call <4 x float> @__powf4(<4 x float> [[F]], <4 x float> [[POWER]]) +; MASSV-NEXT: ret <4 x float> [[TMP1]] +; +; ACCELERATE-LABEL: define {{[^@]+}}@pow_f32 +; ACCELERATE-SAME: (<4 x float> [[F:%.*]], <4 x float> [[POWER:%.*]]) { +; ACCELERATE-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.pow.v4f32(<4 x float> [[F]], <4 x float> [[POWER]]) +; ACCELERATE-NEXT: ret <4 x float> [[TMP1]] +; + %call = call <4 x float> @llvm.pow.v4f32(<4 x float> %F, <4 x float> %power) + ret <4 x float> %call +} + +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) #0 + ; No replacement should take place for non-vector intrinsic. define double @exp_f64(double %in) { ; COMMON-LABEL: define {{[^@]+}}@exp_f64