diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -811,6 +811,10 @@ if (Subtarget.isISA3_1()) { setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::MULHS, MVT::v2i64, Legal); + setOperationAction(ISD::MULHU, MVT::v2i64, Legal); + setOperationAction(ISD::MULHS, MVT::v4i32, Legal); + setOperationAction(ISD::MULHU, MVT::v4i32, Legal); setOperationAction(ISD::UDIV, MVT::v2i64, Legal); setOperationAction(ISD::SDIV, MVT::v2i64, Legal); setOperationAction(ISD::UDIV, MVT::v4i32, Legal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -976,13 +976,17 @@ "vmulld $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>; def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>; def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>; def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>; def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulhud $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulhud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>; def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmodsw $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>; diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll --- a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll @@ -7,6 +7,9 @@ ; RUN: FileCheck %s ; This test case aims to test the vector multiply instructions on Power10. +; This includes the low order and high order versions of vector multiply. +; The low order version operates on doublewords, whereas the high order version +; operates on signed and unsigned words and doublewords. define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vmulld: @@ -17,3 +20,59 @@ %mul = mul <2 x i64> %b, %a ret <2 x i64> %mul } + +define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmulhsd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhsd v2, v3, v2 +; CHECK-NEXT: blr +entry: + %0 = sext <2 x i64> %a to <2 x i128> + %1 = sext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %0 + %shr = lshr <2 x i128> %mul, + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + +define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmulhud: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhud v2, v3, v2 +; CHECK-NEXT: blr +entry: + %0 = zext <2 x i64> %a to <2 x i128> + %1 = zext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %0 + %shr = lshr <2 x i128> %mul, + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + +define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmulhsw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhsw v2, v3, v2 +; CHECK-NEXT: blr +entry: + %0 = sext <4 x i32> %a to <4 x i64> + %1 = sext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %0 + %shr = lshr <4 x i64> %mul, + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmulhuw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhuw v2, v3, v2 +; CHECK-NEXT: blr +entry: + %0 = zext <4 x i32> %a to <4 x i64> + %1 = zext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %0 + %shr = lshr <4 x i64> %mul, + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +}