Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -808,6 +808,10 @@ if (Subtarget.isISA3_1()) { setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::MULHS, MVT::v2i64, Legal); + setOperationAction(ISD::MULHU, MVT::v2i64, Legal); + setOperationAction(ISD::MULHS, MVT::v4i32, Legal); + setOperationAction(ISD::MULHU, MVT::v4i32, Legal); setOperationAction(ISD::UDIV, MVT::v2i64, Legal); setOperationAction(ISD::SDIV, MVT::v2i64, Legal); setOperationAction(ISD::UDIV, MVT::v4i32, Legal); Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -571,6 +571,18 @@ def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmulld $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>; + def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>; + def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>; + def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>; + def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulhud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>; def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmodsw $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>; Index: llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll +++ llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll @@ -4,6 +4,9 @@ ; RUN: FileCheck %s ; This test case aims to test the vector multiply instructions on Power10. +; This includes the low order and high order versions of vector multiply. +; The low order version operates on doublewords, whereas the high order version +; operates on signed and unsigned words and doublewords. define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vmulld: @@ -14,3 +17,59 @@ %mul = mul <2 x i64> %b, %a ret <2 x i64> %mul } + +define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmulhsd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhsd v2, v3, v2 +; CHECK-NEXT: blr +entry: + %0 = sext <2 x i64> %a to <2 x i128> + %1 = sext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %0 + %shr = lshr <2 x i128> %mul, + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + +define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmulhud: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhud v2, v3, v2 +; CHECK-NEXT: blr +entry: + %0 = zext <2 x i64> %a to <2 x i128> + %1 = zext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %0 + %shr = lshr <2 x i128> %mul, + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + +define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmulhsw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhsw v2, v3, v2 +; CHECK-NEXT: blr +entry: + %0 = sext <4 x i32> %a to <4 x i64> + %1 = sext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %0 + %shr = lshr <4 x i64> %mul, + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmulhuw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhuw v2, v3, v2 +; CHECK-NEXT: blr +entry: + %0 = zext <4 x i32> %a to <4 x i64> + %1 = zext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %0 + %shr = lshr <4 x i64> %mul, + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -69,3 +69,15 @@ # CHECK: vdivud 21, 11, 10 0x12 0xab 0x50 0xcb + +# CHECK: vmulhsw 1, 2, 3 +0x10 0x22 0x1b 0x89 + +# CHECK: vmulhuw 1, 2, 3 +0x10 0x22 0x1a 0x89 + +# CHECK: vmulhsd 1, 2, 3 +0x10 0x22 0x1b 0xc9 + +# CHECK: vmulhud 1, 2, 3 +0x10 0x22 0x1a 0xc9 Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -72,3 +72,15 @@ # CHECK-BE: vdivud 21, 11, 10 # encoding: [0x12,0xab,0x50,0xcb] # CHECK-LE: vdivud 21, 11, 10 # encoding: [0xcb,0x50,0xab,0x12] vdivud 21, 11, 10 +# CHECK-BE: vmulhsw 1, 2, 3 # encoding: [0x10,0x22,0x1b,0x89] +# CHECK-LE: vmulhsw 1, 2, 3 # encoding: [0x89,0x1b,0x22,0x10] + vmulhsw 1, 2, 3 +# CHECK-BE: vmulhuw 1, 2, 3 # encoding: [0x10,0x22,0x1a,0x89] +# CHECK-LE: vmulhuw 1, 2, 3 # encoding: [0x89,0x1a,0x22,0x10] + vmulhuw 1, 2, 3 +# CHECK-BE: vmulhsd 1, 2, 3 # encoding: [0x10,0x22,0x1b,0xc9] +# CHECK-LE: vmulhsd 1, 2, 3 # encoding: [0xc9,0x1b,0x22,0x10] + vmulhsd 1, 2, 3 +# CHECK-BE: vmulhud 1, 2, 3 # encoding: [0x10,0x22,0x1a,0xc9] +# CHECK-LE: vmulhud 1, 2, 3 # encoding: [0xc9,0x1a,0x22,0x10] + vmulhud 1, 2, 3