diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -809,6 +809,18 @@ else setOperationAction(ISD::MUL, MVT::v4i32, Custom); + if (Subtarget.isISA3_1()) { + setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::UDIV, MVT::v2i64, Legal); + setOperationAction(ISD::SDIV, MVT::v2i64, Legal); + setOperationAction(ISD::UDIV, MVT::v4i32, Legal); + setOperationAction(ISD::SDIV, MVT::v4i32, Legal); + setOperationAction(ISD::UREM, MVT::v2i64, Legal); + setOperationAction(ISD::SREM, MVT::v2i64, Legal); + setOperationAction(ISD::UREM, MVT::v4i32, Legal); + setOperationAction(ISD::SREM, MVT::v4i32, Legal); + } + setOperationAction(ISD::MUL, MVT::v8i16, Legal); setOperationAction(ISD::MUL, MVT::v16i8, Custom); diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -973,7 +973,8 @@ [(set v16i8:$vD, (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>; def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmulld $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmulld $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>; def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, []>; def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), @@ -983,21 +984,29 @@ def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmulhud $vD, $vA, $vB", IIC_VecGeneral, []>; def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsw $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmodsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>; def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmoduw $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmoduw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>; def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsd $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmodsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>; def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodud $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmodud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>; def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivsw $vD, $vA, $vB", IIC_VecGeneral, []>; + "vdivsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>; def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivuw $vD, $vA, $vB", IIC_VecGeneral, []>; + "vdivuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>; def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivsd $vD, $vA, $vB", IIC_VecGeneral, []>; + "vdivsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>; def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivud $vD, $vA, $vB", IIC_VecGeneral, []>; + "vdivud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>; def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vdivesw $vD, $vA, $vB", IIC_VecGeneral, []>; def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector divide instructions on Power10. +; This includes the low order and extended versions of vector divide, +; that operate on signed and unsigned words and doublewords. + +define <2 x i64> @test_vdivud(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vdivud: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivud v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = udiv <2 x i64> %a, %b + ret <2 x i64> %div +} + +define <2 x i64> @test_vdivsd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vdivsd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivsd v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = sdiv <2 x i64> %a, %b + ret <2 x i64> %div +} + +define <4 x i32> @test_vdivuw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vdivuw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivuw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = udiv <4 x i32> %a, %b + ret <4 x i32> %div +} + +define <4 x i32> @test_vdivsw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vdivsw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivsw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = sdiv <4 x i32> %a, %b + ret <4 x i32> %div +} diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll b/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector modulo instructions on Power10. +; The vector modulo instructions operate on signed and unsigned words +; and doublewords. + +define <2 x i64> @test_vmodud(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmodud: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodud v2, v2, v3 +; CHECK-NEXT: blr +entry: + %rem = urem <2 x i64> %a, %b + ret <2 x i64> %rem +} + +define <2 x i64> @test_vmodsd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmodsd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodsd v2, v2, v3 +; CHECK-NEXT: blr +entry: + %rem = srem <2 x i64> %a, %b + ret <2 x i64> %rem +} + +define <4 x i32> @test_vmoduw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmoduw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmoduw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %rem = urem <4 x i32> %a, %b + ret <4 x i32> %rem +} + +define <4 x i32> @test_vmodsw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmodsw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodsw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %rem = srem <4 x i32> %a, %b + ret <4 x i32> %rem +} + +define <2 x i64> @test_vmodud_with_div(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmodud_with_div: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodud v4, v2, v3 +; CHECK-NEXT: vdivud v2, v2, v3 +; CHECK-NEXT: vaddudm v2, v4, v2 +; CHECK-NEXT: blr +entry: + %rem = urem <2 x i64> %a, %b + %div = udiv <2 x i64> %a, %b + %add = add <2 x i64> %rem, %div + ret <2 x i64> %add +} + +define <2 x i64> @test_vmodsd_with_div(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmodsd_with_div: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodsd v4, v2, v3 +; CHECK-NEXT: vdivsd v2, v2, v3 +; CHECK-NEXT: vaddudm v2, v4, v2 +; CHECK-NEXT: blr +entry: + %rem = srem <2 x i64> %a, %b + %div = sdiv <2 x i64> %a, %b + %add = add <2 x i64> %rem, %div + ret <2 x i64> %add +} + +define <4 x i32> @test_vmoduw_with_div(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmoduw_with_div: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmoduw v4, v2, v3 +; CHECK-NEXT: vdivuw v2, v2, v3 +; CHECK-NEXT: vadduwm v2, v4, v2 +; CHECK-NEXT: blr +entry: + %rem = urem <4 x i32> %a, %b + %div = udiv <4 x i32> %a, %b + %add = add <4 x i32> %rem, %div + ret <4 x i32> %add +} + +define <4 x i32> @test_vmodsw_div(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmodsw_div: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodsw v4, v2, v3 +; CHECK-NEXT: vdivsw v2, v2, v3 +; CHECK-NEXT: vadduwm v2, v4, v2 +; CHECK-NEXT: blr +entry: + %rem = srem <4 x i32> %a, %b + %div = sdiv <4 x i32> %a, %b + %add = add <4 x i32> %rem, %div + ret <4 x i32> %add +} diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector multiply instructions on Power10. + +define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmulld: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulld v2, v3, v2 +; CHECK-NEXT: blr +entry: + %mul = mul <2 x i64> %b, %a + ret <2 x i64> %mul +}