diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23466,10 +23466,14 @@ int Index0, Index1; SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); SDValue Src1 = DAG.getSplatSourceVector(N1, Index1); + // Extract element from splat_vector should be free. + // TODO: use DAG.isSplatValue instead? + bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR && + N1.getOpcode() == ISD::SPLAT_VECTOR; if (!Src0 || !Src1 || Index0 != Index1 || Src0.getValueType().getVectorElementType() != EltVT || Src1.getValueType().getVectorElementType() != EltVT || - !TLI.isExtractVecEltCheap(VT, Index0) || + !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) || !TLI.isOperationLegalOrCustom(Opcode, EltVT)) return SDValue(); @@ -23491,6 +23495,8 @@ } // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, ScalarBO); SmallVector Ops(VT.getVectorNumElements(), ScalarBO); return DAG.getBuildVector(VT, DL, Ops); } diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -111,16 +111,16 @@ define @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_nxv4i1_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: mov z1.s, w0 ; CHECK-NEXT: and z0.s, z0.s, #0xff -; CHECK-NEXT: and z1.s, z1.s, #0xff +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and w8, w1, #0xff ; CHECK-NEXT: add z0.s, z0.s, z1.s -; CHECK-NEXT: mov z1.s, w1 ; CHECK-NEXT: umin z0.s, z0.s, #255 -; CHECK-NEXT: and z1.s, z1.s, #0xff ; CHECK-NEXT: and z0.s, z0.s, #0xff -; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s ; CHECK-NEXT: ret %active.lane.mask = call @llvm.get.active.lane.mask.nxv4i1.i8(i8 %index, i8 %TC) @@ -131,18 +131,18 @@ ; CHECK-LABEL: lane_mask_nxv2i1_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: and x8, x0, #0xff ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x9, x1, #0xff ; CHECK-NEXT: and z0.d, z0.d, #0xff -; CHECK-NEXT: and z1.d, z1.d, #0xff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: add z0.d, z0.d, z1.d -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: mov z2.d, x1 +; CHECK-NEXT: mov z1.d, x9 ; CHECK-NEXT: umin z0.d, z0.d, #255 -; CHECK-NEXT: and z2.d, z2.d, #0xff ; CHECK-NEXT: and z0.d, z0.d, #0xff -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d +; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d ; CHECK-NEXT: ret %active.lane.mask = call @llvm.get.active.lane.mask.nxv2i1.i8(i8 %index, i8 %TC) ret %active.lane.mask diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll --- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -136,11 +136,9 @@ define @splat_fdiv_nxv4f32(float %D, %a) #1 { ; CHECK-LABEL: splat_fdiv_nxv4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 -; CHECK-NEXT: fmov z2.s, #1.00000000 -; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmov s2, #1.00000000 +; CHECK-NEXT: fdiv s0, s2, s0 ; CHECK-NEXT: mov z0.s, s0 -; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: fmul z0.s, z1.s, z0.s ; CHECK-NEXT: ret entry: @@ -153,11 +151,9 @@ define void @splat_three_fdiv_nxv4f32(float %D, %a, %b, %c) #1 { ; CHECK-LABEL: splat_three_fdiv_nxv4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 -; CHECK-NEXT: fmov z4.s, #1.00000000 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z0.s, s0 -; CHECK-NEXT: fdiv z4.s, p0/m, z4.s, z0.s +; CHECK-NEXT: fmov s4, #1.00000000 +; CHECK-NEXT: fdiv s0, s4, s0 +; CHECK-NEXT: mov z4.s, s0 ; CHECK-NEXT: fmul z0.s, z1.s, z4.s ; CHECK-NEXT: fmul z1.s, z2.s, z4.s ; CHECK-NEXT: fmul z2.s, z3.s, z4.s @@ -190,11 +186,9 @@ define void @splat_two_fdiv_nxv2f64(double %D, %a, %b) #1 { ; CHECK-LABEL: splat_two_fdiv_nxv2f64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov z3.d, #1.00000000 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z0.d, d0 -; CHECK-NEXT: fdiv z3.d, p0/m, z3.d, z0.d +; CHECK-NEXT: fmov d3, #1.00000000 +; CHECK-NEXT: fdiv d0, d3, d0 +; CHECK-NEXT: mov z3.d, d0 ; CHECK-NEXT: fmul z0.d, z1.d, z3.d ; CHECK-NEXT: fmul z1.d, z2.d, z3.d ; CHECK-NEXT: b foo_2_nxv2f64 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll @@ -169,7 +169,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -185,15 +184,14 @@ ; VBITS_GE_256-LABEL: select_v16f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.s ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.s, w9 -; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0 ; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s ; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s @@ -209,7 +207,6 @@ ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: mov z2.s, w8 -; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] @@ -230,7 +227,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -251,7 +247,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -298,7 +293,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -314,15 +308,14 @@ ; VBITS_GE_256-LABEL: select_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.d, x9 -; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0 ; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d ; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d @@ -338,7 +331,6 @@ ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: mov z2.d, x8 -; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] @@ -359,7 +351,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -380,7 +371,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll @@ -298,7 +298,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -314,15 +313,14 @@ ; VBITS_GE_256-LABEL: select_v16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.s ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.s, w9 -; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0 ; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s ; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s @@ -338,7 +336,6 @@ ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: mov z2.s, w8 -; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] @@ -359,7 +356,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -380,7 +376,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -427,7 +422,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -443,15 +437,14 @@ ; VBITS_GE_256-LABEL: select_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.d, x9 -; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0 ; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d ; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d @@ -467,7 +460,6 @@ ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: mov z2.d, x8 -; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] @@ -488,7 +480,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -509,7 +500,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll --- a/llvm/test/CodeGen/AArch64/sve-gep.ll +++ b/llvm/test/CodeGen/AArch64/sve-gep.ll @@ -56,8 +56,8 @@ define @scalable_of_fixed_1(i8* %base) { ; CHECK-LABEL: scalable_of_fixed_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, x0 -; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1 +; CHECK-NEXT: add x8, x0, #1 +; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: ret %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer %d = getelementptr i8, i8* %base, %idx @@ -202,8 +202,8 @@ define *> @scalable_of_scalable_1(* %base) { ; CHECK-LABEL: scalable_of_scalable_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, x0 -; CHECK-NEXT: incd z0.d, all, mul #8 +; CHECK-NEXT: addvl x8, x0, #1 +; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: ret %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer %d = getelementptr , * %base, %idx diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll @@ -2,7 +2,7 @@ ; This code generates a concat_vectors with more than 2 inputs. Make sure ; that this compiles successfully. -; CHECK: vlsr +; CHECK: lsr target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" target triple = "hexagon" diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll @@ -879,9 +879,9 @@ ; ; RV64-LABEL: vadd_xx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vadd.vx v8, v8, a1 ; RV64-NEXT: ret %head1 = insertelement poison, i64 %a, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll @@ -1370,9 +1370,9 @@ ; ; RV64-LABEL: vand_xx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: ret %head1 = insertelement poison, i64 %a, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM ; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M define @vmul_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vmul_vv_nxv1i8: @@ -939,12 +939,19 @@ ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: vmul_xx_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: ret +; RV64NOM-LABEL: vmul_xx_nxv8i64: +; RV64NOM: # %bb.0: +; RV64NOM-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64NOM-NEXT: vmv.v.x v8, a0 +; RV64NOM-NEXT: vmul.vx v8, v8, a1 +; RV64NOM-NEXT: ret +; +; RV64M-LABEL: vmul_xx_nxv8i64: +; RV64M: # %bb.0: +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64M-NEXT: vmv.v.x v8, a0 +; RV64M-NEXT: ret %head1 = insertelement poison, i64 %a, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer %head2 = insertelement poison, i64 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll @@ -1163,9 +1163,9 @@ ; ; RV64-LABEL: vor_xx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vor.vx v8, v8, a1 ; RV64-NEXT: ret %head1 = insertelement poison, i64 %a, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll @@ -857,9 +857,9 @@ ; ; RV64-LABEL: vsub_xx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vsub.vx v8, v8, a1 ; RV64-NEXT: ret %head1 = insertelement poison, i64 %a, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll @@ -1370,9 +1370,9 @@ ; ; RV64-LABEL: vxor_xx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vxor.vx v8, v8, a1 ; RV64-NEXT: ret %head1 = insertelement poison, i64 %a, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer