diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8348,36 +8348,36 @@ return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); case MVT::i32: Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(0xFF00, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, - DAG.getConstant(0xFF0000, dl, VT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); case MVT::i64: Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<8, dl, VT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT)); Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, - DAG.getConstant(255ULL<<48, dl, VT)); - Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, - DAG.getConstant(255ULL<<40, dl, VT)); - Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, - DAG.getConstant(255ULL<<32, dl, VT)); Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, - DAG.getConstant(255ULL<<8 , dl, VT)); + DAG.getConstant(255ULL<<8, dl, VT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll @@ -201,29 +201,27 @@ ; CHECK-LABEL: bswap_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: adrp x10, .LCPI14_2 +; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: adrp x8, .LCPI14_3 +; CHECK-NEXT: ldr d3, [x10, :lo12:.LCPI14_2] +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s ; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI14_1] ; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z1.s -; CHECK-NEXT: ldr d3, [x10, :lo12:.LCPI14_2] -; CHECK-NEXT: movprfx z6, z0 -; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z2.s -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI14_3] -; CHECK-NEXT: adrp x8, .LCPI14_4 +; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s ; CHECK-NEXT: lslr z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: and z0.d, z0.d, z3.d +; CHECK-NEXT: and z3.d, z5.d, z3.d ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: and z2.d, z6.d, z3.d -; CHECK-NEXT: and z0.d, z0.d, z4.d -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI14_4] -; CHECK-NEXT: orr z2.d, z2.d, z5.d +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_3] +; CHECK-NEXT: orr z3.d, z3.d, z4.d ; CHECK-NEXT: orr z0.d, z1.d, z0.d -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z3.s +; CHECK-NEXT: orr z0.d, z0.d, z3.d +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %op) @@ -290,25 +288,24 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) #0 { ; CHECK-LABEL: bswap_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_1 -; CHECK-NEXT: adrp x9, .LCPI18_2 -; CHECK-NEXT: adrp x10, .LCPI18_0 +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: adrp x10, .LCPI18_2 +; CHECK-NEXT: adrp x9, .LCPI18_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI18_1] -; CHECK-NEXT: adrp x8, .LCPI18_3 -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI18_2] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s +; CHECK-NEXT: ldr d3, [x10, :lo12:.LCPI18_2] +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI18_1] ; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z1.s -; CHECK-NEXT: ldr d3, [x10, :lo12:.LCPI18_0] -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI18_3] -; CHECK-NEXT: and z2.d, z5.d, z2.d -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z3.s -; CHECK-NEXT: lslr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z3.s -; CHECK-NEXT: and z1.d, z1.d, z4.d -; CHECK-NEXT: orr z2.d, z2.d, z5.d +; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s +; CHECK-NEXT: and z5.d, z5.d, z3.d +; CHECK-NEXT: and z3.d, z0.d, z3.d +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: orr z2.d, z5.d, z4.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -320,25 +317,24 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 { ; CHECK-LABEL: bswap_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_1 -; CHECK-NEXT: adrp x9, .LCPI19_2 -; CHECK-NEXT: adrp x10, .LCPI19_0 +; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: adrp x10, .LCPI19_2 +; CHECK-NEXT: adrp x9, .LCPI19_1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_1] -; CHECK-NEXT: adrp x8, .LCPI19_3 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI19_2] +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s +; CHECK-NEXT: ldr q3, [x10, :lo12:.LCPI19_2] +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI19_1] ; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z1.s -; CHECK-NEXT: ldr q3, [x10, :lo12:.LCPI19_0] -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI19_3] -; CHECK-NEXT: and z2.d, z5.d, z2.d -; CHECK-NEXT: movprfx z5, z0 -; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z3.s -; CHECK-NEXT: lslr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z3.s -; CHECK-NEXT: and z1.d, z1.d, z4.d -; CHECK-NEXT: orr z2.d, z2.d, z5.d +; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s +; CHECK-NEXT: and z5.d, z5.d, z3.d +; CHECK-NEXT: and z3.d, z0.d, z3.d +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: orr z2.d, z5.d, z4.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -352,35 +348,33 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: adrp x9, .LCPI20_1 -; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldp q4, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: adrp x8, .LCPI20_2 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI20_1] -; CHECK-NEXT: movprfx z5, z2 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI20_1] +; CHECK-NEXT: movprfx z5, z1 ; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z0.s -; CHECK-NEXT: movprfx z6, z2 -; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z1.s -; CHECK-NEXT: movprfx z7, z2 +; CHECK-NEXT: movprfx z6, z1 +; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z2.s +; CHECK-NEXT: movprfx z7, z1 ; CHECK-NEXT: lsl z7.s, p0/m, z7.s, z0.s -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI20_2] -; CHECK-NEXT: adrp x8, .LCPI20_3 -; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: and z6.d, z6.d, z4.d -; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI20_3] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI20_2] +; CHECK-NEXT: movprfx z16, z4 +; CHECK-NEXT: lsr z16.s, p0/m, z16.s, z2.s +; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: and z6.d, z6.d, z3.d +; CHECK-NEXT: and z16.d, z16.d, z3.d +; CHECK-NEXT: and z3.d, z4.d, z3.d ; CHECK-NEXT: orr z5.d, z6.d, z5.d -; CHECK-NEXT: movprfx z6, z3 -; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z1.s -; CHECK-NEXT: and z4.d, z6.d, z4.d -; CHECK-NEXT: movprfx z6, z3 +; CHECK-NEXT: movprfx z6, z4 ; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z0.s -; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z3.s -; CHECK-NEXT: lslr z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: and z2.d, z2.d, z16.d -; CHECK-NEXT: and z1.d, z1.d, z16.d -; CHECK-NEXT: orr z3.d, z4.d, z6.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z1.d, z7.d, z2.d +; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z4.s +; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s +; CHECK-NEXT: lslr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: orr z3.d, z16.d, z6.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d +; CHECK-NEXT: orr z1.d, z7.d, z1.d ; CHECK-NEXT: orr z0.d, z0.d, z3.d ; CHECK-NEXT: orr z1.d, z1.d, z5.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -397,48 +391,43 @@ ; CHECK-NEXT: mov w8, #56 ; CHECK-NEXT: mov w9, #40 ; CHECK-NEXT: mov w10, #65280 +; CHECK-NEXT: mov w11, #24 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov w8, #24 +; CHECK-NEXT: mov w8, #16711680 ; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov w9, #16711680 +; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: mov w10, #8 -; CHECK-NEXT: fmov d4, x8 +; CHECK-NEXT: movprfx z7, z0 +; CHECK-NEXT: lsr z7.d, p0/m, z7.d, z1.d +; CHECK-NEXT: fmov d5, x8 ; CHECK-NEXT: mov w8, #-16777216 -; CHECK-NEXT: fmov d5, x9 -; CHECK-NEXT: mov x9, #1095216660480 ; CHECK-NEXT: movprfx z16, z0 ; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z2.d -; CHECK-NEXT: and z3.d, z16.d, z3.d -; CHECK-NEXT: fmov d7, x8 -; CHECK-NEXT: mov x8, #280375465082880 +; CHECK-NEXT: fmov d4, x11 +; CHECK-NEXT: fmov d6, x9 +; CHECK-NEXT: and z16.d, z16.d, z3.d +; CHECK-NEXT: fmov d17, x8 +; CHECK-NEXT: orr z7.d, z16.d, z7.d ; CHECK-NEXT: movprfx z16, z0 ; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z4.d -; CHECK-NEXT: fmov d6, x10 -; CHECK-NEXT: and z5.d, z16.d, z5.d -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z6.d -; CHECK-NEXT: fmov d18, x8 -; CHECK-NEXT: mov x8, #71776119061217280 -; CHECK-NEXT: and z7.d, z16.d, z7.d -; CHECK-NEXT: fmov d17, x9 -; CHECK-NEXT: orr z5.d, z7.d, z5.d -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z1.d -; CHECK-NEXT: fmov d7, x8 -; CHECK-NEXT: lslr z6.d, p0/m, z6.d, z0.d -; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z0.d -; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d -; CHECK-NEXT: and z6.d, z6.d, z17.d -; CHECK-NEXT: and z4.d, z4.d, z18.d +; CHECK-NEXT: movprfx z18, z0 +; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z6.d +; CHECK-NEXT: and z16.d, z16.d, z5.d +; CHECK-NEXT: and z5.d, z0.d, z5.d +; CHECK-NEXT: and z18.d, z18.d, z17.d +; CHECK-NEXT: and z17.d, z0.d, z17.d +; CHECK-NEXT: lslr z6.d, p0/m, z6.d, z17.d +; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z5.d +; CHECK-NEXT: and z3.d, z0.d, z3.d ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: and z1.d, z2.d, z7.d -; CHECK-NEXT: orr z3.d, z3.d, z16.d +; CHECK-NEXT: orr z16.d, z18.d, z16.d +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: orr z2.d, z4.d, z6.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z1.d, z5.d, z3.d +; CHECK-NEXT: orr z1.d, z16.d, z7.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -464,37 +453,32 @@ ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_3] ; CHECK-NEXT: adrp x8, .LCPI22_6 ; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI22_4] -; CHECK-NEXT: adrp x9, .LCPI22_7 +; CHECK-NEXT: movprfx z7, z0 +; CHECK-NEXT: lsr z7.d, p0/m, z7.d, z1.d ; CHECK-NEXT: movprfx z16, z0 ; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z2.d -; CHECK-NEXT: and z3.d, z16.d, z3.d -; CHECK-NEXT: ldr q7, [x8, :lo12:.LCPI22_6] -; CHECK-NEXT: adrp x8, .LCPI22_8 -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z4.d ; CHECK-NEXT: ldr q6, [x10, :lo12:.LCPI22_5] -; CHECK-NEXT: and z5.d, z16.d, z5.d -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z6.d -; CHECK-NEXT: ldr q18, [x8, :lo12:.LCPI22_8] -; CHECK-NEXT: adrp x8, .LCPI22_9 -; CHECK-NEXT: and z7.d, z16.d, z7.d -; CHECK-NEXT: ldr q17, [x9, :lo12:.LCPI22_7] -; CHECK-NEXT: orr z5.d, z7.d, z5.d +; CHECK-NEXT: ldr q17, [x8, :lo12:.LCPI22_6] +; CHECK-NEXT: and z16.d, z16.d, z3.d +; CHECK-NEXT: orr z7.d, z16.d, z7.d ; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z1.d -; CHECK-NEXT: ldr q7, [x8, :lo12:.LCPI22_9] -; CHECK-NEXT: lslr z6.d, p0/m, z6.d, z0.d -; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z0.d -; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d -; CHECK-NEXT: and z6.d, z6.d, z17.d -; CHECK-NEXT: and z4.d, z4.d, z18.d +; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z4.d +; CHECK-NEXT: movprfx z18, z0 +; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z6.d +; CHECK-NEXT: and z16.d, z16.d, z5.d +; CHECK-NEXT: and z18.d, z18.d, z17.d +; CHECK-NEXT: and z17.d, z0.d, z17.d +; CHECK-NEXT: and z5.d, z0.d, z5.d +; CHECK-NEXT: lslr z6.d, p0/m, z6.d, z17.d +; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z5.d +; CHECK-NEXT: and z3.d, z0.d, z3.d ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: and z1.d, z2.d, z7.d -; CHECK-NEXT: orr z3.d, z3.d, z16.d +; CHECK-NEXT: orr z16.d, z18.d, z16.d +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: orr z2.d, z4.d, z6.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z1.d, z5.d, z3.d +; CHECK-NEXT: orr z1.d, z16.d, z7.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -508,79 +492,72 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: adrp x9, .LCPI23_1 -; CHECK-NEXT: adrp x10, .LCPI23_2 +; CHECK-NEXT: adrp x10, .LCPI23_3 ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: adrp x8, .LCPI23_4 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_1] -; CHECK-NEXT: adrp x9, .LCPI23_3 -; CHECK-NEXT: ldr q4, [x10, :lo12:.LCPI23_2] -; CHECK-NEXT: adrp x10, .LCPI23_5 -; CHECK-NEXT: ldr q7, [x8, :lo12:.LCPI23_4] -; CHECK-NEXT: adrp x8, .LCPI23_6 -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI23_3] -; CHECK-NEXT: adrp x9, .LCPI23_7 -; CHECK-NEXT: movprfx z6, z1 -; CHECK-NEXT: lsr z6.d, p0/m, z6.d, z2.d -; CHECK-NEXT: movprfx z17, z1 -; CHECK-NEXT: lsr z17.d, p0/m, z17.d, z3.d -; CHECK-NEXT: ldr q18, [x8, :lo12:.LCPI23_6] -; CHECK-NEXT: adrp x8, .LCPI23_8 -; CHECK-NEXT: and z6.d, z6.d, z4.d -; CHECK-NEXT: ldr q16, [x10, :lo12:.LCPI23_5] -; CHECK-NEXT: orr z6.d, z6.d, z17.d -; CHECK-NEXT: ldr q17, [x9, :lo12:.LCPI23_7] -; CHECK-NEXT: ldr q21, [x8, :lo12:.LCPI23_8] -; CHECK-NEXT: adrp x8, .LCPI23_9 -; CHECK-NEXT: movprfx z19, z1 +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: adrp x8, .LCPI23_2 +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI23_1] +; CHECK-NEXT: adrp x9, .LCPI23_4 +; CHECK-NEXT: ldr q5, [x10, :lo12:.LCPI23_3] +; CHECK-NEXT: adrp x10, .LCPI23_6 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI23_2] +; CHECK-NEXT: adrp x8, .LCPI23_5 +; CHECK-NEXT: ldr q6, [x9, :lo12:.LCPI23_4] +; CHECK-NEXT: movprfx z16, z2 +; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z3.d +; CHECK-NEXT: ldr q17, [x10, :lo12:.LCPI23_6] +; CHECK-NEXT: movprfx z18, z2 +; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z0.d +; CHECK-NEXT: ldr q7, [x8, :lo12:.LCPI23_5] +; CHECK-NEXT: movprfx z19, z2 ; CHECK-NEXT: lsr z19.d, p0/m, z19.d, z5.d -; CHECK-NEXT: movprfx z20, z1 -; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z16.d -; CHECK-NEXT: and z19.d, z19.d, z7.d -; CHECK-NEXT: and z20.d, z20.d, z18.d -; CHECK-NEXT: orr z19.d, z20.d, z19.d -; CHECK-NEXT: movprfx z20, z1 -; CHECK-NEXT: lsl z20.d, p0/m, z20.d, z16.d -; CHECK-NEXT: movprfx z22, z1 -; CHECK-NEXT: lsl z22.d, p0/m, z22.d, z5.d -; CHECK-NEXT: ldr q23, [x8, :lo12:.LCPI23_9] +; CHECK-NEXT: movprfx z20, z2 +; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z7.d +; CHECK-NEXT: and z16.d, z16.d, z4.d +; CHECK-NEXT: and z19.d, z19.d, z6.d ; CHECK-NEXT: and z20.d, z20.d, z17.d -; CHECK-NEXT: and z22.d, z22.d, z21.d -; CHECK-NEXT: orr z6.d, z19.d, z6.d -; CHECK-NEXT: orr z19.d, z22.d, z20.d +; CHECK-NEXT: orr z16.d, z16.d, z18.d +; CHECK-NEXT: orr z18.d, z20.d, z19.d +; CHECK-NEXT: and z19.d, z2.d, z17.d +; CHECK-NEXT: and z20.d, z2.d, z6.d +; CHECK-NEXT: lsl z19.d, p0/m, z19.d, z7.d +; CHECK-NEXT: lsl z20.d, p0/m, z20.d, z5.d +; CHECK-NEXT: orr z16.d, z18.d, z16.d +; CHECK-NEXT: orr z18.d, z20.d, z19.d +; CHECK-NEXT: movprfx z19, z2 +; CHECK-NEXT: lsl z19.d, p0/m, z19.d, z0.d +; CHECK-NEXT: and z2.d, z2.d, z4.d +; CHECK-NEXT: movprfx z20, z1 +; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z3.d +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: movprfx z21, z1 +; CHECK-NEXT: lsr z21.d, p0/m, z21.d, z0.d +; CHECK-NEXT: and z20.d, z20.d, z4.d +; CHECK-NEXT: orr z2.d, z19.d, z2.d +; CHECK-NEXT: orr z19.d, z20.d, z21.d ; CHECK-NEXT: movprfx z20, z1 -; CHECK-NEXT: lsl z20.d, p0/m, z20.d, z3.d -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d -; CHECK-NEXT: movprfx z22, z0 -; CHECK-NEXT: lsr z22.d, p0/m, z22.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z23.d -; CHECK-NEXT: and z4.d, z22.d, z4.d -; CHECK-NEXT: movprfx z22, z0 -; CHECK-NEXT: lsr z22.d, p0/m, z22.d, z3.d -; CHECK-NEXT: orr z1.d, z20.d, z1.d -; CHECK-NEXT: orr z4.d, z4.d, z22.d -; CHECK-NEXT: movprfx z20, z0 ; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z5.d -; CHECK-NEXT: movprfx z22, z0 -; CHECK-NEXT: lsr z22.d, p0/m, z22.d, z16.d -; CHECK-NEXT: lslr z16.d, p0/m, z16.d, z0.d -; CHECK-NEXT: lslr z5.d, p0/m, z5.d, z0.d -; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d -; CHECK-NEXT: and z7.d, z20.d, z7.d -; CHECK-NEXT: and z18.d, z22.d, z18.d -; CHECK-NEXT: and z16.d, z16.d, z17.d -; CHECK-NEXT: and z5.d, z5.d, z21.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z3.d -; CHECK-NEXT: and z2.d, z2.d, z23.d -; CHECK-NEXT: orr z7.d, z18.d, z7.d -; CHECK-NEXT: orr z3.d, z5.d, z16.d -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z2.d, z7.d, z4.d +; CHECK-NEXT: movprfx z21, z1 +; CHECK-NEXT: lsr z21.d, p0/m, z21.d, z7.d +; CHECK-NEXT: and z20.d, z20.d, z6.d +; CHECK-NEXT: and z21.d, z21.d, z17.d +; CHECK-NEXT: and z17.d, z1.d, z17.d +; CHECK-NEXT: and z6.d, z1.d, z6.d +; CHECK-NEXT: lslr z7.d, p0/m, z7.d, z17.d +; CHECK-NEXT: lslr z5.d, p0/m, z5.d, z6.d +; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: orr z20.d, z21.d, z20.d +; CHECK-NEXT: and z4.d, z1.d, z4.d +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: orr z3.d, z5.d, z7.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z1.d, z20.d, z19.d ; CHECK-NEXT: orr z0.d, z0.d, z3.d -; CHECK-NEXT: orr z1.d, z1.d, z19.d -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z1.d, z6.d +; CHECK-NEXT: orr z2.d, z2.d, z18.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: orr z1.d, z2.d, z16.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op = load <4 x i64>, <4 x i64>* %a diff --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll --- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll +++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll @@ -56,11 +56,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r0, [r0] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -230,22 +230,21 @@ define i64 @load_i64_by_i8_bswap(i64* %arg) { ; CHECK-LABEL: load_i64_by_i8_bswap: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: ldr r1, [r0] ; CHECK-NEXT: mov r12, #65280 ; CHECK-NEXT: ldr r0, [r0, #4] -; CHECK-NEXT: mov lr, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r3, r12, r0, lsr #8 -; CHECK-NEXT: and r2, lr, r0, lsl #8 ; CHECK-NEXT: orr r3, r3, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: and r2, r12, r1, lsr #8 ; CHECK-NEXT: orr r0, r0, r3 -; CHECK-NEXT: and r3, lr, r1, lsl #8 +; CHECK-NEXT: and r3, r1, #65280 ; CHECK-NEXT: orr r2, r2, r1, lsr #24 -; CHECK-NEXT: orr r1, r3, r1, lsl #24 +; CHECK-NEXT: lsl r1, r1, #24 +; CHECK-NEXT: orr r1, r1, r3, lsl #8 ; CHECK-NEXT: orr r1, r1, r2 -; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr ; ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap: @@ -389,11 +388,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r0, [r0, #1] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -447,11 +446,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r0, [r0, #-4] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -603,11 +602,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r0, [r0] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -684,12 +683,12 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: add r0, r0, r1 ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 ; CHECK-NEXT: ldr r0, [r0, #12] +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -750,12 +749,12 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: add r0, r1, r0 ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 ; CHECK-NEXT: ldr r0, [r0, #13] +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; diff --git a/llvm/test/CodeGen/ARM/load-combine.ll b/llvm/test/CodeGen/ARM/load-combine.ll --- a/llvm/test/CodeGen/ARM/load-combine.ll +++ b/llvm/test/CodeGen/ARM/load-combine.ll @@ -123,11 +123,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r0, [r0] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -243,22 +243,21 @@ define i64 @load_i64_by_i8_bswap(i64* %arg) { ; CHECK-LABEL: load_i64_by_i8_bswap: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: ldr r1, [r0] ; CHECK-NEXT: mov r12, #65280 ; CHECK-NEXT: ldr r0, [r0, #4] -; CHECK-NEXT: mov lr, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r3, r12, r0, lsr #8 -; CHECK-NEXT: and r2, lr, r0, lsl #8 ; CHECK-NEXT: orr r3, r3, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: and r2, r12, r1, lsr #8 ; CHECK-NEXT: orr r0, r0, r3 -; CHECK-NEXT: and r3, lr, r1, lsl #8 +; CHECK-NEXT: and r3, r1, #65280 ; CHECK-NEXT: orr r2, r2, r1, lsr #24 -; CHECK-NEXT: orr r1, r3, r1, lsl #24 +; CHECK-NEXT: lsl r1, r1, #24 +; CHECK-NEXT: orr r1, r1, r3, lsl #8 ; CHECK-NEXT: orr r1, r1, r2 -; CHECK-NEXT: pop {r11, lr} ; CHECK-NEXT: mov pc, lr ; ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap: @@ -425,11 +424,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r0, [r0, #1] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -482,11 +481,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r0, [r0, #-4] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -541,11 +540,11 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r0, [r0] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; diff --git a/llvm/test/CodeGen/Mips/bswap.ll b/llvm/test/CodeGen/Mips/bswap.ll --- a/llvm/test/CodeGen/Mips/bswap.ll +++ b/llvm/test/CodeGen/Mips/bswap.ll @@ -23,16 +23,15 @@ ; MIPS16-LABEL: bswap32: ; MIPS16-DAG: srl $[[R0:[0-9]+]], $4, 8 +; MIPS16-DAG: li $[[R4:[0-9]+]], 65280 +; MIPS16-DAG: and $[[R0]], $[[R4]] ; MIPS16-DAG: srl $[[R1:[0-9]+]], $4, 24 -; MIPS16-DAG: sll $[[R2:[0-9]+]], $4, 8 +; MIPS16-DAG: or $[[R1]], $[[R0]] +; MIPS16-DAG: and $[[R4]], $4 +; MIPS16-DAG: sll $[[R2:[0-9]+]], $[[R4]], 8 ; MIPS16-DAG: sll $[[R3:[0-9]+]], $4, 24 -; MIPS16-DAG: li $[[R4:[0-9]+]], 65280 -; MIPS16-DAG: and $[[R4]], $[[R0]] -; MIPS16-DAG: or $[[R1]], $[[R4]] -; MIPS16-DAG: lw $[[R7:[0-9]+]], $CPI -; MIPS16-DAG: and $[[R7]], $[[R2]] -; MIPS16-DAG: or $[[R3]], $[[R7]] -; MIPS16-DAG: or $[[R3]], $[[R1]] +; MIPS16-DAG: or $[[R3]], $[[R2]] +; MIPS16-DAG: or $[[R3]], $[[R1]] %or.3 = call i32 @llvm.bswap.i32(i32 %x) ret i32 %or.3 @@ -58,23 +57,22 @@ ; MIPS16-LABEL: bswap64: ; MIPS16-DAG: srl $[[R0:[0-9]+]], $5, 8 -; MIPS16-DAG: srl $[[R1:[0-9]+]], $5, 24 -; MIPS16-DAG: sll $[[R2:[0-9]+]], $5, 8 -; MIPS16-DAG: sll $[[R3:[0-9]+]], $5, 24 ; MIPS16-DAG: li $[[R4:[0-9]+]], 65280 ; MIPS16-DAG: and $[[R0]], $[[R4]] +; MIPS16-DAG: srl $[[R1:[0-9]+]], $5, 24 ; MIPS16-DAG: or $[[R1]], $[[R0]] -; MIPS16-DAG: lw $[[R7:[0-9]+]], 1f -; MIPS16-DAG: and $[[R2]], $[[R7]] -; MIPS16-DAG: or $[[R3]], $[[R2]] -; MIPS16-DAG: or $[[R3]], $[[R1]] +; MIPS16-DAG: sll $[[R3:[0-9]+]], $5, 24 +; MIPS16-DAG: and $5, $[[R4]] +; MIPS16-DAG: sll $[[R2:[0-9]+]], $5, 8 +; MIPS16-DAG: or $[[R0]], $[[R3]] +; MIPS16-DAG: or $[[R0]], $[[R1]] ; MIPS16-DAG: srl $[[R0:[0-9]+]], $4, 8 -; MIPS16-DAG: srl $[[R1:[0-9]+]], $4, 24 -; MIPS16-DAG: sll $[[R2:[0-9]+]], $4, 8 -; MIPS16-DAG: sll $[[R3:[0-9]+]], $4, 24 ; MIPS16-DAG: and $[[R0]], $[[R4]] +; MIPS16-DAG: srl $[[R1:[0-9]+]], $4, 24 ; MIPS16-DAG: or $[[R1]], $[[R0]] -; MIPS16-DAG: and $[[R2]], $[[R7]] +; MIPS16-DAG: and $[[R4]], $4 +; MIPS16-DAG: sll $[[R2:[0-9]+]], $[[R4]], 8 +; MIPS16-DAG: sll $[[R3:[0-9]+]], $4, 24 ; MIPS16-DAG: or $[[R3]], $[[R2]] ; MIPS16-DAG: or $[[R3]], $[[R1]] diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll @@ -59,11 +59,10 @@ ; RV32I-NEXT: lui a2, 16 ; RV32I-NEXT: addi a2, a2, -256 ; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: slli a2, a2, 8 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: or a0, a0, a1 @@ -75,11 +74,10 @@ ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: lui a3, 4080 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 8 ; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 @@ -108,18 +106,17 @@ ; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: slli a4, a1, 8 -; RV32I-NEXT: lui a5, 4080 -; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: and a4, a1, a3 +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: or a2, a1, a2 ; RV32I-NEXT: srli a1, a0, 8 ; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: slli a3, a0, 8 -; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: and a3, a0, a3 +; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: or a1, a0, a1 @@ -128,34 +125,31 @@ ; ; RV64I-LABEL: test_bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 24 -; RV64I-NEXT: li a2, 255 -; RV64I-NEXT: slli a3, a2, 40 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: slli a3, a3, 32 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: slli a2, a2, 48 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: slli a3, a0, 56 -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a3, 16 -; RV64I-NEXT: addiw a3, a3, -256 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: srli a3, a0, 24 ; RV64I-NEXT: lui a4, 4080 ; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: srli a5, a0, 8 +; RV64I-NEXT: srliw a5, a5, 24 +; RV64I-NEXT: slli a5, a5, 24 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: and a3, a0, a4 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 40 +; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZB-LABEL: test_bswap_i64: @@ -402,11 +396,10 @@ ; RV32I-NEXT: lui a2, 16 ; RV32I-NEXT: addi a2, a2, -256 ; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: slli a2, a2, 8 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: or a0, a0, a1 @@ -439,11 +432,10 @@ ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: lui a3, 4080 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 8 ; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 @@ -550,9 +542,8 @@ ; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: slli a4, a1, 8 -; RV32I-NEXT: lui a5, 4080 -; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: and a4, a1, a3 +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: or a1, a1, a2 @@ -564,25 +555,25 @@ ; RV32I-NEXT: slli a1, a1, 4 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: srli a2, a1, 2 -; RV32I-NEXT: lui a6, 209715 -; RV32I-NEXT: addi a6, a6, 819 -; RV32I-NEXT: and a2, a2, a6 -; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: lui a5, 209715 +; RV32I-NEXT: addi a5, a5, 819 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: and a1, a1, a5 ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: srli a2, a1, 1 -; RV32I-NEXT: lui a7, 349525 -; RV32I-NEXT: addi a7, a7, 1365 -; RV32I-NEXT: and a2, a2, a7 -; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a2, a2, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: slli a1, a1, 1 ; RV32I-NEXT: or a2, a2, a1 ; RV32I-NEXT: srli a1, a0, 8 ; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: slli a3, a0, 8 -; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: srli a7, a0, 24 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: and a3, a0, a3 +; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: or a0, a0, a1 @@ -592,13 +583,13 @@ ; RV32I-NEXT: slli a0, a0, 4 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a1, a1, a6 -; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, a7 -; RV32I-NEXT: and a0, a0, a7 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: and a0, a0, a6 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: mv a0, a2 @@ -606,39 +597,36 @@ ; ; RV64I-LABEL: test_bitreverse_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 24 -; RV64I-NEXT: li a2, 255 -; RV64I-NEXT: slli a3, a2, 40 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: slli a3, a3, 32 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: slli a2, a2, 48 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: slli a3, a0, 56 -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a3, 16 -; RV64I-NEXT: addiw a3, a3, -256 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: srli a3, a0, 24 ; RV64I-NEXT: lui a4, 4080 ; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: lui a3, %hi(.LCPI6_0) -; RV64I-NEXT: ld a3, %lo(.LCPI6_0)(a3) +; RV64I-NEXT: srli a5, a0, 8 +; RV64I-NEXT: srliw a5, a5, 24 +; RV64I-NEXT: slli a5, a5, 24 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: and a3, a0, a4 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 40 +; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: lui a2, %hi(.LCPI6_0) +; RV64I-NEXT: ld a2, %lo(.LCPI6_0)(a2) +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: lui a2, %hi(.LCPI6_1) ; RV64I-NEXT: ld a2, %lo(.LCPI6_1)(a2) ; RV64I-NEXT: slli a0, a0, 4 diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -769,11 +769,10 @@ ; RV32I-NEXT: lui a2, 16 ; RV32I-NEXT: addi a2, a2, -256 ; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: slli a2, a2, 8 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: or a0, a0, a1 @@ -798,18 +797,17 @@ ; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: slli a4, a1, 8 -; RV32I-NEXT: lui a5, 4080 -; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: and a4, a1, a3 +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: or a2, a1, a2 ; RV32I-NEXT: srli a1, a0, 8 ; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: slli a3, a0, 8 -; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: and a3, a0, a3 +; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: or a1, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -952,11 +952,10 @@ ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srliw a2, a0, 24 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: lui a3, 4080 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 8 ; RV64I-NEXT: slliw a0, a0, 24 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 @@ -979,11 +978,10 @@ ; RV64I-NEXT: lui a3, 16 ; RV64I-NEXT: addiw a3, a3, -256 ; RV64I-NEXT: and a2, a2, a3 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: or a2, a2, a3 -; RV64I-NEXT: slli a3, a0, 8 -; RV64I-NEXT: lui a4, 4080 -; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: and a3, a0, a3 +; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 @@ -1006,34 +1004,31 @@ define i64 @bswap_i64(i64 %a) { ; RV64I-LABEL: bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 24 -; RV64I-NEXT: li a2, 255 -; RV64I-NEXT: slli a3, a2, 40 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: srliw a3, a0, 24 -; RV64I-NEXT: slli a3, a3, 32 -; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: slli a2, a2, 48 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: slli a3, a0, 56 -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a3, 16 -; RV64I-NEXT: addiw a3, a3, -256 -; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: srli a3, a0, 24 ; RV64I-NEXT: lui a4, 4080 ; RV64I-NEXT: and a3, a3, a4 -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: srli a5, a0, 8 +; RV64I-NEXT: srliw a5, a5, 24 +; RV64I-NEXT: slli a5, a5, 24 +; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: and a3, a0, a4 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: and a2, a0, a2 +; RV64I-NEXT: slli a2, a2, 40 +; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: bswap_i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -579,9 +579,8 @@ ; RV32-NEXT: vand.vx v9, v9, a0 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v10, v10, a0 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -617,9 +616,8 @@ ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vsll.vi v10, v10, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -660,9 +658,8 @@ ; RV32-NEXT: vand.vx v9, v9, a0 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v10, v10, a0 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -698,9 +695,8 @@ ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vsll.vi v10, v10, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -741,9 +737,8 @@ ; RV32-NEXT: vand.vx v10, v10, a0 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v12, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v12, v12, a0 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vsll.vi v12, v12, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -779,9 +774,8 @@ ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsrl.vi v12, v8, 24 ; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vsll.vi v12, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v12, v12, a0 +; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vsll.vi v12, v12, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 @@ -822,9 +816,8 @@ ; RV32-NEXT: vand.vx v12, v12, a0 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v16, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vsll.vi v16, v16, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v12 @@ -860,9 +853,8 @@ ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsrl.vi v16, v8, 24 ; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vsll.vi v16, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vsll.vi v16, v16, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 @@ -903,9 +895,8 @@ ; RV32-NEXT: vand.vx v16, v16, a0 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsll.vi v24, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v24, v24, a0 +; RV32-NEXT: vand.vx v24, v8, a0 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -941,9 +932,8 @@ ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsrl.vi v24, v8, 24 ; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsll.vi v24, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v24, v24, a0 +; RV64-NEXT: vand.vx v24, v8, a0 +; RV64-NEXT: vsll.vi v24, v24, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 @@ -982,66 +972,58 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a1, 255 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v10, v8, a3 -; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v10, a2 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsrl.vi v11, v8, 24 -; RV32-NEXT: vand.vx v11, v11, a0 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v11, (a3), zero +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v10, v10, a4 ; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vand.vv v12, v12, v11 +; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a2 -; RV32-NEXT: vsll.vx v12, v8, a3 -; RV32-NEXT: vand.vv v11, v12, v11 -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vlse64.v v11, (a1), zero -; RV32-NEXT: vsll.vi v13, v8, 8 -; RV32-NEXT: vand.vv v12, v13, v12 -; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a0 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v12, v12, a1 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vlse64.v v11, (a3), zero ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: vand.vv v9, v9, v11 ; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: vlse64.v v10, (a3), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vlse64.v v10, (a1), zero +; RV32-NEXT: vlse64.v v10, (a3), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 @@ -1064,25 +1046,22 @@ ; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vor.vv v9, v10, v9 ; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v10, v10, a2 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v10, v10, a3 ; RV64-NEXT: vsrl.vi v11, v8, 8 -; RV64-NEXT: li a2, 255 -; RV64-NEXT: slli a3, a2, 24 -; RV64-NEXT: vand.vx v11, v11, a3 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v11, v11, a4 ; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: vand.vx v10, v10, a3 -; RV64-NEXT: vsll.vi v11, v8, 24 -; RV64-NEXT: slli a3, a2, 40 -; RV64-NEXT: vand.vx v11, v11, a3 -; RV64-NEXT: vor.vv v10, v11, v10 +; RV64-NEXT: vand.vx v10, v8, a3 +; RV64-NEXT: vsll.vi v10, v10, 24 +; RV64-NEXT: vand.vx v11, v8, a4 +; RV64-NEXT: vsll.vi v11, v11, 8 +; RV64-NEXT: vor.vv v10, v10, v11 ; RV64-NEXT: vsll.vx v11, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: slli a0, a2, 48 -; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: lui a0, %hi(.LCPI18_0) ; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) @@ -1121,66 +1100,58 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a1, 255 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v12, v8, a3 -; RV32-NEXT: vand.vx v12, v12, a1 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vsrl.vx v10, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v12, a2 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsrl.vi v14, v8, 24 -; RV32-NEXT: vand.vx v14, v14, a0 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v14, (a3), zero +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v12, v12, a4 ; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v12, v16, v12 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vand.vv v16, v16, v14 +; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a2 -; RV32-NEXT: vsll.vx v16, v8, a3 -; RV32-NEXT: vand.vv v14, v16, v14 -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vlse64.v v14, (a1), zero -; RV32-NEXT: vsll.vi v18, v8, 8 -; RV32-NEXT: vand.vv v16, v18, v16 -; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: vsll.vx v16, v16, a1 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vlse64.v v14, (a3), zero ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: vand.vv v10, v10, v14 ; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v12, (a3), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 2 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v12, (a3), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 @@ -1203,25 +1174,22 @@ ; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vor.vv v10, v12, v10 ; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v12, v12, a2 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v12, v12, a3 ; RV64-NEXT: vsrl.vi v14, v8, 8 -; RV64-NEXT: li a2, 255 -; RV64-NEXT: slli a3, a2, 24 -; RV64-NEXT: vand.vx v14, v14, a3 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v14, v14, a4 ; RV64-NEXT: vor.vv v12, v14, v12 ; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vsll.vi v12, v8, 8 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: vand.vx v12, v12, a3 -; RV64-NEXT: vsll.vi v14, v8, 24 -; RV64-NEXT: slli a3, a2, 40 -; RV64-NEXT: vand.vx v14, v14, a3 -; RV64-NEXT: vor.vv v12, v14, v12 +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vand.vx v14, v8, a4 +; RV64-NEXT: vsll.vi v14, v14, 8 +; RV64-NEXT: vor.vv v12, v12, v14 ; RV64-NEXT: vsll.vx v14, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: slli a0, a2, 48 -; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v14, v8 ; RV64-NEXT: lui a0, %hi(.LCPI19_0) ; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) @@ -1260,66 +1228,58 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a1, 255 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v16, v8, a3 -; RV32-NEXT: vand.vx v16, v16, a1 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vsrl.vx v12, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsrl.vi v20, v8, 24 -; RV32-NEXT: vand.vx v20, v20, a0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v20, (a3), zero +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v16, v24, v16 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vlse64.v v20, (a1), zero +; RV32-NEXT: vand.vv v24, v24, v20 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a2 -; RV32-NEXT: vsll.vx v24, v8, a3 -; RV32-NEXT: vand.vv v20, v24, v20 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vlse64.v v20, (a1), zero -; RV32-NEXT: vsll.vi v28, v8, 8 -; RV32-NEXT: vand.vv v24, v28, v24 -; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: vsll.vx v24, v24, a1 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v24, v8, a4 +; RV32-NEXT: vsll.vi v24, v24, 24 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vlse64.v v20, (a1), zero +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vand.vv v12, v12, v20 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 @@ -1342,25 +1302,22 @@ ; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vor.vv v12, v16, v12 ; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v16, v16, a2 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v16, v16, a3 ; RV64-NEXT: vsrl.vi v20, v8, 8 -; RV64-NEXT: li a2, 255 -; RV64-NEXT: slli a3, a2, 24 -; RV64-NEXT: vand.vx v20, v20, a3 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v20, v20, a4 ; RV64-NEXT: vor.vv v16, v20, v16 ; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vsll.vi v16, v8, 8 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vsll.vi v20, v8, 24 -; RV64-NEXT: slli a3, a2, 40 -; RV64-NEXT: vand.vx v20, v20, a3 -; RV64-NEXT: vor.vv v16, v20, v16 +; RV64-NEXT: vand.vx v16, v8, a3 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vand.vx v20, v8, a4 +; RV64-NEXT: vsll.vi v20, v20, 8 +; RV64-NEXT: vor.vv v16, v16, v20 ; RV64-NEXT: vsll.vx v20, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: slli a0, a2, 48 -; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v20, v8 ; RV64-NEXT: lui a0, %hi(.LCPI20_0) ; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) @@ -1397,95 +1354,71 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a1, 255 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: sw a2, 12(sp) -; RV32-NEXT: sw a2, 8(sp) -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v16, v8, a3 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsrl.vx v0, v8, a2 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a0 -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v24, v8, a3 -; RV32-NEXT: vand.vv v16, v24, v16 -; RV32-NEXT: vsll.vx v24, v8, a2 -; RV32-NEXT: vlse64.v v0, (a1), zero +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v24, a2 ; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsll.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v0, v0, a4 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vl8re8.v v0, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a2 +; RV32-NEXT: vsll.vx v0, v0, a1 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vor.vv v0, v16, v0 +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vor.vv v8, v0, v8 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 4 ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v24, v8, 2 ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v24, v8, 1 @@ -1494,7 +1427,7 @@ ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1511,25 +1444,22 @@ ; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vor.vv v16, v24, v16 ; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v24, v24, a2 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v24, v24, a3 ; RV64-NEXT: vsrl.vi v0, v8, 8 -; RV64-NEXT: li a2, 255 -; RV64-NEXT: slli a3, a2, 24 -; RV64-NEXT: vand.vx v0, v0, a3 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v0, v0, a4 ; RV64-NEXT: vor.vv v24, v0, v24 ; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsll.vi v24, v8, 8 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsll.vi v0, v8, 24 -; RV64-NEXT: slli a3, a2, 40 -; RV64-NEXT: vand.vx v0, v0, a3 -; RV64-NEXT: vor.vv v24, v0, v24 +; RV64-NEXT: vand.vx v24, v8, a3 +; RV64-NEXT: vsll.vi v24, v24, 24 +; RV64-NEXT: vand.vx v0, v8, a4 +; RV64-NEXT: vsll.vi v0, v0, 8 +; RV64-NEXT: vor.vv v24, v24, v0 ; RV64-NEXT: vsll.vx v0, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: slli a0, a2, 48 -; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v0, v8 ; RV64-NEXT: lui a0, %hi(.LCPI21_0) ; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -90,9 +90,8 @@ ; RV32-NEXT: vand.vx v9, v9, a0 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v10, v10, a0 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -107,9 +106,8 @@ ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vsll.vi v10, v10, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -129,9 +127,8 @@ ; RV32-NEXT: vand.vx v9, v9, a0 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v10, v10, a0 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -146,9 +143,8 @@ ; RV64-NEXT: vand.vx v9, v9, a0 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vsll.vi v10, v10, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -168,9 +164,8 @@ ; RV32-NEXT: vand.vx v10, v10, a0 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v12, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v12, v12, a0 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vsll.vi v12, v12, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -185,9 +180,8 @@ ; RV64-NEXT: vand.vx v10, v10, a0 ; RV64-NEXT: vsrl.vi v12, v8, 24 ; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vsll.vi v12, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v12, v12, a0 +; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vsll.vi v12, v12, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 @@ -207,9 +201,8 @@ ; RV32-NEXT: vand.vx v12, v12, a0 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v16, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vsll.vi v16, v16, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v8, v12 @@ -224,9 +217,8 @@ ; RV64-NEXT: vand.vx v12, v12, a0 ; RV64-NEXT: vsrl.vi v16, v8, 24 ; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vsll.vi v16, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vsll.vi v16, v16, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 @@ -246,9 +238,8 @@ ; RV32-NEXT: vand.vx v16, v16, a0 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsll.vi v24, v8, 8 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v24, v24, a0 +; RV32-NEXT: vand.vx v24, v8, a0 +; RV32-NEXT: vsll.vi v24, v24, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -263,9 +254,8 @@ ; RV64-NEXT: vand.vx v16, v16, a0 ; RV64-NEXT: vsrl.vi v24, v8, 24 ; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsll.vi v24, v8, 8 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v24, v24, a0 +; RV64-NEXT: vand.vx v24, v8, a0 +; RV64-NEXT: vsll.vi v24, v24, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 @@ -283,41 +273,33 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a1, 255 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v10, v8, a3 -; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v10, a2 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vsrl.vi v11, v8, 24 -; RV32-NEXT: vand.vx v11, v11, a0 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v11, (a3), zero +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v10, v10, a3 ; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vand.vv v12, v12, v11 +; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a2 -; RV32-NEXT: vsll.vx v12, v8, a3 -; RV32-NEXT: vand.vv v11, v12, v11 -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vor.vv v10, v10, v11 -; RV32-NEXT: vlse64.v v11, (a1), zero -; RV32-NEXT: vsll.vi v13, v8, 8 -; RV32-NEXT: vand.vv v12, v13, v12 -; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vx v10, v8, a0 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v12, v12, a1 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 @@ -335,25 +317,22 @@ ; RV64-NEXT: vand.vx v10, v10, a2 ; RV64-NEXT: vor.vv v9, v10, v9 ; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v10, v10, a2 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v10, v10, a3 ; RV64-NEXT: vsrl.vi v11, v8, 8 -; RV64-NEXT: li a2, 255 -; RV64-NEXT: slli a3, a2, 24 -; RV64-NEXT: vand.vx v11, v11, a3 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v11, v11, a4 ; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: vand.vx v10, v10, a3 -; RV64-NEXT: vsll.vi v11, v8, 24 -; RV64-NEXT: slli a3, a2, 40 -; RV64-NEXT: vand.vx v11, v11, a3 -; RV64-NEXT: vor.vv v10, v11, v10 +; RV64-NEXT: vand.vx v10, v8, a3 +; RV64-NEXT: vsll.vi v10, v10, 24 +; RV64-NEXT: vand.vx v11, v8, a4 +; RV64-NEXT: vsll.vi v11, v11, 8 +; RV64-NEXT: vor.vv v10, v10, v11 ; RV64-NEXT: vsll.vx v11, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: slli a0, a2, 48 -; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -371,41 +350,33 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a1, 255 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, ma -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v12, v8, a3 -; RV32-NEXT: vand.vx v12, v12, a1 +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vsrl.vx v10, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v12, v12, a2 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vsrl.vi v14, v8, 24 -; RV32-NEXT: vand.vx v14, v14, a0 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v14, (a3), zero +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v12, v16, v12 -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vand.vv v16, v16, v14 +; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a2 -; RV32-NEXT: vsll.vx v16, v8, a3 -; RV32-NEXT: vand.vv v14, v16, v14 -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vor.vv v12, v12, v14 -; RV32-NEXT: vlse64.v v14, (a1), zero -; RV32-NEXT: vsll.vi v18, v8, 8 -; RV32-NEXT: vand.vv v16, v18, v16 -; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vx v12, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: vsll.vx v16, v16, a1 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a3 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 @@ -423,25 +394,22 @@ ; RV64-NEXT: vand.vx v12, v12, a2 ; RV64-NEXT: vor.vv v10, v12, v10 ; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v12, v12, a2 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v12, v12, a3 ; RV64-NEXT: vsrl.vi v14, v8, 8 -; RV64-NEXT: li a2, 255 -; RV64-NEXT: slli a3, a2, 24 -; RV64-NEXT: vand.vx v14, v14, a3 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v14, v14, a4 ; RV64-NEXT: vor.vv v12, v14, v12 ; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vsll.vi v12, v8, 8 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: vand.vx v12, v12, a3 -; RV64-NEXT: vsll.vi v14, v8, 24 -; RV64-NEXT: slli a3, a2, 40 -; RV64-NEXT: vand.vx v14, v14, a3 -; RV64-NEXT: vor.vv v12, v14, v12 +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vand.vx v14, v8, a4 +; RV64-NEXT: vsll.vi v14, v14, 8 +; RV64-NEXT: vor.vv v12, v12, v14 ; RV64-NEXT: vsll.vx v14, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: slli a0, a2, 48 -; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v14, v8 ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v8, v10 @@ -459,41 +427,33 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a1, 255 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, ma -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v16, v8, a3 -; RV32-NEXT: vand.vx v16, v16, a1 +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vsrl.vx v12, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vsrl.vi v20, v8, 24 -; RV32-NEXT: vand.vx v20, v20, a0 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v20, (a3), zero +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v16, v24, v16 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vlse64.v v20, (a1), zero +; RV32-NEXT: vand.vv v24, v24, v20 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsll.vx v16, v8, a2 -; RV32-NEXT: vsll.vx v24, v8, a3 -; RV32-NEXT: vand.vv v20, v24, v20 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vlse64.v v20, (a1), zero -; RV32-NEXT: vsll.vi v28, v8, 8 -; RV32-NEXT: vand.vv v24, v28, v24 -; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: vsll.vx v24, v24, a1 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v24, v8, a3 +; RV32-NEXT: vsll.vi v24, v24, 24 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -511,25 +471,22 @@ ; RV64-NEXT: vand.vx v16, v16, a2 ; RV64-NEXT: vor.vv v12, v16, v12 ; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v16, v16, a2 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v16, v16, a3 ; RV64-NEXT: vsrl.vi v20, v8, 8 -; RV64-NEXT: li a2, 255 -; RV64-NEXT: slli a3, a2, 24 -; RV64-NEXT: vand.vx v20, v20, a3 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v20, v20, a4 ; RV64-NEXT: vor.vv v16, v20, v16 ; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vsll.vi v16, v8, 8 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vsll.vi v20, v8, 24 -; RV64-NEXT: slli a3, a2, 40 -; RV64-NEXT: vand.vx v20, v20, a3 -; RV64-NEXT: vor.vv v16, v20, v16 +; RV64-NEXT: vand.vx v16, v8, a3 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vand.vx v20, v8, a4 +; RV64-NEXT: vsll.vi v20, v20, 8 +; RV64-NEXT: vor.vv v16, v16, v20 ; RV64-NEXT: vsll.vx v20, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: slli a0, a2, 48 -; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v20, v8 ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v8, v12 @@ -545,74 +502,50 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: sub sp, sp, a0 ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: li a1, 255 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsrl.vx v16, v8, a3 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vsrl.vx v0, v8, a2 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: li a0, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a0 -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v0, v8, a3 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsll.vx v0, v8, a2 -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: vsll.vi v16, v8, 8 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v0, v0, a3 +; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8re8.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a2 +; RV32-NEXT: vsll.vx v0, v0, a1 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vand.vx v8, v8, a3 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -629,25 +562,22 @@ ; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vor.vv v16, v24, v16 ; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v24, v24, a2 +; RV64-NEXT: lui a3, 4080 +; RV64-NEXT: vand.vx v24, v24, a3 ; RV64-NEXT: vsrl.vi v0, v8, 8 -; RV64-NEXT: li a2, 255 -; RV64-NEXT: slli a3, a2, 24 -; RV64-NEXT: vand.vx v0, v0, a3 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v0, v0, a4 ; RV64-NEXT: vor.vv v24, v0, v24 ; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsll.vi v24, v8, 8 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: vand.vx v24, v24, a3 -; RV64-NEXT: vsll.vi v0, v8, 24 -; RV64-NEXT: slli a3, a2, 40 -; RV64-NEXT: vand.vx v0, v0, a3 -; RV64-NEXT: vor.vv v24, v0, v24 +; RV64-NEXT: vand.vx v24, v8, a3 +; RV64-NEXT: vsll.vi v24, v24, 24 +; RV64-NEXT: vand.vx v0, v8, a4 +; RV64-NEXT: vsll.vi v0, v0, 8 +; RV64-NEXT: vor.vv v24, v24, v0 ; RV64-NEXT: vsll.vx v0, v8, a0 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: slli a0, a2, 48 -; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vor.vv v8, v0, v8 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -85,9 +85,8 @@ ; RV32-NEXT: vand.vx v9, v9, a1 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 8 -; RV32-NEXT: lui a1, 4080 -; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: vand.vx v10, v8, a1 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -125,9 +124,8 @@ ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: lui a1, 4080 -; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vsll.vi v10, v10, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -186,32 +184,19 @@ ; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v11, v12, v11 -; RV32-NEXT: vor.vv v10, v11, v10 +; RV32-NEXT: vand.vv v12, v12, v11 +; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: li a5, 255 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v10, a5 -; RV32-NEXT: vmerge.vim v10, v10, 0, v0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsll.vi v11, v8, 8 -; RV32-NEXT: vand.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v11, a3 -; RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsll.vi v12, v8, 24 -; RV32-NEXT: vand.vv v11, v12, v11 -; RV32-NEXT: vor.vv v10, v11, v10 -; RV32-NEXT: vsll.vx v11, v8, a2 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v12, a4 -; RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12 -; RV32-NEXT: vsll.vx v8, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v11 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -259,25 +244,22 @@ ; RV64-NEXT: vand.vx v10, v10, a3 ; RV64-NEXT: vor.vv v9, v10, v9 ; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: lui a4, 4080 +; RV64-NEXT: vand.vx v10, v10, a4 ; RV64-NEXT: vsrl.vi v11, v8, 8 -; RV64-NEXT: li a3, 255 -; RV64-NEXT: slli a4, a3, 24 -; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: li a5, 255 +; RV64-NEXT: slli a5, a5, 24 +; RV64-NEXT: vand.vx v11, v11, a5 ; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: vand.vx v10, v10, a4 -; RV64-NEXT: vsll.vi v11, v8, 24 -; RV64-NEXT: slli a4, a3, 40 -; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: vand.vx v10, v8, a5 +; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vand.vx v11, v8, a4 +; RV64-NEXT: vsll.vi v11, v11, 24 ; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vsll.vx v11, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vsll.vx v8, v8, a2 -; RV64-NEXT: slli a1, a3, 48 -; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: lui a1, %hi(.LCPI2_0) ; RV64-NEXT: ld a1, %lo(.LCPI2_0)(a1) @@ -497,9 +479,8 @@ ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 8 -; LMULMAX2-RV32-NEXT: lui a1, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a1 +; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 @@ -537,9 +518,8 @@ ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 8 -; LMULMAX2-RV64-NEXT: lui a1, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a1 +; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 @@ -579,55 +559,54 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: lui a3, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: lui a4, 61681 -; LMULMAX1-RV32-NEXT: addi a4, a4, -241 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-RV32-NEXT: lui a3, 61681 +; LMULMAX1-RV32-NEXT: addi a3, a3, -241 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV32-NEXT: lui a5, 209715 -; LMULMAX1-RV32-NEXT: addi a5, a5, 819 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV32-NEXT: lui a4, 209715 +; LMULMAX1-RV32-NEXT: addi a4, a4, 819 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a6, 349525 -; LMULMAX1-RV32-NEXT: addi a6, a6, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a6 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a6 +; LMULMAX1-RV32-NEXT: lui a5, 349525 +; LMULMAX1-RV32-NEXT: addi a5, a5, 1365 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a4 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a6 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a6 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) @@ -646,55 +625,54 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: lui a3, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: lui a4, 61681 -; LMULMAX1-RV64-NEXT: addiw a4, a4, -241 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-RV64-NEXT: lui a3, 61681 +; LMULMAX1-RV64-NEXT: addiw a3, a3, -241 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: lui a5, 209715 -; LMULMAX1-RV64-NEXT: addiw a5, a5, 819 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: lui a4, 209715 +; LMULMAX1-RV64-NEXT: addiw a4, a4, 819 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a6, 349525 -; LMULMAX1-RV64-NEXT: addiw a6, a6, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a6 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 +; LMULMAX1-RV64-NEXT: lui a5, 349525 +; LMULMAX1-RV64-NEXT: addiw a5, a5, 1365 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a6 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a5 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) @@ -732,32 +710,19 @@ ; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v14, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v14, v12 +; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 +; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: li a5, 255 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a5 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsll.vi v14, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v12, v14, v12 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v14, a3 -; LMULMAX2-RV32-NEXT: vmerge.vim v14, v14, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsll.vi v16, v8, 24 -; LMULMAX2-RV32-NEXT: vand.vv v14, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v14, v12 -; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a2 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v16, a4 -; LMULMAX2-RV32-NEXT: vmerge.vim v16, v16, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v14, v14, v16 -; LMULMAX2-RV32-NEXT: vsll.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 +; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 +; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 +; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV32-NEXT: lui a1, 61681 @@ -805,25 +770,22 @@ ; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: lui a3, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a3 +; LMULMAX2-RV64-NEXT: lui a4, 4080 +; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a4 ; LMULMAX2-RV64-NEXT: vsrl.vi v14, v8, 8 -; LMULMAX2-RV64-NEXT: li a3, 255 -; LMULMAX2-RV64-NEXT: slli a4, a3, 24 -; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a4 +; LMULMAX2-RV64-NEXT: li a5, 255 +; LMULMAX2-RV64-NEXT: slli a5, a5, 24 +; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a5 ; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12 ; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 8 -; LMULMAX2-RV64-NEXT: slli a4, a3, 32 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV64-NEXT: vsll.vi v14, v8, 24 -; LMULMAX2-RV64-NEXT: slli a4, a3, 40 -; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a4 +; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a5 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV64-NEXT: vand.vx v14, v8, a4 +; LMULMAX2-RV64-NEXT: vsll.vi v14, v14, 24 ; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12 ; LMULMAX2-RV64-NEXT: vsll.vx v14, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a3 ; LMULMAX2-RV64-NEXT: vsll.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: slli a1, a3, 48 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v14, v8 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI5_0) ; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI5_0)(a1) @@ -855,19 +817,19 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v12, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v10, (a1) ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v12, a2 +; LMULMAX1-RV32-NEXT: vsrl.vx v9, v10, a2 ; LMULMAX1-RV32-NEXT: li a3, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v12, a3 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v10, a3 ; LMULMAX1-RV32-NEXT: lui a4, 16 ; LMULMAX1-RV32-NEXT: addi a4, a4, -256 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v12, 24 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v9, v10, 24 ; LMULMAX1-RV32-NEXT: lui a5, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a5 +; LMULMAX1-RV32-NEXT: vand.vx v12, v9, a5 ; LMULMAX1-RV32-NEXT: li a6, 5 ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -875,102 +837,89 @@ ; LMULMAX1-RV32-NEXT: lui a6, 1044480 ; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a6, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 8 +; LMULMAX1-RV32-NEXT: vsrl.vi v13, v10, 8 ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v9 -; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 -; LMULMAX1-RV32-NEXT: vor.vv v13, v11, v10 -; LMULMAX1-RV32-NEXT: li a6, 255 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a6 -; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsll.vi v11, v12, 8 -; LMULMAX1-RV32-NEXT: vand.vv v14, v11, v10 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a4 -; LMULMAX1-RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsll.vi v15, v12, 24 -; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v11 -; LMULMAX1-RV32-NEXT: vor.vv v14, v15, v14 -; LMULMAX1-RV32-NEXT: vsll.vx v15, v12, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v16, a5 -; LMULMAX1-RV32-NEXT: vmerge.vim v16, v16, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v16 -; LMULMAX1-RV32-NEXT: vsll.vx v12, v12, a2 -; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v15 -; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v14 +; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 +; LMULMAX1-RV32-NEXT: vor.vv v11, v12, v11 +; LMULMAX1-RV32-NEXT: vsll.vx v12, v10, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 ; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 4 +; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a5 +; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v9 +; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 8 +; LMULMAX1-RV32-NEXT: vor.vv v10, v13, v10 +; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 4 ; LMULMAX1-RV32-NEXT: lui a6, 61681 ; LMULMAX1-RV32-NEXT: addi a6, a6, -241 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v14, a6 +; LMULMAX1-RV32-NEXT: vmv.v.x v12, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v14 -; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v14 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 4 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 2 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v12 +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v12 +; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 4 +; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 2 ; LMULMAX1-RV32-NEXT: lui a6, 209715 ; LMULMAX1-RV32-NEXT: addi a6, a6, 819 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v15, a6 +; LMULMAX1-RV32-NEXT: vmv.v.x v13, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v15 -; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v15 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 2 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 1 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v13 +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v13 +; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 2 +; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 1 ; LMULMAX1-RV32-NEXT: lui a6, 349525 ; LMULMAX1-RV32-NEXT: addi a6, a6, 1365 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v17, a6 +; LMULMAX1-RV32-NEXT: vmv.v.x v14, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v17 -; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v17 -; LMULMAX1-RV32-NEXT: vadd.vv v12, v12, v12 -; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vsrl.vx v13, v8, a2 -; LMULMAX1-RV32-NEXT: vsrl.vx v18, v8, a3 -; LMULMAX1-RV32-NEXT: vand.vx v18, v18, a4 -; LMULMAX1-RV32-NEXT: vor.vv v13, v18, v13 -; LMULMAX1-RV32-NEXT: vsrl.vi v18, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v18, v18, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v19, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v9, v19, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v18 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v10, v13, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vv v11, v13, v11 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v14 +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v14 +; LMULMAX1-RV32-NEXT: vadd.vv v10, v10, v10 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v11, v8, a3 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v16 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a2 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a2 +; LMULMAX1-RV32-NEXT: vsrl.vx v15, v8, a3 +; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a4 +; LMULMAX1-RV32-NEXT: vor.vv v11, v15, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v15, v8, 24 +; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a5 +; LMULMAX1-RV32-NEXT: vsrl.vi v16, v8, 8 +; LMULMAX1-RV32-NEXT: vand.vv v16, v16, v9 +; LMULMAX1-RV32-NEXT: vor.vv v15, v16, v15 +; LMULMAX1-RV32-NEXT: vor.vv v11, v15, v11 +; LMULMAX1-RV32-NEXT: vsll.vx v15, v8, a2 +; LMULMAX1-RV32-NEXT: vand.vx v16, v8, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v16, v16, a3 +; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v16 +; LMULMAX1-RV32-NEXT: vand.vx v16, v8, a5 +; LMULMAX1-RV32-NEXT: vsll.vi v16, v16, 24 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v16, v8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v14 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v14 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v15 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v13 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v13 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v17 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v17 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v14 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v14 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v9, v8 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v12, (a1) +; LMULMAX1-RV32-NEXT: vse64.v v10, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: bitreverse_v4i64: @@ -992,43 +941,40 @@ ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 ; LMULMAX1-RV64-NEXT: li a6, 255 -; LMULMAX1-RV64-NEXT: slli a7, a6, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 +; LMULMAX1-RV64-NEXT: slli a6, a6, 24 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: slli t0, a6, 32 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 -; LMULMAX1-RV64-NEXT: vsll.vi v12, v9, 24 -; LMULMAX1-RV64-NEXT: slli t1, a6, 40 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 +; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a6 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vand.vx v12, v9, a5 +; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 ; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: slli a6, a6, 48 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 -; LMULMAX1-RV64-NEXT: lui t2, %hi(.LCPI5_0) -; LMULMAX1-RV64-NEXT: ld t2, %lo(.LCPI5_0)(t2) +; LMULMAX1-RV64-NEXT: lui a7, %hi(.LCPI5_0) +; LMULMAX1-RV64-NEXT: ld a7, %lo(.LCPI5_0)(a7) ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t2 -; LMULMAX1-RV64-NEXT: lui t3, %hi(.LCPI5_1) -; LMULMAX1-RV64-NEXT: ld t3, %lo(.LCPI5_1)(t3) +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a7 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a7 +; LMULMAX1-RV64-NEXT: lui t0, %hi(.LCPI5_1) +; LMULMAX1-RV64-NEXT: ld t0, %lo(.LCPI5_1)(t0) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t3 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t3 -; LMULMAX1-RV64-NEXT: lui t4, %hi(.LCPI5_2) -; LMULMAX1-RV64-NEXT: ld t4, %lo(.LCPI5_2)(t4) +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t0 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t0 +; LMULMAX1-RV64-NEXT: lui t1, %hi(.LCPI5_2) +; LMULMAX1-RV64-NEXT: ld t1, %lo(.LCPI5_2)(t1) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t4 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t4 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t1 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t1 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 @@ -1038,33 +984,33 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 -; LMULMAX1-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 +; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a6 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vand.vx v12, v8, a5 +; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t2 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a7 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a7 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t3 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t3 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t0 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t4 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t4 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t1 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t1 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -33,9 +33,8 @@ ; RV32-NEXT: vand.vx v9, v9, a1 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v10, v8, 8 -; RV32-NEXT: lui a1, 4080 -; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: vand.vx v10, v8, a1 +; RV32-NEXT: vsll.vi v10, v10, 8 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v8, v9 @@ -52,9 +51,8 @@ ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: lui a1, 4080 -; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vsll.vi v10, v10, 8 ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -92,32 +90,19 @@ ; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v11, v12, v11 -; RV32-NEXT: vor.vv v10, v11, v10 +; RV32-NEXT: vand.vv v12, v12, v11 +; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: li a5, 255 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v10, a5 -; RV32-NEXT: vmerge.vim v10, v10, 0, v0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsll.vi v11, v8, 8 -; RV32-NEXT: vand.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v11, a3 -; RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsll.vi v12, v8, 24 -; RV32-NEXT: vand.vv v11, v12, v11 -; RV32-NEXT: vor.vv v10, v11, v10 -; RV32-NEXT: vsll.vx v11, v8, a2 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v12, a4 -; RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vand.vv v11, v11, v12 -; RV32-NEXT: vsll.vx v8, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v11 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret @@ -135,25 +120,22 @@ ; RV64-NEXT: vand.vx v10, v10, a3 ; RV64-NEXT: vor.vv v9, v10, v9 ; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: lui a3, 4080 -; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: lui a4, 4080 +; RV64-NEXT: vand.vx v10, v10, a4 ; RV64-NEXT: vsrl.vi v11, v8, 8 -; RV64-NEXT: li a3, 255 -; RV64-NEXT: slli a4, a3, 24 -; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: li a5, 255 +; RV64-NEXT: slli a5, a5, 24 +; RV64-NEXT: vand.vx v11, v11, a5 ; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vsll.vi v10, v8, 8 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: vand.vx v10, v10, a4 -; RV64-NEXT: vsll.vi v11, v8, 24 -; RV64-NEXT: slli a4, a3, 40 -; RV64-NEXT: vand.vx v11, v11, a4 +; RV64-NEXT: vand.vx v10, v8, a5 +; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vand.vx v11, v8, a4 +; RV64-NEXT: vsll.vi v11, v11, 24 ; RV64-NEXT: vor.vv v10, v11, v10 ; RV64-NEXT: vsll.vx v11, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vsll.vx v8, v8, a2 -; RV64-NEXT: slli a1, a3, 48 -; RV64-NEXT: vand.vx v8, v8, a1 ; RV64-NEXT: vor.vv v8, v11, v8 ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v8, v9 @@ -238,9 +220,8 @@ ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 8 -; LMULMAX2-RV32-NEXT: lui a1, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a1 +; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 ; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 @@ -257,9 +238,8 @@ ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 8 -; LMULMAX2-RV64-NEXT: lui a1, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a1 +; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 ; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 @@ -278,9 +258,8 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: lui a3, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 @@ -288,8 +267,8 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 @@ -309,9 +288,8 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: lui a3, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 @@ -319,8 +297,8 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 @@ -359,32 +337,19 @@ ; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v14, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v14, v12 +; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 +; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: li a5, 255 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a5 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsll.vi v14, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v12, v14, v12 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v14, a3 -; LMULMAX2-RV32-NEXT: vmerge.vim v14, v14, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsll.vi v16, v8, 24 -; LMULMAX2-RV32-NEXT: vand.vv v14, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v14, v12 -; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a2 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v16, a4 -; LMULMAX2-RV32-NEXT: vmerge.vim v16, v16, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vand.vv v14, v14, v16 -; LMULMAX2-RV32-NEXT: vsll.vx v8, v8, a1 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 +; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 +; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 +; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 +; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 +; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -402,25 +367,22 @@ ; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV64-NEXT: lui a3, 4080 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a3 +; LMULMAX2-RV64-NEXT: lui a4, 4080 +; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a4 ; LMULMAX2-RV64-NEXT: vsrl.vi v14, v8, 8 -; LMULMAX2-RV64-NEXT: li a3, 255 -; LMULMAX2-RV64-NEXT: slli a4, a3, 24 -; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a4 +; LMULMAX2-RV64-NEXT: li a5, 255 +; LMULMAX2-RV64-NEXT: slli a5, a5, 24 +; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a5 ; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12 ; LMULMAX2-RV64-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 8 -; LMULMAX2-RV64-NEXT: slli a4, a3, 32 -; LMULMAX2-RV64-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV64-NEXT: vsll.vi v14, v8, 24 -; LMULMAX2-RV64-NEXT: slli a4, a3, 40 -; LMULMAX2-RV64-NEXT: vand.vx v14, v14, a4 +; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a5 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 +; LMULMAX2-RV64-NEXT: vand.vx v14, v8, a4 +; LMULMAX2-RV64-NEXT: vsll.vi v14, v14, 24 ; LMULMAX2-RV64-NEXT: vor.vv v12, v14, v12 ; LMULMAX2-RV64-NEXT: vsll.vx v14, v8, a1 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a3 ; LMULMAX2-RV64-NEXT: vsll.vx v8, v8, a2 -; LMULMAX2-RV64-NEXT: slli a1, a3, 48 -; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 ; LMULMAX2-RV64-NEXT: vor.vv v8, v14, v8 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 @@ -431,17 +393,17 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) ; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 ; LMULMAX1-RV32-NEXT: li a3, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v11, v9, a3 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a3 ; LMULMAX1-RV32-NEXT: lui a4, 16 ; LMULMAX1-RV32-NEXT: addi a4, a4, -256 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV32-NEXT: li a6, 5 @@ -451,57 +413,44 @@ ; LMULMAX1-RV32-NEXT: lui a6, 1044480 ; LMULMAX1-RV32-NEXT: vmerge.vxm v12, v12, a6, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 8 +; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 +; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 +; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 +; LMULMAX1-RV32-NEXT: vsll.vx v11, v8, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v13 +; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a5 +; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v13, v8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 +; LMULMAX1-RV32-NEXT: vsrl.vx v11, v9, a3 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 +; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 8 ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 ; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: li a6, 255 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a6 -; LMULMAX1-RV32-NEXT: vmerge.vim v11, v11, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsll.vi v13, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v11 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v14, a4 -; LMULMAX1-RV32-NEXT: vmerge.vim v14, v14, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsll.vi v15, v9, 24 -; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v14 -; LMULMAX1-RV32-NEXT: vor.vv v13, v15, v13 -; LMULMAX1-RV32-NEXT: vsll.vx v15, v9, a3 -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v16, a5 -; LMULMAX1-RV32-NEXT: vmerge.vim v16, v16, 0, v0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v16 -; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v13 +; LMULMAX1-RV32-NEXT: vsll.vx v11, v9, a2 +; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a4 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v13 +; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a5 +; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 +; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9 +; LMULMAX1-RV32-NEXT: vor.vv v9, v11, v9 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: vsrl.vx v13, v8, a3 -; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v13, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v15, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v12, v15, v12 -; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 -; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v11, v12, v11 -; LMULMAX1-RV32-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v14 -; LMULMAX1-RV32-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV32-NEXT: vsll.vx v12, v8, a3 -; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v16 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v12 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: bswap_v4i64: @@ -523,21 +472,18 @@ ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8 ; LMULMAX1-RV64-NEXT: li a6, 255 -; LMULMAX1-RV64-NEXT: slli a7, a6, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 +; LMULMAX1-RV64-NEXT: slli a6, a6, 24 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: slli t0, a6, 32 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 -; LMULMAX1-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV64-NEXT: slli t1, a6, 40 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 +; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a6 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vand.vx v12, v8, a5 +; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 ; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3 -; LMULMAX1-RV64-NEXT: slli a6, a6, 48 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 @@ -548,17 +494,17 @@ ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a6 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 -; LMULMAX1-RV64-NEXT: vsll.vi v12, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 +; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a6 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 +; LMULMAX1-RV64-NEXT: vand.vx v12, v9, a5 +; LMULMAX1-RV64-NEXT: vsll.vi v12, v12, 24 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 ; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10