diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -847,6 +847,13 @@ def : PatGprGpr; def : PatGprGpr; def : PatGprImm; + +foreach Idx = 1...3 in { + defvar ShamtA = !mul(8, Idx); + defvar ShamtB = !mul(8, !sub(4, Idx)); + def : Pat<(or (shl GPR:$rk, (i32 ShamtA)), (srl GPR:$rj, (i32 ShamtB))), + (BYTEPICK_W GPR:$rj, GPR:$rk, Idx)>; +} } // Predicates = [IsLA32] let Predicates = [IsLA64] in { @@ -891,6 +898,24 @@ def : Pat<(sext_inreg (add GPR:$rj, simm32_hi16_lo12:$imm), i32), (ADDI_W (ADDU16I_D GPR:$rj, (HI16ForAddu16idAddiPair $imm)), (LO12 $imm))>; + +foreach Idx = 1...7 in { + defvar ShamtA = !mul(8, Idx); + defvar ShamtB = !mul(8, !sub(8, Idx)); + def : Pat<(or (shl GPR:$rk, (i64 ShamtA)), (srl GPR:$rj, (i64 ShamtB))), + (BYTEPICK_D GPR:$rj, GPR:$rk, Idx)>; +} + +foreach Idx = 1...3 in { + defvar ShamtA = !mul(8, Idx); + defvar ShamtB = !mul(8, !sub(4, Idx)); + // NOTE: the srl node would already be transformed into a loongarch_bstrpick + // by the time this pattern gets to execute, hence the weird construction. + def : Pat<(sext_inreg (or (shl GPR:$rk, (i64 ShamtA)), + (loongarch_bstrpick GPR:$rj, (i64 31), + (i64 ShamtB))), i32), + (BYTEPICK_W GPR:$rj, GPR:$rk, Idx)>; +} } // Predicates = [IsLA64] def : PatGprGpr; diff --git a/llvm/test/CodeGen/LoongArch/bitreverse.ll b/llvm/test/CodeGen/LoongArch/bitreverse.ll --- a/llvm/test/CodeGen/LoongArch/bitreverse.ll +++ b/llvm/test/CodeGen/LoongArch/bitreverse.ll @@ -111,11 +111,9 @@ define i48 @test_bitreverse_i48(i48 %a) nounwind { ; LA32-LABEL: test_bitreverse_i48: ; LA32: # %bb.0: -; LA32-NEXT: bitrev.w $a1, $a1 -; LA32-NEXT: srli.w $a1, $a1, 16 ; LA32-NEXT: bitrev.w $a2, $a0 -; LA32-NEXT: slli.w $a0, $a2, 16 -; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: bitrev.w $a0, $a1 +; LA32-NEXT: bytepick.w $a0, $a0, $a2, 2 ; LA32-NEXT: srli.w $a1, $a2, 16 ; LA32-NEXT: ret ; diff --git a/llvm/test/CodeGen/LoongArch/bswap.ll b/llvm/test/CodeGen/LoongArch/bswap.ll --- a/llvm/test/CodeGen/LoongArch/bswap.ll +++ b/llvm/test/CodeGen/LoongArch/bswap.ll @@ -63,13 +63,11 @@ define i48 @test_bswap_i48(i48 %a) nounwind { ; LA32-LABEL: test_bswap_i48: ; LA32: # %bb.0: -; LA32-NEXT: revb.2h $a1, $a1 -; LA32-NEXT: rotri.w $a1, $a1, 16 -; LA32-NEXT: srli.w $a1, $a1, 16 ; LA32-NEXT: revb.2h $a0, $a0 ; LA32-NEXT: rotri.w $a2, $a0, 16 -; LA32-NEXT: slli.w $a0, $a2, 16 -; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: revb.2h $a0, $a1 +; LA32-NEXT: rotri.w $a0, $a0, 16 +; LA32-NEXT: bytepick.w $a0, $a0, $a2, 2 ; LA32-NEXT: srli.w $a1, $a2, 16 ; LA32-NEXT: ret ; @@ -91,28 +89,22 @@ ; LA32-NEXT: ld.w $a3, $a1, 4 ; LA32-NEXT: revb.2h $a3, $a3 ; LA32-NEXT: rotri.w $a3, $a3, 16 -; LA32-NEXT: srli.w $a4, $a3, 16 -; LA32-NEXT: slli.w $a5, $a2, 16 -; LA32-NEXT: or $a4, $a5, $a4 -; LA32-NEXT: srli.w $a2, $a2, 16 -; LA32-NEXT: st.h $a2, $a0, 8 +; LA32-NEXT: bytepick.w $a4, $a3, $a2, 2 ; LA32-NEXT: st.w $a4, $a0, 4 -; LA32-NEXT: slli.w $a2, $a3, 16 ; LA32-NEXT: ld.w $a1, $a1, 8 ; LA32-NEXT: revb.2h $a1, $a1 ; LA32-NEXT: rotri.w $a1, $a1, 16 -; LA32-NEXT: srli.w $a1, $a1, 16 -; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: bytepick.w $a1, $a1, $a3, 2 ; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: srli.w $a1, $a2, 16 +; LA32-NEXT: st.h $a1, $a0, 8 ; LA32-NEXT: ret ; ; LA64-LABEL: test_bswap_i80: ; LA64: # %bb.0: -; LA64-NEXT: revb.d $a1, $a1 -; LA64-NEXT: srli.d $a1, $a1, 48 ; LA64-NEXT: revb.d $a2, $a0 -; LA64-NEXT: slli.d $a0, $a2, 16 -; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: revb.d $a0, $a1 +; LA64-NEXT: bytepick.d $a0, $a0, $a2, 2 ; LA64-NEXT: srli.d $a1, $a2, 48 ; LA64-NEXT: ret %tmp = call i80 @llvm.bswap.i80(i80 %a) diff --git a/llvm/test/CodeGen/LoongArch/bytepick.ll b/llvm/test/CodeGen/LoongArch/bytepick.ll --- a/llvm/test/CodeGen/LoongArch/bytepick.ll +++ b/llvm/test/CodeGen/LoongArch/bytepick.ll @@ -9,9 +9,7 @@ define i32 @pick_i32_1(i32 %a, i32 %b) { ; LA32-LABEL: pick_i32_1: ; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 8 -; LA32-NEXT: srli.w $a1, $a1, 24 -; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: bytepick.w $a0, $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i32_1: @@ -31,17 +29,12 @@ define signext i32 @pick_i32_1_sext(i32 %a, i32 %b) { ; LA32-LABEL: pick_i32_1_sext: ; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 8 -; LA32-NEXT: srli.w $a1, $a1, 24 -; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: bytepick.w $a0, $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i32_1_sext: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 8 -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 24 -; LA64-NEXT: or $a0, $a1, $a0 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: bytepick.w $a0, $a1, $a0, 1 ; LA64-NEXT: ret %1 = lshr i32 %b, 24 %2 = shl i32 %a, 8 @@ -54,9 +47,7 @@ define i32 @pick_i32_2(i32 %a, i32 %b) { ; LA32-LABEL: pick_i32_2: ; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 16 -; LA32-NEXT: srli.w $a1, $a1, 16 -; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: bytepick.w $a0, $a1, $a0, 2 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i32_2: @@ -76,17 +67,12 @@ define signext i32 @pick_i32_2_sext(i32 %a, i32 %b) { ; LA32-LABEL: pick_i32_2_sext: ; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 16 -; LA32-NEXT: srli.w $a1, $a1, 16 -; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: bytepick.w $a0, $a1, $a0, 2 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i32_2_sext: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 16 -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 16 -; LA64-NEXT: or $a0, $a1, $a0 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: bytepick.w $a0, $a1, $a0, 2 ; LA64-NEXT: ret %1 = lshr i32 %b, 16 %2 = shl i32 %a, 16 @@ -99,9 +85,7 @@ define i32 @pick_i32_3(i32 %a, i32 %b) { ; LA32-LABEL: pick_i32_3: ; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 24 -; LA32-NEXT: srli.w $a1, $a1, 8 -; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: bytepick.w $a0, $a1, $a0, 3 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i32_3: @@ -121,17 +105,12 @@ define signext i32 @pick_i32_3_sext(i32 %a, i32 %b) { ; LA32-LABEL: pick_i32_3_sext: ; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 24 -; LA32-NEXT: srli.w $a1, $a1, 8 -; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: bytepick.w $a0, $a1, $a0, 3 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i32_3_sext: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 24 -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 8 -; LA64-NEXT: or $a0, $a1, $a0 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: bytepick.w $a0, $a1, $a0, 3 ; LA64-NEXT: ret %1 = lshr i32 %b, 8 %2 = shl i32 %a, 24 @@ -144,20 +123,14 @@ define i64 @pick_i64_1(i64 %a, i64 %b) { ; LA32-LABEL: pick_i64_1: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a2, $a3, 24 -; LA32-NEXT: slli.w $a3, $a0, 8 -; LA32-NEXT: or $a2, $a2, $a3 -; LA32-NEXT: srli.w $a0, $a0, 24 -; LA32-NEXT: slli.w $a1, $a1, 8 -; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: bytepick.w $a2, $a3, $a0, 1 +; LA32-NEXT: bytepick.w $a1, $a0, $a1, 1 ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i64_1: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 8 -; LA64-NEXT: srli.d $a1, $a1, 56 -; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: bytepick.d $a0, $a1, $a0, 1 ; LA64-NEXT: ret %1 = lshr i64 %b, 56 %2 = shl i64 %a, 8 @@ -170,20 +143,14 @@ define i64 @pick_i64_2(i64 %a, i64 %b) { ; LA32-LABEL: pick_i64_2: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a2, $a3, 16 -; LA32-NEXT: slli.w $a3, $a0, 16 -; LA32-NEXT: or $a2, $a2, $a3 -; LA32-NEXT: srli.w $a0, $a0, 16 -; LA32-NEXT: slli.w $a1, $a1, 16 -; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: bytepick.w $a2, $a3, $a0, 2 +; LA32-NEXT: bytepick.w $a1, $a0, $a1, 2 ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i64_2: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 16 -; LA64-NEXT: srli.d $a1, $a1, 48 -; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: bytepick.d $a0, $a1, $a0, 2 ; LA64-NEXT: ret %1 = lshr i64 %b, 48 %2 = shl i64 %a, 16 @@ -196,20 +163,14 @@ define i64 @pick_i64_3(i64 %a, i64 %b) { ; LA32-LABEL: pick_i64_3: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a2, $a3, 8 -; LA32-NEXT: slli.w $a3, $a0, 24 -; LA32-NEXT: or $a2, $a2, $a3 -; LA32-NEXT: srli.w $a0, $a0, 8 -; LA32-NEXT: slli.w $a1, $a1, 24 -; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: bytepick.w $a2, $a3, $a0, 3 +; LA32-NEXT: bytepick.w $a1, $a0, $a1, 3 ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i64_3: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 24 -; LA64-NEXT: srli.d $a1, $a1, 40 -; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: bytepick.d $a0, $a1, $a0, 3 ; LA64-NEXT: ret %1 = lshr i64 %b, 40 %2 = shl i64 %a, 24 @@ -228,9 +189,7 @@ ; ; LA64-LABEL: pick_i64_4: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 32 -; LA64-NEXT: srli.d $a1, $a1, 32 -; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: bytepick.d $a0, $a1, $a0, 4 ; LA64-NEXT: ret %1 = lshr i64 %b, 32 %2 = shl i64 %a, 32 @@ -243,20 +202,14 @@ define i64 @pick_i64_5(i64 %a, i64 %b) { ; LA32-LABEL: pick_i64_5: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a1, $a2, 24 -; LA32-NEXT: slli.w $a2, $a3, 8 -; LA32-NEXT: or $a2, $a1, $a2 -; LA32-NEXT: slli.w $a0, $a0, 8 -; LA32-NEXT: srli.w $a1, $a3, 24 -; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: bytepick.w $a2, $a2, $a3, 1 +; LA32-NEXT: bytepick.w $a1, $a3, $a0, 1 ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i64_5: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 40 -; LA64-NEXT: srli.d $a1, $a1, 24 -; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: bytepick.d $a0, $a1, $a0, 5 ; LA64-NEXT: ret %1 = lshr i64 %b, 24 %2 = shl i64 %a,40 @@ -269,20 +222,14 @@ define i64 @pick_i64_6(i64 %a, i64 %b) { ; LA32-LABEL: pick_i64_6: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a1, $a2, 16 -; LA32-NEXT: slli.w $a2, $a3, 16 -; LA32-NEXT: or $a2, $a1, $a2 -; LA32-NEXT: slli.w $a0, $a0, 16 -; LA32-NEXT: srli.w $a1, $a3, 16 -; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: bytepick.w $a2, $a2, $a3, 2 +; LA32-NEXT: bytepick.w $a1, $a3, $a0, 2 ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i64_6: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 48 -; LA64-NEXT: srli.d $a1, $a1, 16 -; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: bytepick.d $a0, $a1, $a0, 6 ; LA64-NEXT: ret %1 = lshr i64 %b, 16 %2 = shl i64 %a, 48 @@ -295,20 +242,14 @@ define i64 @pick_i64_7(i64 %a, i64 %b) { ; LA32-LABEL: pick_i64_7: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a1, $a2, 8 -; LA32-NEXT: slli.w $a2, $a3, 24 -; LA32-NEXT: or $a2, $a1, $a2 -; LA32-NEXT: slli.w $a0, $a0, 24 -; LA32-NEXT: srli.w $a1, $a3, 8 -; LA32-NEXT: or $a1, $a1, $a0 +; LA32-NEXT: bytepick.w $a2, $a2, $a3, 3 +; LA32-NEXT: bytepick.w $a1, $a3, $a0, 3 ; LA32-NEXT: move $a0, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: pick_i64_7: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 56 -; LA64-NEXT: srli.d $a1, $a1, 8 -; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: bytepick.d $a0, $a1, $a0, 7 ; LA64-NEXT: ret %1 = lshr i64 %b, 8 %2 = shl i64 %a, 56