diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -519,6 +519,40 @@ N->getValueType(0)); }]>; +// Check if (and r, imm) can be optimized to (BSTRINS r, R0, msb, lsb), +// in which imm = ~((2^^(msb-lsb+1) - 1) << lsb). +def BstrinsImm : PatLeaf<(imm), [{ + if (!N->hasOneUse()) + return false; + uint64_t Imm = N->getZExtValue(); + // andi can be used instead if Imm <= 0xfff. + if (Imm <= 0xfff) + return false; + unsigned MaskIdx, MaskLen; + return N->getValueType(0).getSizeInBits() == 32 + ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen) + : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen); +}]>; + +def BstrinsMsb: SDNodeXFormgetZExtValue(); + unsigned MaskIdx, MaskLen; + N->getValueType(0).getSizeInBits() == 32 + ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen) + : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen); + return CurDAG->getTargetConstant(MaskIdx + MaskLen - 1, SDLoc(N), + N->getValueType(0)); +}]>; + +def BstrinsLsb: SDNodeXFormgetZExtValue(); + unsigned MaskIdx, MaskLen; + N->getValueType(0).getSizeInBits() == 32 + ? llvm::isShiftedMask_32(~Imm, MaskIdx, MaskLen) + : llvm::isShiftedMask_64(~Imm, MaskIdx, MaskLen); + return CurDAG->getTargetConstant(MaskIdx, SDLoc(N), N->getValueType(0)); +}]>; + //===----------------------------------------------------------------------===// // Instruction Formats //===----------------------------------------------------------------------===// @@ -1142,6 +1176,18 @@ def : Pat<(or GPR:$rj, (not GPR:$rk)), (ORN GPR:$rj, GPR:$rk)>; def : Pat<(and GPR:$rj, (not GPR:$rk)), (ANDN GPR:$rj, GPR:$rk)>; +let Predicates = [IsLA32] in { +def : Pat<(and GPR:$rj, BstrinsImm:$imm), + (BSTRINS_W GPR:$rj, R0, (BstrinsMsb BstrinsImm:$imm), + (BstrinsLsb BstrinsImm:$imm))>; +} // Predicates = [IsLA32] + +let Predicates = [IsLA64] in { +def : Pat<(and GPR:$rj, BstrinsImm:$imm), + (BSTRINS_D GPR:$rj, R0, (BstrinsMsb BstrinsImm:$imm), + (BstrinsLsb BstrinsImm:$imm))>; +} // Predicates = [IsLA64] + /// Traps // We lower `trap` to `amswap.w rd:$r0, rk:$r1, rj:$r0`, as this is guaranteed diff --git a/llvm/test/CodeGen/LoongArch/alloca.ll b/llvm/test/CodeGen/LoongArch/alloca.ll --- a/llvm/test/CodeGen/LoongArch/alloca.ll +++ b/llvm/test/CodeGen/LoongArch/alloca.ll @@ -17,8 +17,7 @@ ; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill ; LA32-NEXT: addi.w $fp, $sp, 16 ; LA32-NEXT: addi.w $a0, $a0, 15 -; LA32-NEXT: addi.w $a1, $zero, -16 -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrins.w $a0, $zero, 3, 0 ; LA32-NEXT: sub.w $a0, $sp, $a0 ; LA32-NEXT: move $sp, $a0 ; LA32-NEXT: bl %plt(notdead) @@ -62,10 +61,9 @@ ; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill ; LA32-NEXT: st.w $s0, $sp, 4 # 4-byte Folded Spill ; LA32-NEXT: addi.w $fp, $sp, 16 -; LA32-NEXT: addi.w $a0, $a0, 15 -; LA32-NEXT: addi.w $a1, $zero, -16 -; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: move $s0, $sp +; LA32-NEXT: addi.w $a0, $a0, 15 +; LA32-NEXT: bstrins.w $a0, $zero, 3, 0 ; LA32-NEXT: sub.w $a0, $sp, $a0 ; LA32-NEXT: move $sp, $a0 ; LA32-NEXT: bl %plt(notdead) @@ -118,8 +116,7 @@ ; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill ; LA32-NEXT: addi.w $fp, $sp, 16 ; LA32-NEXT: addi.w $a0, $a0, 15 -; LA32-NEXT: addi.w $a1, $zero, -16 -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrins.w $a0, $zero, 3, 0 ; LA32-NEXT: sub.w $a0, $sp, $a0 ; LA32-NEXT: move $sp, $a0 ; LA32-NEXT: addi.w $sp, $sp, -16 diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll --- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll @@ -4,39 +4,38 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; LA64-LABEL: atomicrmw_uinc_wrap_i8: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: ld.w $a3, $a2, 0 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ld.w $a2, $a0, 0 ; LA64-NEXT: ori $a4, $zero, 255 -; LA64-NEXT: sll.w $a4, $a4, $a0 -; LA64-NEXT: andi $a0, $a0, 24 +; LA64-NEXT: sll.w $a4, $a4, $a3 +; LA64-NEXT: andi $a3, $a3, 24 ; LA64-NEXT: nor $a4, $a4, $zero ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB0_1: # %atomicrmw.start ; LA64-NEXT: # =>This Loop Header: Depth=1 ; LA64-NEXT: # Child Loop BB0_3 Depth 2 -; LA64-NEXT: srl.w $a5, $a3, $a0 +; LA64-NEXT: srl.w $a5, $a2, $a3 ; LA64-NEXT: andi $a6, $a5, 255 ; LA64-NEXT: sltu $a6, $a6, $a1 ; LA64-NEXT: addi.d $a5, $a5, 1 ; LA64-NEXT: xori $a6, $a6, 1 ; LA64-NEXT: masknez $a5, $a5, $a6 ; LA64-NEXT: andi $a5, $a5, 255 -; LA64-NEXT: sll.w $a5, $a5, $a0 -; LA64-NEXT: and $a6, $a3, $a4 +; LA64-NEXT: sll.w $a5, $a5, $a3 +; LA64-NEXT: and $a6, $a2, $a4 ; LA64-NEXT: or $a6, $a6, $a5 ; LA64-NEXT: .LBB0_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a5, $a2, 0 -; LA64-NEXT: bne $a5, $a3, .LBB0_5 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: bne $a5, $a2, .LBB0_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 ; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a7, $a6 -; LA64-NEXT: sc.w $a7, $a2, 0 +; LA64-NEXT: sc.w $a7, $a0, 0 ; LA64-NEXT: beqz $a7, .LBB0_3 ; LA64-NEXT: b .LBB0_6 ; LA64-NEXT: .LBB0_5: # %atomicrmw.start @@ -44,11 +43,11 @@ ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB0_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64-NEXT: addi.w $a6, $a3, 0 -; LA64-NEXT: move $a3, $a5 +; LA64-NEXT: addi.w $a6, $a2, 0 +; LA64-NEXT: move $a2, $a5 ; LA64-NEXT: bne $a5, $a6, .LBB0_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a5, $a3 ; LA64-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst ret i8 %result @@ -57,40 +56,39 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; LA64-LABEL: atomicrmw_uinc_wrap_i16: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: ld.w $a3, $a2, 0 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ld.w $a2, $a0, 0 ; LA64-NEXT: lu12i.w $a4, 15 ; LA64-NEXT: ori $a4, $a4, 4095 -; LA64-NEXT: sll.w $a4, $a4, $a0 -; LA64-NEXT: andi $a0, $a0, 24 +; LA64-NEXT: sll.w $a4, $a4, $a3 +; LA64-NEXT: andi $a3, $a3, 24 ; LA64-NEXT: nor $a4, $a4, $zero ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB1_1: # %atomicrmw.start ; LA64-NEXT: # =>This Loop Header: Depth=1 ; LA64-NEXT: # Child Loop BB1_3 Depth 2 -; LA64-NEXT: srl.w $a5, $a3, $a0 +; LA64-NEXT: srl.w $a5, $a2, $a3 ; LA64-NEXT: bstrpick.d $a6, $a5, 15, 0 ; LA64-NEXT: sltu $a6, $a6, $a1 ; LA64-NEXT: addi.d $a5, $a5, 1 ; LA64-NEXT: xori $a6, $a6, 1 ; LA64-NEXT: masknez $a5, $a5, $a6 ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 -; LA64-NEXT: sll.w $a5, $a5, $a0 -; LA64-NEXT: and $a6, $a3, $a4 +; LA64-NEXT: sll.w $a5, $a5, $a3 +; LA64-NEXT: and $a6, $a2, $a4 ; LA64-NEXT: or $a6, $a6, $a5 ; LA64-NEXT: .LBB1_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a5, $a2, 0 -; LA64-NEXT: bne $a5, $a3, .LBB1_5 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: bne $a5, $a2, .LBB1_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 ; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a7, $a6 -; LA64-NEXT: sc.w $a7, $a2, 0 +; LA64-NEXT: sc.w $a7, $a0, 0 ; LA64-NEXT: beqz $a7, .LBB1_3 ; LA64-NEXT: b .LBB1_6 ; LA64-NEXT: .LBB1_5: # %atomicrmw.start @@ -98,11 +96,11 @@ ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB1_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64-NEXT: addi.w $a6, $a3, 0 -; LA64-NEXT: move $a3, $a5 +; LA64-NEXT: addi.w $a6, $a2, 0 +; LA64-NEXT: move $a2, $a5 ; LA64-NEXT: bne $a5, $a6, .LBB1_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a5, $a3 ; LA64-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst ret i16 %result @@ -189,20 +187,19 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; LA64-LABEL: atomicrmw_udec_wrap_i8: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: ld.w $a3, $a2, 0 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ld.w $a2, $a0, 0 ; LA64-NEXT: ori $a4, $zero, 255 -; LA64-NEXT: sll.w $a4, $a4, $a0 -; LA64-NEXT: andi $a0, $a0, 24 +; LA64-NEXT: sll.w $a4, $a4, $a3 +; LA64-NEXT: andi $a3, $a3, 24 ; LA64-NEXT: nor $a4, $a4, $zero ; LA64-NEXT: andi $a5, $a1, 255 ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB4_1: # %atomicrmw.start ; LA64-NEXT: # =>This Loop Header: Depth=1 ; LA64-NEXT: # Child Loop BB4_3 Depth 2 -; LA64-NEXT: srl.w $a6, $a3, $a0 +; LA64-NEXT: srl.w $a6, $a2, $a3 ; LA64-NEXT: andi $a7, $a6, 255 ; LA64-NEXT: sltu $t0, $a5, $a7 ; LA64-NEXT: addi.d $a6, $a6, -1 @@ -214,19 +211,19 @@ ; LA64-NEXT: maskeqz $a7, $a1, $a7 ; LA64-NEXT: or $a6, $a7, $a6 ; LA64-NEXT: andi $a6, $a6, 255 -; LA64-NEXT: sll.w $a6, $a6, $a0 -; LA64-NEXT: and $a7, $a3, $a4 +; LA64-NEXT: sll.w $a6, $a6, $a3 +; LA64-NEXT: and $a7, $a2, $a4 ; LA64-NEXT: or $a7, $a7, $a6 ; LA64-NEXT: .LBB4_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB4_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a6, $a2, 0 -; LA64-NEXT: bne $a6, $a3, .LBB4_5 +; LA64-NEXT: ll.w $a6, $a0, 0 +; LA64-NEXT: bne $a6, $a2, .LBB4_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 ; LA64-NEXT: dbar 0 ; LA64-NEXT: move $t0, $a7 -; LA64-NEXT: sc.w $t0, $a2, 0 +; LA64-NEXT: sc.w $t0, $a0, 0 ; LA64-NEXT: beqz $t0, .LBB4_3 ; LA64-NEXT: b .LBB4_6 ; LA64-NEXT: .LBB4_5: # %atomicrmw.start @@ -234,11 +231,11 @@ ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB4_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 -; LA64-NEXT: addi.w $a7, $a3, 0 -; LA64-NEXT: move $a3, $a6 +; LA64-NEXT: addi.w $a7, $a2, 0 +; LA64-NEXT: move $a2, $a6 ; LA64-NEXT: bne $a6, $a7, .LBB4_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a6, $a0 +; LA64-NEXT: srl.w $a0, $a6, $a3 ; LA64-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst ret i8 %result @@ -247,21 +244,20 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; LA64-LABEL: atomicrmw_udec_wrap_i16: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: ld.w $a3, $a2, 0 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ld.w $a2, $a0, 0 ; LA64-NEXT: lu12i.w $a4, 15 ; LA64-NEXT: ori $a4, $a4, 4095 -; LA64-NEXT: sll.w $a4, $a4, $a0 -; LA64-NEXT: andi $a0, $a0, 24 +; LA64-NEXT: sll.w $a4, $a4, $a3 +; LA64-NEXT: andi $a3, $a3, 24 ; LA64-NEXT: nor $a4, $a4, $zero ; LA64-NEXT: bstrpick.d $a5, $a1, 15, 0 ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB5_1: # %atomicrmw.start ; LA64-NEXT: # =>This Loop Header: Depth=1 ; LA64-NEXT: # Child Loop BB5_3 Depth 2 -; LA64-NEXT: srl.w $a6, $a3, $a0 +; LA64-NEXT: srl.w $a6, $a2, $a3 ; LA64-NEXT: bstrpick.d $a7, $a6, 15, 0 ; LA64-NEXT: sltu $t0, $a5, $a7 ; LA64-NEXT: addi.d $a6, $a6, -1 @@ -273,19 +269,19 @@ ; LA64-NEXT: maskeqz $a7, $a1, $a7 ; LA64-NEXT: or $a6, $a7, $a6 ; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 -; LA64-NEXT: sll.w $a6, $a6, $a0 -; LA64-NEXT: and $a7, $a3, $a4 +; LA64-NEXT: sll.w $a6, $a6, $a3 +; LA64-NEXT: and $a7, $a2, $a4 ; LA64-NEXT: or $a7, $a7, $a6 ; LA64-NEXT: .LBB5_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB5_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a6, $a2, 0 -; LA64-NEXT: bne $a6, $a3, .LBB5_5 +; LA64-NEXT: ll.w $a6, $a0, 0 +; LA64-NEXT: bne $a6, $a2, .LBB5_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 ; LA64-NEXT: dbar 0 ; LA64-NEXT: move $t0, $a7 -; LA64-NEXT: sc.w $t0, $a2, 0 +; LA64-NEXT: sc.w $t0, $a0, 0 ; LA64-NEXT: beqz $t0, .LBB5_3 ; LA64-NEXT: b .LBB5_6 ; LA64-NEXT: .LBB5_5: # %atomicrmw.start @@ -293,11 +289,11 @@ ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB5_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 -; LA64-NEXT: addi.w $a7, $a3, 0 -; LA64-NEXT: move $a3, $a6 +; LA64-NEXT: addi.w $a7, $a2, 0 +; LA64-NEXT: move $a2, $a6 ; LA64-NEXT: bne $a6, $a7, .LBB5_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a6, $a0 +; LA64-NEXT: srl.w $a0, $a6, $a3 ; LA64-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst ret i16 %result diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll @@ -369,20 +369,15 @@ ret i64 %b } -;; This case is not optimized to `bstrpick + slli`, -;; since the immediate -2048 can be composed via -;; a single `addi.w $rx, $zero, -2048`. define i64 @and_i64_minus_2048(i64 %a) { ; LA32-LABEL: and_i64_minus_2048: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -2048 -; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 10, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: and_i64_minus_2048: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a1, $zero, -2048 -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrins.d $a0, $zero, 10, 0 ; LA64-NEXT: ret %b = and i64 %a, -2048 ret i64 %b @@ -425,20 +420,15 @@ ret i64 %i } -;; TODO: this can be codegened to bstrins.[wd] $a0, $zero, 23, 16. define i64 @and_i64_0xffffffffff00ffff(i64 %a) { ; LA32-LABEL: and_i64_0xffffffffff00ffff: ; LA32: # %bb.0: -; LA32-NEXT: lu12i.w $a2, -4081 -; LA32-NEXT: ori $a2, $a2, 4095 -; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 23, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: and_i64_0xffffffffff00ffff: ; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -4081 -; LA64-NEXT: ori $a1, $a1, 4095 -; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: bstrins.d $a0, $zero, 23, 16 ; LA64-NEXT: ret %b = and i64 %a, 18446744073692839935 ret i64 %b diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll @@ -4,27 +4,26 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; LA64-LABEL: cmpxchg_i8_acquire_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a3, $zero, -4 -; LA64-NEXT: and $a3, $a0, $a3 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: andi $a2, $a2, 255 -; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: sll.w $a2, $a2, $a3 ; LA64-NEXT: ori $a4, $zero, 255 -; LA64-NEXT: sll.w $a0, $a4, $a0 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: sll.w $a3, $a4, $a3 +; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a3, 0 -; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: and $a5, $a4, $a3 ; LA64-NEXT: bne $a5, $a1, .LBB0_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: andn $a5, $a4, $a3 ; LA64-NEXT: or $a5, $a5, $a2 -; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB0_1 ; LA64-NEXT: b .LBB0_4 ; LA64-NEXT: .LBB0_3: @@ -38,28 +37,27 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind { ; LA64-LABEL: cmpxchg_i16_acquire_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a3, $zero, -4 -; LA64-NEXT: and $a3, $a0, $a3 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: sll.w $a2, $a2, $a3 ; LA64-NEXT: lu12i.w $a4, 15 ; LA64-NEXT: ori $a4, $a4, 4095 -; LA64-NEXT: sll.w $a0, $a4, $a0 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: sll.w $a3, $a4, $a3 +; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a3, 0 -; LA64-NEXT: and $a5, $a4, $a0 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: and $a5, $a4, $a3 ; LA64-NEXT: bne $a5, $a1, .LBB1_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: andn $a5, $a4, $a0 +; LA64-NEXT: andn $a5, $a4, $a3 ; LA64-NEXT: or $a5, $a5, $a2 -; LA64-NEXT: sc.w $a5, $a3, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB1_1 ; LA64-NEXT: b .LBB1_4 ; LA64-NEXT: .LBB1_3: @@ -113,33 +111,32 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a3, $zero, -4 -; LA64-NEXT: and $a3, $a0, $a3 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a3, $a0, 3 ; LA64-NEXT: ori $a4, $zero, 255 -; LA64-NEXT: sll.w $a4, $a4, $a0 -; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: sll.w $a4, $a4, $a3 ; LA64-NEXT: andi $a2, $a2, 255 -; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: sll.w $a2, $a2, $a3 ; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a5, $a3, 0 +; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a6, $a5, $a4 ; LA64-NEXT: bne $a6, $a1, .LBB4_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 -; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB4_1 ; LA64-NEXT: b .LBB4_4 ; LA64-NEXT: .LBB4_3: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB4_4: -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a5, $a3 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire %res = extractvalue { i8, i1 } %tmp, 0 @@ -149,34 +146,33 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { ; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a3, $zero, -4 -; LA64-NEXT: and $a3, $a0, $a3 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a4, 15 -; LA64-NEXT: ori $a4, $a4, 4095 -; LA64-NEXT: sll.w $a4, $a4, $a0 -; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: slli.d $a4, $a0, 3 +; LA64-NEXT: sll.w $a3, $a3, $a4 ; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -; LA64-NEXT: sll.w $a2, $a2, $a0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: sll.w $a2, $a2, $a4 ; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a4 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a5, $a3, 0 -; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: and $a6, $a5, $a3 ; LA64-NEXT: bne $a6, $a1, .LBB5_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: andn $a6, $a5, $a3 ; LA64-NEXT: or $a6, $a6, $a2 -; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB5_1 ; LA64-NEXT: b .LBB5_4 ; LA64-NEXT: .LBB5_3: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB5_4: -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a5, $a4 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire %res = extractvalue { i16, i1 } %tmp, 0 @@ -230,27 +226,26 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a3, $zero, -4 -; LA64-NEXT: and $a3, $a0, $a3 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: ori $a4, $zero, 255 -; LA64-NEXT: sll.w $a4, $a4, $a0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a3 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: andi $a2, $a2, 255 -; LA64-NEXT: sll.w $a0, $a2, $a0 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: addi.w $a2, $a4, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: addi.w $a3, $a4, 0 ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a5, $a3, 0 -; LA64-NEXT: and $a6, $a5, $a2 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: and $a6, $a5, $a3 ; LA64-NEXT: bne $a6, $a1, .LBB8_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: andn $a6, $a5, $a2 -; LA64-NEXT: or $a6, $a6, $a0 -; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: andn $a6, $a5, $a3 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB8_1 ; LA64-NEXT: b .LBB8_4 ; LA64-NEXT: .LBB8_3: @@ -269,34 +264,33 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { ; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a3, $zero, -4 -; LA64-NEXT: and $a3, $a0, $a3 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a4, 15 -; LA64-NEXT: ori $a4, $a4, 4095 -; LA64-NEXT: sll.w $a4, $a4, $a0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: slli.d $a4, $a0, 3 +; LA64-NEXT: sll.w $a3, $a3, $a4 +; LA64-NEXT: sll.w $a1, $a1, $a4 ; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 -; LA64-NEXT: sll.w $a0, $a2, $a0 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: sll.w $a2, $a2, $a4 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: addi.w $a2, $a4, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: addi.w $a4, $a3, 0 ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a5, $a3, 0 -; LA64-NEXT: and $a6, $a5, $a2 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: and $a6, $a5, $a4 ; LA64-NEXT: bne $a6, $a1, .LBB9_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: andn $a6, $a5, $a2 -; LA64-NEXT: or $a6, $a6, $a0 -; LA64-NEXT: sc.w $a6, $a3, 0 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB9_1 ; LA64-NEXT: b .LBB9_4 ; LA64-NEXT: .LBB9_3: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB9_4: -; LA64-NEXT: and $a0, $a5, $a4 +; LA64-NEXT: and $a0, $a5, $a3 ; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: xor $a0, $a1, $a0 ; LA64-NEXT: sltui $a0, $a0, 1 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll @@ -7,18 +7,17 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-LABEL: atomicrmw_umax_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a6, $a4, $a3 ; LA64-NEXT: move $a5, $a4 ; LA64-NEXT: bgeu $a6, $a1, .LBB0_3 @@ -27,12 +26,12 @@ ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB0_1 ; LA64-NEXT: # %bb.4: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.5: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b acquire ret i8 %1 @@ -41,33 +40,32 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-LABEL: atomicrmw_umax_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 -; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: and $a6, $a4, $a2 ; LA64-NEXT: move $a5, $a4 ; LA64-NEXT: bgeu $a6, $a1, .LBB1_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 ; LA64-NEXT: xor $a5, $a4, $a1 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB1_1 ; LA64-NEXT: # %bb.4: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.5: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b acquire ret i16 %1 @@ -96,18 +94,17 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-LABEL: atomicrmw_umin_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a6, $a4, $a3 ; LA64-NEXT: move $a5, $a4 ; LA64-NEXT: bgeu $a1, $a6, .LBB4_3 @@ -116,12 +113,12 @@ ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: .LBB4_3: # in Loop: Header=BB4_1 Depth=1 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB4_1 ; LA64-NEXT: # %bb.4: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.5: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b acquire ret i8 %1 @@ -130,33 +127,32 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-LABEL: atomicrmw_umin_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 -; LA64-NEXT: and $a6, $a4, $a3 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: and $a6, $a4, $a2 ; LA64-NEXT: move $a5, $a4 ; LA64-NEXT: bgeu $a1, $a6, .LBB5_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ; LA64-NEXT: xor $a5, $a4, $a1 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: .LBB5_3: # in Loop: Header=BB5_1 Depth=1 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB5_1 ; LA64-NEXT: # %bb.4: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.5: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b acquire ret i16 %1 @@ -185,20 +181,19 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-LABEL: atomicrmw_max_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: ext.w.b $a1, $a1 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: andi $a4, $a2, 24 ; LA64-NEXT: xori $a4, $a4, 56 ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a7, $a5, $a3 ; LA64-NEXT: move $a6, $a5 ; LA64-NEXT: sll.w $a7, $a7, $a4 @@ -209,12 +204,12 @@ ; LA64-NEXT: and $a6, $a6, $a3 ; LA64-NEXT: xor $a6, $a5, $a6 ; LA64-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1 -; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB8_1 ; LA64-NEXT: # %bb.4: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.5: -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a5, $a2 ; LA64-NEXT: ret %1 = atomicrmw max ptr %a, i8 %b acquire ret i8 %1 @@ -223,22 +218,21 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-LABEL: atomicrmw_max_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: andi $a3, $a2, 24 ; LA64-NEXT: ori $a4, $zero, 48 ; LA64-NEXT: sub.d $a3, $a4, $a3 ; LA64-NEXT: lu12i.w $a4, 15 ; LA64-NEXT: ori $a4, $a4, 4095 -; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: sll.w $a4, $a4, $a2 ; LA64-NEXT: addi.w $a4, $a4, 0 ; LA64-NEXT: ext.w.h $a1, $a1 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a7, $a5, $a4 ; LA64-NEXT: move $a6, $a5 ; LA64-NEXT: sll.w $a7, $a7, $a3 @@ -249,12 +243,12 @@ ; LA64-NEXT: and $a6, $a6, $a4 ; LA64-NEXT: xor $a6, $a5, $a6 ; LA64-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1 -; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB9_1 ; LA64-NEXT: # %bb.4: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.5: -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a5, $a2 ; LA64-NEXT: ret %1 = atomicrmw max ptr %a, i16 %b acquire ret i16 %1 @@ -283,20 +277,19 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-LABEL: atomicrmw_min_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: ext.w.b $a1, $a1 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: andi $a4, $a0, 24 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: andi $a4, $a2, 24 ; LA64-NEXT: xori $a4, $a4, 56 ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a7, $a5, $a3 ; LA64-NEXT: move $a6, $a5 ; LA64-NEXT: sll.w $a7, $a7, $a4 @@ -307,12 +300,12 @@ ; LA64-NEXT: and $a6, $a6, $a3 ; LA64-NEXT: xor $a6, $a5, $a6 ; LA64-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 -; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB12_1 ; LA64-NEXT: # %bb.4: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.5: -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a5, $a2 ; LA64-NEXT: ret %1 = atomicrmw min ptr %a, i8 %b acquire ret i8 %1 @@ -321,22 +314,21 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-LABEL: atomicrmw_min_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: andi $a3, $a0, 24 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: andi $a3, $a2, 24 ; LA64-NEXT: ori $a4, $zero, 48 ; LA64-NEXT: sub.d $a3, $a4, $a3 ; LA64-NEXT: lu12i.w $a4, 15 ; LA64-NEXT: ori $a4, $a4, 4095 -; LA64-NEXT: sll.w $a4, $a4, $a0 +; LA64-NEXT: sll.w $a4, $a4, $a2 ; LA64-NEXT: addi.w $a4, $a4, 0 ; LA64-NEXT: ext.w.h $a1, $a1 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a5, $a2, 0 +; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a7, $a5, $a4 ; LA64-NEXT: move $a6, $a5 ; LA64-NEXT: sll.w $a7, $a7, $a3 @@ -347,12 +339,12 @@ ; LA64-NEXT: and $a6, $a6, $a4 ; LA64-NEXT: xor $a6, $a5, $a6 ; LA64-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 -; LA64-NEXT: sc.w $a6, $a2, 0 +; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB13_1 ; LA64-NEXT: # %bb.4: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.5: -; LA64-NEXT: srl.w $a0, $a5, $a0 +; LA64-NEXT: srl.w $a0, $a5, $a2 ; LA64-NEXT: ret %1 = atomicrmw min ptr %a, i16 %b acquire ret i16 %1 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -5,48 +5,46 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_xchg_i8_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: ori $a3, $zero, 255 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a3, $a2 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB0_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_xchg_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB0_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 %b acquire ret i8 %1 @@ -55,50 +53,48 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_xchg_i16_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: lu12i.w $a3, 15 -; LA32-NEXT: ori $a3, $a3, 4095 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: and $a5, $a5, $a2 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB1_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a3 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_xchg_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB1_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 %b acquire ret i16 %1 @@ -149,48 +145,46 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_add_i8_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: ori $a3, $zero, 255 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a3, $a2 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB4_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_add_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB4_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw add ptr %a, i8 %b acquire ret i8 %1 @@ -199,50 +193,48 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_add_i16_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: lu12i.w $a3, 15 -; LA32-NEXT: ori $a3, $a3, 4095 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: and $a5, $a5, $a2 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB5_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a3 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_add_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB5_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw add ptr %a, i16 %b acquire ret i16 %1 @@ -293,48 +285,46 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_sub_i8_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: ori $a3, $zero, 255 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a3, $a2 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB8_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_sub_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB8_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw sub ptr %a, i8 %b acquire ret i8 %1 @@ -343,50 +333,48 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_sub_i16_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: lu12i.w $a3, 15 -; LA32-NEXT: ori $a3, $a3, 4095 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: and $a5, $a5, $a2 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB9_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a3 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_sub_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB9_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw sub ptr %a, i16 %b acquire ret i16 %1 @@ -439,50 +427,48 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_nand_i8_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: ori $a3, $zero, 255 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a3, $a2 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB12_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_nand_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB12_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw nand ptr %a, i8 %b acquire ret i8 %1 @@ -491,52 +477,50 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_nand_i16_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: lu12i.w $a3, 15 -; LA32-NEXT: ori $a3, $a3, 4095 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: and $a5, $a5, $a2 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB13_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a3 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_nand_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: dbar 0 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB13_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw nand ptr %a, i16 %b acquire ret i16 %1 @@ -608,8 +592,7 @@ ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: orn $a1, $a1, $a3 -; LA32-NEXT: addi.w $a3, $zero, -4 -; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 @@ -628,8 +611,7 @@ ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: orn $a1, $a1, $a3 -; LA64-NEXT: addi.w $a3, $zero, -4 -; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: amand_db.w $a3, $a1, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret @@ -647,8 +629,7 @@ ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a3 ; LA32-NEXT: orn $a1, $a1, $a2 -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 @@ -668,8 +649,7 @@ ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: orn $a1, $a1, $a2 -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: amand_db.w $a2, $a1, $a0 ; LA64-NEXT: srl.w $a0, $a2, $a3 ; LA64-NEXT: ret @@ -722,30 +702,28 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_or_i8_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: or $a4, $a3, $a1 -; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB20_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_or_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 -; LA64-NEXT: amor_db.w $a3, $a1, $a2 -; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: amor_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw or ptr %a, i8 %b acquire ret i8 %1 @@ -754,30 +732,28 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_or_i16_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: or $a4, $a3, $a1 -; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB21_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_or_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 -; LA64-NEXT: amor_db.w $a3, $a1, $a2 -; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: amor_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw or ptr %a, i16 %b acquire ret i16 %1 @@ -828,30 +804,28 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_xor_i8_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: xor $a4, $a3, $a1 -; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB24_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_xor_i8_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 -; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw xor ptr %a, i8 %b acquire ret i8 %1 @@ -860,30 +834,28 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_xor_i16_acquire: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: dbar 0 -; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: xor $a4, $a3, $a1 -; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB25_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_xor_i16_acquire: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 -; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw xor ptr %a, i16 %b acquire ret i16 %1 @@ -934,46 +906,44 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_xchg_i8_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: ori $a3, $zero, 255 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a3, $a2 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB28_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_xchg_i8_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB28_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i8 %b monotonic ret i8 %1 @@ -982,48 +952,46 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_xchg_i16_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: lu12i.w $a3, 15 -; LA32-NEXT: ori $a3, $a3, 4095 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: and $a5, $a5, $a2 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB29_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a3 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_xchg_i16_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB29_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw xchg ptr %a, i16 %b monotonic ret i16 %1 @@ -1073,46 +1041,44 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_add_i8_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: ori $a3, $zero, 255 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a3, $a2 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB32_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_add_i8_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB32_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw add ptr %a, i8 %b monotonic ret i8 %1 @@ -1121,48 +1087,46 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_add_i16_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: lu12i.w $a3, 15 -; LA32-NEXT: ori $a3, $a3, 4095 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: and $a5, $a5, $a2 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB33_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a3 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_add_i16_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB33_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw add ptr %a, i16 %b monotonic ret i16 %1 @@ -1212,46 +1176,44 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_sub_i8_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: ori $a3, $zero, 255 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a3, $a2 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB36_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_sub_i8_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB36_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw sub ptr %a, i8 %b monotonic ret i8 %1 @@ -1260,48 +1222,46 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_sub_i16_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: lu12i.w $a3, 15 -; LA32-NEXT: ori $a3, $a3, 4095 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: and $a5, $a5, $a2 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB37_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a3 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_sub_i16_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB37_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw sub ptr %a, i16 %b monotonic ret i16 %1 @@ -1353,48 +1313,46 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_nand_i8_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 +; LA32-NEXT: slli.w $a2, $a0, 3 ; LA32-NEXT: ori $a3, $zero, 255 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: sll.w $a3, $a3, $a2 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero ; LA32-NEXT: xor $a5, $a4, $a5 ; LA32-NEXT: and $a5, $a5, $a3 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB40_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_nand_i8_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 +; LA64-NEXT: slli.d $a2, $a0, 3 ; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a3, $a3, $a0 +; LA64-NEXT: sll.w $a3, $a3, $a2 ; LA64-NEXT: addi.w $a3, $a3, 0 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero ; LA64-NEXT: xor $a5, $a4, $a5 ; LA64-NEXT: and $a5, $a5, $a3 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB40_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw nand ptr %a, i8 %b monotonic ret i8 %1 @@ -1403,50 +1361,48 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_nand_i16_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: lu12i.w $a3, 15 -; LA32-NEXT: ori $a3, $a3, 4095 -; LA32-NEXT: sll.w $a3, $a3, $a0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: slli.w $a3, $a0, 3 +; LA32-NEXT: sll.w $a2, $a2, $a3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: sll.w $a1, $a1, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a4, $a2, 0 +; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: and $a5, $a5, $a2 ; LA32-NEXT: xor $a5, $a4, $a5 -; LA32-NEXT: sc.w $a5, $a2, 0 +; LA32-NEXT: sc.w $a5, $a0, 0 ; LA32-NEXT: beqz $a5, .LBB41_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a4, $a0 +; LA32-NEXT: srl.w $a0, $a4, $a3 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_nand_i16_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a3, $a3, $a0 -; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 +; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a4, $a2, 0 +; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: and $a5, $a5, $a2 ; LA64-NEXT: xor $a5, $a4, $a5 -; LA64-NEXT: sc.w $a5, $a2, 0 +; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB41_1 ; LA64-NEXT: # %bb.2: -; LA64-NEXT: srl.w $a0, $a4, $a0 +; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw nand ptr %a, i16 %b monotonic ret i16 %1 @@ -1515,8 +1471,7 @@ ; LA32-NEXT: andi $a1, $a1, 255 ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: orn $a1, $a1, $a3 -; LA32-NEXT: addi.w $a3, $zero, -4 -; LA32-NEXT: and $a0, $a0, $a3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: and $a4, $a3, $a1 @@ -1534,8 +1489,7 @@ ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: sll.w $a1, $a1, $a2 ; LA64-NEXT: orn $a1, $a1, $a3 -; LA64-NEXT: addi.w $a3, $zero, -4 -; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: amand_db.w $a3, $a1, $a0 ; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret @@ -1553,8 +1507,7 @@ ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 ; LA32-NEXT: sll.w $a1, $a1, $a3 ; LA32-NEXT: orn $a1, $a1, $a2 -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a0, $a0, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a4, $a2, $a1 @@ -1573,8 +1526,7 @@ ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: sll.w $a1, $a1, $a3 ; LA64-NEXT: orn $a1, $a1, $a2 -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a0, $a0, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: amand_db.w $a2, $a1, $a0 ; LA64-NEXT: srl.w $a0, $a2, $a3 ; LA64-NEXT: ret @@ -1626,29 +1578,27 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_or_i8_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: or $a4, $a3, $a1 -; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB48_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_or_i8_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 -; LA64-NEXT: amor_db.w $a3, $a1, $a2 -; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: amor_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw or ptr %a, i8 %b monotonic ret i8 %1 @@ -1657,29 +1607,27 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_or_i16_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: or $a4, $a3, $a1 -; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB49_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_or_i16_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 -; LA64-NEXT: amor_db.w $a3, $a1, $a2 -; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: amor_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw or ptr %a, i16 %b monotonic ret i16 %1 @@ -1729,29 +1677,27 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ; LA32-LABEL: atomicrmw_xor_i8_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: xor $a4, $a3, $a1 -; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB52_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_xor_i8_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: sll.w $a1, $a1, $a0 -; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw xor ptr %a, i8 %b monotonic ret i8 %1 @@ -1760,29 +1706,27 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ; LA32-LABEL: atomicrmw_xor_i16_monotonic: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $zero, -4 -; LA32-NEXT: and $a2, $a0, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 ; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: sll.w $a1, $a1, $a0 +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ll.w $a3, $a2, 0 +; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: xor $a4, $a3, $a1 -; LA32-NEXT: sc.w $a4, $a2, 0 +; LA32-NEXT: sc.w $a4, $a0, 0 ; LA32-NEXT: beqz $a4, .LBB53_1 ; LA32-NEXT: # %bb.2: -; LA32-NEXT: srl.w $a0, $a3, $a0 +; LA32-NEXT: srl.w $a0, $a3, $a2 ; LA32-NEXT: ret ; ; LA64-LABEL: atomicrmw_xor_i16_monotonic: ; LA64: # %bb.0: -; LA64-NEXT: addi.w $a2, $zero, -4 -; LA64-NEXT: and $a2, $a0, $a2 -; LA64-NEXT: slli.d $a0, $a0, 3 ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: sll.w $a1, $a1, $a0 -; LA64-NEXT: amxor_db.w $a3, $a1, $a2 -; LA64-NEXT: srl.w $a0, $a3, $a0 +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: amxor_db.w $a3, $a1, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a2 ; LA64-NEXT: ret %1 = atomicrmw xor ptr %a, i16 %b monotonic ret i16 %1 diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll --- a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll +++ b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll @@ -22,8 +22,7 @@ ; LA32-NEXT: bstrins.w $sp, $zero, 5, 0 ; LA32-NEXT: move $s8, $sp ; LA32-NEXT: addi.w $a0, $a0, 15 -; LA32-NEXT: addi.w $a1, $zero, -16 -; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: bstrins.w $a0, $zero, 3, 0 ; LA32-NEXT: sub.w $a0, $sp, $a0 ; LA32-NEXT: move $sp, $a0 ; LA32-NEXT: addi.w $a1, $s8, 0 diff --git a/llvm/test/CodeGen/LoongArch/vararg.ll b/llvm/test/CodeGen/LoongArch/vararg.ll --- a/llvm/test/CodeGen/LoongArch/vararg.ll +++ b/llvm/test/CodeGen/LoongArch/vararg.ll @@ -126,10 +126,9 @@ ; LA64-FPELIM-NEXT: st.d $a2, $fp, 16 ; LA64-FPELIM-NEXT: addi.d $a0, $fp, 16 ; LA64-FPELIM-NEXT: st.d $a0, $fp, -32 +; LA64-FPELIM-NEXT: st.d $a1, $fp, 8 ; LA64-FPELIM-NEXT: addi.d $a0, $a1, 15 -; LA64-FPELIM-NEXT: addi.w $a1, $zero, -16 -; LA64-FPELIM-NEXT: and $a0, $a0, $a1 -; LA64-FPELIM-NEXT: st.d $s0, $fp, 8 +; LA64-FPELIM-NEXT: bstrins.d $a0, $zero, 3, 0 ; LA64-FPELIM-NEXT: sub.d $a0, $sp, $a0 ; LA64-FPELIM-NEXT: move $sp, $a0 ; LA64-FPELIM-NEXT: bl %plt(notdead) @@ -157,10 +156,9 @@ ; LA64-WITHFP-NEXT: st.d $a2, $fp, 16 ; LA64-WITHFP-NEXT: addi.d $a0, $fp, 16 ; LA64-WITHFP-NEXT: st.d $a0, $fp, -32 +; LA64-WITHFP-NEXT: st.d $a1, $fp, 8 ; LA64-WITHFP-NEXT: addi.d $a0, $a1, 15 -; LA64-WITHFP-NEXT: addi.w $a1, $zero, -16 -; LA64-WITHFP-NEXT: and $a0, $a0, $a1 -; LA64-WITHFP-NEXT: st.d $s0, $fp, 8 +; LA64-WITHFP-NEXT: bstrins.d $a0, $zero, 3, 0 ; LA64-WITHFP-NEXT: sub.d $a0, $sp, $a0 ; LA64-WITHFP-NEXT: move $sp, $a0 ; LA64-WITHFP-NEXT: bl %plt(notdead)