Index: include/llvm/IR/IntrinsicsRISCV.td =================================================================== --- include/llvm/IR/IntrinsicsRISCV.td +++ include/llvm/IR/IntrinsicsRISCV.td @@ -24,10 +24,7 @@ def int_riscv_masked_atomicrmw_xchg_i32 : MaskedAtomicRMW32Intrinsic; def int_riscv_masked_atomicrmw_add_i32 : MaskedAtomicRMW32Intrinsic; def int_riscv_masked_atomicrmw_sub_i32 : MaskedAtomicRMW32Intrinsic; -def int_riscv_masked_atomicrmw_and_i32 : MaskedAtomicRMW32Intrinsic; def int_riscv_masked_atomicrmw_nand_i32 : MaskedAtomicRMW32Intrinsic; -def int_riscv_masked_atomicrmw_or_i32 : MaskedAtomicRMW32Intrinsic; -def int_riscv_masked_atomicrmw_xor_i32 : MaskedAtomicRMW32Intrinsic; def int_riscv_masked_atomicrmw_max_i32 : MaskedAtomicRMW32Intrinsic; def int_riscv_masked_atomicrmw_min_i32 : MaskedAtomicRMW32Intrinsic; def int_riscv_masked_atomicrmw_umax_i32 : MaskedAtomicRMW32Intrinsic; Index: lib/Target/RISCV/RISCVExpandPseudoInsts.cpp =================================================================== --- lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -91,15 +91,9 @@ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI); case RISCV::PseudoMaskedAtomicLoadSub32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI); - case RISCV::PseudoMaskedAtomicLoadAnd32: - return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, true, 32, NextMBBI); case RISCV::PseudoMaskedAtomicLoadNand32: return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32, NextMBBI); - case RISCV::PseudoMaskedAtomicLoadOr32: - return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, true, 32, NextMBBI); - case RISCV::PseudoMaskedAtomicLoadXor32: - return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, true, 32, NextMBBI); case RISCV::PseudoMaskedAtomicLoadMax32: return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32, NextMBBI); @@ -255,11 +249,6 @@ .addReg(DestReg) .addReg(IncrReg); break; - case AtomicRMWInst::And: - BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg) - .addReg(DestReg) - .addReg(IncrReg); - break; case AtomicRMWInst::Nand: BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg) .addReg(DestReg) @@ -268,16 +257,6 @@ .addReg(ScratchReg) .addImm(-1); break; - case AtomicRMWInst::Or: - BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg) - .addReg(DestReg) - .addReg(IncrReg); - break; - case AtomicRMWInst::Xor: - BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg) - .addReg(DestReg) - .addReg(IncrReg); - break; } insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg, Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -170,10 +170,7 @@ case Intrinsic::riscv_masked_atomicrmw_xchg_i32: case Intrinsic::riscv_masked_atomicrmw_add_i32: case Intrinsic::riscv_masked_atomicrmw_sub_i32: - case Intrinsic::riscv_masked_atomicrmw_and_i32: case Intrinsic::riscv_masked_atomicrmw_nand_i32: - case Intrinsic::riscv_masked_atomicrmw_or_i32: - case Intrinsic::riscv_masked_atomicrmw_xor_i32: case Intrinsic::riscv_masked_atomicrmw_max_i32: case Intrinsic::riscv_masked_atomicrmw_min_i32: case Intrinsic::riscv_masked_atomicrmw_umax_i32: @@ -1623,14 +1620,8 @@ return Intrinsic::riscv_masked_atomicrmw_add_i32; case AtomicRMWInst::Sub: return Intrinsic::riscv_masked_atomicrmw_sub_i32; - case AtomicRMWInst::And: - return Intrinsic::riscv_masked_atomicrmw_and_i32; case AtomicRMWInst::Nand: return Intrinsic::riscv_masked_atomicrmw_nand_i32; - case AtomicRMWInst::Or: - return Intrinsic::riscv_masked_atomicrmw_or_i32; - case AtomicRMWInst::Xor: - return Intrinsic::riscv_masked_atomicrmw_xor_i32; case AtomicRMWInst::Max: return Intrinsic::riscv_masked_atomicrmw_max_i32; case AtomicRMWInst::Min: @@ -1661,26 +1652,55 @@ // alignedaddr = addr & -4 Value *Addr = AI->getPointerOperand(); - Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); - Value *AlignedAddr = Builder.CreateIntToPtr(Builder.CreateAnd(AddrInt, -4), - Addr->getType(), "AlignedAddr"); + Value *AddrInt = Builder.CreatePtrToInt(Addr, Builder.getInt32Ty()); + Value *AlignedAddr = Builder.CreateIntToPtr( + Builder.CreateAnd(AddrInt, -4), Type::getInt32PtrTy(Ctx), "AlignedAddr"); // offset = (addr & 3) << 3 Value *Offset = Builder.CreateShl(Builder.CreateAnd(AddrInt, 3), 3, "Offset"); - - // mask = maskval << offset - Value *Mask = Builder.CreateShl(Builder.getInt32(MaskVal), Offset, "Mask"); // incr = ai.val << offset Value *Incr = Builder.CreateShl( Builder.CreateZExt(AI->getValOperand(), Builder.getInt32Ty()), Offset, "Incr"); - // oldval = riscv.masked.atomicrmw.binop(alignedaddr, mask, incr, ordering) - Value *Ordering = Builder.getInt32(static_cast(AI->getOrdering())); - Type *Tys[] = {Addr->getType()}; - Function *LrwAddScwLoop = Intrinsic::getDeclaration( - M, getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys); - Value *OldVal = - Builder.CreateCall(LrwAddScwLoop, {AlignedAddr, Incr, Mask, Ordering}); + AtomicRMWInst::BinOp BinOp = AI->getOperation(); + Value *OldVal; + + // In the case of bitwise part-word atomics that have a native 32-bit AMO, + // we can expand to that with an appropriate operand. Otherwise, expand to + // masked rmw intrinsics. + switch (BinOp) { + default: { + // mask = maskval << offset + Value *Mask = Builder.CreateShl(Builder.getInt32(MaskVal), Offset, "Mask"); + // oldval = riscv.masked.atomicrmw.binop(alignedaddr, mask, incr, ordering) + Value *Ordering = + Builder.getInt32(static_cast(AI->getOrdering())); + Type *Tys[] = {Type::getInt32PtrTy(Ctx)}; + Function *LrwAddScwLoop = Intrinsic::getDeclaration( + M, getIntrinsicForMaskedAtomicRMWBinOp32(BinOp), Tys); + OldVal = + Builder.CreateCall(LrwAddScwLoop, {AlignedAddr, Incr, Mask, Ordering}); + break; + } + case AtomicRMWInst::And: { + // mask = maskval << offset + Value *Mask = Builder.CreateShl(Builder.getInt32(MaskVal), Offset, "Mask"); + // andoperand = (not mask) | incr + Value *AndOperand = + Builder.CreateOr(Builder.CreateNot(Mask), Incr, "AndOperand"); + // oldval = atomicrmw add i32 alignedaddr, andoperand, ordering + OldVal = Builder.CreateAtomicRMW(BinOp, AlignedAddr, AndOperand, + AI->getOrdering()); + break; + } + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + // oldval = atomicrmw [x]or i32 alignedaddr, incr, ordering + OldVal = + Builder.CreateAtomicRMW(BinOp, AlignedAddr, Incr, AI->getOrdering()); + break; + } + // finaloldval = trunc(oldval >> offset) Value *FinalOldVal = Builder.CreateTrunc(Builder.CreateLShr(OldVal, Offset), AI->getType()); Index: lib/Target/RISCV/RISCVInstrInfoA.td =================================================================== --- lib/Target/RISCV/RISCVInstrInfoA.td +++ lib/Target/RISCV/RISCVInstrInfoA.td @@ -187,18 +187,9 @@ def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat; -def PseudoMaskedAtomicLoadAnd32 : PseudoMaskedAMO; -def : PseudoMaskedAMOPat; def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat; -def PseudoMaskedAtomicLoadOr32 : PseudoMaskedAMO; -def : PseudoMaskedAMOPat; -def PseudoMaskedAtomicLoadXor32 : PseudoMaskedAMO; -def : PseudoMaskedAMOPat; def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax; def : PseudoMaskedAMOPat; Index: test/CodeGen/RISCV/atomic-rmw.ll =================================================================== --- test/CodeGen/RISCV/atomic-rmw.ll +++ test/CodeGen/RISCV/atomic-rmw.ll @@ -542,23 +542,17 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_monotonic: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: or a1, a3, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB15_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoand.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw and i8* %a, i8 %b monotonic ret i8 %1 @@ -577,23 +571,17 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_acquire: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: or a1, a3, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB16_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoand.w.aq a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw and i8* %a, i8 %b acquire ret i8 %1 @@ -612,23 +600,17 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_release: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: or a1, a3, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB17_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoand.w.rl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw and i8* %a, i8 %b release ret i8 %1 @@ -647,23 +629,17 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_acq_rel: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: or a1, a3, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB18_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw and i8* %a, i8 %b acq_rel ret i8 %1 @@ -682,23 +658,17 @@ ; ; RV32IA-LABEL: atomicrmw_and_i8_seq_cst: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: addi a3, zero, 255 ; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 -; RV32IA-NEXT: sll a1, a1, a2 +; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: or a1, a3, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB19_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw and i8* %a, i8 %b seq_cst ret i8 %1 @@ -897,23 +867,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_monotonic: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB25_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoor.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i8* %a, i8 %b monotonic ret i8 %1 @@ -932,23 +892,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_acquire: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB26_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoor.w.aq a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i8* %a, i8 %b acquire ret i8 %1 @@ -967,23 +917,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_release: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB27_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoor.w.rl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i8* %a, i8 %b release ret i8 %1 @@ -1002,23 +942,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_acq_rel: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB28_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i8* %a, i8 %b acq_rel ret i8 %1 @@ -1037,23 +967,13 @@ ; ; RV32IA-LABEL: atomicrmw_or_i8_seq_cst: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB29_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i8* %a, i8 %b seq_cst ret i8 %1 @@ -1072,23 +992,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_monotonic: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB30_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i8* %a, i8 %b monotonic ret i8 %1 @@ -1107,23 +1017,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_acquire: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB31_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoxor.w.aq a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i8* %a, i8 %b acquire ret i8 %1 @@ -1142,23 +1042,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_release: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB32_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoxor.w.rl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i8* %a, i8 %b release ret i8 %1 @@ -1177,23 +1067,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_acq_rel: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB33_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i8* %a, i8 %b acq_rel ret i8 %1 @@ -1212,23 +1092,13 @@ ; ; RV32IA-LABEL: atomicrmw_xor_i8_seq_cst: ; RV32IA: # %bb.0: +; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: slli a2, a0, 3 ; RV32IA-NEXT: andi a2, a2, 24 -; RV32IA-NEXT: addi a3, zero, 255 -; RV32IA-NEXT: sll a3, a3, a2 -; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a3 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB34_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a2 +; RV32IA-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i8* %a, i8 %b seq_cst ret i8 %1 @@ -3272,19 +3142,13 @@ ; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: not a2, a2 +; RV32IA-NEXT: or a1, a2, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB70_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB70_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoand.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a3 ; RV32IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b monotonic ret i16 %1 @@ -3308,19 +3172,13 @@ ; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: not a2, a2 +; RV32IA-NEXT: or a1, a2, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB71_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB71_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoand.w.aq a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a3 ; RV32IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b acquire ret i16 %1 @@ -3344,19 +3202,13 @@ ; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: not a2, a2 +; RV32IA-NEXT: or a1, a2, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB72_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoand.w.rl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a3 ; RV32IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b release ret i16 %1 @@ -3380,19 +3232,13 @@ ; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: not a2, a2 +; RV32IA-NEXT: or a1, a2, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB73_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a3 ; RV32IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b acq_rel ret i16 %1 @@ -3416,19 +3262,13 @@ ; RV32IA-NEXT: and a1, a1, a2 ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 ; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: sll a2, a2, a3 +; RV32IA-NEXT: not a2, a2 +; RV32IA-NEXT: or a1, a2, a1 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB74_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: and a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB74_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoand.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a3 ; RV32IA-NEXT: ret %1 = atomicrmw and i16* %a, i16 %b seq_cst ret i16 %1 @@ -3635,21 +3475,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB80_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoor.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i16* %a, i16 %b monotonic ret i16 %1 @@ -3671,21 +3502,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB81_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoor.w.aq a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i16* %a, i16 %b acquire ret i16 %1 @@ -3707,21 +3529,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB82_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoor.w.rl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i16* %a, i16 %b release ret i16 %1 @@ -3743,21 +3556,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB83_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i16* %a, i16 %b acq_rel ret i16 %1 @@ -3779,21 +3583,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: or a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB84_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoor.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw or i16* %a, i16 %b seq_cst ret i16 %1 @@ -3815,21 +3610,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB85_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoxor.w a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i16* %a, i16 %b monotonic ret i16 %1 @@ -3851,21 +3637,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB86_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoxor.w.aq a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i16* %a, i16 %b acquire ret i16 %1 @@ -3887,21 +3664,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB87_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoxor.w.rl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i16* %a, i16 %b release ret i16 %1 @@ -3923,21 +3691,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.rl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB88_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i16* %a, i16 %b acq_rel ret i16 %1 @@ -3959,21 +3718,12 @@ ; RV32IA-NEXT: lui a2, 16 ; RV32IA-NEXT: addi a2, a2, -1 ; RV32IA-NEXT: and a1, a1, a2 -; RV32IA-NEXT: slli a3, a0, 3 -; RV32IA-NEXT: andi a3, a3, 24 -; RV32IA-NEXT: sll a2, a2, a3 -; RV32IA-NEXT: sll a1, a1, a3 +; RV32IA-NEXT: slli a2, a0, 3 +; RV32IA-NEXT: andi a2, a2, 24 +; RV32IA-NEXT: sll a1, a1, a2 ; RV32IA-NEXT: andi a0, a0, -4 -; RV32IA-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a0) -; RV32IA-NEXT: xor a5, a4, a1 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: and a5, a5, a2 -; RV32IA-NEXT: xor a5, a4, a5 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a0) -; RV32IA-NEXT: bnez a5, .LBB89_1 -; RV32IA-NEXT: # %bb.2: -; RV32IA-NEXT: srl a0, a4, a3 +; RV32IA-NEXT: amoxor.w.aqrl a0, a1, (a0) +; RV32IA-NEXT: srl a0, a0, a2 ; RV32IA-NEXT: ret %1 = atomicrmw xor i16* %a, i16 %b seq_cst ret i16 %1