Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -6818,7 +6818,7 @@ The argument to the '``fneg``' instruction must be a :ref:`floating-point ` or :ref:`vector ` of -floating-point values. +floating-point values. Semantics: """""""""" @@ -8430,15 +8430,18 @@ - min - umax - umin +- fadd +- fsub For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. For xchg, this may also be a floating point type with the same size constraints as -integers. The type of the '````' operand must be a pointer to -that type. If the ``atomicrmw`` is marked as ``volatile``, then the -optimizer is not allowed to modify the number or order of execution of -this ``atomicrmw`` with other :ref:`volatile operations `. +integers. For fadd/fsub, this must be a floating point type. The +type of the '````' operand must be a pointer to that type. If +the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not +allowed to modify the number or order of execution of this +``atomicrmw`` with other :ref:`volatile operations `. A ``atomicrmw`` instruction can also take an optional ":ref:`syncscope `" argument. @@ -8464,6 +8467,8 @@ comparison) - umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned comparison) +- fadd: ``*ptr = *ptr + val`` (using floating point rules) +- fsub: ``*ptr = *ptr - val`` (using floating point rules) Example: """""""" @@ -14765,13 +14770,13 @@ Overview: """"""""" -The '``llvm.experimental.constrained.maxnum``' intrinsic returns the maximum +The '``llvm.experimental.constrained.maxnum``' intrinsic returns the maximum of the two arguments. Arguments: """""""""" -The first two arguments and the return value are floating-point numbers +The first two arguments and the return value are floating-point numbers of the same type. The third and forth arguments specify the rounding mode and exception @@ -14839,7 +14844,7 @@ Overview: """"""""" -The '``llvm.experimental.constrained.ceil``' intrinsic returns the ceiling of the +The '``llvm.experimental.constrained.ceil``' intrinsic returns the ceiling of the first operand. Arguments: @@ -14875,7 +14880,7 @@ Overview: """"""""" -The '``llvm.experimental.constrained.floor``' intrinsic returns the floor of the +The '``llvm.experimental.constrained.floor``' intrinsic returns the floor of the first operand. Arguments: @@ -14892,7 +14897,7 @@ """""""""" This function returns the same values as the libm ``floor`` functions -would and handles error conditions in the same way. +would and handles error conditions in the same way. '``llvm.experimental.constrained.round``' Intrinsic @@ -14911,7 +14916,7 @@ Overview: """"""""" -The '``llvm.experimental.constrained.round``' intrinsic returns the first +The '``llvm.experimental.constrained.round``' intrinsic returns the first operand rounded to the nearest integer. Arguments: @@ -14947,8 +14952,8 @@ Overview: """"""""" -The '``llvm.experimental.constrained.trunc``' intrinsic returns the first -operand rounded to the nearest integer not larger in magnitude than the +The '``llvm.experimental.constrained.trunc``' intrinsic returns the first +operand rounded to the nearest integer not larger in magnitude than the operand. Arguments: Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -407,7 +407,9 @@ RMW_MAX = 7, RMW_MIN = 8, RMW_UMAX = 9, - RMW_UMIN = 10 + RMW_UMIN = 10, + RMW_FADD = 11, + RMW_FSUB = 12 }; /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -1684,8 +1684,9 @@ /// Returns how the IR-level AtomicExpand pass should expand the given /// AtomicRMW, if at all. Default is to never expand. - virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const { - return AtomicExpansionKind::None; + virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { + return RMW->isFloatingPointOperation() ? + AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } /// On some platforms, an AtomicRMW that never actually modifies the value Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -714,8 +714,14 @@ /// *p = old getType()->getPointerAddressSpace(); } + bool isFloatingPointOperation() const { + return isFPOperation(getOperation()); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::AtomicRMW; Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -6816,6 +6816,7 @@ AtomicOrdering Ordering = AtomicOrdering::NotAtomic; SyncScope::ID SSID = SyncScope::System; bool isVolatile = false; + bool IsFP = false; AtomicRMWInst::BinOp Operation; if (EatIfPresent(lltok::kw_volatile)) @@ -6834,6 +6835,14 @@ case lltok::kw_min: Operation = AtomicRMWInst::Min; break; case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break; case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break; + case lltok::kw_fadd: + Operation = AtomicRMWInst::FAdd; + IsFP = true; + break; + case lltok::kw_fsub: + Operation = AtomicRMWInst::FSub; + IsFP = true; + break; } Lex.Lex(); // Eat the operation. @@ -6850,18 +6859,25 @@ if (cast(Ptr->getType())->getElementType() != Val->getType()) return Error(ValLoc, "atomicrmw value and pointer type do not match"); - if (Operation != AtomicRMWInst::Xchg && !Val->getType()->isIntegerTy()) { - return Error(ValLoc, "atomicrmw " + - AtomicRMWInst::getOperationName(Operation) + - " operand must be an integer"); - } - - if (Operation == AtomicRMWInst::Xchg && - !Val->getType()->isIntegerTy() && - !Val->getType()->isFloatingPointTy()) { - return Error(ValLoc, "atomicrmw " + - AtomicRMWInst::getOperationName(Operation) + - " operand must be an integer or floating point type"); + if (Operation == AtomicRMWInst::Xchg) { + if (!Val->getType()->isIntegerTy() && + !Val->getType()->isFloatingPointTy()) { + return Error(ValLoc, "atomicrmw " + + AtomicRMWInst::getOperationName(Operation) + + " operand must be an integer or floating point type"); + } + } else if (IsFP) { + if (!Val->getType()->isFloatingPointTy()) { + return Error(ValLoc, "atomicrmw " + + AtomicRMWInst::getOperationName(Operation) + + " operand must be a floating point type"); + } + } else { + if (!Val->getType()->isIntegerTy()) { + return Error(ValLoc, "atomicrmw " + + AtomicRMWInst::getOperationName(Operation) + + " operand must be an integer"); + } } unsigned Size = Val->getType()->getPrimitiveSizeInBits(); Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -1035,6 +1035,8 @@ case bitc::RMW_MIN: return AtomicRMWInst::Min; case bitc::RMW_UMAX: return AtomicRMWInst::UMax; case bitc::RMW_UMIN: return AtomicRMWInst::UMin; + case bitc::RMW_FADD: return AtomicRMWInst::FAdd; + case bitc::RMW_FSUB: return AtomicRMWInst::FSub; } } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -560,6 +560,8 @@ case AtomicRMWInst::Min: return bitc::RMW_MIN; case AtomicRMWInst::UMax: return bitc::RMW_UMAX; case AtomicRMWInst::UMin: return bitc::RMW_UMIN; + case AtomicRMWInst::FAdd: return bitc::RMW_FADD; + case AtomicRMWInst::FSub: return bitc::RMW_FSUB; } } Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -550,6 +550,10 @@ case AtomicRMWInst::UMin: NewVal = Builder.CreateICmpULE(Loaded, Inc); return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::FAdd: + return Builder.CreateFAdd(Loaded, Inc, "new"); + case AtomicRMWInst::FSub: + return Builder.CreateFSub(Loaded, Inc, "new"); default: llvm_unreachable("Unknown atomic op"); } @@ -1548,6 +1552,8 @@ case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: + case AtomicRMWInst::FAdd: + case AtomicRMWInst::FSub: // No atomic libcalls are available for max/min/umax/umin. return {}; } Index: lib/IR/Instructions.cpp =================================================================== --- lib/IR/Instructions.cpp +++ lib/IR/Instructions.cpp @@ -1423,6 +1423,10 @@ return "umax"; case AtomicRMWInst::UMin: return "umin"; + case AtomicRMWInst::FAdd: + return "fadd"; + case AtomicRMWInst::FSub: + return "fsub"; case AtomicRMWInst::BAD_BINOP: return ""; } Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -3413,6 +3413,11 @@ AtomicRMWInst::getOperationName(Op) + " operand must have integer or floating point type!", &RMWI, ElTy); + } else if (AtomicRMWInst::isFPOperation(Op)) { + Assert(ElTy->isFloatingPointTy(), "atomicrmw " + + AtomicRMWInst::getOperationName(Op) + + " operand must have floating point type!", + &RMWI, ElTy); } else { Assert(ElTy->isIntegerTy(), "atomicrmw " + AtomicRMWInst::getOperationName(Op) + Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11531,6 +11531,9 @@ // For the real atomic operations, we have ldxr/stxr up to 128 bits, TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size > 128) return AtomicExpansionKind::None; // Nand not supported in LSE. Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -14526,6 +14526,9 @@ // and up to 64 bits on the non-M profiles TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3107,13 +3107,21 @@ AtomicOrdering Ord) const { BasicBlock *BB = Builder.GetInsertBlock(); Module *M = BB->getParent()->getParent(); - Type *Ty = cast(Addr->getType())->getElementType(); + auto PT = cast(Addr->getType()); + Type *Ty = PT->getElementType(); unsigned SZ = Ty->getPrimitiveSizeInBits(); assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported"); Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked : Intrinsic::hexagon_L4_loadd_locked; + + PointerType *NewPtrTy + = Builder.getIntNTy(SZ)->getPointerTo(PT->getAddressSpace()); + Addr = Builder.CreateBitCast(Addr, NewPtrTy); + Value *Fn = Intrinsic::getDeclaration(M, IntID); - return Builder.CreateCall(Fn, Addr, "larx"); + Value *Call = Builder.CreateCall(Fn, Addr, "larx"); + + return Builder.CreateBitCast(Call, Ty); } /// Perform a store-conditional operation to Addr. Return the status of the @@ -3124,10 +3132,17 @@ Module *M = BB->getParent()->getParent(); Type *Ty = Val->getType(); unsigned SZ = Ty->getPrimitiveSizeInBits(); + + Type *CastTy = Builder.getIntNTy(SZ); assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported"); Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked : Intrinsic::hexagon_S4_stored_locked; Value *Fn = Intrinsic::getDeclaration(M, IntID); + + unsigned AS = Addr->getType()->getPointerAddressSpace(); + Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS)); + Val = Builder.CreateBitCast(Val, CastTy); + Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx"); Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), ""); Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext())); Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -1717,6 +1717,9 @@ TargetLowering::AtomicExpansionKind RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *CI) const { + if (CI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); if (Size == 8 || Size == 16) return AtomicExpansionKind::MaskedIntrinsic; Index: test/Assembler/atomic.ll =================================================================== --- test/Assembler/atomic.ll +++ test/Assembler/atomic.ll @@ -39,3 +39,13 @@ fence syncscope("device") seq_cst ret void } + +define void @fp_atomics(float* %x) { + ; CHECK: atomicrmw fadd float* %x, float 1.000000e+00 seq_cst + atomicrmw fadd float* %x, float 1.0 seq_cst + + ; CHECK: atomicrmw volatile fadd float* %x, float 1.000000e+00 seq_cst + atomicrmw volatile fadd float* %x, float 1.0 seq_cst + + ret void +} Index: test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll =================================================================== --- /dev/null +++ test/Assembler/invalid-atomicrmw-fadd-must-be-fp-type.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +; CHECK: error: atomicrmw fadd operand must be a floating point type +define void @f(i32* %ptr) { + atomicrmw fadd i32* %ptr, i32 2 seq_cst + ret void +} Index: test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll =================================================================== --- /dev/null +++ test/Assembler/invalid-atomicrmw-fsub-must-be-fp-type.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +; CHECK: error: atomicrmw fsub operand must be a floating point type +define void @f(i32* %ptr) { + atomicrmw fsub i32* %ptr, i32 2 seq_cst + ret void +} Index: test/Bitcode/compatibility.ll =================================================================== --- test/Bitcode/compatibility.ll +++ test/Bitcode/compatibility.ll @@ -764,6 +764,13 @@ define void @fp_atomics(float* %word) { ; CHECK: %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.000000e+00 monotonic %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.0 monotonic + +; CHECK: %atomicrmw.fadd = atomicrmw fadd float* %word, float 1.000000e+00 monotonic + %atomicrmw.fadd = atomicrmw fadd float* %word, float 1.0 monotonic + +; CHECK: %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.000000e+00 monotonic + %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.0 monotonic + ret void } Index: test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=aarch64-linux-gnu -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + Index: test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: call void @llvm.arm.dmb(i32 11) +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: call void @llvm.arm.dmb(i32 11) +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: call void @llvm.arm.dmb(i32 11) +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: call void @llvm.arm.dmb(i32 11) +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + Index: test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=hexagon-- -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[LARX:%.*]] = call i32 @llvm.hexagon.L2.loadw.locked(i32* [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[LARX]] to float +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[TMP2]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[STCX:%.*]] = call i32 @llvm.hexagon.S2.storew.locked(i32* [[TMP3]], i32 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[STCX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP2]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32* +; CHECK-NEXT: [[LARX:%.*]] = call i32 @llvm.hexagon.L2.loadw.locked(i32* [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[LARX]] to float +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[TMP2]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[STCX:%.*]] = call i32 @llvm.hexagon.S2.storew.locked(i32* [[TMP3]], i32 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[STCX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP2]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + Index: test/Transforms/AtomicExpand/Hexagon/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/Hexagon/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'Hexagon' in config.root.targets: + config.unsupported = True Index: test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/Mips/atomicrmw-fp.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=mips64-mti-linux-gnu -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] monotonic monotonic +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + Index: test/Transforms/AtomicExpand/Mips/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/Mips/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'Mips' in config.root.targets: + config.unsupported = True Index: test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=riscv32-- -atomic-expand %s | FileCheck %s + +define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fadd_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fadd float* %ptr, float %value seq_cst + ret float %res +} + +define float @test_atomicrmw_fsub_f32(float* %ptr, float %value) { +; CHECK-LABEL: @test_atomicrmw_fsub_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[ATOMIxbCRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret float [[TMP6]] +; + %res = atomicrmw fsub float* %ptr, float %value seq_cst + ret float %res +} + Index: test/Transforms/AtomicExpand/RISCV/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/AtomicExpand/RISCV/lit.local.cfg @@ -0,0 +1,5 @@ +config.suffixes = ['.ll'] + +targets = set(config.root.targets_to_build.split()) +if not 'RISCV' in targets: + config.unsupported = True