Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -231,6 +231,8 @@ Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); L->setAlignment(SrcAlign); + if (MI->isElementAtomic()) + L->setOrdering(AtomicOrdering::Unordered); if (CopyMD) L->setMetadata(LLVMContext::MD_tbaa, CopyMD); MDNode *LoopMemParallelMD = @@ -240,6 +242,8 @@ StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); S->setAlignment(DstAlign); + if (MI->isElementAtomic()) + S->setOrdering(AtomicOrdering::Unordered); if (CopyMD) S->setMetadata(LLVMContext::MD_tbaa, CopyMD); if (LoopMemParallelMD) @@ -284,6 +288,8 @@ StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, MI->isVolatile()); S->setAlignment(Alignment); + if (MI->isElementAtomic()) + S->setOrdering(AtomicOrdering::Unordered); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setLength(Constant::getNullValue(LenC->getType())); @@ -1892,6 +1898,8 @@ // Replace the instruction with just byte operations. We would // transform other cases to loads/stores, but we don't know if // alignment is sufficient. + // Note: If MI->isElementAtomic(), then materialized load/store + // must be unordered-atomic. } } @@ -1903,16 +1911,20 @@ // then the source and dest pointers can't alias, so we can change this // into a call to memcpy. if (MemMoveInst *MMI = dyn_cast(MI)) { - if (GlobalVariable *GVSrc = dyn_cast(MMI->getSource())) - if (GVSrc->isConstant()) { - Module *M = CI.getModule(); - Intrinsic::ID MemCpyID = Intrinsic::memcpy; - Type *Tys[3] = { CI.getArgOperand(0)->getType(), - CI.getArgOperand(1)->getType(), - CI.getArgOperand(2)->getType() }; - CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); - Changed = true; - } + // Fixme: Materialize element-atomic memcpy when element-atomic memmove + // is implemented + if (!MMI->isElementAtomic()) { + if (GlobalVariable *GVSrc = dyn_cast(MMI->getSource())) + if (GVSrc->isConstant()) { + Module *M = CI.getModule(); + Intrinsic::ID MemCpyID = Intrinsic::memcpy; + Type *Tys[3] = {CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType()}; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); + Changed = true; + } + } } if (MemTransferInst *MTI = dyn_cast(MI)) { Index: test/Transforms/InstCombine/element-atomic-memintrins.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/element-atomic-memintrins.ll @@ -0,0 +1,84 @@ +;; Placeholder tests that will fail once @llvm.mem[move|set] instrinsics have +;; been added to the MemIntrinsic class hierarchy. These will act as a reminder to +;; verify that inst combine handles these intrinsics properly once they have been +;; added to that class hierarchy. + +; RUN: opt -instcombine -S < %s | FileCheck %s + +;; ---- memset ----- +define void @test_memset_1(i8* %dest) { + ; CHECK-LABEL: test_memset_1 + ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 0, i32 1) + ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 1, i32 1) + ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 2, i32 1) + ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 4, i32 1) + ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 8, i32 1) + ; CHECK-NEXT: ret void + ; not: memset.element.unordered.atomic(d,v,0,*) -> no-op + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 0, i32 1) + ; not: memset.element.unordered.atomic(d,v,[1,2,4,8],*) -> store + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 1, i32 1) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 2, i32 1) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 4, i32 1) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 8, i32 1) + ret void +} + +declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture writeonly, i8, i32, i32) nounwind argmemonly + +;; ----- memmove ------ +@gconst = constant [8 x i8] c"0123456\00" + +define void @test_memmove_1(i8* %dest) { + ; not: memmove from a global constant source becomes memcpy + ; CHECK-LABEL: test_memmove_1 + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 getelementptr inbounds ([8 x i8], [8 x i8]* @gconst, i64 0, i64 0), i32 8, i32 1) + ; CHECK-NEXT: ret void + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 getelementptr inbounds ([8 x i8], [8 x i8]* @gconst, i64 0, i64 0), i32 8, i32 1) + ret void +} + +define void @test_memmove_2(i8* %dest, i8* %src, i32 %sz) { + ; CHECK-LABEL: test_memmove_2 + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %dest, i32 %sz, i32 1) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %dest, i32 %sz, i32 2) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %dest, i32 %sz, i32 4) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %dest, i32 %sz, i32 8) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %dest, i32 %sz, i32 16) + ; CHECK-NEXT: ret void + ; memmove(d,s,0) -> no-op + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16) + ; memmove(p,p,*) -> no-op + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %dest, i32 %sz, i32 1) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %dest, i32 %sz, i32 2) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %dest, i32 %sz, i32 4) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %dest, i32 %sz, i32 8) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %dest, i32 %sz, i32 16) + ret void +} + +define void @test_memmove_3(i8* %dest, i8* %src) { + ; CHECK-LABEL: test_memmove_3 + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1) + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1) + ; CHECK-NEXT: ret void + ; memmove(d,s,[1,2,4,8]) -> load/store + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1) + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1) + ret void +} + +declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly