Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -575,7 +575,7 @@ def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_token_ty], [IntrReadMem]>; -def int_experimental_gc_relocate : Intrinsic<[llvm_anyptr_ty], +def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty], [llvm_token_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; Index: llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -505,27 +505,27 @@ #ifndef NDEBUG // Check that each of the gc pointer and bases we've gotten out of the - // safepoint is something the strategy thinks might be a pointer into the GC - // heap. This is basically just here to help catch errors during statepoint - // insertion. TODO: This should actually be in the Verifier, but we can't get - // to the GCStrategy from there (yet). + // safepoint is something the strategy thinks might be a pointer (or vector + // of pointers) into the GC heap. This is basically just here to help catch + // errors during statepoint insertion. TODO: This should actually be in the + // Verifier, but we can't get to the GCStrategy from there (yet). GCStrategy &S = Builder.GFI->getStrategy(); for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V->getType()); + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V->getType()); + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed derived pointer found in statepoint"); } } for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V->getType()); + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed pointer relocated"); } Index: llvm/trunk/lib/IR/Verifier.cpp =================================================================== --- llvm/trunk/lib/IR/Verifier.cpp +++ llvm/trunk/lib/IR/Verifier.cpp @@ -3652,6 +3652,9 @@ case Intrinsic::experimental_gc_relocate: { Assert(CS.getNumArgOperands() == 3, "wrong number of arguments", CS); + Assert(isa(CS.getType()->getScalarType()), + "gc.relocate must return a pointer or a vector of pointers", CS); + // Check that this relocate is correctly tied to the statepoint // This is case for relocate on the unwinding path of an invoke statepoint @@ -3734,17 +3737,20 @@ "'gc parameters' section of the statepoint call", CS); - // Relocated value must be a pointer type, but gc_relocate does not need to return the - // same pointer type as the relocated pointer. It can be casted to the correct type later - // if it's desired. However, they must have the same address space. + // Relocated value must be either a pointer type or vector-of-pointer type, + // but gc_relocate does not need to return the same pointer type as the + // relocated pointer. It can be casted to the correct type later if it's + // desired. However, they must have the same address space and 'vectorness' GCRelocateInst &Relocate = cast(*CS.getInstruction()); - Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(), + Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(), "gc.relocate: relocated value must be a gc pointer", CS); - // gc_relocate return type must be a pointer type, and is verified earlier in - // VerifyIntrinsicType(). - Assert(cast(CS.getType())->getAddressSpace() == - cast(Relocate.getDerivedPtr()->getType())->getAddressSpace(), + auto ResultType = CS.getType(); + auto DerivedType = Relocate.getDerivedPtr()->getType(); + Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(), + "gc.relocate: vector relocates to vector and pointer to pointer", CS); + Assert(ResultType->getPointerAddressSpace() == + DerivedType->getPointerAddressSpace(), "gc.relocate: relocating a pointer shouldn't change its address space", CS); break; } Index: llvm/trunk/test/CodeGen/X86/statepoint-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/statepoint-vector.ll +++ llvm/trunk/test/CodeGen/X86/statepoint-vector.ll @@ -0,0 +1,130 @@ +; RUN: llc -debug-only=stackmaps < %s | FileCheck %s + +; Can we lower a single vector? +define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" { +entry: +; CHECK-LABEL: @test +; CHECK: subq $24, %rsp +; CHECK: movaps %xmm0, (%rsp) +; CHECK: callq do_safepoint +; CHECK: movaps (%rsp), %xmm0 +; CHECK: addq $24, %rsp + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj) + %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) + ret <2 x i8 addrspace(1)*> %obj.relocated +} + +; Can we lower the base, derived pairs if both are vectors? +define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" { +entry: +; CHECK-LABEL: @test2 +; CHECK: subq $40, %rsp +; CHECK: movd %rdi, %xmm1 +; CHECK: pshufd $68, %xmm1, %xmm1 # xmm1 = xmm1[0,1,0,1] +; CHECK: paddq %xmm0, %xmm1 +; CHECK: movdqa %xmm0, 16(%rsp) +; CHECK: movdqa %xmm1, (%rsp) +; CHECK: callq do_safepoint +; CHECK: movaps (%rsp), %xmm0 +; CHECK: addq $40, %rsp + %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived) + %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived) + ret <2 x i8 addrspace(1)*> %derived.relocated +} + +; Originally, this was just a variant of @test2 above, but it ends up +; covering a bunch of interesting missed optimizations. Specifically: +; - We waste a stack slot for a value that a backend transform pass +; CSEd to another spilled one. +; - We don't remove the testb even though it serves no purpose +; - We could in principal reuse the argument memory (%rsi) and do away +; with stack slots entirely. +define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" { +entry: +; CHECK-LABEL: @test3 +; CHECK: subq $40, %rsp +; CHECK: testb $1, %dil +; CHECK: movaps (%rsi), %xmm0 +; CHECK: movaps %xmm0, 16(%rsp) +; CHECK: movaps %xmm0, (%rsp) +; CHECK: callq do_safepoint +; CHECK: movaps (%rsp), %xmm0 +; CHECK: addq $40, %rsp + br i1 %cnd, label %taken, label %untaken + +taken: ; preds = %entry + %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr + br label %merge + +untaken: ; preds = %entry + %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr + br label %merge + +merge: ; preds = %untaken, %taken + %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] + %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base) + %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj) + %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*> + %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base) + %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*> + ret <2 x i64 addrspace(1)*> %obj.relocated.casted +} + +; CHECK: __LLVM_StackMaps: + +; CHECK: .Ltmp1-test +; Check for the two spill slots +; Stack Maps: Loc 3: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] +; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] +; CHECK: .byte 3 +; CHECK: .byte 16 +; CHECK: .short 7 +; CHECK: .long 0 +; CHECK: .byte 3 +; CHECK: .byte 16 +; CHECK: .short 7 +; CHECK: .long 0 + +; CHECK: .Ltmp3-test2 +; Check for the two spill slots +; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] +; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] +; CHECK: .byte 3 +; CHECK: .byte 16 +; CHECK: .short 7 +; CHECK: .long 16 +; CHECK: .byte 3 +; CHECK: .byte 16 +; CHECK: .short 7 +; CHECK: .long 0 + +; CHECK: .Ltmp5-test3 +; Check for the four spill slots +; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] +; Stack Maps: Loc 4: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] +; Stack Maps: Loc 5: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16] +; Stack Maps: Loc 6: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0] +; CHECK: .byte 3 +; CHECK: .byte 16 +; CHECK: .short 7 +; CHECK: .long 16 +; CHECK: .byte 3 +; CHECK: .byte 16 +; CHECK: .short 7 +; CHECK: .long 16 +; CHECK: .byte 3 +; CHECK: .byte 16 +; CHECK: .short 7 +; CHECK: .long 16 +; CHECK: .byte 3 +; CHECK: .byte 16 +; CHECK: .short 7 +; CHECK: .long 0 + +declare void @do_safepoint() + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) +declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32) Index: llvm/trunk/test/Verifier/gc_relocate_return.ll =================================================================== --- llvm/trunk/test/Verifier/gc_relocate_return.ll +++ llvm/trunk/test/Verifier/gc_relocate_return.ll @@ -1,8 +1,7 @@ ; RUN: not llvm-as -disable-output <%s 2>&1 | FileCheck %s -; This is to verify that gc_relocate must return a pointer type, which is defined -; in intrinsics.td. +; This is to verify that gc_relocate must return a pointer type -; CHECK: Intrinsic has incorrect return type! +; CHECK: gc.relocate must return a pointer or a vector of pointers declare void @foo()