diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -61,13 +61,19 @@ * The ``inalloca`` attribute now has a mandatory type field, similar to ``byval`` and ``sret``. - Changes to building LLVM ------------------------ Changes to TableGen ------------------- +Changes to Backend Code Generation +---------------------------------- + +* When lowering calls, only ABI attributes on the call itself are checked, not + the caller. Frontends need to make sure to properly set ABI attributes on + calls (and always should have). + Changes to the ARM Backend -------------------------- diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -102,28 +102,31 @@ return true; } -/// Set CallLoweringInfo attribute flags based on a call instruction -/// and called function attributes. +/// Set CallLoweringInfo attribute flags based on the call instruction's +/// argument attributes. void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, unsigned ArgIdx) { - IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt); - IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt); - IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg); - IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet); - IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest); - IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal); - IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated); - IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); - IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); - IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); - IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); - Alignment = Call->getParamStackAlign(ArgIdx); + auto Attrs = Call->getAttributes(); + + IsSExt = Attrs.hasParamAttribute(ArgIdx, Attribute::SExt); + IsZExt = Attrs.hasParamAttribute(ArgIdx, Attribute::ZExt); + IsInReg = Attrs.hasParamAttribute(ArgIdx, Attribute::InReg); + IsSRet = Attrs.hasParamAttribute(ArgIdx, Attribute::StructRet); + IsNest = Attrs.hasParamAttribute(ArgIdx, Attribute::Nest); + IsReturned = Attrs.hasParamAttribute(ArgIdx, Attribute::Returned); + IsSwiftSelf = Attrs.hasParamAttribute(ArgIdx, Attribute::SwiftSelf); + IsSwiftError = Attrs.hasParamAttribute(ArgIdx, Attribute::SwiftError); + Alignment = Attrs.getParamStackAlignment(ArgIdx); + + IsByVal = Attrs.hasParamAttribute(ArgIdx, Attribute::ByVal); ByValType = nullptr; if (IsByVal) { ByValType = Call->getParamByValType(ArgIdx); if (!Alignment) Alignment = Call->getParamAlign(ArgIdx); } + IsInAlloca = Attrs.hasParamAttribute(ArgIdx, Attribute::InAlloca); + IsPreallocated = Attrs.hasParamAttribute(ArgIdx, Attribute::Preallocated); PreallocatedType = nullptr; if (IsPreallocated) PreallocatedType = Call->getParamPreallocatedType(ArgIdx); diff --git a/llvm/test/CodeGen/AArch64/arm64-this-return.ll b/llvm/test/CodeGen/AArch64/arm64-this-return.ll --- a/llvm/test/CodeGen/AArch64/arm64-this-return.ll +++ b/llvm/test/CodeGen/AArch64/arm64-this-return.ll @@ -38,9 +38,9 @@ ; CHECK-NOT: mov x0, {{x[0-9]+}} ; CHECK: b {{_?B_ctor_base}} %0 = bitcast %struct.C* %this to %struct.A* - %call = tail call %struct.A* @A_ctor_base(%struct.A* %0) + %call = tail call %struct.A* @A_ctor_base(%struct.A* returned %0) %1 = getelementptr inbounds %struct.C, %struct.C* %this, i32 0, i32 0 - %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x) + %call2 = tail call %struct.B* @B_ctor_base(%struct.B* returned %1, i32 %x) ret %struct.C* %this } @@ -88,7 +88,7 @@ entry: ; CHECK-LABEL: C_ctor_complete: ; CHECK: b {{_?C_ctor_base}} - %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) + %call = tail call %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x) ret %struct.C* %this } @@ -135,8 +135,8 @@ ; CHECK-NOT: mov x0, {{x[0-9]+}} ; CHECK: b {{_?B_ctor_complete}} %b = getelementptr inbounds %struct.D, %struct.D* %this, i32 0, i32 0 - %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) - %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + %call = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x) + %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x) ret %struct.D* %this } @@ -166,8 +166,8 @@ ; CHECK-LABEL: E_ctor_base: ; CHECK-NOT: b {{_?B_ctor_complete}} %b = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 0 - %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + %call = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x) %b2 = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 1 - %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x) + %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b2, i32 %x) ret %struct.E* %this } diff --git a/llvm/test/CodeGen/AArch64/bitfield-extract.ll b/llvm/test/CodeGen/AArch64/bitfield-extract.ll --- a/llvm/test/CodeGen/AArch64/bitfield-extract.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-extract.ll @@ -91,7 +91,7 @@ define void @test11(i64 %a) { %tmp = lshr i64 %a, 23 %res = trunc i64 %tmp to i16 - call void @use(i16 %res, i64 %tmp) + call void @use(i16 signext %res, i64 %tmp) ret void } diff --git a/llvm/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/llvm/test/CodeGen/AArch64/tailcall-explicit-sret.ll --- a/llvm/test/CodeGen/AArch64/tailcall-explicit-sret.ll +++ b/llvm/test/CodeGen/AArch64/tailcall-explicit-sret.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: _test_tailcall_explicit_sret: ; CHECK-NEXT: b _test_explicit_sret define void @test_tailcall_explicit_sret(i1024* sret(i1024) %arg) #0 { - tail call void @test_explicit_sret(i1024* %arg) + tail call void @test_explicit_sret(i1024* sret(i1024) %arg) ret void } @@ -20,7 +20,7 @@ ; CHECK: bl _test_explicit_sret ; CHECK: ret define void @test_call_explicit_sret(i1024* sret(i1024) %arg) #0 { - call void @test_explicit_sret(i1024* %arg) + call void @test_explicit_sret(i1024* sret(i1024) %arg) ret void } @@ -30,7 +30,7 @@ ; CHECK: ret define void @test_tailcall_explicit_sret_alloca_unused() #0 { %l = alloca i1024, align 8 - tail call void @test_explicit_sret(i1024* %l) + tail call void @test_explicit_sret(i1024* sret(i1024) %l) ret void } @@ -44,7 +44,7 @@ %l = alloca i1024, align 8 %r = load i1024, i1024* %ptr, align 8 store i1024 %r, i1024* %l, align 8 - tail call void @test_explicit_sret(i1024* %l) + tail call void @test_explicit_sret(i1024* sret(i1024) %l) ret void } @@ -56,7 +56,7 @@ ; CHECK: ret define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 { %ptr2 = getelementptr i1024, i1024* %ptr, i32 1 - tail call void @test_explicit_sret(i1024* %ptr2) + tail call void @test_explicit_sret(i1024* sret(i1024) %ptr2) ret void } @@ -69,7 +69,7 @@ ; CHECK: ret define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 { %l = alloca i1024, align 8 - tail call void @test_explicit_sret(i1024* %l) + tail call void @test_explicit_sret(i1024* sret(i1024) %l) %r = load i1024, i1024* %l, align 8 ret i1024 %r } diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -92,7 +92,7 @@ ; GCN-NEXT: s_endpgm define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { %var = load volatile i1, i1 addrspace(1)* undef - call void @external_void_func_i1_signext(i1 %var) + call void @external_void_func_i1_signext(i1 signext %var) ret void } @@ -113,7 +113,7 @@ ; GCN-NEXT: s_endpgm define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { %var = load volatile i1, i1 addrspace(1)* undef - call void @external_void_func_i1_zeroext(i1 %var) + call void @external_void_func_i1_zeroext(i1 zeroext %var) ret void } @@ -148,7 +148,7 @@ ; GCN-NEXT: s_endpgm define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { %var = load volatile i8, i8 addrspace(1)* undef - call void @external_void_func_i8_signext(i8 %var) + call void @external_void_func_i8_signext(i8 signext %var) ret void } @@ -166,7 +166,7 @@ ; GCN-NEXT: s_endpgm define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { %var = load volatile i8, i8 addrspace(1)* undef - call void @external_void_func_i8_zeroext(i8 %var) + call void @external_void_func_i8_zeroext(i8 zeroext %var) ret void } @@ -195,7 +195,7 @@ ; GCN-NEXT: s_endpgm define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { %var = load volatile i16, i16 addrspace(1)* undef - call void @external_void_func_i16_signext(i16 %var) + call void @external_void_func_i16_signext(i16 signext %var) ret void } @@ -212,7 +212,7 @@ ; GCN-NEXT: s_endpgm define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { %var = load volatile i16, i16 addrspace(1)* undef - call void @external_void_func_i16_zeroext(i16 %var) + call void @external_void_func_i16_zeroext(i16 zeroext %var) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll @@ -517,7 +517,7 @@ i32 210, i32 220, i32 230, i32 240, i32 250, i32 260, i32 270, i32 280, i32 290, i32 300, i32 310, i32 320, - i32 addrspace(5)* %alloca) + i32 addrspace(5)* byval(i32) %alloca) ret void } @@ -541,7 +541,7 @@ i32 210, i32 220, i32 230, i32 240, i32 250, i32 260, i32 270, i32 280, i32 290, i32 300, i32 310, i32 320, - i32 addrspace(5)* %alloca) + i32 addrspace(5)* byval(i32) %alloca) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -649,7 +649,7 @@ i32 210, i32 220, i32 230, i32 240, i32 250, i32 260, i32 270, i32 280, i32 290, i32 300, i32 310, i32 320, - i32 addrspace(5)* %alloca) + i32 addrspace(5)* byval(i32) %alloca) ret void } @@ -686,7 +686,7 @@ i32 210, i32 220, i32 230, i32 240, i32 250, i32 260, i32 270, i32 280, i32 290, i32 300, i32 310, i32 320, - i32 addrspace(5)* %alloca) + i32 addrspace(5)* byval(i32) %alloca) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -214,7 +214,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %var = load volatile i1, i1 addrspace(1)* undef - call amdgpu_gfx void @external_void_func_i1_signext(i1 %var) + call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var) ret void } @@ -280,7 +280,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %var = load volatile i1, i1 addrspace(1)* undef - call amdgpu_gfx void @external_void_func_i1_zeroext(i1 %var) + call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var) ret void } @@ -401,7 +401,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %var = load volatile i8, i8 addrspace(1)* undef - call amdgpu_gfx void @external_void_func_i8_signext(i8 %var) + call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var) ret void } @@ -463,7 +463,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %var = load volatile i8, i8 addrspace(1)* undef - call amdgpu_gfx void @external_void_func_i8_zeroext(i8 %var) + call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var) ret void } @@ -584,7 +584,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %var = load volatile i16, i16 addrspace(1)* undef - call amdgpu_gfx void @external_void_func_i16_signext(i16 %var) + call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var) ret void } @@ -646,7 +646,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %var = load volatile i16, i16 addrspace(1)* undef - call amdgpu_gfx void @external_void_func_i16_zeroext(i16 %var) + call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var) ret void } @@ -3081,7 +3081,7 @@ %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1 store i8 3, i8 addrspace(5)* %gep0 store i32 8, i32 addrspace(5)* %gep1 - call amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %val) + call amdgpu_gfx void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val) ret void } @@ -3173,7 +3173,7 @@ %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1 store i8 3, i8 addrspace(5)* %in.gep0 store i32 8, i32 addrspace(5)* %in.gep1 - call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* %in.val) + call amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %out.val, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %in.val) %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0 %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1 %out.val0 = load i8, i8 addrspace(5)* %out.gep0 @@ -3383,7 +3383,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_i1_inreg(i1 true) + call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true) ret void } @@ -3442,7 +3442,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_i8_inreg(i8 123) + call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123) ret void } @@ -3501,7 +3501,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_i16_inreg(i16 123) + call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123) ret void } @@ -3560,7 +3560,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_i32_inreg(i32 42) + call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42) ret void } @@ -3621,7 +3621,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_i64_inreg(i64 123) + call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123) ret void } @@ -3683,7 +3683,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %val = load <2 x i64>, <2 x i64> addrspace(4)* null - call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> %val) + call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val) ret void } @@ -3748,7 +3748,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> ) + call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg ) ret void } @@ -3816,7 +3816,7 @@ %load = load <2 x i64>, <2 x i64> addrspace(4)* null %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> - call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> %val) + call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val) ret void } @@ -3887,7 +3887,7 @@ ; GFX10-NEXT: s_setpc_b64 s[4:5] %load = load <2 x i64>, <2 x i64> addrspace(4)* null %val = shufflevector <2 x i64> %load, <2 x i64> , <4 x i32> - call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> %val) + call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val) ret void } @@ -3946,7 +3946,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_f16_inreg(half 4.0) + call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0) ret void } @@ -4005,7 +4005,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_f32_inreg(float 4.0) + call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0) ret void } @@ -4066,7 +4066,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> ) + call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg ) ret void } @@ -4129,7 +4129,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> ) + call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg ) ret void } @@ -4196,7 +4196,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> ) + call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg ) ret void } @@ -4257,7 +4257,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_f64_inreg(double 4.0) + call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0) ret void } @@ -4322,7 +4322,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> ) + call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg ) ret void } @@ -4391,7 +4391,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> ) + call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg ) ret void } @@ -4451,7 +4451,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %val = load <2 x i16>, <2 x i16> addrspace(4)* undef - call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> %val) + call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val) ret void } @@ -4511,7 +4511,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %val = load <3 x i16>, <3 x i16> addrspace(4)* undef - call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> %val) + call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val) ret void } @@ -4571,7 +4571,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %val = load <3 x half>, <3 x half> addrspace(4)* undef - call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> %val) + call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val) ret void } @@ -4632,7 +4632,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> ) + call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg ) ret void } @@ -4693,7 +4693,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> ) + call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg ) ret void } @@ -4753,7 +4753,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %val = load <4 x i16>, <4 x i16> addrspace(4)* undef - call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> %val) + call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val) ret void } @@ -4814,7 +4814,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> ) + call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg ) ret void } @@ -4874,7 +4874,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %val = load <2 x half>, <2 x half> addrspace(4)* undef - call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> %val) + call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val) ret void } @@ -4934,7 +4934,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %val = load <2 x i32>, <2 x i32> addrspace(4)* undef - call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> %val) + call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val) ret void } @@ -4995,7 +4995,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> ) + call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg ) ret void } @@ -5058,7 +5058,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> ) + call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg ) ret void } @@ -5123,7 +5123,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> , i32 6) + call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg , i32 inreg 6) ret void } @@ -5183,7 +5183,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] %val = load <4 x i32>, <4 x i32> addrspace(4)* undef - call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> %val) + call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val) ret void } @@ -5248,7 +5248,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> ) + call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg ) ret void } @@ -5315,7 +5315,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> ) + call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg ) ret void } @@ -5380,7 +5380,7 @@ ; GFX10-NEXT: s_setpc_b64 s[4:5] %ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(4)* %ptr - call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> %val) + call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val) ret void } @@ -5453,7 +5453,7 @@ ; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] - call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> ) + call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg ) ret void } @@ -5518,7 +5518,7 @@ ; GFX10-NEXT: s_setpc_b64 s[4:5] %ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(4)* %ptr - call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> %val) + call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val) ret void } @@ -5696,7 +5696,7 @@ ; GFX10-NEXT: s_setpc_b64 s[4:5] %ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(4)* %ptr - call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> %val) + call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val) ret void } @@ -5882,7 +5882,7 @@ %ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef %val0 = load <32 x i32>, <32 x i32> addrspace(4)* %ptr0 %val1 = load i32, i32 addrspace(4)* undef - call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> %val0, i32 %val1) + call amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg %val0, i32 inreg %val1) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll --- a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -23,6 +23,6 @@ ; GCN-NEXT: s_addc_u32 s7, s7, callee@rel32@hi+12 ; GCN-NEXT: s_setpc_b64 s[6:7] %add = fadd float %arg0, 1.0 - %call = tail call amdgpu_gfx float @callee(float %add, float 2.0) + %call = tail call amdgpu_gfx float @callee(float %add, float inreg 2.0) ret float %call } diff --git a/llvm/test/CodeGen/ARM/ipra-r0-returned.ll b/llvm/test/CodeGen/ARM/ipra-r0-returned.ll --- a/llvm/test/CodeGen/ARM/ipra-r0-returned.ll +++ b/llvm/test/CodeGen/ARM/ipra-r0-returned.ll @@ -13,6 +13,6 @@ ; CHECK-NOT: r0 ; CHECK: bl returns_r0 ; CHECK-NOT: r0 - %b = call i32 @returns_r0(i32 %a) + %b = call i32 @returns_r0(i32 returned %a) ret i32 %a } diff --git a/llvm/test/CodeGen/ARM/returned-ext.ll b/llvm/test/CodeGen/ARM/returned-ext.ll --- a/llvm/test/CodeGen/ARM/returned-ext.ll +++ b/llvm/test/CodeGen/ARM/returned-ext.ll @@ -22,9 +22,9 @@ ; CHECKT2D: uxth r0, r0 ; CHECKT2D: bl _identity32 ; CHECKT2D: mov r0, [[SAVEX]] - %call = tail call i16 @identity16(i16 %x) + %call = tail call i16 @identity16(i16 returned %x) %b = zext i16 %call to i32 - %call2 = tail call i32 @identity32(i32 %b) + %call2 = tail call i32 @identity32(i32 returned %b) ret i16 %x } @@ -56,9 +56,9 @@ ; This shouldn't be required ; CHECKT2D: mov r0, [[SAVEX]] - %call = tail call i16 @retzext16(i16 %x) + %call = tail call i16 @retzext16(i16 returned %x) %b = zext i16 %call to i32 - %call2 = tail call i32 @identity32(i32 %b) + %call2 = tail call i32 @identity32(i32 returned %b) ret i16 %x } @@ -76,9 +76,9 @@ ; CHECKT2D: sxth r0, {{r[0-9]+}} ; CHECKT2D: bl _identity32 ; CHECKT2D: mov r0, [[SAVEX]] - %call = tail call i16 @retzext16(i16 %x) + %call = tail call i16 @retzext16(i16 returned %x) %b = sext i16 %call to i32 - %call2 = tail call i32 @identity32(i32 %b) + %call2 = tail call i32 @identity32(i32 returned %b) ret i16 %x } @@ -96,10 +96,10 @@ ; CHECKT2D: uxth r0, r0 ; CHECKT2D: bl _identity32 ; CHECKT2D: b.w _paramzext16 - %call = tail call i16 @paramzext16(i16 %x) + %call = tail call i16 @paramzext16(i16 zeroext returned %x) %b = zext i16 %call to i32 - %call2 = tail call i32 @identity32(i32 %b) - %call3 = tail call i16 @paramzext16(i16 %call) + %call2 = tail call i32 @identity32(i32 returned %b) + %call3 = tail call i16 @paramzext16(i16 zeroext returned %call) ret i16 %call3 } @@ -121,13 +121,13 @@ ; CHECKT2D: bl _paramzext16 ; CHECKT2D: bl _identity32 ; CHECKT2D: b.w _paramzext16 - %call = tail call i16 @paramzext16(i16 %x) + %call = tail call i16 @paramzext16(i16 zeroext returned %x) ; Should make no difference if %x is used below rather than %call, but it does %b = zext i16 %x to i32 %call2 = tail call i32 @identity32(i32 %b) - %call3 = tail call i16 @paramzext16(i16 %call) + %call3 = tail call i16 @paramzext16(i16 zeroext returned %call) ret i16 %call3 } @@ -149,9 +149,9 @@ ; FIXME: Tail call should be OK here ; CHECKT2D: bl _identity32 - %call = tail call i16 @bothzext16(i16 %x) + %call = tail call i16 @bothzext16(i16 zeroext returned %x) %b = zext i16 %x to i32 - %call2 = tail call i32 @identity32(i32 %b) + %call2 = tail call i32 @identity32(i32 returned %b) ret i16 %call } @@ -171,8 +171,8 @@ ; CHECKT2D: sxth r0, [[SAVEX]] ; CHECKT2D: bl _identity32 ; CHECKT2D: mov r0, [[SAVEX]] - %call = tail call i16 @bothzext16(i16 %x) + %call = tail call i16 @bothzext16(i16 zeroext returned %x) %b = sext i16 %x to i32 - %call2 = tail call i32 @identity32(i32 %b) + %call2 = tail call i32 @identity32(i32 returned %b) ret i16 %x } diff --git a/llvm/test/CodeGen/ARM/this-return.ll b/llvm/test/CodeGen/ARM/this-return.ll --- a/llvm/test/CodeGen/ARM/this-return.ll +++ b/llvm/test/CodeGen/ARM/this-return.ll @@ -28,9 +28,9 @@ ; CHECKT2D-NOT: mov r0, {{r[0-9]+}} ; CHECKT2D: b.w _B_ctor_base %0 = bitcast %struct.C* %this to %struct.A* - %call = tail call %struct.A* @A_ctor_base(%struct.A* %0) + %call = tail call %struct.A* @A_ctor_base(%struct.A* returned %0) %1 = getelementptr inbounds %struct.C, %struct.C* %this, i32 0, i32 0 - %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x) + %call2 = tail call %struct.B* @B_ctor_base(%struct.B* returned %1, i32 %x) ret %struct.C* %this } @@ -59,7 +59,7 @@ ; CHECKELF: b C_ctor_base ; CHECKT2D-LABEL: C_ctor_complete: ; CHECKT2D: b.w _C_ctor_base - %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) + %call = tail call %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x) ret %struct.C* %this } @@ -86,8 +86,8 @@ ; CHECKT2D-NOT: mov r0, {{r[0-9]+}} ; CHECKT2D: b.w _B_ctor_complete %b = getelementptr inbounds %struct.D, %struct.D* %this, i32 0, i32 0 - %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) - %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + %call = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x) + %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x) ret %struct.D* %this } @@ -98,8 +98,8 @@ ; CHECKT2D-LABEL: E_ctor_base: ; CHECKT2D-NOT: b.w _B_ctor_complete %b = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 0 - %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + %call = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b, i32 %x) %b2 = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 1 - %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x) + %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* returned %b2, i32 %x) ret %struct.E* %this } diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll --- a/llvm/test/CodeGen/SPARC/64abi.ll +++ b/llvm/test/CodeGen/SPARC/64abi.ll @@ -50,7 +50,7 @@ ; CHECK-NOT: add %sp ; CHECK: restore define void @call_intarg(i32 %i0, i8* %i1) { - call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 %i0, i8* %i1) + call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 signext %i0, i8* %i1) ret void } @@ -222,7 +222,7 @@ ; SOFT: or %i1, %i0, %o0 ; CHECK: call inreg_fi define void @call_inreg_fi(i32* %p, i32 %i1, float %f5) { - %x = call i32 @inreg_fi(i32 %i1, float %f5) + %x = call i32 @inreg_fi(i32 inreg %i1, float inreg %f5) ret void } @@ -245,7 +245,7 @@ ; SOFT: or %i1, %i0, %o0 ; CHECK: call inreg_ff define void @call_inreg_ff(i32* %p, float %f3, float %f5) { - %x = call float @inreg_ff(float %f3, float %f5) + %x = call float @inreg_ff(float inreg %f3, float inreg %f5) ret void } @@ -269,7 +269,7 @@ ; SOFT: or %i1, %i0, %o0 ; CHECK: call inreg_if define void @call_inreg_if(i32* %p, float %f3, i32 %i2) { - %x = call i32 @inreg_if(float %f3, i32 %i2) + %x = call i32 @inreg_if(float inreg %f3, i32 inreg %i2) ret void } @@ -289,7 +289,7 @@ ; CHECK: or [[R1]], [[R2]], %o0 ; CHECK: call inreg_ii define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) { - %x = call i32 @inreg_ii(i32 %i1, i32 %i2) + %x = call i32 @inreg_ii(i32 inreg %i1, i32 inreg %i2) ret void } diff --git a/llvm/test/CodeGen/SystemZ/args-02.ll b/llvm/test/CodeGen/SystemZ/args-02.ll --- a/llvm/test/CodeGen/SystemZ/args-02.ll +++ b/llvm/test/CodeGen/SystemZ/args-02.ll @@ -66,9 +66,9 @@ ; CHECK-STACK: mvghi 160(%r15), -5 ; CHECK-STACK: brasl %r14, bar@PLT - call void @bar (i8 -1, i16 -2, i32 -3, i64 -4, float 0.0, double 0.0, + call void @bar (i8 signext -1, i16 signext -2, i32 signext -3, i64 -4, float 0.0, double 0.0, fp128 0xL00000000000000000000000000000000, i64 -5, - float -0.0, double -0.0, i8 -6, i16 -7, i32 -8, i64 -9, + float -0.0, double -0.0, i8 signext -6, i16 signext -7, i32 signext -8, i64 -9, float 0.0, double 0.0, fp128 0xL00000000000000000000000000000000) ret void diff --git a/llvm/test/CodeGen/SystemZ/args-03.ll b/llvm/test/CodeGen/SystemZ/args-03.ll --- a/llvm/test/CodeGen/SystemZ/args-03.ll +++ b/llvm/test/CodeGen/SystemZ/args-03.ll @@ -68,9 +68,9 @@ ; CHECK-STACK: mvghi 160(%r15), -5 ; CHECK-STACK: brasl %r14, bar@PLT - call void @bar (i8 -1, i16 -2, i32 -3, i64 -4, float 0.0, double 0.0, + call void @bar (i8 zeroext -1, i16 zeroext -2, i32 zeroext -3, i64 -4, float 0.0, double 0.0, fp128 0xL00000000000000000000000000000000, i64 -5, - float -0.0, double -0.0, i8 -6, i16 -7, i32 -8, i64 -9, + float -0.0, double -0.0, i8 zeroext -6, i16 zeroext -7, i32 zeroext -8, i64 -9, float 0.0, double 0.0, fp128 0xL00000000000000000000000000000000) ret void diff --git a/llvm/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/llvm/test/CodeGen/X86/fast-cc-merge-stack-adj.ll --- a/llvm/test/CodeGen/X86/fast-cc-merge-stack-adj.ll +++ b/llvm/test/CodeGen/X86/fast-cc-merge-stack-adj.ll @@ -7,7 +7,7 @@ define x86_fastcallcc void @caller(i32, i64) { %X = alloca i32 ; [#uses=1] - call x86_fastcallcc void @func( i32* %X, i64 0 ) + call x86_fastcallcc void @func( i32* %X, i64 inreg 0 ) ret void } diff --git a/llvm/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/llvm/test/CodeGen/X86/fast-cc-pass-in-regs.ll --- a/llvm/test/CodeGen/X86/fast-cc-pass-in-regs.ll +++ b/llvm/test/CodeGen/X86/fast-cc-pass-in-regs.ll @@ -4,7 +4,7 @@ declare x86_fastcallcc i64 @callee(i64 inreg) define i64 @caller() { - %X = call x86_fastcallcc i64 @callee( i64 4294967299 ) ; [#uses=1] + %X = call x86_fastcallcc i64 @callee( i64 inreg 4294967299 ) ; [#uses=1] ; CHECK: mov{{.*}}edx, 1 ret i64 %X } diff --git a/llvm/test/CodeGen/X86/mismatched-byval.ll b/llvm/test/CodeGen/X86/mismatched-byval.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/mismatched-byval.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s + +; This tests that we only look at the call site for ABI attributes, so f and f2 should codegen differently + +define void @b(i8* byval(i8) %p) { +; CHECK-LABEL: b: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + ret void +} + +define void @f(i8 %p) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-NEXT: movb %al, (%rsp) +; CHECK-NEXT: callq b@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %a = alloca i8 + ;store i8 %p, i8* %a + call void @b(i8* byval(i8) %a) + ret void +} + +define void @f2(i8 %p) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: callq b@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %a = alloca i8 + ;store i8 %p, i8* %a + call void @b(i8* %a) + ret void +} + diff --git a/llvm/test/CodeGen/X86/movtopush.ll b/llvm/test/CodeGen/X86/movtopush.ll --- a/llvm/test/CodeGen/X86/movtopush.ll +++ b/llvm/test/CodeGen/X86/movtopush.ll @@ -107,7 +107,7 @@ ; NORMAL-NEXT: addl $12, %esp define void @test4() optsize { entry: - call void @inreg(i32 1, i32 2, i32 3, i32 4) + call void @inreg(i32 1, i32 inreg 2, i32 3, i32 4) ret void } @@ -307,9 +307,9 @@ define void @test12() optsize { entry: %s = alloca %struct.s, align 4 - call void @struct(%struct.s* %s, i32 2, i32 3, i32 4) + call void @struct(%struct.s* byval(%struct.s) %s, i32 2, i32 3, i32 4) call void @good(i32 5, i32 6, i32 7, i32 8) - call void @struct(%struct.s* %s, i32 10, i32 11, i32 12) + call void @struct(%struct.s* byval(%struct.s) %s, i32 10, i32 11, i32 12) ret void } @@ -340,7 +340,7 @@ entry: %s = alloca %struct.s, align 4 call void @good(i32 1, i32 2, i32 3, i32 4) - call void @struct(%struct.s* %s, i32 6, i32 7, i32 8) + call void @struct(%struct.s* byval(%struct.s) %s, i32 6, i32 7, i32 8) call void @good(i32 9, i32 10, i32 11, i32 12) ret void } @@ -413,7 +413,7 @@ %0 = bitcast %struct.A* %a to i64* %1 = load i64, i64* %0, align 4 store i64 %1, i64* %agg.tmp, align 4 - %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* %ref.tmp, %struct.A* byval(%struct.A) %tmpcast) + %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* returned %ref.tmp, %struct.A* byval(%struct.A) %tmpcast) %2 = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 0 call void @B_func(%struct.B* sret(%struct.B) %tmp, %struct.B* %ref.tmp, i32 1) ret void diff --git a/llvm/test/CodeGen/X86/pop-stack-cleanup.ll b/llvm/test/CodeGen/X86/pop-stack-cleanup.ll --- a/llvm/test/CodeGen/X86/pop-stack-cleanup.ll +++ b/llvm/test/CodeGen/X86/pop-stack-cleanup.ll @@ -60,7 +60,7 @@ ; CHECK-DAG: movl {{.*}}, %edx ; CHECK: calll _spill %i = call i32 @param2_ret(i32 1, i32 2) - call void @spill(i32 %a, i32 %b, i32 %c) + call void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) ret void } diff --git a/llvm/test/CodeGen/X86/preallocated.ll b/llvm/test/CodeGen/X86/preallocated.ll --- a/llvm/test/CodeGen/X86/preallocated.ll +++ b/llvm/test/CodeGen/X86/preallocated.ll @@ -129,11 +129,11 @@ ; CHECK: pushl [[REGISTER2]] ; CHECK: calll _init - call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)] + call void @foo_ret_p(%Foo* sret(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)] ; CHECK-NOT: subl {{\$[0-9]+}}, %esp ; CHECK-NOT: pushl ; CHECK: calll _foo_ret_p - call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)] + call void @foo_ret_p(%Foo* sret(%Foo) %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)] ; CHECK-NOT: subl {{\$[0-9]+}}, %esp ; CHECK-NOT: pushl ; CHECK: calll _foo_ret_p @@ -150,7 +150,7 @@ ; CHECK: subl $8, %esp ; CHECK: movl $9, %eax ; CHECK: calll _foo_inreg_p - call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] + call void @foo_inreg_p(i32 inreg 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)] ret void } diff --git a/llvm/test/CodeGen/X86/tailcall-msvc-conventions.ll b/llvm/test/CodeGen/X86/tailcall-msvc-conventions.ll --- a/llvm/test/CodeGen/X86/tailcall-msvc-conventions.ll +++ b/llvm/test/CodeGen/X86/tailcall-msvc-conventions.ll @@ -181,7 +181,7 @@ declare x86_fastcallcc void @fastcall2(i32 inreg %a, i32 inreg %b) define void @cdecl_fastcall_tail(i32 %a, i32 %b) { - tail call x86_fastcallcc void @fastcall2(i32 %a, i32 %b) + tail call x86_fastcallcc void @fastcall2(i32 inreg %a, i32 inreg %b) ret void } ; fastcall2 won't pop anything.