Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -414,6 +414,59 @@ return wouldInstructionBeTriviallyDead(I, TLI); } +/// Return true if this is a call optionally returns a value, and whose only +/// other side effect is writting to a trivially unread memory location. +static bool isTriviallyDSEableCall(CallBase &CB) { + if (!CB.doesNotThrow() || !CB.onlyAccessesArgMemory() || CB.isTerminator()) + return false; + + if (CB.hasOperandBundles()) + return false; + + // TODO: Could extend this to handle writes to multiple dead allocas, but + // that's probably complicated enough to defer to DSE. We could similiarly + // be fancier about capture, but again, probably best left for DSE. + Value *WrittenParam = nullptr; + for (unsigned i = 0; i < CB.arg_size(); i++) { + if (!CB.getArgOperand(i)->getType()->isPointerTy()) + continue; + if (!CB.doesNotCapture(i)) + // Reasoning about reads from potentially captured locations is tricky. + return false; + if (CB.paramHasAttr(i, Attribute::WriteOnly)) { + if (WrittenParam) + return false; + WrittenParam = CB.getArgOperand(i); + } + } + if (!WrittenParam) + return false; + + // TODO: We could allow passing an offset pointer to the call by handling + // geps below. + auto *AI = dyn_cast(WrittenParam->stripPointerCasts()); + if (!AI) + return false; + + // The alloca should only be used in lifetime intrinsics and the call + // itself, i.e. the write-only argument has no semantic uses. + SmallVector AllocaUsers(AI->users()); + while (!AllocaUsers.empty()) { + auto *UserI = cast(AllocaUsers.pop_back_val()); + if (isa(UserI)) { + AllocaUsers.append(UserI->user_begin(), UserI->user_end()); + continue; + } + if (UserI == &CB) + // Note: This is only correct because we conservatively checked for + // capture above. + continue; + auto *II = dyn_cast(UserI); + if (!II || !II->isLifetimeStartOrEnd()) + return false; + } + return true; +} bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI) { if (I->isTerminator()) @@ -499,9 +552,12 @@ if (Constant *C = dyn_cast(CI->getArgOperand(0))) return C->isNullValue() || isa(C); - if (auto *Call = dyn_cast(I)) + if (auto *Call = dyn_cast(I)) { if (isMathLibCallNoop(Call, TLI)) return true; + if (isTriviallyDSEableCall(*Call)) + return true; + } // To express possible interaction with floating point environment constrained // intrinsics are described as if they access memory. So they look like having Index: llvm/test/CodeGen/AMDGPU/flat-scratch.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -500,9 +500,9 @@ ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s32 -; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 -; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 +; GFX9-NEXT: v_and_b32_e32 v0, v0, v3 ; GFX9-NEXT: scratch_store_dword v2, v3, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 @@ -514,14 +514,14 @@ ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v1, s32 -; GFX10-NEXT: v_and_b32_e32 v2, 15, v0 -; GFX10-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX10-NEXT: v_lshl_add_u32 v1, v2, 2, v1 -; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: v_mov_b32_e32 v2, s32 +; GFX10-NEXT: v_and_b32_e32 v3, v0, v1 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2 +; GFX10-NEXT: scratch_store_dword v0, v1, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc +; GFX10-NEXT: scratch_load_dword v0, v2, off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -529,9 +529,9 @@ ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s32 -; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 -; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 +; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3 ; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 @@ -543,14 +543,14 @@ ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v1, s32 -; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2, 2, v1 -; GFX10-PAL-NEXT: scratch_store_dword v0, v3, off +; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-PAL-NEXT: v_mov_b32_e32 v2, s32 +; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2 +; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc +; GFX10-PAL-NEXT: scratch_load_dword v0, v2, off glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] bb: @@ -1247,9 +1247,9 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100 ; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi -; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 -; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 +; GFX9-NEXT: v_and_b32_e32 v0, v0, v3 ; GFX9-NEXT: scratch_store_dword v2, v3, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 @@ -1261,17 +1261,17 @@ ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 -; GFX10-NEXT: v_and_b32_e32 v2, 15, v0 -; GFX10-NEXT: v_mov_b32_e32 v1, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX10-NEXT: v_lshl_add_u32 v1, v2, 2, v1 -; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc +; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo +; GFX10-NEXT: v_and_b32_e32 v3, v0, v1 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2 +; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: scratch_store_dword v0, v1, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc +; GFX10-NEXT: scratch_load_dword v0, v2, off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -1282,9 +1282,9 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x100 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi -; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 -; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 +; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3 ; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 @@ -1296,17 +1296,17 @@ ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 -; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo -; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2, 2, v1 -; GFX10-PAL-NEXT: scratch_load_dword v2, off, s32 glc dlc +; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo +; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2 +; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: scratch_store_dword v0, v3, off +; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc +; GFX10-PAL-NEXT: scratch_load_dword v0, v2, off glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] bb: @@ -1485,7 +1485,7 @@ ; GFX9-LABEL: zero_init_large_offset_foo: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: scratch_load_dword v0, off, s32 offset:16 glc +; GFX9-NEXT: scratch_load_dword v0, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 s1, s0 @@ -1495,13 +1495,13 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4010 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4010 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4010 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4010 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -1510,10 +1510,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: scratch_load_dword v0, off, s32 offset:16 glc dlc +; GFX10-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -1522,11 +1522,11 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1534,7 +1534,7 @@ ; GFX9-PAL-LABEL: zero_init_large_offset_foo: ; GFX9-PAL: ; %bb.0: ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s32 offset:16 glc +; GFX9-PAL-NEXT: scratch_load_dword v0, off, s32 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: s_mov_b32 s1, s0 @@ -1544,13 +1544,13 @@ ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4010 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4010 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4010 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4010 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] @@ -1559,10 +1559,10 @@ ; GFX1010-PAL: ; %bb.0: ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 offset:16 glc dlc +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s3, s0 @@ -1572,13 +1572,13 @@ ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo ; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 ; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 ; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 -; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31] @@ -1587,10 +1587,10 @@ ; GFX1030-PAL: ; %bb.0: ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 offset:16 glc dlc +; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1030-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s2, s0 ; GFX1030-PAL-NEXT: s_mov_b32 s3, s0 @@ -1599,11 +1599,11 @@ ; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo -; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 -; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 -; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31] @@ -2015,13 +2015,13 @@ ; GFX9-LABEL: store_load_vindex_large_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc +; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi -; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 -; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1 +; GFX9-NEXT: v_and_b32_e32 v0, v0, v3 ; GFX9-NEXT: scratch_store_dword v2, v3, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1 @@ -2033,30 +2033,30 @@ ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-NEXT: v_and_b32_e32 v2, 15, v0 -; GFX10-NEXT: v_mov_b32_e32 v1, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX10-NEXT: v_lshl_add_u32 v1, v2, 2, v1 -; GFX10-NEXT: scratch_load_dword v2, off, s32 offset:4 glc dlc +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000 +; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo +; GFX10-NEXT: v_and_b32_e32 v3, v0, v1 +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2 +; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: scratch_store_dword v0, v3, off +; GFX10-NEXT: scratch_store_dword v0, v1, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc +; GFX10-NEXT: scratch_load_dword v0, v2, off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-PAL-LABEL: store_load_vindex_large_offset_foo: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi -; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15 -; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1 +; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3 ; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 @@ -2068,17 +2068,17 @@ ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo -; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2, 2, v1 -; GFX10-PAL-NEXT: scratch_load_dword v2, off, s32 offset:4 glc dlc +; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000 +; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo +; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1 +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2 +; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2 +; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: scratch_store_dword v0, v3, off +; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc +; GFX10-PAL-NEXT: scratch_load_dword v0, v2, off glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] bb: @@ -2218,10 +2218,9 @@ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-NEXT: s_add_i32 vcc_hi, s32, 4 -; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 +; GFX9-NEXT: scratch_store_dword off, v0, s32 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s0, vcc_hi +; GFX9-NEXT: s_add_i32 s0, s0, s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -2236,9 +2235,8 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX10-NEXT: s_add_i32 s0, s0, vcc_lo -; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 +; GFX10-NEXT: s_add_i32 s0, s0, s32 +; GFX10-NEXT: scratch_store_dword off, v0, s32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -2251,10 +2249,9 @@ ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 4 -; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 +; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s0, vcc_hi +; GFX9-PAL-NEXT: s_add_i32 s0, s0, s32 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -2269,9 +2266,8 @@ ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 4 -; GFX10-PAL-NEXT: s_add_i32 s0, s0, vcc_lo -; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, s32 +; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 Index: llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll =================================================================== --- llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -56,8 +56,6 @@ ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller ; IS__CGSCC_NPM-SAME: () #[[ATTR1:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) #[[ATTR2:[0-9]+]] ; IS__CGSCC_NPM-NEXT: ret i32 undef ; %A = alloca i32 @@ -101,5 +99,4 @@ ;. ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } ;. Index: llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll =================================================================== --- llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -74,19 +74,10 @@ ; IS__TUNIT____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2]] ; IS__TUNIT____-NEXT: ret i32 0 ; -; IS__CGSCC_OPM: Function Attrs: nofree nosync nounwind readnone willreturn -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@callercaller -; IS__CGSCC_OPM-SAME: () #[[ATTR1:[0-9]+]] { -; IS__CGSCC_OPM-NEXT: [[B:%.*]] = alloca i32, align 4 -; IS__CGSCC_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR3:[0-9]+]] -; IS__CGSCC_OPM-NEXT: ret i32 0 -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callercaller -; IS__CGSCC_NPM-SAME: () #[[ATTR1:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: [[B:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR3:[0-9]+]] -; IS__CGSCC_NPM-NEXT: ret i32 0 +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@callercaller +; IS__CGSCC____-SAME: () #[[ATTR1:[0-9]+]] { +; IS__CGSCC____-NEXT: ret i32 0 ; %B = alloca i32 store i32 2, i32* %B @@ -100,12 +91,10 @@ ; IS__TUNIT____: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } ;. ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind willreturn writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR3]] = { nounwind willreturn writeonly } ;. ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR3]] = { nounwind willreturn writeonly } ;. Index: llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll =================================================================== --- llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll +++ llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll @@ -9,28 +9,6 @@ define internal void @add({i32, i32}* %this, i32* sret(i32) %r) { ; -; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@add -; IS__TUNIT_OPM-SAME: ({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__TUNIT_OPM-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 -; IS__TUNIT_OPM-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 -; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 -; IS__TUNIT_OPM-NEXT: [[B:%.*]] = load i32, i32* [[BP]], align 4 -; IS__TUNIT_OPM-NEXT: [[AB:%.*]] = add i32 [[A]], [[B]] -; IS__TUNIT_OPM-NEXT: store i32 [[AB]], i32* [[R]], align 4 -; IS__TUNIT_OPM-NEXT: ret void -; -; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@add -; IS__TUNIT_NPM-SAME: ({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__TUNIT_NPM-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 -; IS__TUNIT_NPM-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 -; IS__TUNIT_NPM-NEXT: [[B:%.*]] = load i32, i32* [[BP]], align 4 -; IS__TUNIT_NPM-NEXT: [[AB:%.*]] = add i32 [[A]], [[B]] -; IS__TUNIT_NPM-NEXT: store i32 [[AB]], i32* [[R]], align 4 -; IS__TUNIT_NPM-NEXT: ret void -; ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@add ; IS__CGSCC_OPM-SAME: ({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR0:[0-9]+]] { @@ -63,37 +41,15 @@ } define void @f() { -; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f -; IS__TUNIT_OPM-SAME: () #[[ATTR1:[0-9]+]] { -; IS__TUNIT_OPM-NEXT: [[R:%.*]] = alloca i32, align 4 -; IS__TUNIT_OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__TUNIT_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R]]) #[[ATTR2:[0-9]+]] -; IS__TUNIT_OPM-NEXT: ret void -; -; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f -; IS__TUNIT_NPM-SAME: () #[[ATTR1:[0-9]+]] { -; IS__TUNIT_NPM-NEXT: [[R:%.*]] = alloca i32, align 4 -; IS__TUNIT_NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__TUNIT_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R]]) #[[ATTR2:[0-9]+]] -; IS__TUNIT_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@f +; IS__TUNIT____-SAME: () #[[ATTR0:[0-9]+]] { +; IS__TUNIT____-NEXT: ret void ; -; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: () #[[ATTR1:[0-9]+]] { -; IS__CGSCC_OPM-NEXT: [[R:%.*]] = alloca i32, align 4 -; IS__CGSCC_OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__CGSCC_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R]]) #[[ATTR2:[0-9]+]] -; IS__CGSCC_OPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_NPM-SAME: () #[[ATTR1:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: [[R:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__CGSCC_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R]]) #[[ATTR2:[0-9]+]] -; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@f +; IS__CGSCC____-SAME: () #[[ATTR1:[0-9]+]] { +; IS__CGSCC____-NEXT: ret void ; %r = alloca i32 %pair = alloca {i32, i32} @@ -102,11 +58,8 @@ ret void } ;. -; IS__TUNIT____: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } -; IS__TUNIT____: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } -; IS__TUNIT____: attributes #[[ATTR2:[0-9]+]] = { nofree nosync nounwind willreturn } +; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } ;. ; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; IS__CGSCC____: attributes #[[ATTR1:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__CGSCC____: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn } +; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } ;. Index: llvm/test/Transforms/InstCombine/trivial-dse-calls.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/trivial-dse-calls.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +declare void @unknown() +declare void @f(i8*) +declare void @f2(i8*, i8*) + +; Basic case for DSEing a trivially dead writing call +define void @test_dead() { +; CHECK-LABEL: @test_dead( +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn + ret void +} + +; Add in canonical lifetime intrinsics +define void @test_lifetime() { +; CHECK-LABEL: @test_lifetime( +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast) + call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn + call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast) + ret void +} + +; Add some unknown calls just to point out that this is use based, not +; instruction order sensitive +define void @test_lifetime2() { +; CHECK-LABEL: @test_lifetime2( +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %bitcast) + call void @unknown() + call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn + call void @unknown() + call void @llvm.lifetime.end.p0i8(i64 4, i8* %bitcast) + ret void +} + +define i32 @test_neg_read_after() { +; CHECK-LABEL: @test_neg_read_after( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: ret i32 [[RES]] +; + %a = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn + %res = load i32, i32* %a + ret i32 %res +} + + +define void @test_neg_infinite_loop() { +; CHECK-LABEL: @test_neg_infinite_loop( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind + ret void +} + +define void @test_neg_throw() { +; CHECK-LABEL: @test_neg_throw( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + call void @f(i8* writeonly nocapture %bitcast) argmemonly willreturn + ret void +} + +define void @test_neg_extra_write() { +; CHECK-LABEL: @test_neg_extra_write( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + call void @f(i8* writeonly nocapture %bitcast) nounwind willreturn + ret void +} + +define i32 @test_neg_captured_by_call() { +; CHECK-LABEL: @test_neg_captured_by_call( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A2:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast i8** [[A2]] to i8* +; CHECK-NEXT: call void @f2(i8* nocapture nonnull writeonly [[BITCAST]], i8* nonnull [[BITCAST2]]) #[[ATTR1]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[A2]] to i32** +; CHECK-NEXT: [[A_COPY_CAST1:%.*]] = load i32*, i32** [[TMP1]], align 8 +; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[A_COPY_CAST1]], align 4 +; CHECK-NEXT: ret i32 [[RES]] +; + %a = alloca i32, align 4 + %a2 = alloca i8*, align 4 + %bitcast = bitcast i32* %a to i8* + %bitcast2 = bitcast i8** %a2 to i8* + call void @f2(i8* writeonly nocapture %bitcast, i8* %bitcast2) argmemonly nounwind willreturn + %a_copy_cast = load i8*, i8** %a2 + %a_copy = bitcast i8* %a_copy_cast to i32* + %res = load i32, i32* %a_copy + ret i32 %res +} + +define i32 @test_neg_captured_before() { +; CHECK-LABEL: @test_neg_captured_before( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @f(i8* nocapture nonnull writeonly [[BITCAST]]) #[[ATTR1]] +; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: ret i32 [[RES]] +; + %a = alloca i32, align 4 + %a2 = alloca i8*, align 4 + %bitcast = bitcast i32* %a to i8* + %bitcast2 = bitcast i8** %a2 to i8* + store i8* %bitcast, i8** %a2 + call void @f(i8* writeonly nocapture %bitcast) argmemonly nounwind willreturn + %a_copy_cast = load i8*, i8** %a2 + %a_copy = bitcast i8* %a_copy_cast to i32* + %res = load i32, i32* %a_copy + ret i32 %res +} + +