Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -62,6 +62,7 @@ namespace llvm { +class AssumptionCache; class CCState; class CCValAssign; class Constant; @@ -424,8 +425,10 @@ return MachineMemOperand::MONone; } - MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI, - const DataLayout &DL) const; + MachineMemOperand::Flags + getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL, + AssumptionCache *AC = nullptr, + const TargetLibraryInfo *LibInfo = nullptr) const; MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, const DataLayout &DL) const; MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1306,7 +1306,8 @@ } auto &TLI = *MF->getSubtarget().getTargetLowering(); - MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL); + MachineMemOperand::Flags Flags = + TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo); if (AA && !(Flags & MachineMemOperand::MOInvariant)) { if (AA->pointsToConstantMemory( MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) { @@ -1314,12 +1315,6 @@ } } - if (!(Flags & MachineMemOperand::MODereferenceable)) { - if (isDereferenceableAndAlignedPointer(Ptr, LI.getType(), LI.getAlign(), - *DL, &LI, AC, nullptr, LibInfo)) - Flags |= MachineMemOperand::MODereferenceable; - } - const MDNode *Ranges = Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4134,7 +4134,7 @@ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); bool isVolatile = I.isVolatile(); MachineMemOperand::Flags MMOFlags = - TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); SDValue Root; bool ConstantMemory = false; @@ -4157,10 +4157,6 @@ Root = DAG.getRoot(); } - if (isDereferenceableAndAlignedPointer(SV, Ty, Alignment, DAG.getDataLayout(), - &I, AC, nullptr, LibInfo)) - MMOFlags |= MachineMemOperand::MODereferenceable; - SDLoc dl = getCurSDLoc(); if (isVolatile) @@ -4731,7 +4727,7 @@ I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); - auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -2246,9 +2246,9 @@ MF.getRegInfo().freezeReservedRegs(MF); } -MachineMemOperand::Flags -TargetLoweringBase::getLoadMemOperandFlags(const LoadInst &LI, - const DataLayout &DL) const { +MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags( + const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC, + const TargetLibraryInfo *LibInfo) const { MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; if (LI.isVolatile()) Flags |= MachineMemOperand::MOVolatile; @@ -2259,7 +2259,9 @@ if (LI.hasMetadata(LLVMContext::MD_invariant_load)) Flags |= MachineMemOperand::MOInvariant; - if (isDereferenceablePointer(LI.getPointerOperand(), LI.getType(), DL)) + if (isDereferenceableAndAlignedPointer(LI.getPointerOperand(), LI.getType(), + LI.getAlign(), DL, &LI, AC, + /*DT=*/nullptr, LibInfo)) Flags |= MachineMemOperand::MODereferenceable; Flags |= getTargetMMOFlags(LI); Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll @@ -33,7 +33,7 @@ ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable load (s32) from %ir.ptr) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr) ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %load = load i32, ptr %ptr, align 4 @@ -46,7 +46,7 @@ ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable invariant load (s32) from %ir.ptr) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load (s32) from %ir.ptr) ; CHECK-NEXT: $w0 = COPY [[LOAD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %load = load i32, ptr %ptr, align 4, !invariant.load !0 Index: llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll @@ -6,23 +6,33 @@ ; CHECK-LABEL: name: stack_passed_i64 ; CHECK: fixedStack: -; CHECK: - { id: 0, type: default, offset: 8, size: 8, alignment: 8, stack-id: default, +; CHECK: - { id: 0, type: default, offset: 16, size: 8, alignment: 16, stack-id: default, ; CHECK-NEXT: isImmutable: false, isAliased: false, -; CHECK: - { id: 1, type: default, offset: 0, size: 8, alignment: 16, stack-id: default, +; CHECK: - { id: 1, type: default, offset: 8, size: 8, alignment: 8, stack-id: default, +; CHECK-NEXT: isImmutable: false, isAliased: false, +; CHECK: - { id: 2, type: default, offset: 0, size: 8, alignment: 16, stack-id: default, ; CHECK-NEXT: isImmutable: true, isAliased: false, define void @stack_passed_i64(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, - i64 %arg7, i64 %arg8, ptr byval(i64) %arg9) { + i64 %arg7, i64 %arg8, ptr byval(i64) %arg9, ptr byval(i64) align(8) %arg10) { ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX1]](p0) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY8]](p0) :: (dereferenceable load (s64) from %ir.arg9) - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD1]], [[LOAD]] - ; CHECK: G_STORE [[ADD]](s64), [[COPY8]](p0) :: (volatile store (s64) into %ir.arg9) - ; CHECK: RET_ReallyLR - %load = load i64, ptr %arg9 - %add = add i64 %load, %arg8 - store volatile i64 %add, ptr %arg9 + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK: [[COPY9:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX2]](p0) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY8]](p0) :: (load (s64) from %ir.arg9) + ; CHECK: [[ADD0:%[0-9]+]]:_(s64) = G_ADD [[LOAD1]], [[LOAD]] + ; CHECK: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[COPY9]](p0) :: (dereferenceable load (s64) from %ir.arg10) + ; CHECK: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD0]], [[LOAD2]] + ; CHECK: G_STORE [[ADD1]](s64), [[COPY8]](p0) :: (volatile store (s64) into %ir.arg9) + + +; CHECK: RET_ReallyLR + %load0 = load i64, ptr %arg9 + %add0 = add i64 %load0, %arg8 + %load1 = load i64, ptr %arg10 + %add1 = add i64 %add0, %load1 + store volatile i64 %add1, ptr %arg9 ret void } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1659,10 +1659,10 @@ ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from %ir.arg0 + 4, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) @@ -1685,13 +1685,13 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile load (s8) from %ir.arg1, align 4, addrspace 5) ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile load (s32) from %ir.arg1 + 4, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) @@ -1717,8 +1717,8 @@ ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32) from %ir.arg0, addrspace 5) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (load (s64) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: SI_RETURN Index: llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll +++ llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s -define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615) %arg0, i32 %arg1) { +define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias align(16) dereferenceable(18446744073709551615) %arg0, i32 %arg1) { ; GCN-LABEL: name: mmo_offsets0 ; GCN: bb.0.bb.0: ; GCN-NEXT: liveins: $sgpr0, $vgpr0 Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -182,9 +182,9 @@ ; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 ; GISEL-NEXT: s_add_u32 s8, s8, 8 ; GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 ; GISEL-NEXT: s_mov_b32 s14, s16 @@ -373,12 +373,12 @@ ; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 ; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 ; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GISEL-NEXT: s_add_u32 s8, s8, 8 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-NEXT: s_add_u32 s8, s8, 8 ; GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 -; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 +; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b ; GISEL-NEXT: s_mov_b32 s14, s16 ; GISEL-NEXT: s_waitcnt lgkmcnt(0) Index: llvm/test/CodeGen/AMDGPU/kernel-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -6129,7 +6129,7 @@ ret void } -define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) %in.byref, i32 %after.offset) { +define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) { ; SI-LABEL: byref_natural_align_constant_v16i32_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x19 Index: llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll +++ llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll @@ -4,7 +4,14 @@ ; and dereferenceable flags. ; GCN: BUFFER_LOAD_USHORT{{.*}} :: (dereferenceable invariant load (s16) from %ir.ptr, addrspace 4) -define half @legalize_f16_load(ptr addrspace(4) dereferenceable(4) %ptr) { +define half @legalize_f16_load_align2(ptr addrspace(4) dereferenceable(4) align(2) %ptr) { + %load = load half, ptr addrspace(4) %ptr, !invariant.load !0 + %add = fadd half %load, 1.0 + ret half %add +} + +; GCN: BUFFER_LOAD_USHORT{{.*}} :: (invariant load (s16) from %ir.ptr, addrspace 4) +define half @legalize_f16_load_align1(ptr addrspace(4) dereferenceable(4) align(1) %ptr) { %load = load half, ptr addrspace(4) %ptr, !invariant.load !0 %add = fadd half %load, 1.0 ret half %add Index: llvm/test/CodeGen/WebAssembly/reg-stackify.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/reg-stackify.ll +++ llvm/test/CodeGen/WebAssembly/reg-stackify.ll @@ -26,7 +26,7 @@ ; CHECK: return $1{{$}} ; NOREGS-LABEL: no1: ; NOREGS: return{{$}} -define i32 @no1(ptr %p, ptr dereferenceable(4) %q) { +define i32 @no1(ptr %p, ptr dereferenceable(4) align(4) %q) { %t = load volatile i32, ptr %q, !invariant.load !0 store volatile i32 0, ptr %p ret i32 %t @@ -38,7 +38,7 @@ ; CHECK: return $pop{{[0-9]+}}{{$}} ; NOREGS-LABEL: yes0: ; NOREGS: return{{$}} -define i32 @yes0(ptr %p, ptr dereferenceable(4) %q) { +define i32 @yes0(ptr %p, ptr dereferenceable(4) align(4) %q) { %t = load i32, ptr %q, !invariant.load !0 store i32 0, ptr %p ret i32 %t @@ -559,7 +559,7 @@ ; NOREGS: call callee ; NOREGS: i32.load 0 ; NOREGS: return -define i32 @store_past_invar_load(i32 %a, ptr %p1, ptr dereferenceable(4) %p2) { +define i32 @store_past_invar_load(i32 %a, ptr %p1, ptr dereferenceable(4) align(4) %p2) { store i32 %a, ptr %p1 %b = load i32, ptr %p2, !invariant.load !0 call i32 @callee(i32 %a) Index: llvm/test/CodeGen/X86/fold-sext-trunc.ll =================================================================== --- llvm/test/CodeGen/X86/fold-sext-trunc.ll +++ llvm/test/CodeGen/X86/fold-sext-trunc.ll @@ -5,7 +5,7 @@ %0 = type { i64 } %struct.S1 = type { i16, i32 } -@g_10 = external dso_local global %struct.S1 +@g_10 = external dso_local global %struct.S1, align 8 declare void @func_28(i64, i64) Index: llvm/test/CodeGen/X86/hoist-invariant-load.ll =================================================================== --- llvm/test/CodeGen/X86/hoist-invariant-load.ll +++ llvm/test/CodeGen/X86/hoist-invariant-load.ll @@ -212,7 +212,7 @@ declare ptr @objc_msgSend(ptr, ptr, ...) nonlazybind -define void @test_multi_def(ptr dereferenceable(8) %x1, +define void @test_multi_def(ptr dereferenceable(8) align(8) %x1, ; CHECK-LABEL: test_multi_def: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movq %rdx, %rax @@ -233,7 +233,7 @@ ; CHECK-NEXT: jl LBB4_2 ; CHECK-NEXT: ## %bb.3: ## %exit ; CHECK-NEXT: retq - ptr dereferenceable(8) %x2, + ptr dereferenceable(8) align(8) %x2, ptr %y, i64 %count) nounwind nofree nosync { entry: br label %for.body @@ -260,7 +260,7 @@ ret void } -define void @test_div_def(ptr dereferenceable(8) %x1, +define void @test_div_def(ptr dereferenceable(8) align(8) %x1, ; CHECK-LABEL: test_div_def: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movq %rdx, %r8 @@ -281,7 +281,7 @@ ; CHECK-NEXT: jl LBB5_2 ; CHECK-NEXT: ## %bb.3: ## %exit ; CHECK-NEXT: retq - ptr dereferenceable(8) %x2, + ptr dereferenceable(8) align(8) %x2, ptr %y, i32 %count) nounwind nofree nosync { entry: br label %for.body