diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -273,6 +273,8 @@ // Get the memory source at offset `OffsetBytes`. Value *LhsSource = CI->getArgOperand(0); Value *RhsSource = CI->getArgOperand(1); + Align LhsAlign = LhsSource->getPointerAlignment(DL).valueOrOne(); + Align RhsAlign = RhsSource->getPointerAlignment(DL).valueOrOne(); if (OffsetBytes > 0) { auto *ByteType = Type::getInt8Ty(CI->getContext()); LhsSource = Builder.CreateConstGEP1_64( @@ -281,6 +283,8 @@ RhsSource = Builder.CreateConstGEP1_64( ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()), OffsetBytes); + LhsAlign = commonAlignment(LhsAlign, OffsetBytes); + RhsAlign = commonAlignment(RhsAlign, OffsetBytes); } LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo()); RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo()); @@ -290,13 +294,13 @@ if (auto *C = dyn_cast(LhsSource)) Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); if (!Lhs) - Lhs = Builder.CreateLoad(LoadSizeType, LhsSource); + Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign); Value *Rhs = nullptr; if (auto *C = dyn_cast(RhsSource)) Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); if (!Rhs) - Rhs = Builder.CreateLoad(LoadSizeType, RhsSource); + Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign); // Swap bytes if required. if (NeedsBSwap) { diff --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll --- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -4,13 +4,28 @@ declare i32 @bcmp(i8*, i8*, i64) nounwind readonly declare i32 @memcmp(i8*, i8*, i64) nounwind readonly -define i1 @bcmp_b2(i8* %s1, i8* %s2) { +define i1 @test_b2(i8* %s1, i8* %s2) { entry: %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15) %ret = icmp eq i32 %bcmp, 0 ret i1 %ret -; CHECK-LABEL: bcmp_b2: +; CHECK-LABEL: test_b2: +; CHECK-NOT: bl bcmp +; CHECKN: ldr x +; CHECKN-NEXT: ldr x +; CHECKN-NEXT: ldur x +; CHECKN-NEXT: ldur x +; CHECKS-COUNT-30: ldrb w +} + +define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) { +entry: + %bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret + +; CHECK-LABEL: test_b2_align8: ; CHECK-NOT: bl bcmp ; CHECKN: ldr x ; CHECKN-NEXT: ldr x @@ -20,19 +35,19 @@ ; CHECKS-NEXT: ldr x ; CHECKS-NEXT: ldr w ; CHECKS-NEXT: ldr w -; CHECKS-NEXT: ldrh w -; CHECKS-NEXT: ldrh w -; CHECKS-NEXT: ldrb w -; CHECKS-NEXT: ldrb w +; CHECKS-NEXT: ldrh w +; CHECKS-NEXT: ldrh w +; CHECKS-NEXT: ldrb w +; CHECKS-NEXT: ldrb w } -define i1 @bcmp_bs(i8* %s1, i8* %s2) optsize { +define i1 @test_bs(i8* %s1, i8* %s2) optsize { entry: %memcmp = call i32 @memcmp(i8* %s1, i8* %s2, i64 31) %ret = icmp eq i32 %memcmp, 0 ret i1 %ret -; CHECK-LABEL: bcmp_bs: +; CHECK-LABEL: test_bs: ; CHECKN-NOT: bl memcmp ; CHECKN: ldp x ; CHECKN-NEXT: ldp x diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -35,8 +35,8 @@ define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) { ; CHECK-LABEL: zeroEqualityTest01: ; CHECK: # %bb.0: -; CHECK-NEXT: ld 5, 0(3) -; CHECK-NEXT: ld 6, 0(4) +; CHECK-NEXT: ldx 5, 0, 3 +; CHECK-NEXT: ldx 6, 0, 4 ; CHECK-NEXT: cmpld 5, 6 ; CHECK-NEXT: bne 0, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %loadbb1 @@ -125,7 +125,7 @@ define signext i32 @equalityFoldOneConstant(i8* %X) { ; CHECK-LABEL: equalityFoldOneConstant: ; CHECK: # %bb.0: -; CHECK-NEXT: ld 4, 0(3) +; CHECK-NEXT: ldx 4, 0, 3 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: sldi 5, 5, 32 ; CHECK-NEXT: cmpld 4, 5 diff --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll --- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll @@ -8,8 +8,8 @@ define zeroext i1 @opeq1( ; PPC64LE-LABEL: opeq1: ; PPC64LE: # %bb.0: # %"entry+land.rhs.i" -; PPC64LE-NEXT: ld 3, 0(3) -; PPC64LE-NEXT: ld 4, 0(4) +; PPC64LE-NEXT: ldx 3, 0, 3 +; PPC64LE-NEXT: ldx 4, 0, 4 ; PPC64LE-NEXT: xor 3, 3, 4 ; PPC64LE-NEXT: cntlzd 3, 3 ; PPC64LE-NEXT: rldicl 3, 3, 58, 63 diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -9,8 +9,8 @@ ; ALL-LABEL: @cmp2( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* ; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] +; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1 +; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1 ; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) ; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) ; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 @@ -26,8 +26,8 @@ ; ALL-LABEL: @cmp2_align2( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* ; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] +; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 2 +; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2 ; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) ; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) ; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 @@ -49,8 +49,8 @@ ; ALL: loadbb: ; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i16* ; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]] -; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]] +; ALL-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 1 +; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]], align 1 ; ALL-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]]) ; ALL-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]]) ; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]] @@ -58,8 +58,8 @@ ; ALL: loadbb1: ; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 2 ; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 2 -; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] +; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1 +; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1 ; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 ; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 ; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] @@ -76,8 +76,8 @@ ; ALL-LABEL: @cmp4( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) ; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) ; ALL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] @@ -101,8 +101,8 @@ ; ALL: loadbb: ; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* ; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] -; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] +; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1 +; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1 ; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) ; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) ; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] @@ -110,8 +110,8 @@ ; ALL: loadbb1: ; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] +; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1 +; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1 ; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 ; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 ; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] @@ -136,8 +136,8 @@ ; ALL: loadbb: ; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* ; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] -; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] +; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1 +; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1 ; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) ; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) ; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] @@ -147,8 +147,8 @@ ; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; ALL-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16* ; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16* -; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] -; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] +; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]], align 1 +; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]], align 1 ; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) ; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) ; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32 @@ -184,8 +184,8 @@ ; X32: loadbb: ; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X32-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] -; X32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] +; X32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1 +; X32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1 ; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) ; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) ; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] @@ -195,8 +195,8 @@ ; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* ; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32* -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] +; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]], align 1 +; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 1 ; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) ; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) ; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] @@ -208,8 +208,8 @@ ; X64-LABEL: @cmp8( ; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) ; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) ; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] @@ -237,8 +237,8 @@ ; X64: loadbb: ; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] -; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1 +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1 ; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) ; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) ; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] @@ -246,8 +246,8 @@ ; X64: loadbb1: ; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] +; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1 +; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1 ; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 ; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 ; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] @@ -276,8 +276,8 @@ ; X64: loadbb: ; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] -; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1 +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1 ; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) ; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) ; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] @@ -287,8 +287,8 @@ ; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8 ; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16* ; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16* -; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] +; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]], align 1 +; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]], align 1 ; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) ; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) ; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64 @@ -328,8 +328,8 @@ ; X64: loadbb: ; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] -; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1 +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1 ; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) ; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) ; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] @@ -339,8 +339,8 @@ ; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8 ; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* ; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32* -; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] +; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]], align 1 +; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 1 ; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) ; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) ; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64 @@ -398,8 +398,8 @@ ; X64: loadbb: ; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] -; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1 +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1 ; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) ; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) ; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] @@ -409,8 +409,8 @@ ; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8 ; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64* ; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64* -; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] +; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]], align 1 +; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]], align 1 ; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) ; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) ; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] @@ -427,8 +427,8 @@ ; ALL-LABEL: @cmp_eq2( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* ; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] +; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1 +; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1 ; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] ; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 @@ -445,13 +445,13 @@ ; X32-LABEL: @cmp_eq3( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* ; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] +; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1 ; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2 ; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2 -; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1 +; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1 ; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16 ; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 ; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] @@ -469,15 +469,15 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2 ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1 +; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1 ; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -489,13 +489,13 @@ ; X64_2LD-LABEL: @cmp_eq3( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1 +; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1 ; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16 ; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] @@ -516,8 +516,8 @@ ; ALL-LABEL: @cmp_eq4( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] ; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 @@ -534,13 +534,13 @@ ; X32-LABEL: @cmp_eq5( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1 +; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1 ; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32 ; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] @@ -558,15 +558,15 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1 +; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1 ; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -578,13 +578,13 @@ ; X64_2LD-LABEL: @cmp_eq5( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1 +; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1 ; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32 ; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] @@ -605,15 +605,15 @@ ; X32-LABEL: @cmp_eq6( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* ; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* -; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1 ; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 ; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 ; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] @@ -631,8 +631,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -640,8 +640,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -653,15 +653,15 @@ ; X64_2LD-LABEL: @cmp_eq6( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1 ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 ; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] @@ -682,15 +682,15 @@ ; X32-LABEL: @cmp_eq6_align4( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* ; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* -; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4 +; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4 ; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 ; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 ; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] @@ -708,8 +708,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -717,8 +717,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -730,15 +730,15 @@ ; X64_2LD-LABEL: @cmp_eq6_align4( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4 ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 ; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] @@ -759,15 +759,15 @@ ; X32-LABEL: @cmp_eq7( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 ; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 ; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* ; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1 ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] ; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 @@ -783,8 +783,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -792,8 +792,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -805,15 +805,15 @@ ; X64_2LD-LABEL: @cmp_eq7( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 @@ -832,15 +832,15 @@ ; X32-LABEL: @cmp_eq8( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* ; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1 +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1 ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* ; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1 +; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1 ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] ; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 @@ -852,8 +852,8 @@ ; X64-LABEL: @cmp_eq8( ; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 @@ -880,15 +880,15 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1 +; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1 ; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -900,13 +900,13 @@ ; X64_2LD-LABEL: @cmp_eq9( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1 +; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1 ; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64 ; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] @@ -937,8 +937,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -946,8 +946,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -959,15 +959,15 @@ ; X64_2LD-LABEL: @cmp_eq10( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1 ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64 ; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] @@ -998,8 +998,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -1007,8 +1007,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -1020,15 +1020,15 @@ ; X64_2LD-LABEL: @cmp_eq11( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 @@ -1057,8 +1057,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -1066,8 +1066,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -1079,15 +1079,15 @@ ; X64_2LD-LABEL: @cmp_eq12( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1 ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 ; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] @@ -1118,8 +1118,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -1127,8 +1127,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -1140,15 +1140,15 @@ ; X64_2LD-LABEL: @cmp_eq13( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 @@ -1177,8 +1177,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -1186,8 +1186,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -1199,15 +1199,15 @@ ; X64_2LD-LABEL: @cmp_eq14( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 @@ -1236,8 +1236,8 @@ ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: @@ -1245,8 +1245,8 @@ ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7 ; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1 +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1 ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: @@ -1258,15 +1258,15 @@ ; X64_2LD-LABEL: @cmp_eq15( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* ; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1 ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7 ; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7 ; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* ; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1 +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 @@ -1291,8 +1291,8 @@ ; X64-LABEL: @cmp_eq16( ; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i128* ; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128* -; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]] +; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]], align 1 +; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]], align 1 ; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]] ; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0