diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1062,7 +1062,7 @@ def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, - (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64, + (DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64, PPC970_DGroup_Cracked; let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src), @@ -1173,7 +1173,7 @@ let PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, - [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64; + [(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64; // The following four definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). @@ -1380,7 +1380,7 @@ // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), "std $rS, $dst", IIC_LdStSTD, - [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64; + [(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64; def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), "stdx $rS, $dst", IIC_LdStSTD, [(store i64:$rS, xaddrX4:$dst)]>, isPPC64, @@ -1447,7 +1447,7 @@ (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>; -def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), +def : Pat<(DSFormPreStore i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STDU $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), @@ -1591,11 +1591,11 @@ // Patterns to match r+r indexed loads and stores for // addresses without at least 4-byte alignment. -def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), +def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)), (LWAX xoaddr:$src)>; -def : Pat<(i64 (unaligned4load xoaddr:$src)), +def : Pat<(i64 (NonDSFormLoad xoaddr:$src)), (LDX xoaddr:$src)>; -def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), +def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst), (STDX $rS, xoaddr:$dst)>; // 64-bits atomic loads and stores diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -495,37 +495,41 @@ return isUInt<32>(Imm); }]>; -// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// This is a somewhat weaker condition than actually checking for 4-byte +// alignment. It is simply checking that the displacement can be represented +// as an immediate that is a multiple of 4 (i.e. the requirements for DS-Form +// instructions). +// But some r+i load/store instructions (such as LD, STD, LDU, etc.) that require // restricted memrix (4-aligned) constants are alignment sensitive. If these // offsets are hidden behind TOC entries than the values of the lower-order // bits cannot be checked directly. As a result, we need to also incorporate // an alignment check into the relevant patterns. -def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 4; +def DSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isOffsetMultipleOf(N, 4) || cast(N)->getAlignment() >= 4; }]>; -def aligned4store : PatFrag<(ops node:$val, node:$ptr), +def DSFormStore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ - return cast(N)->getAlignment() >= 4; + return isOffsetMultipleOf(N, 4) || cast(N)->getAlignment() >= 4; }]>; -def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ - return cast(N)->getAlignment() >= 4; +def DSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return isOffsetMultipleOf(N, 4) || cast(N)->getAlignment() >= 4; }]>; -def aligned4pre_store : PatFrag< +def DSFormPreStore : PatFrag< (ops node:$val, node:$base, node:$offset), (pre_store node:$val, node:$base, node:$offset), [{ - return cast(N)->getAlignment() >= 4; + return isOffsetMultipleOf(N, 4) || cast(N)->getAlignment() >= 4; }]>; -def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() < 4; +def NonDSFormLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); }]>; -def unaligned4store : PatFrag<(ops node:$val, node:$ptr), +def NonDSFormStore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ - return cast(N)->getAlignment() < 4; + return cast(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); }]>; -def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ - return cast(N)->getAlignment() < 4; +def NonDSFormSextLoadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast(N)->getAlignment() < 4 && !isOffsetMultipleOf(N, 4); }]>; // This is a somewhat weaker condition than actually checking for 16-byte diff --git a/llvm/test/CodeGen/PowerPC/ldst-align.ll b/llvm/test/CodeGen/PowerPC/ldst-align.ll --- a/llvm/test/CodeGen/PowerPC/ldst-align.ll +++ b/llvm/test/CodeGen/PowerPC/ldst-align.ll @@ -6,9 +6,8 @@ ; CHECK: bb.0.entry: ; CHECK: liveins: $x3 ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 - ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[COPY]], 24 - ; CHECK: [[LDX:%[0-9]+]]:g8rc = LDX $zero8, killed [[ADDI8_]] :: (load 8 from %ir.arrayidx, align 2) - ; CHECK: $x3 = COPY [[LDX]] + ; CHECK: [[LD:%[0-9]+]]:g8rc = LD 24, [[COPY]] :: (load 8 from %ir.arrayidx, align 2) + ; CHECK: $x3 = COPY [[LD]] ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: %arrayidx = getelementptr inbounds i64, i64* %p, i64 3 @@ -21,9 +20,8 @@ ; CHECK: bb.0.entry: ; CHECK: liveins: $x3 ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 - ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[COPY]], 16 ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 9 - ; CHECK: STDX killed [[LI8_]], $zero8, killed [[ADDI8_]] :: (store 8 into %ir.arrayidx, align 1) + ; CHECK: STD killed [[LI8_]], 16, [[COPY]] :: (store 8 into %ir.arrayidx, align 1) ; CHECK: BLR8 implicit $lr8, implicit $rm entry: %arrayidx = getelementptr inbounds i64, i64* %p, i64 2 diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -35,8 +35,8 @@ define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) { ; CHECK-LABEL: zeroEqualityTest01: ; CHECK: # %bb.0: -; CHECK-NEXT: ldx 5, 0, 3 -; CHECK-NEXT: ldx 6, 0, 4 +; CHECK-NEXT: ld 5, 0(3) +; CHECK-NEXT: ld 6, 0(4) ; CHECK-NEXT: cmpld 5, 6 ; CHECK-NEXT: bne 0, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %loadbb1 @@ -125,7 +125,7 @@ define signext i32 @equalityFoldOneConstant(i8* %X) { ; CHECK-LABEL: equalityFoldOneConstant: ; CHECK: # %bb.0: -; CHECK-NEXT: ldx 4, 0, 3 +; CHECK-NEXT: ld 4, 0(3) ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: sldi 5, 5, 32 ; CHECK-NEXT: cmpld 4, 5 diff --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll --- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll @@ -8,8 +8,8 @@ define zeroext i1 @opeq1( ; PPC64LE-LABEL: opeq1: ; PPC64LE: # %bb.0: # %"entry+land.rhs.i" -; PPC64LE-NEXT: ldx 3, 0, 3 -; PPC64LE-NEXT: ldx 4, 0, 4 +; PPC64LE-NEXT: ld 3, 0(3) +; PPC64LE-NEXT: ld 4, 0(4) ; PPC64LE-NEXT: xor 3, 3, 4 ; PPC64LE-NEXT: cntlzd 3, 3 ; PPC64LE-NEXT: rldicl 3, 3, 58, 63 diff --git a/llvm/test/CodeGen/PowerPC/pr45186.ll b/llvm/test/CodeGen/PowerPC/pr45186.ll --- a/llvm/test/CodeGen/PowerPC/pr45186.ll +++ b/llvm/test/CodeGen/PowerPC/pr45186.ll @@ -9,7 +9,7 @@ define i64 @e(i8* nocapture readonly %f) local_unnamed_addr #0 { ; CHECK-LABEL: e: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ldx r3, 0, r3 +; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: blr entry: %0 = load i8, i8* %f, align 1 diff --git a/llvm/test/CodeGen/PowerPC/store-combine.ll b/llvm/test/CodeGen/PowerPC/store-combine.ll --- a/llvm/test/CodeGen/PowerPC/store-combine.ll +++ b/llvm/test/CodeGen/PowerPC/store-combine.ll @@ -80,7 +80,7 @@ define void @store_i64_by_i8(i64 %m, i8* %p) { ; CHECK-PPC64LE-LABEL: store_i64_by_i8: ; CHECK-PPC64LE: # %bb.0: # %entry -; CHECK-PPC64LE-NEXT: stdx 3, 0, 4 +; CHECK-PPC64LE-NEXT: std 3, 0(4) ; CHECK-PPC64LE-NEXT: blr ; ; CHECK-PPC64-LABEL: store_i64_by_i8: @@ -138,7 +138,7 @@ ; ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap: ; CHECK-PPC64: # %bb.0: # %entry -; CHECK-PPC64-NEXT: stdx 3, 0, 4 +; CHECK-PPC64-NEXT: std 3, 0(4) ; CHECK-PPC64-NEXT: blr entry: %conv = trunc i64 %m to i8 @@ -198,7 +198,7 @@ ; CHECK-PPC64-NEXT: slwi 5, 3, 3 ; CHECK-PPC64-NEXT: sub 3, 5, 3 ; CHECK-PPC64-NEXT: extsw 3, 3 -; CHECK-PPC64-NEXT: stdx 3, 0, 4 +; CHECK-PPC64-NEXT: std 3, 0(4) ; CHECK-PPC64-NEXT: blr entry: %mul = mul nsw i32 %t, 7 diff --git a/llvm/test/CodeGen/PowerPC/unal4-std.ll b/llvm/test/CodeGen/PowerPC/unal4-std.ll --- a/llvm/test/CodeGen/PowerPC/unal4-std.ll +++ b/llvm/test/CodeGen/PowerPC/unal4-std.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: ld 4, -8(1) ; CHECK-NEXT: std 4, 8(3) ; CHECK-NEXT: ld 4, -16(1) -; CHECK-NEXT: stdx 4, 0, 3 +; CHECK-NEXT: std 4, 0(3) ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: copy_to_conceal: diff --git a/llvm/test/CodeGen/PowerPC/unaligned.ll b/llvm/test/CodeGen/PowerPC/unaligned.ll --- a/llvm/test/CodeGen/PowerPC/unaligned.ll +++ b/llvm/test/CodeGen/PowerPC/unaligned.ll @@ -46,14 +46,14 @@ define void @foo3(i64* %p, i64* %r) nounwind { ; CHECK-LABEL: foo3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ldx 3, 0, 3 -; CHECK-NEXT: stdx 3, 0, 4 +; CHECK-NEXT: ld 3, 0(3) +; CHECK-NEXT: std 3, 0(4) ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: foo3: ; CHECK-VSX: # %bb.0: # %entry -; CHECK-VSX-NEXT: ldx 3, 0, 3 -; CHECK-VSX-NEXT: stdx 3, 0, 4 +; CHECK-VSX-NEXT: ld 3, 0(3) +; CHECK-VSX-NEXT: std 3, 0(4) ; CHECK-VSX-NEXT: blr entry: %v = load i64, i64* %p, align 1 @@ -118,7 +118,7 @@ ; CHECK-NEXT: ld 3, -8(1) ; CHECK-NEXT: std 3, 8(4) ; CHECK-NEXT: ld 3, -16(1) -; CHECK-NEXT: stdx 3, 0, 4 +; CHECK-NEXT: std 3, 0(4) ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: foo6: