Index: llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -338,6 +338,10 @@ // to X. bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; + // Return true if N (a load or a store) fullfills the alignment + // requirements for a PC-relative access. + bool storeLoadIsAligned(SDNode *N) const; + // Try to expand a boolean SELECT_CCMASK using an IPM sequence. SDValue expandSelectBoolean(SDNode *Node); @@ -1460,6 +1464,40 @@ canUseBlockOperation(StoreA, LoadB); } +bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const { + + auto *MemAccess = cast(N); + SDValue BasePtr = MemAccess->getBasePtr(); + + if (!SystemZISD::isPCREL(BasePtr.getOpcode()) || + !MemAccess->getOffset().isUndef()) // potential index register + return false; + + // An access to GOT or constant pool is always aligned. + if (const PseudoSourceValue *PSV = + MemAccess->getMemOperand()->getPseudoValue()) + if (PSV->isGOT() || PSV->isConstantPool()) + return true; + + // The alignment value of the memory access must be acceptable. + TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize(); + if (MemAccess->getAlignment() < StoreSize) + return false; + + // The immediate offset must be a multiple of the store size. + GlobalAddressSDNode *GA = cast(BasePtr.getOperand(0)); + if (GA->getOffset() % StoreSize != 0) + return false; + + // The alignment of the symbol itself must be at least the store size. + const GlobalValue *GV = GA->getGlobal(); + const DataLayout &DL = GV->getParent()->getDataLayout(); + if (GV->getPointerAlignment(DL).value() < StoreSize) + return false; + + return true; +} + void SystemZDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { Index: llvm/lib/Target/SystemZ/SystemZOperators.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZOperators.td +++ llvm/lib/Target/SystemZ/SystemZOperators.td @@ -572,10 +572,8 @@ // Aligned loads. class AlignedLoad - : PatFrag<(ops node:$addr), (load node:$addr), [{ - auto *Load = cast(N); - return Load->getAlignment() >= Load->getMemoryVT().getStoreSize(); -}]>; + : PatFrag<(ops node:$addr), (load node:$addr), + [{ return storeLoadIsAligned(N); }]>; def aligned_load : AlignedLoad; def aligned_asextloadi16 : AlignedLoad; def aligned_asextloadi32 : AlignedLoad; @@ -584,10 +582,8 @@ // Aligned stores. class AlignedStore - : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{ - auto *Store = cast(N); - return Store->getAlignment() >= Store->getMemoryVT().getStoreSize(); -}]>; + : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), + [{ return storeLoadIsAligned(N); }]>; def aligned_store : AlignedStore; def aligned_truncstorei16 : AlignedStore; def aligned_truncstorei32 : AlignedStore; Index: llvm/test/CodeGen/SystemZ/int-move-10.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/int-move-10.ll @@ -0,0 +1,209 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Test PC-relative memory accesses of globals with packed struct types. +; PC-relative memory accesses cannot be used when the address is not +; aligned. This can happen with programs like the following (which are not +; strictly correct): +; +; #pragma pack(1) +; struct { +; short a; +; int b; +; } c; +; +; void main() { +; int *e = &c.b; +; *e = 0; +; } +; + +%packed.i16i32 = type <{ i16, i32 }> +%packed.i16i32i16i32 = type <{ i16, i32, i16, i32 }> +%packed.i16i64 = type <{ i16, i64 }> +%packed.i8i16 = type <{ i8, i16 }> + +@A_align2 = global %packed.i16i32 zeroinitializer, align 2 +@B_align2 = global %packed.i16i32i16i32 zeroinitializer, align 2 +@C_align2 = global %packed.i16i64 zeroinitializer, align 2 +@D_align4 = global %packed.i16i32 zeroinitializer, align 4 +@E_align4 = global %packed.i16i32i16i32 zeroinitializer, align 4 +@F_align2 = global %packed.i8i16 zeroinitializer, align 2 + +;;; Stores + +; unaligned packed struct + 2 -> unaligned address +define void @f1() { +; CHECK-LABEL: f1: +; CHECK: larl %r1, A_align2 +; CHECK: mvhi 2(%r1), 0 +; CHECK: br %r14 + store i32 0, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @A_align2, i64 0, i32 1), align 4 + ret void +} + +; unaligned packed struct + 8 -> unaligned address +define void @f2() { +; CHECK-LABEL: f2: +; CHECK: larl %r1, B_align2 +; CHECK: mvhi 8(%r1), 0 +; CHECK: br %r14 + store i32 0, i32* getelementptr inbounds (%packed.i16i32i16i32, %packed.i16i32i16i32* @B_align2, i64 0, i32 3), align 4 + ret void +} + +; aligned packed struct + 2 -> unaligned address +define void @f3() { +; CHECK-LABEL: f3: +; CHECK: larl %r1, D_align4 +; CHECK: mvhi 2(%r1), 0 +; CHECK: br %r14 + store i32 0, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @D_align4, i64 0, i32 1), align 4 + ret void +} + +; aligned packed struct + 8 -> aligned address +define void @f4() { +; CHECK-LABEL: f4: +; CHECK: lhi %r0, 0 +; CHECK: strl %r0, E_align4+8 +; CHECK: br %r14 + store i32 0, i32* getelementptr inbounds (%packed.i16i32i16i32, %packed.i16i32i16i32* @E_align4, i64 0, i32 3), align 4 + ret void +} + +define void @f5() { +; CHECK-LABEL: f5: +; CHECK: larl %r1, C_align2 +; CHECK: mvghi 2(%r1), 0 +; CHECK: br %r14 + store i64 0, i64* getelementptr inbounds (%packed.i16i64, %packed.i16i64* @C_align2, i64 0, i32 1), align 8 + ret void +} + +define void @f6() { +; CHECK-LABEL: f6: +; CHECK-NOT: sthrl + store i16 0, i16* getelementptr inbounds (%packed.i8i16, %packed.i8i16* @F_align2, i64 0, i32 1), align 2 + ret void +} + +define void @f7(i64* %Src) { +; CHECK-LABEL: f7: +; CHECK: lg %r0, 0(%r2) +; CHECK: larl %r1, D_align4 +; CHECK: st %r0, 2(%r1) +; CHECK: br %r14 + %L = load i64, i64* %Src + %T = trunc i64 %L to i32 + store i32 %T, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @D_align4, i64 0, i32 1), align 4 + ret void +} + +define void @f8(i64* %Src) { +; CHECK-LABEL: f8: +; CHECK-NOT: sthrl + %L = load i64, i64* %Src + %T = trunc i64 %L to i16 + store i16 %T, i16* getelementptr inbounds (%packed.i8i16, %packed.i8i16* @F_align2, i64 0, i32 1), align 2 + ret void +} + +;;; Loads + +; unaligned packed struct + 2 -> unaligned address +define i32 @f9() { +; CHECK-LABEL: f9: +; CHECK: larl %r1, A_align2 +; CHECK: l %r2, 2(%r1) +; CHECK: br %r14 + %L = load i32, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @A_align2, i64 0, i32 1), align 4 + ret i32 %L +} + +; unaligned packed struct + 8 -> unaligned address +define i32 @f10() { +; CHECK-LABEL: f10: +; CHECK: larl %r1, B_align2 +; CHECK: l %r2, 8(%r1) +; CHECK: br %r14 + %L = load i32, i32* getelementptr inbounds (%packed.i16i32i16i32, %packed.i16i32i16i32* @B_align2, i64 0, i32 3), align 4 + ret i32 %L +} + +; aligned packed struct + 2 -> unaligned address +define i32 @f11() { +; CHECK-LABEL: f11: +; CHECK: larl %r1, D_align4 +; CHECK: l %r2, 2(%r1) +; CHECK: br %r14 + %L = load i32, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @D_align4, i64 0, i32 1), align 4 + ret i32 %L +} + +; aligned packed struct + 8 -> aligned address +define i32 @f12() { +; CHECK-LABEL: f12: +; CHECK: lrl %r2, E_align4+8 +; CHECK: br %r14 + %L = load i32, i32* getelementptr inbounds (%packed.i16i32i16i32, %packed.i16i32i16i32* @E_align4, i64 0, i32 3), align 4 + ret i32 %L +} + +define i64 @f13() { +; CHECK-LABEL: f13: +; CHECK: larl %r1, C_align2 +; CHECK: lg %r2, 2(%r1) +; CHECK: br %r14 + %L = load i64, i64* getelementptr inbounds (%packed.i16i64, %packed.i16i64* @C_align2, i64 0, i32 1), align 8 + ret i64 %L +} + +define i32 @f14() { +; CHECK-LABEL: f14: +; CHECK-NOT: lhrl + %L = load i16, i16* getelementptr inbounds (%packed.i8i16, %packed.i8i16* @F_align2, i64 0, i32 1), align 2 + %ext = sext i16 %L to i32 + ret i32 %ext +} + +define i64 @f15() { +; CHECK-LABEL: f15: +; CHECK-NOT: llghrl + %L = load i16, i16* getelementptr inbounds (%packed.i8i16, %packed.i8i16* @F_align2, i64 0, i32 1), align 2 + %ext = zext i16 %L to i64 + ret i64 %ext +} + +;;; Loads folded into compare instructions + +define i32 @f16(i32 %src1) { +; CHECK-LABEL: f16: +; CHECK: larl %r1, A_align2 +; CHECK: c %r2, 2(%r1) +entry: + %src2 = load i32, i32* getelementptr inbounds (%packed.i16i32, %packed.i16i32* @A_align2, i64 0, i32 1), align 4 + %cond = icmp slt i32 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i32 %src1, %src1 + br label %exit +exit: + %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] + ret i32 %res +} + +define i64 @f17(i64 %src1) { +; CHECK-LABEL: f17: +; CHECK: larl %r1, C_align2 +; CHECK: clg %r2, 2(%r1) +entry: + %src2 = load i64, i64* getelementptr inbounds (%packed.i16i64, %packed.i16i64* @C_align2, i64 0, i32 1), align 8 + %cond = icmp ult i64 %src1, %src2 + br i1 %cond, label %exit, label %mulb +mulb: + %mul = mul i64 %src1, %src1 + br label %exit +exit: + %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] + ret i64 %res +}