Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -151,6 +151,8 @@ case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: + case AArch64::STURBBi: + case AArch64::STURHHi: case AArch64::STURWi: case AArch64::STURXi: case AArch64::LDURSi: @@ -188,6 +190,18 @@ } } +static bool isNarrowSt(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::STRBBui: + case AArch64::STURBBi: + case AArch64::STRHHui: + case AArch64::STURHHi: + return true; + } +} + static bool isNarrowLd(unsigned Opc) { switch (Opc) { default: @@ -216,12 +230,14 @@ case AArch64::LDRSBWui: case AArch64::LDURSBWi: case AArch64::STRBBui: + case AArch64::STURBBi: return 1; case AArch64::LDRHHui: case AArch64::LDURHHi: case AArch64::LDRSHWui: case AArch64::LDURSHWi: case AArch64::STRHHui: + case AArch64::STURHHi: return 2; case AArch64::LDRSui: case AArch64::LDURSi: @@ -275,6 +291,10 @@ case AArch64::STURDi: case AArch64::STRQui: case AArch64::STURQi: + case AArch64::STRBBui: + case AArch64::STURBBi: + case AArch64::STRHHui: + case AArch64::STURHHi: case AArch64::STRWui: case AArch64::STURWi: case AArch64::STRXui: @@ -324,6 +344,14 @@ case AArch64::STRQui: case AArch64::STURQi: return AArch64::STPQi; + case AArch64::STRBBui: + return AArch64::STRHHui; + case AArch64::STRHHui: + return AArch64::STRWui; + case AArch64::STURBBi: + return AArch64::STURHHi; + case AArch64::STURHHi: + return AArch64::STURWi; case AArch64::STRWui: case AArch64::STURWi: return AArch64::STPWi; @@ -671,17 +699,31 @@ return NextI; } - // Handle Unscaled - if (IsUnscaled) - OffsetImm /= OffsetStride; - // Construct the new instruction. - MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, - I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(getLdStRegOp(RtMI)) - .addOperand(getLdStRegOp(Rt2MI)) - .addOperand(BaseRegOp) - .addImm(OffsetImm); + MachineInstrBuilder MIB; + if (isNarrowSt(Opc)) { + // Change the scaled offset from small to large type. + if (!IsUnscaled) + OffsetImm /= 2; + MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(NewOpc)) + .addOperand(getLdStRegOp(I)) + .addOperand(BaseRegOp) + .addImm(OffsetImm); + // Copy MachineMemOperands from the original strs. + concatenateMemOperands(MIB, I, Paired); + } else { + // Handle Unscaled + if (IsUnscaled) + OffsetImm /= OffsetStride; + MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(NewOpc)) + .addOperand(getLdStRegOp(RtMI)) + .addOperand(getLdStRegOp(Rt2MI)) + .addOperand(BaseRegOp) + .addImm(OffsetImm); + } + (void)MIB; // FIXME: Do we need/want to copy the mem operands from the source @@ -820,6 +862,11 @@ unsigned Reg = getLdStRegOp(FirstMI).getReg(); unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); + bool IsNarrowSt = isNarrowSt(Opc); + + // For narrow stores, find only the case where the stored value is WZR. + if (IsNarrowSt && Reg != AArch64::WZR) + return E; // Early exit if the first instruction modifies the base register. // e.g., ldr x0, [x0] @@ -830,7 +877,8 @@ // range, plus allow an extra one in case we find a later insn that matches // with Offset-1) int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; - if (!isNarrowLd(Opc) && !inBoundsForPair(IsUnscaled, Offset, OffsetStride)) + if (!(isNarrowLd(Opc) || IsNarrowSt) && + !inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return E; // Track which registers have been modified and used between the first insn @@ -897,9 +945,9 @@ continue; } - if (IsNarrowLd) { - // If the alignment requirements of the larger type scaled load - // instruction can't express the scaled offset of the smaller type + if (IsNarrowLd || IsNarrowSt) { + // If the alignment requirements of the scaled wide load/store + // instruction can't express the offset of the scaled narrow // input, bail and keep looking. if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); @@ -919,7 +967,9 @@ // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. - if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { + // For narrow stores, allow only when the stored value is WZR. + if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) || + (IsNarrowSt && Reg != getLdStRegOp(MI).getReg())) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; @@ -1274,7 +1324,12 @@ case AArch64::LDRBBui: case AArch64::LDRSBWui: case AArch64::LDRSHWui: + case AArch64::STRBBui: + case AArch64::STRHHui: + // Unscaled instructions. + case AArch64::STURBBi: + case AArch64::STURHHi: case AArch64::LDURHHi: case AArch64::LDURBBi: case AArch64::LDURSBWi: Index: test/CodeGen/AArch64/arm64-ldr-merge.ll =================================================================== --- test/CodeGen/AArch64/arm64-ldr-merge.ll +++ /dev/null @@ -1,227 +0,0 @@ -; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s - -; CHECK-LABEL: Ldrh_merge -; CHECK-NOT: ldrh -; CHECK: ldr [[NEW_DEST:w[0-9]+]] -; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff -; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]] -define i16 @Ldrh_merge(i16* nocapture readonly %p) { - %1 = load i16, i16* %p, align 2 - %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1 - %2 = load i16, i16* %arrayidx2, align 2 - %add = add nuw nsw i16 %1, %2 - ret i16 %add -} - -; CHECK-LABEL: Ldurh_merge -; CHECK-NOT: ldurh -; CHECK: ldur [[NEW_DEST:w[0-9]+]] -; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff -; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]] -define i16 @Ldurh_merge(i16* nocapture readonly %p) { -entry: - %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2 - %0 = load i16, i16* %arrayidx - %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1 - %1 = load i16, i16* %arrayidx3 - %add = add nuw nsw i16 %0, %1 - ret i16 %add -} - -; CHECK-LABEL: Ldrh_4_merge -; CHECK-NOT: ldrh -; CHECK: ldp [[NEW_DEST:w[0-9]+]] -define i16 @Ldrh_4_merge(i16* nocapture readonly %P) { - %arrayidx = getelementptr inbounds i16, i16* %P, i64 0 - %l0 = load i16, i16* %arrayidx - %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1 - %l1 = load i16, i16* %arrayidx2 - %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2 - %l2 = load i16, i16* %arrayidx7 - %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3 - %l3 = load i16, i16* %arrayidx12 - %add4 = add nuw nsw i16 %l1, %l0 - %add9 = add nuw nsw i16 %add4, %l2 - %add14 = add nuw nsw i16 %add9, %l3 - ret i16 %add14 -} - -; CHECK-LABEL: Ldrsh_merge -; CHECK-NOT: ldrsh -; CHECK: ldr [[NEW_DEST:w[0-9]+]] -; CHECK: asr w{{[0-9]+}}, [[NEW_DEST]], #16 -; CHECK: sxth w{{[0-9]+}}, [[NEW_DEST]] -define i32 @Ldrsh_merge(i16* %p) nounwind { - %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 5 - %tmp = load i16, i16* %add.ptr0 - %add.ptr = getelementptr inbounds i16, i16* %p, i64 4 - %tmp1 = load i16, i16* %add.ptr - %sexttmp = sext i16 %tmp to i32 - %sexttmp1 = sext i16 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldrsh_zsext_merge -; CHECK: ldr [[NEW_DEST:w[0-9]+]] -; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]], #16 -; CHECK: sxth w{{[0-9]+}}, [[NEW_DEST]] -define i32 @Ldrsh_zsext_merge(i16* %p) nounwind { - %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 5 - %tmp = load i16, i16* %add.ptr0 - %add.ptr = getelementptr inbounds i16, i16* %p, i64 4 - %tmp1 = load i16, i16* %add.ptr - %sexttmp = zext i16 %tmp to i32 - %sexttmp1 = sext i16 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldrsh_szext_merge -; CHECK: ldr [[NEW_DEST:w[0-9]+]] -; CHECK: asr w{{[0-9]+}}, [[NEW_DEST]], #16 -; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff -define i32 @Ldrsh_szext_merge(i16* %p) nounwind { - %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 5 - %tmp = load i16, i16* %add.ptr0 - %add.ptr = getelementptr inbounds i16, i16* %p, i64 4 - %tmp1 = load i16, i16* %add.ptr - %sexttmp = sext i16 %tmp to i32 - %sexttmp1 = zext i16 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldrsb_merge -; CHECK: ldrh [[NEW_DEST:w[0-9]+]] -; CHECK: sxtb w{{[0-9]+}}, [[NEW_DEST]] -; CHECK: sbfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 -define i32 @Ldrsb_merge(i8* %p) nounwind { - %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 - %tmp = load i8, i8* %add.ptr0 - %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 - %tmp1 = load i8, i8* %add.ptr - %sexttmp = sext i8 %tmp to i32 - %sexttmp1 = sext i8 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldrsb_zsext_merge -; CHECK: ldrh [[NEW_DEST:w[0-9]+]] -; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xff -; CHECK: sbfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 -define i32 @Ldrsb_zsext_merge(i8* %p) nounwind { - %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 - %tmp = load i8, i8* %add.ptr0 - %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 - %tmp1 = load i8, i8* %add.ptr - %sexttmp = zext i8 %tmp to i32 - %sexttmp1 = sext i8 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldrsb_szext_merge -; CHECK: ldrh [[NEW_DEST:w[0-9]+]] -; CHECK: sxtb w{{[0-9]+}}, [[NEW_DEST]] -; CHECK: ubfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 -define i32 @Ldrsb_szext_merge(i8* %p) nounwind { - %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 - %tmp = load i8, i8* %add.ptr0 - %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 - %tmp1 = load i8, i8* %add.ptr - %sexttmp = sext i8 %tmp to i32 - %sexttmp1 = zext i8 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} -; CHECK-LABEL: Ldursh_merge -; CHECK: ldur [[NEW_DEST:w[0-9]+]] -; CHECK: asr w{{[0-9]+}}, [[NEW_DEST]], #16 -; CHECK: sxth w{{[0-9]+}}, [[NEW_DEST]] -define i32 @Ldursh_merge(i16* %p) nounwind { - %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 - %tmp = load i16, i16* %add.ptr0 - %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 - %tmp1 = load i16, i16* %add.ptr - %sexttmp = sext i16 %tmp to i32 - %sexttmp1 = sext i16 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldursh_zsext_merge -; CHECK: ldur [[NEW_DEST:w[0-9]+]] -; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]], #16 -; CHECK: sxth w{{[0-9]+}}, [[NEW_DEST]] -define i32 @Ldursh_zsext_merge(i16* %p) nounwind { - %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 - %tmp = load i16, i16* %add.ptr0 - %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 - %tmp1 = load i16, i16* %add.ptr - %sexttmp = zext i16 %tmp to i32 - %sexttmp1 = sext i16 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldursh_szext_merge -; CHECK: ldur [[NEW_DEST:w[0-9]+]] -; CHECK: asr w{{[0-9]+}}, [[NEW_DEST]], #16 -; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff -define i32 @Ldursh_szext_merge(i16* %p) nounwind { - %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 - %tmp = load i16, i16* %add.ptr0 - %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 - %tmp1 = load i16, i16* %add.ptr - %sexttmp = sext i16 %tmp to i32 - %sexttmp1 = zext i16 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} -; CHECK-LABEL: Ldursb_merge -; CHECK: ldurh [[NEW_DEST:w[0-9]+]] -; CHECK: sbfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 -; CHECK: sxtb w{{[0-9]+}}, [[NEW_DEST]] -define i32 @Ldursb_merge(i8* %p) nounwind { - %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 - %tmp = load i8, i8* %add.ptr0 - %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 - %tmp1 = load i8, i8* %add.ptr - %sexttmp = sext i8 %tmp to i32 - %sexttmp1 = sext i8 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldursb_zsext_merge -; CHECK: ldurh [[NEW_DEST:w[0-9]+]] -; CHECK: ubfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 -; CHECK: sxtb w{{[0-9]+}}, [[NEW_DEST]] -define i32 @Ldursb_zsext_merge(i8* %p) nounwind { - %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 - %tmp = load i8, i8* %add.ptr0 - %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 - %tmp1 = load i8, i8* %add.ptr - %sexttmp = zext i8 %tmp to i32 - %sexttmp1 = sext i8 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - -; CHECK-LABEL: Ldursb_szext_merge -; CHECK: ldurh [[NEW_DEST:w[0-9]+]] -; CHECK: sbfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 -; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xff -define i32 @Ldursb_szext_merge(i8* %p) nounwind { - %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 - %tmp = load i8, i8* %add.ptr0 - %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 - %tmp1 = load i8, i8* %add.ptr - %sexttmp = sext i8 %tmp to i32 - %sexttmp1 = zext i8 %tmp1 to i32 - %add = add nsw i32 %sexttmp1, %sexttmp - ret i32 %add -} - Index: test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll @@ -0,0 +1,330 @@ +; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: Ldrh_merge +; CHECK-NOT: ldrh +; CHECK: ldr [[NEW_DEST:w[0-9]+]] +; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff +; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]] +define i16 @Ldrh_merge(i16* nocapture readonly %p) { + %1 = load i16, i16* %p, align 2 + %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1 + %2 = load i16, i16* %arrayidx2, align 2 + %add = add nuw nsw i16 %1, %2 + ret i16 %add +} + +; CHECK-LABEL: Ldurh_merge +; CHECK-NOT: ldurh +; CHECK: ldur [[NEW_DEST:w[0-9]+]] +; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff +; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]] +define i16 @Ldurh_merge(i16* nocapture readonly %p) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2 + %0 = load i16, i16* %arrayidx + %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1 + %1 = load i16, i16* %arrayidx3 + %add = add nuw nsw i16 %0, %1 + ret i16 %add +} + +; CHECK-LABEL: Ldrh_4_merge +; CHECK-NOT: ldrh +; CHECK: ldp [[NEW_DEST:w[0-9]+]] +define i16 @Ldrh_4_merge(i16* nocapture readonly %P) { + %arrayidx = getelementptr inbounds i16, i16* %P, i64 0 + %l0 = load i16, i16* %arrayidx + %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1 + %l1 = load i16, i16* %arrayidx2 + %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2 + %l2 = load i16, i16* %arrayidx7 + %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3 + %l3 = load i16, i16* %arrayidx12 + %add4 = add nuw nsw i16 %l1, %l0 + %add9 = add nuw nsw i16 %add4, %l2 + %add14 = add nuw nsw i16 %add9, %l3 + ret i16 %add14 +} + +; CHECK-LABEL: Ldrsh_merge +; CHECK-NOT: ldrsh +; CHECK: ldr [[NEW_DEST:w[0-9]+]] +; CHECK: asr w{{[0-9]+}}, [[NEW_DEST]], #16 +; CHECK: sxth w{{[0-9]+}}, [[NEW_DEST]] +define i32 @Ldrsh_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 5 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 4 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = sext i16 %tmp to i32 + %sexttmp1 = sext i16 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldrsh_zsext_merge +; CHECK: ldr [[NEW_DEST:w[0-9]+]] +; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]], #16 +; CHECK: sxth w{{[0-9]+}}, [[NEW_DEST]] +define i32 @Ldrsh_zsext_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 5 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 4 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = zext i16 %tmp to i32 + %sexttmp1 = sext i16 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldrsh_szext_merge +; CHECK: ldr [[NEW_DEST:w[0-9]+]] +; CHECK: asr w{{[0-9]+}}, [[NEW_DEST]], #16 +; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff +define i32 @Ldrsh_szext_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 5 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 4 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = sext i16 %tmp to i32 + %sexttmp1 = zext i16 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldrsb_merge +; CHECK: ldrh [[NEW_DEST:w[0-9]+]] +; CHECK: sxtb w{{[0-9]+}}, [[NEW_DEST]] +; CHECK: sbfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 +define i32 @Ldrsb_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = sext i8 %tmp to i32 + %sexttmp1 = sext i8 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldrsb_zsext_merge +; CHECK: ldrh [[NEW_DEST:w[0-9]+]] +; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xff +; CHECK: sbfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 +define i32 @Ldrsb_zsext_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = zext i8 %tmp to i32 + %sexttmp1 = sext i8 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldrsb_szext_merge +; CHECK: ldrh [[NEW_DEST:w[0-9]+]] +; CHECK: sxtb w{{[0-9]+}}, [[NEW_DEST]] +; CHECK: ubfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 +define i32 @Ldrsb_szext_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = sext i8 %tmp to i32 + %sexttmp1 = zext i8 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} +; CHECK-LABEL: Ldursh_merge +; CHECK: ldur [[NEW_DEST:w[0-9]+]] +; CHECK: asr w{{[0-9]+}}, [[NEW_DEST]], #16 +; CHECK: sxth w{{[0-9]+}}, [[NEW_DEST]] +define i32 @Ldursh_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = sext i16 %tmp to i32 + %sexttmp1 = sext i16 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldursh_zsext_merge +; CHECK: ldur [[NEW_DEST:w[0-9]+]] +; CHECK: lsr w{{[0-9]+}}, [[NEW_DEST]], #16 +; CHECK: sxth w{{[0-9]+}}, [[NEW_DEST]] +define i32 @Ldursh_zsext_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = zext i16 %tmp to i32 + %sexttmp1 = sext i16 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldursh_szext_merge +; CHECK: ldur [[NEW_DEST:w[0-9]+]] +; CHECK: asr w{{[0-9]+}}, [[NEW_DEST]], #16 +; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xffff +define i32 @Ldursh_szext_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = sext i16 %tmp to i32 + %sexttmp1 = zext i16 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldursb_merge +; CHECK: ldurh [[NEW_DEST:w[0-9]+]] +; CHECK: sbfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 +; CHECK: sxtb w{{[0-9]+}}, [[NEW_DEST]] +define i32 @Ldursb_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = sext i8 %tmp to i32 + %sexttmp1 = sext i8 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldursb_zsext_merge +; CHECK: ldurh [[NEW_DEST:w[0-9]+]] +; CHECK: ubfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 +; CHECK: sxtb w{{[0-9]+}}, [[NEW_DEST]] +define i32 @Ldursb_zsext_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = zext i8 %tmp to i32 + %sexttmp1 = sext i8 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldursb_szext_merge +; CHECK: ldurh [[NEW_DEST:w[0-9]+]] +; CHECK: sbfx w{{[0-9]+}}, [[NEW_DEST]], #8, #8 +; CHECK: and w{{[0-9]+}}, [[NEW_DEST]], #0xff +define i32 @Ldursb_szext_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = sext i8 %tmp to i32 + %sexttmp1 = zext i8 %tmp1 to i32 + %add = add nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Strb_zero +; CHECK: strh wzr +define void @Strb_zero(i8* nocapture %P, i32 %n) #0 { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i8, i8* %P, i64 %idxprom + store i8 0, i8* %arrayidx + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i8, i8* %P, i64 %idxprom1 + store i8 0, i8* %arrayidx2 + ret void +} + +; CHECK-LABEL: Strh_zero +; CHECK: str wzr +define void @Strh_zero(i16* nocapture %P, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom + store i16 0, i16* %arrayidx + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1 + store i16 0, i16* %arrayidx2 + ret void +} + +; CHECK-LABEL: Strh_zero_4 +; CHECK: stp wzr, wzr +define void @Strh_zero_4(i16* nocapture %P, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom + store i16 0, i16* %arrayidx + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1 + store i16 0, i16* %arrayidx2 + %add3 = add nsw i32 %n, 2 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds i16, i16* %P, i64 %idxprom4 + store i16 0, i16* %arrayidx5 + %add6 = add nsw i32 %n, 3 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i16, i16* %P, i64 %idxprom7 + store i16 0, i16* %arrayidx8 + ret void +} + +; CHECK-LABEL: Sturb_zero +; CHECK: sturh wzr +define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 { +entry: + %sub = add nsw i32 %n, -2 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i8, i8* %P, i64 %idxprom + store i8 0, i8* %arrayidx + %sub2= add nsw i32 %n, -1 + %idxprom1 = sext i32 %sub2 to i64 + %arrayidx2 = getelementptr inbounds i8, i8* %P, i64 %idxprom1 + store i8 0, i8* %arrayidx2 + ret void +} + +; CHECK-LABEL: Sturh_zero +; CHECK: stur wzr +define void @Sturh_zero(i16* nocapture %P, i32 %n) { +entry: + %sub = add nsw i32 %n, -2 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom + store i16 0, i16* %arrayidx + %sub1 = add nsw i32 %n, -3 + %idxprom2 = sext i32 %sub1 to i64 + %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2 + store i16 0, i16* %arrayidx3 + ret void +} + +; CHECK-LABEL: Sturh_zero_4 +; CHECK: stp wzr, wzr +define void @Sturh_zero_4(i16* nocapture %P, i32 %n) { +entry: + %sub = add nsw i32 %n, -3 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom + store i16 0, i16* %arrayidx + %sub1 = add nsw i32 %n, -4 + %idxprom2 = sext i32 %sub1 to i64 + %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2 + store i16 0, i16* %arrayidx3 + %sub4 = add nsw i32 %n, -2 + %idxprom5 = sext i32 %sub4 to i64 + %arrayidx6 = getelementptr inbounds i16, i16* %P, i64 %idxprom5 + store i16 0, i16* %arrayidx6 + %sub7 = add nsw i32 %n, -1 + %idxprom8 = sext i32 %sub7 to i64 + %arrayidx9 = getelementptr inbounds i16, i16* %P, i64 %idxprom8 + store i16 0, i16* %arrayidx9 + ret void +}