Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12376,16 +12376,98 @@ // If this load is directly stored, replace the load value with the stored // value. - // TODO: Handle store large -> read small portion. - // TODO: Handle TRUNCSTORE/LOADEXT - if (OptLevel != CodeGenOpt::None && - ISD::isNormalLoad(N) && !LD->isVolatile()) { - if (ISD::isNON_TRUNCStore(Chain.getNode())) { - StoreSDNode *PrevST = cast(Chain); - if (PrevST->getBasePtr() == Ptr && - PrevST->getValue().getValueType() == N->getValueType(0)) - return CombineTo(N, PrevST->getOperand(1), Chain); + StoreSDNode *ST = dyn_cast(Chain.getNode()); + if (OptLevel != CodeGenOpt::None && !LD->isVolatile() && ST && + !ST->isVolatile()) { + auto LDType = LD->getValueType(0); + auto LDMemType = LD->getMemoryVT(); + auto STMemType = ST->getMemoryVT(); + auto STType = ST->getValue().getValueType(); + // Scalars have size 0 to distinguish from singleton vectors. + auto numElems = [](EVT T) { + return T.isVector() ? T.getVectorNumElements() : 0; + }; + BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG); + BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG); + int64_t Offset; + bool STCoversLD = + BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset) && (Offset >= 0) && + (Offset * 8 <= LDMemType.getSizeInBits()) && + (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits()); + if (STCoversLD && Offset == 0 && LDType == STType && + STMemType == LDMemType) { + // Simple case: Direct non-truncating forwarding + if (LDType.getSizeInBits() == LDMemType.getSizeInBits()) + return CombineTo(N, ST->getValue(), Chain); + // Can we model the truncate and extension with an and mask? + if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() && + !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) { + // Mask to size of LDMemType + auto Mask = + DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(), + STMemType.getSizeInBits()), + SDLoc(ST), STType); + auto Val = + DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); + return CombineTo(N, Val, Chain); + } } + SDValue Val = ST->getValue(); + do { + // TODO" Deal with non-zero offsets + if (LD->getBasePtr().isUndef() || Offset != 0 || !STCoversLD) + break; + // Convert StVal to STMemType. + if (STType == STMemType) { // Do nothing/ + } else if (!isTypeLegal(STMemType)) + break; // fail. + else if (STType.isFloatingPoint() && STMemType.isFloatingPoint() && + TLI.isOperationLegal(ISD::FTRUNC, STMemType)) + Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val); + else if (numElems(STType) == numElems(STMemType) && STType.isInteger() && + STMemType.isInteger()) + Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val); + else if (STType.getSizeInBits() == STMemType.getSizeInBits()) + Val = DAG.getBitcast(STMemType, Val); + else + break; // fail + // Convert STMemType to LDMemType. + if (STMemType == LDMemType) { + // Do nothing. + } else if (!isTypeLegal(LDMemType)) + break; // fail. + else if (numElems(STMemType) == numElems(LDMemType) && + STMemType.isInteger() && LDMemType.isInteger()) + Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val); + else + break; // fail + + // Convert LDMemType to LDType. + if (LDMemType == LDType) { + // Do nothing. + } else if (LDMemType.isInteger() && LDType.isInteger()) + switch (LD->getExtensionType()) { + case ISD::NON_EXTLOAD: + Val = DAG.getBitcast(LDType, Val); + break; + case ISD::EXTLOAD: + Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val); + break; + case ISD::SEXTLOAD: + Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val); + break; + case ISD::ZEXTLOAD: + Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val); + break; + } + else + break; + // We have a new value. Finalize. + return CombineTo(N, Val, Chain); + } while (false); + // Cleanup dead nodes we may have created. + if (Val->use_empty()) + deleteAndRecombine(Val.getNode()); } // Try to infer better alignment information than the load already has. Index: llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll +++ llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll @@ -13,7 +13,7 @@ } ; CHECK-LABEL: Str64Ldr32_0 -; CHECK: and x0, x1, #0xffffffff +; CHECK: mov w0, w1 define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) { entry: %0 = bitcast i64* %P to i32* @@ -37,7 +37,7 @@ } ; CHECK-LABEL: Str64Ldr16_0 -; CHECK: and x0, x1, #0xffff +; CHECK: mov w0, w1 define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) { entry: %0 = bitcast i64* %P to i16* @@ -85,7 +85,7 @@ } ; CHECK-LABEL: Str64Ldr8_0 -; CHECK: and x0, x1, #0xff +; CHECK: mov w0, w1 define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) { entry: %0 = bitcast i64* %P to i8* @@ -193,7 +193,7 @@ } ; CHECK-LABEL: Str32Ldr16_0 -; CHECK: and w0, w1, #0xffff +; CHECK: mov w0, w1 define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) { entry: %0 = bitcast i32* %P to i16* @@ -217,7 +217,7 @@ } ; CHECK-LABEL: Str32Ldr8_0 -; CHECK: and w0, w1, #0xff +; CHECK: mov w0, w1 define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) { entry: %0 = bitcast i32* %P to i8* @@ -265,7 +265,7 @@ } ; CHECK-LABEL: Str16Ldr16 -; CHECK: and w0, w1, #0xffff +; CHECK: mov w0, w1 define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) { entry: %0 = bitcast i16* %P to i16* @@ -277,7 +277,7 @@ } ; CHECK-LABEL: Str16Ldr8_0 -; CHECK: and w0, w1, #0xff +; CHECK: mov w0, w1 define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) { entry: %0 = bitcast i16* %P to i8* @@ -314,7 +314,7 @@ } ; CHECK-LABEL: Unscaled_Str64Ldr32_0 -; CHECK: and x0, x1, #0xffffffff +; CHECK: mov w0, w1 define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) { entry: %0 = bitcast i64* %P to i32* @@ -338,7 +338,7 @@ } ; CHECK-LABEL: Unscaled_Str64Ldr16_0 -; CHECK: and x0, x1, #0xffff +; CHECK: mov w0, w1 define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) { entry: %0 = bitcast i64* %P to i16* @@ -386,7 +386,7 @@ } ; CHECK-LABEL: Unscaled_Str64Ldr8_0 -; CHECK: and x0, x1, #0xff +; CHECK: mov w0, w1 define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) { entry: %0 = bitcast i64* %P to i8* @@ -494,7 +494,7 @@ } ; CHECK-LABEL: Unscaled_Str32Ldr16_0 -; CHECK: and w0, w1, #0xffff +; CHECK: mov w0, w1 define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) { entry: %0 = bitcast i32* %P to i16* @@ -518,7 +518,7 @@ } ; CHECK-LABEL: Unscaled_Str32Ldr8_0 -; CHECK: and w0, w1, #0xff +; CHECK: mov w0, w1 define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) { entry: %0 = bitcast i32* %P to i8* @@ -566,7 +566,7 @@ } ; CHECK-LABEL: Unscaled_Str16Ldr16 -; CHECK: and w0, w1, #0xffff +; CHECK: mov w0, w1 define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) { entry: %0 = bitcast i16* %P to i16* @@ -578,7 +578,7 @@ } ; CHECK-LABEL: Unscaled_Str16Ldr8_0 -; CHECK: and w0, w1, #0xff +; CHECK: mov w0, w1 define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) { entry: %0 = bitcast i16* %P to i8* Index: llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll =================================================================== --- llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -26,9 +26,9 @@ store i8 %inc.4, i8* %locvar ; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]] -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1 ; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]] -; CHECK: and w0, w[[STRVAL]], #0xff +; CHECK: and x0, x[[STRVAL]], #0xff %ret.1 = load i8, i8* %locvar %ret.2 = zext i8 %ret.1 to i64 Index: llvm/test/CodeGen/Hexagon/clr_set_toggle.ll =================================================================== --- llvm/test/CodeGen/Hexagon/clr_set_toggle.ll +++ llvm/test/CodeGen/Hexagon/clr_set_toggle.ll @@ -70,7 +70,7 @@ define zeroext i16 @my_setbit(i16 zeroext %crc) nounwind { entry: ; CHECK-LABEL: my_setbit -; CHECK: memh(r{{[0-9]+}}+#{{[0-9]+}}) = setbit(#15) +; CHECK: r{{[0-9]+}} = setbit(r{{[0-9]+}},#15) %crc.addr = alloca i16, align 2 store i16 %crc, i16* %crc.addr, align 2 %0 = load i16, i16* %crc.addr, align 2 Index: llvm/test/CodeGen/Hexagon/swp-epilog-phis.ll =================================================================== --- llvm/test/CodeGen/Hexagon/swp-epilog-phis.ll +++ /dev/null @@ -1,54 +0,0 @@ -; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=2 \ -; RUN: -pipeliner-ignore-recmii -disable-hexagon-nv-schedule -stats -o /dev/null\ -; RUN: -enable-aa-sched-mi < %s 2>&1 | FileCheck %s --check-prefix=STATS -; REQUIRES: asserts -; -; Test that we generate the correct phis in the last epilog block when -; allowing multiple stages. -; -; STATS: 1 pipeliner - Number of loops software pipelined - -; Function Attrs: nounwind -define void @f0() #0 { -b0: - br i1 undef, label %b6, label %b1 - -b1: ; preds = %b0 - br i1 undef, label %b6, label %b2 - -b2: ; preds = %b1 - br label %b4 - -b3: ; preds = %b4, %b3 - %v0 = add nsw i32 0, 57344 - %v1 = trunc i32 %v0 to i16 - store i16 %v1, i16* null, align 2, !tbaa !0 - %v2 = getelementptr inbounds i8, i8* null, i32 undef - %v3 = load i8, i8* %v2, align 1, !tbaa !4 - %v4 = zext i8 %v3 to i32 - %v5 = shl nuw nsw i32 %v4, 6 - %v6 = add nsw i32 %v5, 57344 - %v7 = trunc i32 %v6 to i16 - store i16 %v7, i16* undef, align 2, !tbaa !0 - br i1 undef, label %b5, label %b3 - -b4: ; preds = %b5, %b2 - %v8 = phi i32 [ 0, %b2 ], [ %v9, %b5 ] - br label %b3 - -b5: ; preds = %b3 - %v9 = add i32 %v8, 1 - %v10 = icmp eq i32 %v9, undef - br i1 %v10, label %b6, label %b4 - -b6: ; preds = %b5, %b1, %b0 - ret void -} - -attributes #0 = { nounwind "target-cpu"="hexagonv55" } - -!0 = !{!1, !1, i64 0} -!1 = !{!"short", !2} -!2 = !{!"omnipotent char", !3} -!3 = !{!"Simple C/C++ TBAA"} -!4 = !{!2, !2, i64 0} Index: llvm/test/CodeGen/Hexagon/swp-memrefs-epilog1.ll =================================================================== --- llvm/test/CodeGen/Hexagon/swp-memrefs-epilog1.ll +++ /dev/null @@ -1,90 +0,0 @@ -; RUN: llc -march=hexagon -enable-pipeliner < %s | FileCheck %s - -; Test that a store and load, that alias, are not put in the same packet. The -; pipeliner altered the size of the memrefs for these instructions, which -; resulted in no order dependence between the instructions in the DAG. No order -; dependence was added since the size was set to UINT_MAX, but there is a -; computation using the size that overflowed. - -; CHECK: endloop0 -; CHECK: memh([[REG:r([0-9]+)]]+#0) = -; CHECK: = memh([[REG]]++#2) - -; Function Attrs: nounwind -define signext i16 @f0(i16* nocapture readonly %a0, i16* nocapture readonly %a1) local_unnamed_addr #0 { -b0: - %v0 = alloca [40 x i16], align 8 - %v1 = bitcast [40 x i16]* %v0 to i8* - call void @llvm.lifetime.start.p0i8(i64 80, i8* nonnull %v1) #2 - %v2 = getelementptr inbounds [40 x i16], [40 x i16]* %v0, i32 0, i32 0 - br label %b1 - -b1: ; preds = %b1, %b0 - %v3 = phi i16* [ %a1, %b0 ], [ %v24, %b1 ] - %v4 = phi i16* [ %v2, %b0 ], [ %v25, %b1 ] - %v5 = phi i32 [ 0, %b0 ], [ %v14, %b1 ] - %v6 = phi i32 [ 1, %b0 ], [ %v22, %b1 ] - %v7 = phi i32 [ 0, %b0 ], [ %v23, %b1 ] - %v8 = load i16, i16* %v3, align 2 - %v9 = sext i16 %v8 to i32 - %v10 = tail call i32 @llvm.hexagon.A2.aslh(i32 %v9) - %v11 = tail call i32 @llvm.hexagon.S2.asr.r.r.sat(i32 %v10, i32 1) - %v12 = tail call i32 @llvm.hexagon.A2.asrh(i32 %v11) - %v13 = trunc i32 %v12 to i16 - store i16 %v13, i16* %v4, align 2 - %v14 = add nuw nsw i32 %v5, 1 - %v15 = icmp eq i32 %v14, 40 - %v16 = getelementptr inbounds i16, i16* %a0, i32 %v7 - %v17 = load i16, i16* %v16, align 2 - %v18 = sext i16 %v17 to i32 - %v19 = getelementptr inbounds [40 x i16], [40 x i16]* %v0, i32 0, i32 %v7 - %v20 = load i16, i16* %v19, align 2 - %v21 = sext i16 %v20 to i32 - %v22 = tail call i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s1(i32 %v6, i32 %v18, i32 %v21) - %v23 = add nuw nsw i32 %v7, 1 - %v24 = getelementptr i16, i16* %v3, i32 1 - %v25 = getelementptr i16, i16* %v4, i32 1 - br i1 %v15, label %b2, label %b1 - -b2: ; preds = %b1 - %v26 = tail call signext i16 @f1(i32 %v22) #0 - %v27 = sext i16 %v26 to i32 - %v28 = tail call i32 @llvm.hexagon.S2.asl.r.r.sat(i32 %v22, i32 %v27) - %v29 = tail call i32 @llvm.hexagon.A2.asrh(i32 %v28) - %v30 = shl i32 %v29, 16 - %v31 = ashr exact i32 %v30, 16 - %v32 = icmp slt i32 %v30, 65536 - br label %b3 - -b3: ; preds = %b2 - call void @llvm.lifetime.end.p0i8(i64 80, i8* nonnull %v1) #2 - ret i16 0 -} - -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.hexagon.S2.asr.r.r.sat(i32, i32) #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.hexagon.A2.aslh(i32) #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.hexagon.A2.asrh(i32) #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s1(i32, i32, i32) #2 - -; Function Attrs: nounwind -declare signext i16 @f1(i32) local_unnamed_addr #0 - -; Function Attrs: nounwind readnone -declare i32 @llvm.hexagon.S2.asl.r.r.sat(i32, i32) #2 - -; Function Attrs: argmemonly nounwind -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 - -attributes #0 = { nounwind } -attributes #1 = { argmemonly nounwind } -attributes #2 = { nounwind readnone } Index: llvm/test/CodeGen/Mips/cconv/vector.ll =================================================================== --- llvm/test/CodeGen/Mips/cconv/vector.ll +++ llvm/test/CodeGen/Mips/cconv/vector.ll @@ -2269,12 +2269,10 @@ ; MIPS64R5-NEXT: sd $4, 24($sp) ; MIPS64R5-NEXT: ldi.b $w0, 0 ; MIPS64R5-NEXT: lw $1, 20($sp) -; MIPS64R5-NEXT: lw $2, 16($sp) ; MIPS64R5-NEXT: move.v $w1, $w0 -; MIPS64R5-NEXT: insert.d $w1[0], $2 +; MIPS64R5-NEXT: insert.d $w1[0], $5 ; MIPS64R5-NEXT: insert.d $w1[1], $1 -; MIPS64R5-NEXT: lw $1, 24($sp) -; MIPS64R5-NEXT: insert.d $w0[0], $1 +; MIPS64R5-NEXT: insert.d $w0[0], $4 ; MIPS64R5-NEXT: lw $1, 28($sp) ; MIPS64R5-NEXT: insert.d $w0[1], $1 ; MIPS64R5-NEXT: addv.d $w0, $w0, $w1 @@ -3749,12 +3747,8 @@ ; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 32 ; MIPS32R5EB-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32R5EB-NEXT: .cfi_offset 31, -4 -; MIPS32R5EB-NEXT: addiu $1, $zero, 1543 -; MIPS32R5EB-NEXT: sh $1, 20($sp) -; MIPS32R5EB-NEXT: addiu $1, $zero, 3080 -; MIPS32R5EB-NEXT: sh $1, 24($sp) -; MIPS32R5EB-NEXT: lhu $4, 20($sp) -; MIPS32R5EB-NEXT: lhu $5, 24($sp) +; MIPS32R5EB-NEXT: addiu $4, $zero, 1543 +; MIPS32R5EB-NEXT: addiu $5, $zero, 3080 ; MIPS32R5EB-NEXT: jal i8_2 ; MIPS32R5EB-NEXT: nop ; MIPS32R5EB-NEXT: sw $2, 16($sp) @@ -3873,12 +3867,8 @@ ; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 32 ; MIPS32R5EL-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32R5EL-NEXT: .cfi_offset 31, -4 -; MIPS32R5EL-NEXT: addiu $1, $zero, 1798 -; MIPS32R5EL-NEXT: sh $1, 20($sp) -; MIPS32R5EL-NEXT: addiu $1, $zero, 2060 -; MIPS32R5EL-NEXT: sh $1, 24($sp) -; MIPS32R5EL-NEXT: lhu $4, 20($sp) -; MIPS32R5EL-NEXT: lhu $5, 24($sp) +; MIPS32R5EL-NEXT: addiu $4, $zero, 1798 +; MIPS32R5EL-NEXT: addiu $5, $zero, 2060 ; MIPS32R5EL-NEXT: jal i8_2 ; MIPS32R5EL-NEXT: nop ; MIPS32R5EL-NEXT: sw $2, 16($sp) Index: llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll =================================================================== --- llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll +++ llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll @@ -155,11 +155,10 @@ ; MIPS64R2: # %bb.0: # %entry ; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ; MIPS64R2-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R2-NEXT: sw $4, 4($sp) -; MIPS64R2-NEXT: lwu $2, 4($sp) +; MIPS64R2-NEXT: dext $2, $4, 0, 32 ; MIPS64R2-NEXT: sltiu $1, $2, 7 ; MIPS64R2-NEXT: beqz $1, .LBB0_3 -; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: sw $4, 4($sp) ; MIPS64R2-NEXT: .LBB0_1: # %entry ; MIPS64R2-NEXT: dsll $1, $2, 3 ; MIPS64R2-NEXT: lui $2, %highest(.LJTI0_0) @@ -251,10 +250,10 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: sw $4, 4($sp) -; MIPS64R6-NEXT: lwu $2, 4($sp) +; MIPS64R6-NEXT: dext $2, $4, 0, 32 ; MIPS64R6-NEXT: sltiu $1, $2, 7 -; MIPS64R6-NEXT: beqzc $1, .LBB0_3 +; MIPS64R6-NEXT: beqz $1, .LBB0_3 +; MIPS64R6-NEXT: sw $4, 4($sp) ; MIPS64R6-NEXT: .LBB0_1: # %entry ; MIPS64R6-NEXT: dsll $1, $2, 3 ; MIPS64R6-NEXT: lui $2, %highest(.LJTI0_0) @@ -473,11 +472,10 @@ ; PIC-MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(_Z3fooi))) ; PIC-MIPS64R2-NEXT: daddu $1, $1, $25 ; PIC-MIPS64R2-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi))) -; PIC-MIPS64R2-NEXT: sw $4, 4($sp) -; PIC-MIPS64R2-NEXT: lwu $3, 4($sp) +; PIC-MIPS64R2-NEXT: dext $3, $4, 0, 32 ; PIC-MIPS64R2-NEXT: sltiu $1, $3, 7 ; PIC-MIPS64R2-NEXT: beqz $1, .LBB0_3 -; PIC-MIPS64R2-NEXT: nop +; PIC-MIPS64R2-NEXT: sw $4, 4($sp) ; PIC-MIPS64R2-NEXT: .LBB0_1: # %entry ; PIC-MIPS64R2-NEXT: dsll $1, $3, 3 ; PIC-MIPS64R2-NEXT: ld $3, %got_page(.LJTI0_0)($2) @@ -537,10 +535,10 @@ ; PIC-MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(_Z3fooi))) ; PIC-MIPS64R6-NEXT: daddu $1, $1, $25 ; PIC-MIPS64R6-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi))) -; PIC-MIPS64R6-NEXT: sw $4, 4($sp) -; PIC-MIPS64R6-NEXT: lwu $3, 4($sp) +; PIC-MIPS64R6-NEXT: dext $3, $4, 0, 32 ; PIC-MIPS64R6-NEXT: sltiu $1, $3, 7 -; PIC-MIPS64R6-NEXT: beqzc $1, .LBB0_3 +; PIC-MIPS64R6-NEXT: beqz $1, .LBB0_3 +; PIC-MIPS64R6-NEXT: sw $4, 4($sp) ; PIC-MIPS64R6-NEXT: .LBB0_1: # %entry ; PIC-MIPS64R6-NEXT: dsll $1, $3, 3 ; PIC-MIPS64R6-NEXT: ld $3, %got_page(.LJTI0_0)($2) Index: llvm/test/CodeGen/Mips/o32_cc_byval.ll =================================================================== --- llvm/test/CodeGen/Mips/o32_cc_byval.ll +++ llvm/test/CodeGen/Mips/o32_cc_byval.ll @@ -109,7 +109,8 @@ ; CHECK-NEXT: lw $1, 64($sp) ; CHECK-NEXT: lw $2, 68($sp) ; CHECK-NEXT: lh $3, 58($sp) -; CHECK-NEXT: lb $5, 56($sp) +; CHECK-NEXT: sll $5, $6, 24 +; CHECK-NEXT: sra $5, $5, 24 ; CHECK-NEXT: swc1 $f12, 36($sp) ; CHECK-NEXT: sw $5, 32($sp) ; CHECK-NEXT: sw $3, 28($sp) @@ -191,11 +192,12 @@ ; CHECK-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill ; CHECK-NEXT: addu $gp, $2, $25 ; CHECK-NEXT: move $4, $7 -; CHECK-NEXT: sw $5, 52($sp) ; CHECK-NEXT: sw $6, 56($sp) +; CHECK-NEXT: sw $5, 52($sp) ; CHECK-NEXT: sw $7, 60($sp) ; CHECK-NEXT: lw $1, 80($sp) -; CHECK-NEXT: lb $2, 52($sp) +; CHECK-NEXT: sll $2, $5, 24 +; CHECK-NEXT: sra $2, $2, 24 ; CHECK-NEXT: addiu $3, $zero, 4 ; CHECK-NEXT: lui $5, 16576 ; CHECK-NEXT: sw $5, 36($sp) Index: llvm/test/CodeGen/Mips/o32_cc_vararg.ll =================================================================== --- llvm/test/CodeGen/Mips/o32_cc_vararg.ll +++ llvm/test/CodeGen/Mips/o32_cc_vararg.ll @@ -29,10 +29,10 @@ ; CHECK-LABEL: va1: ; CHECK: addiu $sp, $sp, -16 -; CHECK: sw $5, 20($sp) ; CHECK: sw $7, 28($sp) ; CHECK: sw $6, 24($sp) -; CHECK: lw $2, 20($sp) +; CHECK: sw $5, 20($sp) +; CHECK: move $2, $5 } ; check whether the variable double argument will be accessed from the 8-byte @@ -83,9 +83,9 @@ ; CHECK-LABEL: va3: ; CHECK: addiu $sp, $sp, -16 -; CHECK: sw $6, 24($sp) ; CHECK: sw $7, 28($sp) -; CHECK: lw $2, 24($sp) +; CHECK: sw $6, 24($sp) +; CHECK: move $2, $6 } ; double @@ -135,7 +135,7 @@ ; CHECK-LABEL: va5: ; CHECK: addiu $sp, $sp, -24 ; CHECK: sw $7, 36($sp) -; CHECK: lw $2, 36($sp) +; CHECK: move $2, $7 } ; double Index: llvm/test/CodeGen/PowerPC/addi-offset-fold.ll =================================================================== --- llvm/test/CodeGen/PowerPC/addi-offset-fold.ll +++ llvm/test/CodeGen/PowerPC/addi-offset-fold.ll @@ -24,12 +24,11 @@ ret i32 %bf.cast ; CHECK-LABEL: @foo -; FIXME: We don't need to do these stores/loads at all. +; FIXME: We don't need to do these stores at all. ; CHECK-DAG: std 3, -24(1) ; CHECK-DAG: stb 4, -16(1) -; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1) +; CHECK-DAG: sldi [[REG3:[0-9]+]], 4, 32 ; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1) -; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32 ; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]] ; CHECK: rldicl 3, [[REG4]], 33, 57 ; CHECK: blr Index: llvm/test/CodeGen/PowerPC/pr13891.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pr13891.ll +++ llvm/test/CodeGen/PowerPC/pr13891.ll @@ -6,8 +6,9 @@ define void @_Z5check3foos(%struct.foo* nocapture byval %f, i16 signext %i) noinline { ; CHECK-LABEL: _Z5check3foos: -; CHECK: sth 3, {{[0-9]+}}(1) -; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1) +; CHECK-DAG: extsh 5, 3 +; CHECK-DAG: sth 3, {{[0-9]+}}(1) + entry: %0 = bitcast %struct.foo* %f to i16* %1 = load i16, i16* %0, align 2 Index: llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll =================================================================== --- llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -61,8 +61,7 @@ ; CHECK-NEXT: rosbg %r0, %r1, 62, 62, 1 ; CHECK-NEXT: vlgvb %r1, %v24, 15 ; CHECK-NEXT: rosbg %r0, %r1, 63, 63, 0 -; CHECK-NEXT: sth %r0, 160(%r15) -; CHECK-NEXT: lh %r2, 160(%r15) +; CHECK-NEXT: llhr %r2, %r0 ; CHECK-NEXT: aghi %r15, 168 ; CHECK-NEXT: br %r14 { Index: llvm/test/CodeGen/X86/i386-shrink-wrapping.ll =================================================================== --- llvm/test/CodeGen/X86/i386-shrink-wrapping.ll +++ llvm/test/CodeGen/X86/i386-shrink-wrapping.ll @@ -56,7 +56,7 @@ ; ; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]] ; CHECK-NEXT: movb %dl, ([[E]]) -; CHECK-NEXT: movsbl ([[E]]), [[CONV:%[a-z]+]] +; CHECK-NEXT: movzbl %dl, [[CONV:%[a-z]+]] ; CHECK-NEXT: movl $6, [[CONV:%[a-z]+]] ; The eflags is used in the next instruction. ; If that instruction disappear, we are not exercising the bug Index: llvm/test/CodeGen/X86/pr32108.ll =================================================================== --- llvm/test/CodeGen/X86/pr32108.ll +++ llvm/test/CodeGen/X86/pr32108.ll @@ -4,7 +4,6 @@ define void @pr32108() { ; CHECK-LABEL: pr32108: ; CHECK: # %bb.0: # %BB -; CHECK-NEXT: movb $0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %CF244 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/X86/win64_vararg.ll =================================================================== --- llvm/test/CodeGen/X86/win64_vararg.ll +++ llvm/test/CodeGen/X86/win64_vararg.ll @@ -123,8 +123,7 @@ ; CHECK: pushq ; CHECK-DAG: movq %r9, 40(%rsp) ; CHECK-DAG: movq %r8, 32(%rsp) -; CHECK: movl 32(%rsp), %[[tmp:[^ ]*]] -; CHECK: movl %[[tmp]], (%[[sret:[^ ]*]]) +; CHECK: movl %r8d, (%[[sret:[^ ]*]]) ; CHECK: movq %[[sret]], %rax ; CHECK: popq ; CHECK: retq