Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13087,14 +13087,28 @@ } } - // If this is a store followed by a store with the same value to the same - // location, then the store is dead/noop. if (StoreSDNode *ST1 = dyn_cast(Chain)) { - if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && - ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && - ST1->isUnindexed() && !ST1->isVolatile()) { - // The store is dead, remove it. - return Chain; + if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && + !ST1->isVolatile() && ST1->getBasePtr() == Ptr && + ST->getMemoryVT() == ST1->getMemoryVT()) { + // If this is a store followed by a store with the same value to the same + // location, then the store is dead/noop. + if (ST1->getValue() == Value) { + // The store is dead, remove it. + return Chain; + } + + // If this is a store who's preceeding store to the same location + // and no one other node is chained to that store we can effectively + // drop the store. Do not remove stores to undef as they may be used as + // data sinks. + if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && + !ST1->getBasePtr().isUndef()) { + // ST1 is fully overwritten and can be elided. Combine with it's chain + // value. + CombineTo(ST1, ST1->getChain()); + return SDValue(); + } } } Index: llvm/trunk/test/CodeGen/AArch64/ldst-zero.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/ldst-zero.ll +++ llvm/trunk/test/CodeGen/AArch64/ldst-zero.ll @@ -9,9 +9,9 @@ ; Original test case which exhibited the bug define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-LABEL: test1: -; CHECK: stp xzr, xzr, [x0, #8] -; CHECK: stp xzr, x2, [x0] -; CHECK: str w1, [x0, #16] +; CHECK-DAG: stp x2, xzr, [x0, #8] +; CHECK-DAG: str w1, [x0, #16] +; CHECK-DAG: str xzr, [x0] entry: %0 = bitcast %struct.tree_common* %t to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) @@ -25,10 +25,8 @@ ; Store to each struct element instead of using memset define void @test2(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-LABEL: test2: -; CHECK: stp xzr, xzr, [x0] -; CHECK: str wzr, [x0, #16] -; CHECK: str w1, [x0, #16] -; CHECK: str x2, [x0, #8] +; CHECK-DAG: str w1, [x0, #16] +; CHECK-DAG: stp xzr, x2, [x0] entry: %0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 0 %1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 @@ -44,9 +42,9 @@ ; Vector store instead of memset define void @test3(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-LABEL: test3: -; CHECK: stp xzr, xzr, [x0, #8] -; CHECK: stp xzr, x2, [x0] -; CHECK: str w1, [x0, #16] +; CHECK-DAG: stp x2, xzr, [x0, #8] +; CHECK-DAG: str w1, [x0, #16] +; CHECK-DAG: str xzr, [x0] entry: %0 = bitcast %struct.tree_common* %t to <3 x i64>* store <3 x i64> zeroinitializer, <3 x i64>* %0, align 8 @@ -60,9 +58,8 @@ ; Vector store, then store to vector elements define void @test4(<3 x i64>* %p, i64 %x, i64 %y) { ; CHECK-LABEL: test4: -; CHECK: stp xzr, xzr, [x0, #8] -; CHECK: stp xzr, x2, [x0] -; CHECK: str x1, [x0, #16] +; CHECK-DAG: stp x2, x1, [x0, #8] +; CHECK-DAG: str xzr, [x0] entry: store <3 x i64> zeroinitializer, <3 x i64>* %p, align 8 %0 = bitcast <3 x i64>* %p to i64* Index: llvm/trunk/test/CodeGen/AArch64/misched-stp.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/misched-stp.ll +++ llvm/trunk/test/CodeGen/AArch64/misched-stp.ll @@ -1,20 +1,18 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s ; Tests to check that the scheduler dependencies derived from alias analysis are ; correct when we have loads that have been split up so that they can later be ; merged into STP. -; CHECK: ********** MI Scheduling ********** -; CHECK: test_splat:BB#0 entry -; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%3+8] -; CHECK: Successors: -; CHECK-NEXT: ord [[SU1:SU\([0-9]+\)]] -; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%3+4] -; CHECK: Successors: -; CHECK-NEXT: ord [[SU2:SU\([0-9]+\)]] -; CHECK: [[SU1]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%2] -; CHECK: [[SU2]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%1] +; Now that overwritten stores are elided in SelectionDAG, dependencies +; are resolved and removed before MISCHED. Check that we have +; equivalent pair of stp calls as a baseline. + +; CHECK-LABEL: test_splat +; CHECK: ldr [[REG:w[0-9]+]], [x2] +; CHECK-DAG: stp w0, [[REG]], [x2, #12] +; CHECK-DAG: stp [[REG]], w1, [x2, #4] define void @test_splat(i32 %x, i32 %y, i32* %p) { entry: %val = load i32, i32* %p, align 4 @@ -35,16 +33,11 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) %struct.tree_common = type { i8*, i8*, i32 } -; CHECK: ********** MI Scheduling ********** -; CHECK: test_zero:BB#0 entry -; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 2; mem:ST8[%0+16] -; CHECK: Successors: -; CHECK-NEXT: ord [[SU3:SU\([0-9]+\)]] -; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 1; mem:ST8[%0+8] -; CHECK: Successors: -; CHECK-NEXT: ord [[SU4:SU\([0-9]+\)]] -; CHECK: [[SU3]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 4; mem:ST4[%code1] -; CHECK: [[SU4]]: STRXui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 1; mem:ST8[%type2] +; CHECK-LABEL: test_zero +; CHECK-DAG: stp x2, xzr, [x0, #8] +; CHECK-DAG: str w1, [x0, #16] +; CHECK-DAG: str xzr, [x0] + define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) { entry: %0 = bitcast %struct.tree_common* %t to i8* Index: llvm/trunk/test/CodeGen/AMDGPU/global-constant.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/global-constant.ll +++ llvm/trunk/test/CodeGen/AMDGPU/global-constant.ll @@ -29,10 +29,10 @@ define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) { %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index %val = load float, float addrspace(2)* %ptr - store float %val, float addrspace(1)* %out + store volatile float %val, float addrspace(1)* %out %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index %val2 = load float, float addrspace(2)* %ptr2 - store float %val2, float addrspace(1)* %out + store volatile float %val2, float addrspace(1)* %out ret void } Index: llvm/trunk/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll +++ llvm/trunk/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll @@ -20,7 +20,7 @@ bb8: ; preds = %bb3 %1 = getelementptr inbounds i8, i8* %0, i32 0 - store i8 0, i8* %1, align 1 + store volatile i8 0, i8* %1, align 1 %2 = call i32 @ptou() nounwind ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], @@ -35,7 +35,7 @@ %7 = or i8 %6, 48 %8 = add i8 %6, 87 %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8 - store i8 %iftmp.5.0.1, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.1, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -49,7 +49,7 @@ %13 = or i8 %12, 48 %14 = add i8 %12, 87 %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14 - store i8 %iftmp.5.0.2, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.2, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -63,7 +63,7 @@ %19 = or i8 %18, 48 %20 = add i8 %18, 87 %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20 - store i8 %iftmp.5.0.4, i8* null, align 1 + store volatile i8 %iftmp.5.0.4, i8* null, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -74,7 +74,7 @@ %22 = urem i32 %21, 10 %23 = icmp ult i32 %22, 10 %iftmp.5.0.5 = select i1 %23, i8 0, i8 %val8 - store i8 %iftmp.5.0.5, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.5, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -88,7 +88,7 @@ %28 = or i8 %27, 48 %29 = add i8 %27, 87 %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29 - store i8 %iftmp.5.0.6, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.6, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -102,7 +102,7 @@ %34 = or i8 %33, 48 %35 = add i8 %33, 87 %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35 - store i8 %iftmp.5.0.7, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.7, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -116,7 +116,7 @@ %40 = or i8 %39, 48 %41 = add i8 %39, 87 %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41 - store i8 %iftmp.5.0.8, i8* null, align 1 + store volatile i8 %iftmp.5.0.8, i8* null, align 1 br label %bb46 bb46: ; preds = %bb3 Index: llvm/trunk/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll +++ llvm/trunk/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll @@ -13,7 +13,7 @@ ; CHECK: sub sp, sp, #12 ; CHECK: sub sp, sp, #4 ; CHECK: add r0, sp, #4 -; CHECK: stm sp, {r0, r1, r2, r3} +; CHECK: stmib sp, {r1, r2, r3} %g = alloca i8* %g1 = bitcast i8** %g to i8* call void @llvm.va_start(i8* %g1) Index: llvm/trunk/test/CodeGen/ARM/dag-combine-ldst.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/dag-combine-ldst.ll +++ llvm/trunk/test/CodeGen/ARM/dag-combine-ldst.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: {{^}}main ; CHECK: mov [[TMP:r[0-9]+]], #0 ; CHECK-NEXT: str [[TMP]], [sp, #4] -; CHECK-NEXT: str [[TMP]], [sp] +; CHECK_O0: str [[TMP]], [sp] ; CHECK_O0: ldr [[TMP:r[0-9]+]], [sp] ; CHECK_O0-NEXT: add [[TMP]], [[TMP]], #2 ; CHECK_O1-NOT: ldr [[TMP:r[0-9]+]], [sp] Index: llvm/trunk/test/CodeGen/MSP430/vararg.ll =================================================================== --- llvm/trunk/test/CodeGen/MSP430/vararg.ll +++ llvm/trunk/test/CodeGen/MSP430/vararg.ll @@ -25,7 +25,6 @@ entry: ; CHECK-LABEL: va_arg: %vl.addr = alloca i8*, align 2 -; CHECK: mov.w r12, 0(r1) store i8* %vl, i8** %vl.addr, align 2 ; CHECK: mov.w r12, [[REG:r[0-9]+]] ; CHECK-NEXT: add.w #2, [[REG]] Index: llvm/trunk/test/CodeGen/Mips/msa/bmzi_bmnzi.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/msa/bmzi_bmnzi.ll +++ llvm/trunk/test/CodeGen/Mips/msa/bmzi_bmnzi.ll @@ -9,9 +9,9 @@ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1 %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2 %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 240) - store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES + store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES %3 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 15) - store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES + store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES %4 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 170) store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES ret void @@ -32,9 +32,9 @@ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1 %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2 %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 240) - store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES + store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES %3 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 15) - store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES + store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES %4 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 170) store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES ret void Index: llvm/trunk/test/CodeGen/PowerPC/ppcf128sf.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppcf128sf.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppcf128sf.ll @@ -14,19 +14,19 @@ %0 = load ppc_fp128, ppc_fp128* @ld, align 16 %1 = load ppc_fp128, ppc_fp128* @ld2, align 16 %add = fadd ppc_fp128 %0, %1 - store ppc_fp128 %add, ppc_fp128* %c, align 16 + store volatile ppc_fp128 %add, ppc_fp128* %c, align 16 %2 = load ppc_fp128, ppc_fp128* @ld, align 16 %3 = load ppc_fp128, ppc_fp128* @ld2, align 16 %sub = fsub ppc_fp128 %2, %3 - store ppc_fp128 %sub, ppc_fp128* %c, align 16 + store volatile ppc_fp128 %sub, ppc_fp128* %c, align 16 %4 = load ppc_fp128, ppc_fp128* @ld, align 16 %5 = load ppc_fp128, ppc_fp128* @ld2, align 16 %mul = fmul ppc_fp128 %4, %5 - store ppc_fp128 %mul, ppc_fp128* %c, align 16 + store volatile ppc_fp128 %mul, ppc_fp128* %c, align 16 %6 = load ppc_fp128, ppc_fp128* @ld, align 16 %7 = load ppc_fp128, ppc_fp128* @ld2, align 16 %div = fdiv ppc_fp128 %6, %7 - store ppc_fp128 %div, ppc_fp128* %c, align 16 + store volatile ppc_fp128 %div, ppc_fp128* %c, align 16 ret void ; CHECK-LABEL: __gcc_qadd Index: llvm/trunk/test/CodeGen/SPARC/32abi.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/32abi.ll +++ llvm/trunk/test/CodeGen/SPARC/32abi.ll @@ -25,17 +25,17 @@ i32 %a5, ; %i5 i32 signext %a6, ; [%fp+92] i8* %a7) { ; [%fp+96] - store i8 %a0, i8* %a4 - store i8 %a1, i8* %a4 + store volatile i8 %a0, i8* %a4 + store volatile i8 %a1, i8* %a4 %p16 = bitcast i8* %a4 to i16* - store i16 %a2, i16* %p16 + store volatile i16 %a2, i16* %p16 %p32 = bitcast i8* %a4 to i32* - store i32 %a3, i32* %p32 + store volatile i32 %a3, i32* %p32 %pp = bitcast i8* %a4 to i8** - store i8* %a4, i8** %pp - store i32 %a5, i32* %p32 - store i32 %a6, i32* %p32 - store i8* %a7, i8** %pp + store volatile i8* %a4, i8** %pp + store volatile i32 %a5, i32* %p32 + store volatile i32 %a6, i32* %p32 + store volatile i8* %a7, i8** %pp ret void } Index: llvm/trunk/test/CodeGen/SPARC/64abi.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/64abi.ll +++ llvm/trunk/test/CodeGen/SPARC/64abi.ll @@ -24,17 +24,17 @@ i32 %a5, ; %i5 i32 signext %a6, ; [%fp+BIAS+176] i8* %a7) { ; [%fp+BIAS+184] - store i8 %a0, i8* %a4 - store i8 %a1, i8* %a4 + store volatile i8 %a0, i8* %a4 + store volatile i8 %a1, i8* %a4 %p16 = bitcast i8* %a4 to i16* - store i16 %a2, i16* %p16 + store volatile i16 %a2, i16* %p16 %p32 = bitcast i8* %a4 to i32* - store i32 %a3, i32* %p32 + store volatile i32 %a3, i32* %p32 %pp = bitcast i8* %a4 to i8** - store i8* %a4, i8** %pp - store i32 %a5, i32* %p32 - store i32 %a6, i32* %p32 - store i8* %a7, i8** %pp + store volatile i8* %a4, i8** %pp + store volatile i32 %a5, i32* %p32 + store volatile i32 %a6, i32* %p32 + store volatile i8* %a7, i8** %pp ret void } @@ -316,7 +316,7 @@ %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef, i64* undef, i64* undef) %e0 = extractvalue { i64, i64 } %rv, 0 - store i64 %e0, i64* %i0 + store volatile i64 %e0, i64* %i0 %e1 = extractvalue { i64, i64 } %rv, 1 store i64 %e1, i64* %i0 ret void Index: llvm/trunk/test/CodeGen/SystemZ/swift-return.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/swift-return.ll +++ llvm/trunk/test/CodeGen/SystemZ/swift-return.ll @@ -189,11 +189,11 @@ %v6 = extractvalue { i1, i1, i1, i1 } %call, 2 %v7 = extractvalue { i1, i1, i1, i1 } %call, 3 %val = zext i1 %v3 to i32 - store i32 %val, i32* @var + store volatile i32 %val, i32* @var %val2 = zext i1 %v5 to i32 - store i32 %val2, i32* @var + store volatile i32 %val2, i32* @var %val3 = zext i1 %v6 to i32 - store i32 %val3, i32* @var + store volatile i32 %val3, i32* @var %val4 = zext i1 %v7 to i32 store i32 %val4, i32* @var ret void Index: llvm/trunk/test/CodeGen/Thumb/stack-access.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb/stack-access.ll +++ llvm/trunk/test/CodeGen/Thumb/stack-access.ll @@ -7,13 +7,13 @@ %z = alloca i8, align 1 ; CHECK: add r1, sp, #8 ; CHECK: str r1, [r0] - store i8* %x, i8** %p, align 4 + store volatile i8* %x, i8** %p, align 4 ; CHECK: add r1, sp, #4 ; CHECK: str r1, [r0] - store i8* %y, i8** %p, align 4 + store volatile i8* %y, i8** %p, align 4 ; CHECK: mov r1, sp ; CHECK: str r1, [r0] - store i8* %z, i8** %p, align 4 + store volatile i8* %z, i8** %p, align 4 ret void } @@ -24,10 +24,10 @@ ; CHECK: add r1, sp, #1020 ; CHECK: adds r1, #4 ; CHECK: str r1, [r0] - store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4 + store volatile [1024 x i8]* %arr1, [1024 x i8]** %p, align 4 ; CHECK: mov r1, sp ; CHECK: str r1, [r0] - store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4 + store volatile [1024 x i8]* %arr2, [1024 x i8]** %p, align 4 ret void } Index: llvm/trunk/test/CodeGen/Thumb2/ldr-str-imm12.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ llvm/trunk/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -50,9 +50,9 @@ ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp - store %union.rec* null, %union.rec** @zz_hold, align 4 + store volatile %union.rec* null, %union.rec** @zz_hold, align 4 store %union.rec* null, %union.rec** @zz_res, align 4 - store %union.rec* %x, %union.rec** @zz_hold, align 4 + store volatile %union.rec* %x, %union.rec** @zz_hold, align 4 %0 = call %union.rec* @Manifest(%union.rec* undef, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind ; <%union.rec*> [#uses=0] unreachable Index: llvm/trunk/test/CodeGen/X86/arg-copy-elide.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/arg-copy-elide.ll +++ llvm/trunk/test/CodeGen/X86/arg-copy-elide.ll @@ -253,9 +253,7 @@ ; CHECK: calll _addrof_i32 ; CHECK: retl - ; Don't elide the copy when the alloca is escaped with a store. - define void @escape_with_store(i32 %x) { %x1 = alloca i32 %x2 = alloca i32* @@ -268,9 +266,8 @@ } ; CHECK-LABEL: _escape_with_store: -; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]] -; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp) -; CHECK: movl %[[reg]], [[offs]](%esp) +; CHECK: movl {{.*}}(%esp), %[[reg:[^ ]*]] +; CHECK: movl %[[reg]], [[offs:[0-9]*]](%esp) ; CHECK: calll _addrof_i32 Index: llvm/trunk/test/CodeGen/X86/nontemporal.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/nontemporal.ll +++ llvm/trunk/test/CodeGen/X86/nontemporal.ll @@ -9,33 +9,29 @@ ; X32-SSE: # BB#0: ; X32-SSE-NEXT: pushl %ebp ; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: pushl %esi ; X32-SSE-NEXT: andl $-16, %esp ; X32-SSE-NEXT: subl $16, %esp ; X32-SSE-NEXT: movl 72(%ebp), %eax ; X32-SSE-NEXT: movl 76(%ebp), %ecx -; X32-SSE-NEXT: movl 12(%ebp), %edx ; X32-SSE-NEXT: movdqa 56(%ebp), %xmm3 ; X32-SSE-NEXT: movdqa 40(%ebp), %xmm4 ; X32-SSE-NEXT: movdqa 24(%ebp), %xmm5 -; X32-SSE-NEXT: movl 8(%ebp), %esi -; X32-SSE-NEXT: addps .LCPI0_0, %xmm0 -; X32-SSE-NEXT: movntps %xmm0, (%esi) -; X32-SSE-NEXT: paddq .LCPI0_1, %xmm2 -; X32-SSE-NEXT: movntdq %xmm2, (%esi) -; X32-SSE-NEXT: addpd .LCPI0_2, %xmm1 -; X32-SSE-NEXT: movntpd %xmm1, (%esi) -; X32-SSE-NEXT: paddd .LCPI0_3, %xmm5 -; X32-SSE-NEXT: movntdq %xmm5, (%esi) -; X32-SSE-NEXT: paddw .LCPI0_4, %xmm4 -; X32-SSE-NEXT: movntdq %xmm4, (%esi) -; X32-SSE-NEXT: paddb .LCPI0_5, %xmm3 -; X32-SSE-NEXT: movntdq %xmm3, (%esi) -; X32-SSE-NEXT: movntil %edx, (%esi) -; X32-SSE-NEXT: movntil %ecx, 4(%esi) -; X32-SSE-NEXT: movntil %eax, (%esi) -; X32-SSE-NEXT: leal -4(%ebp), %esp -; X32-SSE-NEXT: popl %esi +; X32-SSE-NEXT: movl 8(%ebp), %edx +; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: movntps %xmm0, (%edx) +; X32-SSE-NEXT: paddq {{\.LCPI.*}}, %xmm2 +; X32-SSE-NEXT: movntdq %xmm2, (%edx) +; X32-SSE-NEXT: addpd {{\.LCPI.*}}, %xmm1 +; X32-SSE-NEXT: movntpd %xmm1, (%edx) +; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm5 +; X32-SSE-NEXT: movntdq %xmm5, (%edx) +; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm4 +; X32-SSE-NEXT: movntdq %xmm4, (%edx) +; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm3 +; X32-SSE-NEXT: movntdq %xmm3, (%edx) +; X32-SSE-NEXT: movntil %ecx, 4(%edx) +; X32-SSE-NEXT: movntil %eax, (%edx) +; X32-SSE-NEXT: movl %ebp, %esp ; X32-SSE-NEXT: popl %ebp ; X32-SSE-NEXT: retl ; @@ -43,33 +39,29 @@ ; X32-AVX: # BB#0: ; X32-AVX-NEXT: pushl %ebp ; X32-AVX-NEXT: movl %esp, %ebp -; X32-AVX-NEXT: pushl %esi ; X32-AVX-NEXT: andl $-16, %esp ; X32-AVX-NEXT: subl $16, %esp ; X32-AVX-NEXT: movl 72(%ebp), %eax ; X32-AVX-NEXT: movl 76(%ebp), %ecx -; X32-AVX-NEXT: movl 12(%ebp), %edx ; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm3 ; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm4 ; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm5 -; X32-AVX-NEXT: movl 8(%ebp), %esi -; X32-AVX-NEXT: vaddps .LCPI0_0, %xmm0, %xmm0 -; X32-AVX-NEXT: vmovntps %xmm0, (%esi) -; X32-AVX-NEXT: vpaddq .LCPI0_1, %xmm2, %xmm0 -; X32-AVX-NEXT: vmovntdq %xmm0, (%esi) -; X32-AVX-NEXT: vaddpd .LCPI0_2, %xmm1, %xmm0 -; X32-AVX-NEXT: vmovntpd %xmm0, (%esi) -; X32-AVX-NEXT: vpaddd .LCPI0_3, %xmm5, %xmm0 -; X32-AVX-NEXT: vmovntdq %xmm0, (%esi) -; X32-AVX-NEXT: vpaddw .LCPI0_4, %xmm4, %xmm0 -; X32-AVX-NEXT: vmovntdq %xmm0, (%esi) -; X32-AVX-NEXT: vpaddb .LCPI0_5, %xmm3, %xmm0 -; X32-AVX-NEXT: vmovntdq %xmm0, (%esi) -; X32-AVX-NEXT: movntil %edx, (%esi) -; X32-AVX-NEXT: movntil %ecx, 4(%esi) -; X32-AVX-NEXT: movntil %eax, (%esi) -; X32-AVX-NEXT: leal -4(%ebp), %esp -; X32-AVX-NEXT: popl %esi +; X32-AVX-NEXT: movl 8(%ebp), %edx +; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32-AVX-NEXT: vmovntps %xmm0, (%edx) +; X32-AVX-NEXT: vpaddq {{\.LCPI.*}}, %xmm2, %xmm0 +; X32-AVX-NEXT: vmovntdq %xmm0, (%edx) +; X32-AVX-NEXT: vaddpd {{\.LCPI.*}}, %xmm1, %xmm0 +; X32-AVX-NEXT: vmovntpd %xmm0, (%edx) +; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm5, %xmm0 +; X32-AVX-NEXT: vmovntdq %xmm0, (%edx) +; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm4, %xmm0 +; X32-AVX-NEXT: vmovntdq %xmm0, (%edx) +; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm3, %xmm0 +; X32-AVX-NEXT: vmovntdq %xmm0, (%edx) +; X32-AVX-NEXT: movntil %ecx, 4(%edx) +; X32-AVX-NEXT: movntil %eax, (%edx) +; X32-AVX-NEXT: movl %ebp, %esp ; X32-AVX-NEXT: popl %ebp ; X32-AVX-NEXT: retl ; Index: llvm/trunk/test/CodeGen/X86/store-narrow.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/store-narrow.ll +++ llvm/trunk/test/CodeGen/X86/store-narrow.ll @@ -134,10 +134,7 @@ @g_16 = internal global i32 -1 ; X64-LABEL: test8: -; X64-NEXT: movl _g_16(%rip), %eax -; X64-NEXT: movl $0, _g_16(%rip) -; X64-NEXT: orl $1, %eax -; X64-NEXT: movl %eax, _g_16(%rip) +; X64-NEXT: orb $1, _g_16(%rip) ; X64-NEXT: ret define void @test8() nounwind { %tmp = load i32, i32* @g_16 Index: llvm/trunk/test/CodeGen/X86/swift-return.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/swift-return.ll +++ llvm/trunk/test/CodeGen/X86/swift-return.ll @@ -184,11 +184,11 @@ %v6 = extractvalue { i1, i1, i1, i1 } %call, 2 %v7 = extractvalue { i1, i1, i1, i1 } %call, 3 %val = zext i1 %v3 to i32 - store i32 %val, i32* @var + store volatile i32 %val, i32* @var %val2 = zext i1 %v5 to i32 - store i32 %val2, i32* @var + store volatile i32 %val2, i32* @var %val3 = zext i1 %v6 to i32 - store i32 %val3, i32* @var + store volatile i32 %val3, i32* @var %val4 = zext i1 %v7 to i32 store i32 %val4, i32* @var ret void Index: llvm/trunk/test/CodeGen/X86/win32-spill-xmm.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/win32-spill-xmm.ll +++ llvm/trunk/test/CodeGen/X86/win32-spill-xmm.ll @@ -20,7 +20,7 @@ ; Check that proper alignment of spilled vector does not affect vargs ; CHECK-LABEL: vargs_not_affected -; CHECK: leal 28(%ebp), %eax +; CHECK: movl 28(%ebp), %eax define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) { entry: %ap = alloca i8*, align 4 Index: llvm/trunk/test/CodeGen/X86/win64_sibcall.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/win64_sibcall.ll +++ llvm/trunk/test/CodeGen/X86/win64_sibcall.ll @@ -12,8 +12,8 @@ ; LINUX: movq $0, -8(%rsp) %this = alloca %Object addrspace(1)* - store %Object addrspace(1)* null, %Object addrspace(1)** %this - store %Object addrspace(1)* %param0, %Object addrspace(1)** %this + store volatile %Object addrspace(1)* null, %Object addrspace(1)** %this + store volatile %Object addrspace(1)* %param0, %Object addrspace(1)** %this br label %0 ;