Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11351,12 +11351,8 @@ SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); - // Make sure the new and old chains are cleaned up. - AddToWorklist(Token.getNode()); - - // Replace uses with load result and token factor. Don't add users - // to work list. - return CombineTo(N, ReplLoad.getValue(0), Token, false); + // Replace uses with load result and token factor + return CombineTo(N, ReplLoad.getValue(0), Token); } } @@ -16608,6 +16604,18 @@ if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + if (auto *A = dyn_cast(BasePtr0.getBase())) + if (auto *B = dyn_cast(BasePtr1.getBase())) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + if (!MFI.isFixedObjectIndex(A->getIndex()) || + !MFI.isFixedObjectIndex(B->getIndex())) + return false; + } + // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis // modified to use BaseIndexOffset. Index: test/CodeGen/AArch64/arm64-abi-varargs.ll =================================================================== --- test/CodeGen/AArch64/arm64-abi-varargs.ll +++ test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -11,9 +11,8 @@ ; CHECK: add {{x[0-9]+}}, [[ARGS]], #8 ; First vararg ; CHECK: ldr {{w[0-9]+}}, [sp, #72] -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8 ; Second vararg -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8 ; Third vararg ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] Index: test/CodeGen/AArch64/arm64-abi_align.ll =================================================================== --- test/CodeGen/AArch64/arm64-abi_align.ll +++ test/CodeGen/AArch64/arm64-abi_align.ll @@ -280,10 +280,10 @@ define i32 @caller42() #3 { entry: ; CHECK-LABEL: caller42 -; CHECK: str {{x[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] -; CHECK: str {{x[0-9]+}}, [sp, #16] -; CHECK: str {{q[0-9]+}}, [sp] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #16] +; CHECK-DAG: str {{q[0-9]+}}, [sp] ; CHECK: add x1, sp, #32 ; CHECK: mov x2, sp ; Space for s1 is allocated at sp+32 @@ -318,10 +318,10 @@ ; CHECK-LABEL: caller42_stack ; CHECK: sub sp, sp, #112 ; CHECK: add x29, sp, #96 -; CHECK: stur {{x[0-9]+}}, [x29, #-16] -; CHECK: stur {{q[0-9]+}}, [x29, #-32] -; CHECK: str {{x[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] +; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] ; Space for s1 is allocated at x29-32 = sp+64 ; Space for s2 is allocated at sp+32 ; CHECK: add x[[B:[0-9]+]], sp, #32 @@ -388,10 +388,10 @@ define i32 @caller43() #3 { entry: ; CHECK-LABEL: caller43 -; CHECK: str {{q[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] -; CHECK: str {{q[0-9]+}}, [sp, #16] -; CHECK: str {{q[0-9]+}}, [sp] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #16] +; CHECK-DAG: str {{q[0-9]+}}, [sp] ; CHECK: add x1, sp, #32 ; CHECK: mov x2, sp ; Space for s1 is allocated at sp+32 @@ -430,10 +430,10 @@ ; CHECK-LABEL: caller43_stack ; CHECK: sub sp, sp, #112 ; CHECK: add x29, sp, #96 -; CHECK: stur {{q[0-9]+}}, [x29, #-16] -; CHECK: stur {{q[0-9]+}}, [x29, #-32] -; CHECK: str {{q[0-9]+}}, [sp, #48] -; CHECK: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] ; Space for s1 is allocated at x29-32 = sp+64 ; Space for s2 is allocated at sp+32 ; CHECK: add x[[B:[0-9]+]], sp, #32 Index: test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll =================================================================== --- test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll +++ test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll @@ -1,10 +1,8 @@ ; RUN: llc -mtriple=arm64-eabi -mcpu=cyclone < %s | FileCheck %s ; CHECK: foo -; CHECK: str w[[REG0:[0-9]+]], [x19, #264] -; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]] -; CHECK: str w[[REG1]], [x19, #132] - +; CHECK-DAG: str w[[REG0:[0-9]+]], [x19, #132] +; CHECK-DAG: str w[[REG0]], [x19, #264] define i32 @foo(i32 %a) nounwind { %retval = alloca i32, align 4 %a.addr = alloca i32, align 4 Index: test/CodeGen/AArch64/arm64-vext.ll =================================================================== --- test/CodeGen/AArch64/arm64-vext.ll +++ test/CodeGen/AArch64/arm64-vext.ll @@ -116,7 +116,7 @@ define void @test_vext_s32() nounwind ssp { ; CHECK-LABEL: test_vext_s32: - ; CHECK: {{ext.8.*#4}} + ; CHECK: {{rev64.2s.*}} %xS32x2 = alloca <2 x i32>, align 8 %__a = alloca <2 x i32>, align 8 %__b = alloca <2 x i32>, align 8 @@ -137,7 +137,7 @@ define void @test_vext_u32() nounwind ssp { ; CHECK-LABEL: test_vext_u32: - ; CHECK: {{ext.8.*#4}} + ; CHECK: {{rev64.2s.*}} %xU32x2 = alloca <2 x i32>, align 8 %__a = alloca <2 x i32>, align 8 %__b = alloca <2 x i32>, align 8 @@ -158,7 +158,7 @@ define void @test_vext_f32() nounwind ssp { ; CHECK-LABEL: test_vext_f32: - ; CHECK: {{ext.8.*#4}} + ; CHECK: {{rev64.2s.*}} %xF32x2 = alloca <2 x float>, align 8 %__a = alloca <2 x float>, align 8 %__b = alloca <2 x float>, align 8 @@ -179,7 +179,7 @@ define void @test_vext_s64() nounwind ssp { ; CHECK-LABEL: test_vext_s64: - ; CHECK_FIXME: {{ext.8.*#1}} + ; CHECK_FIXME: {{rev64.2s.*}} ; this just turns into a load of the second element %xS64x1 = alloca <1 x i64>, align 8 %__a = alloca <1 x i64>, align 8 Index: test/CodeGen/AArch64/dag-combine-invaraints.ll =================================================================== --- test/CodeGen/AArch64/dag-combine-invaraints.ll +++ test/CodeGen/AArch64/dag-combine-invaraints.ll @@ -9,7 +9,7 @@ %i32T = alloca i32, align 4 %i32F = alloca i32, align 4 %i32X = alloca i32, align 4 - store i32 0, i32* %tmp + store i32 %argc, i32* %tmp store i32 15, i32* %i32T, align 4 store i32 5, i32* %i32F, align 4 %tmp6 = load i32, i32* %tmp, align 4 Index: test/CodeGen/AArch64/swifterror.ll =================================================================== --- test/CodeGen/AArch64/swifterror.ll +++ test/CodeGen/AArch64/swifterror.ll @@ -309,19 +309,19 @@ ; CHECK-APPLE-LABEL: foo_vararg: ; CHECK-APPLE: orr w0, wzr, #0x10 ; CHECK-APPLE: malloc -; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1 -; CHECK-APPLE: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16 -; CHECK-APPLE: strb [[ID]], [x0, #8] +; CHECK-APPLE-DAG: orr [[ID:w[0-9]+]], wzr, #0x1 +; CHECK-APPLE-DAG: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16 +; CHECK-APPLE-DAG: strb [[ID]], [x0, #8] ; First vararg ; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8 ; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16] ; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #8 ; Second vararg -; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}] -; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #8 +; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #24] +; CHECK-APPLE: add {{x[0-9]+}}, {{x[0-9]+}}, #16 ; Third vararg -; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}] +; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #8] ; CHECK-APPLE: mov x21, x0 ; CHECK-APPLE-NOT: x21 Index: test/CodeGen/ARM/atomic-op.ll =================================================================== --- test/CodeGen/ARM/atomic-op.ll +++ test/CodeGen/ARM/atomic-op.ll @@ -26,6 +26,7 @@ store i32 3855, i32* %xort store i32 4, i32* %temp %tmp = load i32, i32* %temp + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: add ; CHECK: strex @@ -35,6 +36,7 @@ ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw add i32* %val1, i32 %tmp monotonic store i32 %0, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: sub ; CHECK: strex @@ -44,6 +46,7 @@ ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw sub i32* %val2, i32 30 monotonic store i32 %1, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: add ; CHECK: strex @@ -53,6 +56,7 @@ ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw add i32* %val2, i32 1 monotonic store i32 %2, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: sub ; CHECK: strex @@ -62,6 +66,7 @@ ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw sub i32* %val2, i32 1 monotonic store i32 %3, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: and ; CHECK: strex @@ -71,6 +76,7 @@ ; CHECK-BAREMETAL-NOT: __sync %4 = atomicrmw and i32* %andt, i32 4080 monotonic store i32 %4, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: or ; CHECK: strex @@ -80,6 +86,7 @@ ; CHECK-BAREMETAL-NOT: __sync %5 = atomicrmw or i32* %ort, i32 4080 monotonic store i32 %5, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: eor ; CHECK: strex @@ -89,6 +96,7 @@ ; CHECK-BAREMETAL-NOT: __sync %6 = atomicrmw xor i32* %xort, i32 4080 monotonic store i32 %6, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -98,6 +106,7 @@ ; CHECK-BAREMETAL-NOT: __sync %7 = atomicrmw min i32* %val2, i32 16 monotonic store i32 %7, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() %neg = sub i32 0, 1 ; CHECK: ldrex ; CHECK: cmp @@ -108,6 +117,7 @@ ; CHECK-BAREMETAL-NOT: __sync %8 = atomicrmw min i32* %val2, i32 %neg monotonic store i32 %8, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -117,6 +127,7 @@ ; CHECK-BAREMETAL-NOT: __sync %9 = atomicrmw max i32* %val2, i32 1 monotonic store i32 %9, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -126,6 +137,7 @@ ; CHECK-BAREMETAL-NOT: __sync %10 = atomicrmw max i32* %val2, i32 0 monotonic store i32 %10, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -135,6 +147,7 @@ ; CHECK-BAREMETAL-NOT: __sync %11 = atomicrmw umin i32* %val2, i32 16 monotonic store i32 %11, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() %uneg = sub i32 0, 1 ; CHECK: ldrex ; CHECK: cmp @@ -145,6 +158,7 @@ ; CHECK-BAREMETAL-NOT: __sync %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic store i32 %12, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -154,6 +168,7 @@ ; CHECK-BAREMETAL-NOT: __sync %13 = atomicrmw umax i32* %val2, i32 1 monotonic store i32 %13, i32* %old + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex Index: test/CodeGen/Hexagon/convertdptoint.ll =================================================================== --- test/CodeGen/Hexagon/convertdptoint.ll +++ test/CodeGen/Hexagon/convertdptoint.ll @@ -12,10 +12,10 @@ %b = alloca double, align 8 %c = alloca double, align 8 store i32 0, i32* %retval - store double 1.540000e+01, double* %a, align 8 - store double 9.100000e+00, double* %b, align 8 - %0 = load double, double* %a, align 8 - %1 = load double, double* %b, align 8 + store volatile double 1.540000e+01, double* %a, align 8 + store volatile double 9.100000e+00, double* %b, align 8 + %0 = load volatile double, double* %a, align 8 + %1 = load volatile double, double* %b, align 8 %add = fadd double %0, %1 store double %add, double* %c, align 8 %2 = load double, double* %c, align 8 Index: test/CodeGen/Hexagon/convertdptoll.ll =================================================================== --- test/CodeGen/Hexagon/convertdptoll.ll +++ test/CodeGen/Hexagon/convertdptoll.ll @@ -17,8 +17,8 @@ %0 = load double, double* %a, align 8 %1 = load double, double* %b, align 8 %add = fadd double %0, %1 - store double %add, double* %c, align 8 - %2 = load double, double* %c, align 8 + store volatile double %add, double* %c, align 8 + %2 = load volatile double, double* %c, align 8 %conv = fptosi double %2 to i64 store i64 %conv, i64* %i, align 8 %3 = load i64, i64* %i, align 8 Index: test/CodeGen/Hexagon/convertsptoint.ll =================================================================== --- test/CodeGen/Hexagon/convertsptoint.ll +++ test/CodeGen/Hexagon/convertsptoint.ll @@ -17,8 +17,8 @@ %0 = load float, float* %a, align 4 %1 = load float, float* %b, align 4 %add = fadd float %0, %1 - store float %add, float* %c, align 4 - %2 = load float, float* %c, align 4 + store volatile float %add, float* %c, align 4 + %2 = load volatile float, float* %c, align 4 %conv = fptosi float %2 to i32 store i32 %conv, i32* %i, align 4 %3 = load i32, i32* %i, align 4 Index: test/CodeGen/Hexagon/convertsptoll.ll =================================================================== --- test/CodeGen/Hexagon/convertsptoll.ll +++ test/CodeGen/Hexagon/convertsptoll.ll @@ -17,8 +17,8 @@ %0 = load float, float* %a, align 4 %1 = load float, float* %b, align 4 %add = fadd float %0, %1 - store float %add, float* %c, align 4 - %2 = load float, float* %c, align 4 + store volatile float %add, float* %c, align 4 + %2 = load volatile float, float* %c, align 4 %conv = fptosi float %2 to i64 store i64 %conv, i64* %i, align 8 %3 = load i64, i64* %i, align 8 Index: test/CodeGen/Hexagon/dadd.ll =================================================================== --- test/CodeGen/Hexagon/dadd.ll +++ test/CodeGen/Hexagon/dadd.ll @@ -9,10 +9,10 @@ %a = alloca double, align 8 %b = alloca double, align 8 %c = alloca double, align 8 - store double 1.540000e+01, double* %a, align 8 - store double 9.100000e+00, double* %b, align 8 - %0 = load double, double* %a, align 8 - %1 = load double, double* %b, align 8 + store volatile double 1.540000e+01, double* %a, align 8 + store volatile double 9.100000e+00, double* %b, align 8 + %0 = load volatile double, double* %a, align 8 + %1 = load volatile double, double* %b, align 8 %add = fadd double %0, %1 store double %add, double* %c, align 8 ret i32 0 Index: test/CodeGen/Hexagon/dmul.ll =================================================================== --- test/CodeGen/Hexagon/dmul.ll +++ test/CodeGen/Hexagon/dmul.ll @@ -8,10 +8,10 @@ %a = alloca double, align 8 %b = alloca double, align 8 %c = alloca double, align 8 - store double 1.540000e+01, double* %a, align 8 - store double 9.100000e+00, double* %b, align 8 - %0 = load double, double* %b, align 8 - %1 = load double, double* %a, align 8 + store volatile double 1.540000e+01, double* %a, align 8 + store volatile double 9.100000e+00, double* %b, align 8 + %0 = load volatile double, double* %b, align 8 + %1 = load volatile double, double* %a, align 8 %mul = fmul double %0, %1 store double %mul, double* %c, align 8 ret i32 0 Index: test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll =================================================================== --- test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll +++ test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll @@ -12,10 +12,10 @@ %b = alloca double, align 8 %c = alloca double, align 8 store i32 0, i32* %retval - store double 1.540000e+01, double* %a, align 8 - store double 9.100000e+00, double* %b, align 8 - %0 = load double, double* %a, align 8 - %1 = load double, double* %b, align 8 + store volatile double 1.540000e+01, double* %a, align 8 + store volatile double 9.100000e+00, double* %b, align 8 + %0 = load volatile double, double* %a, align 8 + %1 = load volatile double, double* %b, align 8 %add = fadd double %0, %1 store double %add, double* %c, align 8 %2 = load double, double* %c, align 8 Index: test/CodeGen/Hexagon/dsub.ll =================================================================== --- test/CodeGen/Hexagon/dsub.ll +++ test/CodeGen/Hexagon/dsub.ll @@ -8,10 +8,10 @@ %a = alloca double, align 8 %b = alloca double, align 8 %c = alloca double, align 8 - store double 1.540000e+01, double* %a, align 8 - store double 9.100000e+00, double* %b, align 8 - %0 = load double, double* %b, align 8 - %1 = load double, double* %a, align 8 + store volatile double 1.540000e+01, double* %a, align 8 + store volatile double 9.100000e+00, double* %b, align 8 + %0 = load volatile double, double* %b, align 8 + %1 = load volatile double, double* %a, align 8 %sub = fsub double %0, %1 store double %sub, double* %c, align 8 ret i32 0 Index: test/CodeGen/Hexagon/fadd.ll =================================================================== --- test/CodeGen/Hexagon/fadd.ll +++ test/CodeGen/Hexagon/fadd.ll @@ -8,10 +8,10 @@ %a = alloca float, align 4 %b = alloca float, align 4 %c = alloca float, align 4 - store float 0x402ECCCCC0000000, float* %a, align 4 - store float 0x4022333340000000, float* %b, align 4 - %0 = load float, float* %a, align 4 - %1 = load float, float* %b, align 4 + store volatile float 0x402ECCCCC0000000, float* %a, align 4 + store volatile float 0x4022333340000000, float* %b, align 4 + %0 = load volatile float, float* %a, align 4 + %1 = load volatile float, float* %b, align 4 %add = fadd float %0, %1 store float %add, float* %c, align 4 ret i32 0 Index: test/CodeGen/Hexagon/fmul.ll =================================================================== --- test/CodeGen/Hexagon/fmul.ll +++ test/CodeGen/Hexagon/fmul.ll @@ -9,10 +9,10 @@ %a = alloca float, align 4 %b = alloca float, align 4 %c = alloca float, align 4 - store float 0x402ECCCCC0000000, float* %a, align 4 - store float 0x4022333340000000, float* %b, align 4 - %0 = load float, float* %b, align 4 - %1 = load float, float* %a, align 4 + store volatile float 0x402ECCCCC0000000, float* %a, align 4 + store volatile float 0x4022333340000000, float* %b, align 4 + %0 = load volatile float, float* %b, align 4 + %1 = load volatile float, float* %a, align 4 %mul = fmul float %0, %1 store float %mul, float* %c, align 4 ret i32 0 Index: test/CodeGen/Hexagon/fsub.ll =================================================================== --- test/CodeGen/Hexagon/fsub.ll +++ test/CodeGen/Hexagon/fsub.ll @@ -8,10 +8,10 @@ %a = alloca float, align 4 %b = alloca float, align 4 %c = alloca float, align 4 - store float 0x402ECCCCC0000000, float* %a, align 4 - store float 0x4022333340000000, float* %b, align 4 - %0 = load float, float* %b, align 4 - %1 = load float, float* %a, align 4 + store volatile float 0x402ECCCCC0000000, float* %a, align 4 + store volatile float 0x4022333340000000, float* %b, align 4 + %0 = load volatile float, float* %b, align 4 + %1 = load volatile float, float* %a, align 4 %sub = fsub float %0, %1 store float %sub, float* %c, align 4 ret i32 0 Index: test/CodeGen/MSP430/Inst16mm.ll =================================================================== --- test/CodeGen/MSP430/Inst16mm.ll +++ test/CodeGen/MSP430/Inst16mm.ll @@ -64,6 +64,6 @@ %0 = load i16, i16* %retval ; [#uses=1] ret i16 %0 ; CHECK-LABEL: mov2: -; CHECK: mov.w 0(r1), 4(r1) -; CHECK: mov.w 2(r1), 6(r1) +; CHECK-DAG: mov.w 0(r1), 4(r1) +; CHECK-DAG: mov.w 2(r1), 6(r1) } Index: test/CodeGen/MSP430/vararg.ll =================================================================== --- test/CodeGen/MSP430/vararg.ll +++ test/CodeGen/MSP430/vararg.ll @@ -39,11 +39,11 @@ ; CHECK-LABEL: va_copy: %vl.addr = alloca i8*, align 2 %vl2 = alloca i8*, align 2 -; CHECK: mov.w r12, 2(r1) +; CHECK-DAG: mov.w r12, 2(r1) store i8* %vl, i8** %vl.addr, align 2 %0 = bitcast i8** %vl2 to i8* %1 = bitcast i8** %vl.addr to i8* -; CHECK-NEXT: mov.w r12, 0(r1) +; CHECK-DAG: mov.w r12, 0(r1) call void @llvm.va_copy(i8* %0, i8* %1) ret void } Index: test/CodeGen/Mips/dins.ll =================================================================== --- test/CodeGen/Mips/dins.ll +++ test/CodeGen/Mips/dins.ll @@ -59,9 +59,9 @@ ; CHECK-LABEL: f123: ; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 123 ; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 27, 37 -; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 5 ; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 4 ; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 28, 6 +; MIPS64R2: daddiu $[[R0:[0-9]+]], $zero, 5 ; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 50, 14 ; MIPS64R2: dsrl $[[R0:[0-9]+]], $[[R1:[0-9]+]], 50 ; MIPS64R2: dins $[[R0:[0-9]+]], $[[R1:[0-9]+]], 34, 16 @@ -94,4 +94,4 @@ ; MIPS32R2: ori $[[R0:[0-9]+]], $[[R0:[0-9]+]], 8 ; MIPS32R2-NOT: ins {{[[:space:]].*}} ; MIPS64R2N32: ori $[[R0:[0-9]+]], $[[R0:[0-9]+]], 8 -; MIPS64R2N32-NOT: ins {{[[:space:]].*}} \ No newline at end of file +; MIPS64R2N32-NOT: ins {{[[:space:]].*}} Index: test/CodeGen/X86/2011-10-19-widen_vselect.ll =================================================================== --- test/CodeGen/X86/2011-10-19-widen_vselect.ll +++ test/CodeGen/X86/2011-10-19-widen_vselect.ll @@ -83,10 +83,11 @@ ; X32-NEXT: cmpeqps %xmm2, %xmm1 ; X32-NEXT: movaps %xmm1, %xmm0 ; X32-NEXT: blendvps %xmm0, %xmm2, %xmm4 -; X32-NEXT: extractps $1, %xmm4, {{[0-9]+}}(%esp) ; X32-NEXT: movss %xmm4, {{[0-9]+}}(%esp) -; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: movshdup {{.*#+}} xmm0 = xmm4[1,1,3,3] +; X32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: movss %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; X32-NEXT: addl $60, %esp ; X32-NEXT: retl ; Index: test/CodeGen/X86/alias-static-alloca.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/alias-static-alloca.ll @@ -0,0 +1,37 @@ +; RUN: llc -o - -mtriple=x86_64-linux-gnu %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; We should be able to bypass the load values to their corresponding +; stores here. + +; CHECK-LABEL: foo +; CHECK-DAG: movl %esi, -8(%rsp) +; CHECK-DAG: movl %ecx, -16(%rsp) +; CHECK-DAG: movl %edi, -4(%rsp) +; CHECK-DAG: movl %edx, -12(%rsp) +; CHECK: leal +; CHECK: addl +; CHECK: addl +; CHECK: retq + +define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) { +entry: + %a0 = alloca i32 + %a1 = alloca i32 + %a2 = alloca i32 + %a3 = alloca i32 + store i32 %b, i32* %a1 + store i32 %d, i32* %a3 + store i32 %a, i32* %a0 + store i32 %c, i32* %a2 + %l0 = load i32, i32* %a0 + %l1 = load i32, i32* %a1 + %l2 = load i32, i32* %a2 + %l3 = load i32, i32* %a3 + %add0 = add nsw i32 %l0, %l1 + %add1 = add nsw i32 %add0, %l2 + %add2 = add nsw i32 %add1, %l3 + ret i32 %add2 +} Index: test/CodeGen/X86/clobber-fi0.ll =================================================================== --- test/CodeGen/X86/clobber-fi0.ll +++ test/CodeGen/X86/clobber-fi0.ll @@ -15,22 +15,22 @@ %tmp = alloca i32, align 4 ; [#uses=3 type=i32*] %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*] %tmp3 = alloca i32 ; [#uses=1 type=i32*] - store i32 1, i32* %tmp, align 4 - store i32 1, i32* %tmp2, align 4 + store volatile i32 1, i32* %tmp, align 4 + store volatile i32 1, i32* %tmp2, align 4 br label %bb4 bb4: ; preds = %bb4, %bb - %tmp6 = load i32, i32* %tmp2, align 4 ; [#uses=1 type=i32] + %tmp6 = load volatile i32, i32* %tmp2, align 4 ; [#uses=1 type=i32] %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32] - store i32 %tmp7, i32* %tmp2, align 4 + store volatile i32 %tmp7, i32* %tmp2, align 4 %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1] - %tmp9 = load i32, i32* %tmp ; [#uses=1 type=i32] + %tmp9 = load volatile i32, i32* %tmp ; [#uses=1 type=i32] %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32] - store i32 %tmp10, i32* %tmp3 + store volatile i32 %tmp10, i32* %tmp3 br i1 %tmp8, label %bb11, label %bb4 bb11: ; preds = %bb4 - %tmp12 = load i32, i32* %tmp, align 4 ; [#uses=1 type=i32] + %tmp12 = load volatile i32, i32* %tmp, align 4 ; [#uses=1 type=i32] ret i32 %tmp12 } Index: test/CodeGen/X86/hipe-cc.ll =================================================================== --- test/CodeGen/X86/hipe-cc.ll +++ test/CodeGen/X86/hipe-cc.ll @@ -48,11 +48,7 @@ store i32 %arg0, i32* %arg0_var store i32 %arg1, i32* %arg1_var store i32 %arg2, i32* %arg2_var - - ; CHECK: movl 16(%esp), %esi - ; CHECK-NEXT: movl 12(%esp), %ebp - ; CHECK-NEXT: movl 8(%esp), %eax - ; CHECK-NEXT: movl 4(%esp), %edx + ; These loads are loading the values from their previous stores and are optimized away. %0 = load i32, i32* %hp_var %1 = load i32, i32* %p_var %2 = load i32, i32* %arg0_var Index: test/CodeGen/X86/hipe-cc64.ll =================================================================== --- test/CodeGen/X86/hipe-cc64.ll +++ test/CodeGen/X86/hipe-cc64.ll @@ -57,11 +57,7 @@ store i64 %arg2, i64* %arg2_var store i64 %arg3, i64* %arg3_var - ; CHECK: movq 40(%rsp), %r15 - ; CHECK-NEXT: movq 32(%rsp), %rbp - ; CHECK-NEXT: movq 24(%rsp), %rsi - ; CHECK-NEXT: movq 16(%rsp), %rdx - ; CHECK-NEXT: movq 8(%rsp), %rcx + ; Loads are reading values just writen from corresponding register and are therefore noops. %0 = load i64, i64* %hp_var %1 = load i64, i64* %p_var %2 = load i64, i64* %arg0_var Index: test/CodeGen/X86/legalize-shift-64.ll =================================================================== --- test/CodeGen/X86/legalize-shift-64.ll +++ test/CodeGen/X86/legalize-shift-64.ll @@ -148,8 +148,7 @@ ; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl $1, (%esp) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: shldl $32, %eax, %ecx @@ -175,9 +174,8 @@ ; CHECK-NEXT: retl %x = alloca i32, align 4 %t = alloca i64, align 8 - store i32 1, i32* %x, align 4 - store i64 1, i64* %t, align 8 ;; DEAD - %load = load i32, i32* %x, align 4 + store volatile i32 1, i32* %x, align 4 + %load = load volatile i32, i32* %x, align 4 %shl = shl i32 %load, 8 %add = add i32 %shl, -224 %sh_prom = zext i32 %add to i64 Index: test/CodeGen/X86/machine-outliner-debuginfo.ll =================================================================== --- test/CodeGen/X86/machine-outliner-debuginfo.ll +++ test/CodeGen/X86/machine-outliner-debuginfo.ll @@ -17,6 +17,7 @@ call void @llvm.dbg.value(metadata i32 10, i64 0, metadata !15, metadata !16), !dbg !17 store i32 4, i32* %5, align 4 store i32 0, i32* @x, align 4, !dbg !24 + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; This is the same sequence of instructions without a debug value. It should be outlined ; in the same way. ; CHECK: callq l_OUTLINED_FUNCTION_0 Index: test/CodeGen/X86/machine-outliner.ll =================================================================== --- test/CodeGen/X86/machine-outliner.ll +++ test/CodeGen/X86/machine-outliner.ll @@ -85,6 +85,7 @@ store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 store i32 1, i32* @x, align 4 + call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: callq [[OFUNC2]] store i32 1, i32* %2, align 4 store i32 2, i32* %3, align 4 Index: test/CodeGen/X86/statepoint-invoke.ll =================================================================== --- test/CodeGen/X86/statepoint-invoke.ll +++ test/CodeGen/X86/statepoint-invoke.ll @@ -95,8 +95,8 @@ right: ; CHECK-LABEL: %right - ; CHECK: movq ; CHECK: movq %rdx, (%rsp) + ; CHECK: movq ; CHECK: callq some_call %sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) to label %right.relocs unwind label %exceptional_return.right Index: test/CodeGen/X86/statepoint-stack-usage.ll =================================================================== --- test/CodeGen/X86/statepoint-stack-usage.ll +++ test/CodeGen/X86/statepoint-stack-usage.ll @@ -11,9 +11,9 @@ define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" { ; CHECK-LABEL: back_to_back_calls ; The exact stores don't matter, but there need to be three stack slots created -; CHECK: movq %rdi, 16(%rsp) -; CHECK: movq %rdx, 8(%rsp) -; CHECK: movq %rsi, (%rsp) +; CHECK-DAG: movq %rdi, 16(%rsp) +; CHECK-DAG: movq %rdx, 8(%rsp) +; CHECK-DAG: movq %rsi, (%rsp) ; There should be no more than three moves ; CHECK-NOT: movq %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) @@ -36,9 +36,9 @@ define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" { ; CHECK-LABEL: reserve_first ; The exact stores don't matter, but there need to be three stack slots created -; CHECK: movq %rdi, 16(%rsp) -; CHECK: movq %rdx, 8(%rsp) -; CHECK: movq %rsi, (%rsp) +; CHECK-DAG: movq %rdi, 16(%rsp) +; CHECK-DAG: movq %rdx, 8(%rsp) +; CHECK-DAG: movq %rsi, (%rsp) %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12) %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13) @@ -61,21 +61,21 @@ gc "statepoint-example" { ; CHECK-LABEL: back_to_back_deopt ; The exact stores don't matter, but there need to be three stack slots created -; CHECK: movl %ebx, 12(%rsp) -; CHECK: movl %ebp, 8(%rsp) -; CHECK: movl %r14d, 4(%rsp) +; CHECK-DAG: movl %ebx, 12(%rsp) +; CHECK-DAG: movl %ebp, 8(%rsp) +; CHECK-DAG: movl %r14d, 4(%rsp) ; CHECK: callq -; CHECK: movl %ebx, 12(%rsp) -; CHECK: movl %ebp, 8(%rsp) -; CHECK: movl %r14d, 4(%rsp) +; CHECK-DAG: movl %ebx, 12(%rsp) +; CHECK-DAG: movl %ebp, 8(%rsp) +; CHECK-DAG: movl %r14d, 4(%rsp) ; CHECK: callq -; CHECK: movl %ebx, 12(%rsp) -; CHECK: movl %ebp, 8(%rsp) -; CHECK: movl %r14d, 4(%rsp) +; CHECK-DAG: movl %ebx, 12(%rsp) +; CHECK-DAG: movl %ebp, 8(%rsp) +; CHECK-DAG: movl %r14d, 4(%rsp) ; CHECK: callq -; CHECK: movl %ebx, 12(%rsp) -; CHECK: movl %ebp, 8(%rsp) -; CHECK: movl %r14d, 4(%rsp) +; CHECK-DAG: movl %ebx, 12(%rsp) +; CHECK-DAG: movl %ebp, 8(%rsp) +; CHECK-DAG: movl %r14d, 4(%rsp) ; CHECK: callq call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) @@ -89,9 +89,9 @@ ; CHECK-LABEL: back_to_back_invokes entry: ; The exact stores don't matter, but there need to be three stack slots created - ; CHECK: movq %rdi, 16(%rsp) - ; CHECK: movq %rdx, 8(%rsp) - ; CHECK: movq %rsi, (%rsp) + ; CHECK-DAG: movq %rdi, 16(%rsp) + ; CHECK-DAG: movq %rdx, 8(%rsp) + ; CHECK-DAG: movq %rsi, (%rsp) ; CHECK: callq %safepoint_token = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) to label %normal_return unwind label %exceptional_return Index: test/CodeGen/X86/statepoint-vector.ll =================================================================== --- test/CodeGen/X86/statepoint-vector.ll +++ test/CodeGen/X86/statepoint-vector.ll @@ -49,8 +49,8 @@ ; CHECK: subq $40, %rsp ; CHECK: testb $1, %dil ; CHECK: movaps (%rsi), %xmm0 -; CHECK: movaps %xmm0, 16(%rsp) -; CHECK: movaps %xmm0, (%rsp) +; CHECK-DAG: movaps %xmm0, (%rsp) +; CHECK-DAG: movaps %xmm0, 16(%rsp) ; CHECK: callq do_safepoint ; CHECK: movaps (%rsp), %xmm0 ; CHECK: addq $40, %rsp Index: test/CodeGen/X86/widen_arith-2.ll =================================================================== --- test/CodeGen/X86/widen_arith-2.ll +++ test/CodeGen/X86/widen_arith-2.ll @@ -16,20 +16,17 @@ ; CHECK-NEXT: .LBB0_2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl (%esp), %eax -; CHECK-NEXT: shll $3, %eax -; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl (%esp), %eax -; CHECK-NEXT: shll $3, %eax -; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl (%esp), %ecx +; CHECK-NEXT: leal (,%eax,8), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; CHECK-NEXT: pmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; CHECK-NEXT: psubw %xmm0, %xmm3 ; CHECK-NEXT: pand %xmm1, %xmm3 ; CHECK-NEXT: pshufb %xmm2, %xmm3 -; CHECK-NEXT: movq %xmm3, (%edx,%ecx,8) +; CHECK-NEXT: movq %xmm3, (%edx,%eax,8) ; CHECK-NEXT: incl (%esp) ; CHECK-NEXT: .LBB0_1: # %forcond ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 Index: test/CodeGen/X86/widen_cast-4.ll =================================================================== --- test/CodeGen/X86/widen_cast-4.ll +++ test/CodeGen/X86/widen_cast-4.ll @@ -16,22 +16,19 @@ ; NARROW-NEXT: .LBB0_2: # %forbody ; NARROW-NEXT: # in Loop: Header=BB0_1 Depth=1 ; NARROW-NEXT: movl (%esp), %eax -; NARROW-NEXT: shll $3, %eax -; NARROW-NEXT: addl {{[0-9]+}}(%esp), %eax -; NARROW-NEXT: movl %eax, {{[0-9]+}}(%esp) -; NARROW-NEXT: movl (%esp), %eax -; NARROW-NEXT: shll $3, %eax -; NARROW-NEXT: addl {{[0-9]+}}(%esp), %eax -; NARROW-NEXT: movl %eax, {{[0-9]+}}(%esp) -; NARROW-NEXT: movl (%esp), %ecx +; NARROW-NEXT: leal (,%eax,8), %ecx ; NARROW-NEXT: movl {{[0-9]+}}(%esp), %edx +; NARROW-NEXT: addl %ecx, %edx +; NARROW-NEXT: movl %edx, {{[0-9]+}}(%esp) +; NARROW-NEXT: addl {{[0-9]+}}(%esp), %ecx +; NARROW-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; NARROW-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; NARROW-NEXT: psubw %xmm0, %xmm2 ; NARROW-NEXT: psllw $8, %xmm2 ; NARROW-NEXT: psraw $8, %xmm2 ; NARROW-NEXT: psraw $2, %xmm2 ; NARROW-NEXT: pshufb %xmm1, %xmm2 -; NARROW-NEXT: movq %xmm2, (%edx,%ecx,8) +; NARROW-NEXT: movq %xmm2, (%edx,%eax,8) ; NARROW-NEXT: incl (%esp) ; NARROW-NEXT: .LBB0_1: # %forcond ; NARROW-NEXT: # =>This Inner Loop Header: Depth=1 @@ -54,24 +51,21 @@ ; WIDE-NEXT: .LBB0_2: # %forbody ; WIDE-NEXT: # in Loop: Header=BB0_1 Depth=1 ; WIDE-NEXT: movl (%esp), %eax -; WIDE-NEXT: shll $3, %eax -; WIDE-NEXT: addl {{[0-9]+}}(%esp), %eax -; WIDE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; WIDE-NEXT: movl (%esp), %eax -; WIDE-NEXT: shll $3, %eax -; WIDE-NEXT: addl {{[0-9]+}}(%esp), %eax -; WIDE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; WIDE-NEXT: movl (%esp), %ecx +; WIDE-NEXT: leal (,%eax,8), %ecx ; WIDE-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIDE-NEXT: addl %ecx, %edx +; WIDE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIDE-NEXT: addl {{[0-9]+}}(%esp), %ecx +; WIDE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; WIDE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; WIDE-NEXT: pinsrd $1, 4(%eax,%ecx,8), %xmm3 +; WIDE-NEXT: pinsrd $1, 4(%ecx,%eax,8), %xmm3 ; WIDE-NEXT: psubb %xmm0, %xmm3 ; WIDE-NEXT: psrlw $2, %xmm3 ; WIDE-NEXT: pand %xmm1, %xmm3 ; WIDE-NEXT: pxor %xmm2, %xmm3 ; WIDE-NEXT: psubb %xmm2, %xmm3 -; WIDE-NEXT: pextrd $1, %xmm3, 4(%edx,%ecx,8) -; WIDE-NEXT: movd %xmm3, (%edx,%ecx,8) +; WIDE-NEXT: pextrd $1, %xmm3, 4(%edx,%eax,8) +; WIDE-NEXT: movd %xmm3, (%edx,%eax,8) ; WIDE-NEXT: incl (%esp) ; WIDE-NEXT: .LBB0_1: # %forcond ; WIDE-NEXT: # =>This Inner Loop Header: Depth=1 Index: test/CodeGen/XCore/varargs.ll =================================================================== --- test/CodeGen/XCore/varargs.ll +++ test/CodeGen/XCore/varargs.ll @@ -26,10 +26,10 @@ ; CHECK-LABEL: test_vararg ; CHECK: extsp 6 ; CHECK: stw lr, sp[1] -; CHECK: stw r3, sp[6] -; CHECK: stw r0, sp[3] -; CHECK: stw r1, sp[4] -; CHECK: stw r2, sp[5] +; CHECK-DAG: stw r3, sp[6] +; CHECK-DAG: stw r0, sp[3] +; CHECK-DAG: stw r1, sp[4] +; CHECK-DAG: stw r2, sp[5] ; CHECK: ldaw r0, sp[3] ; CHECK: stw r0, sp[2] %list = alloca i8*, align 4