Index: test/CodeGen/X86/MergeConsecutiveStores.ll =================================================================== --- test/CodeGen/X86/MergeConsecutiveStores.ll +++ test/CodeGen/X86/MergeConsecutiveStores.ll @@ -4,6 +4,7 @@ %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.C = type { i8, i8, i8, i8, i32, i32, i32, i64 } ; save 1,2,3 ... as one big integer. define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { @@ -848,3 +849,74 @@ store float %f3, float* %idx3, align 4 ret void } + +; same as @merge_const_store with heterogeneous types. +define void @merge_const_store_heterogeneous(i32 %count, %struct.C* nocapture %p) nounwind uwtable noinline ssp { +; CHECK-LABEL: merge_const_store_heterogeneous: +; CHECK: # %bb.0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jle .LBB20_3 +; CHECK-NEXT: # %bb.1: # %.lr.ph.preheader +; CHECK-NEXT: movabsq $578437695752307201, %rax # imm = 0x807060504030201 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB20_2: # %.lr.ph +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: addq $24, %rsi +; CHECK-NEXT: decl %edi +; CHECK-NEXT: jne .LBB20_2 +; CHECK-NEXT: .LBB20_3: # %._crit_edge +; CHECK-NEXT: retq + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge +.lr.ph: + %i.02 = phi i32 [ %7, %.lr.ph ], [ 0, %0 ] + %.01 = phi %struct.C* [ %8, %.lr.ph ], [ %p, %0 ] + %2 = getelementptr inbounds %struct.C, %struct.C* %.01, i64 0, i32 0 + store i8 1, i8* %2, align 1 + %3 = getelementptr inbounds %struct.C, %struct.C* %.01, i64 0, i32 1 + store i8 2, i8* %3, align 1 + %4 = getelementptr inbounds %struct.C, %struct.C* %.01, i64 0, i32 2 + store i8 3, i8* %4, align 1 + %5 = getelementptr inbounds %struct.C, %struct.C* %.01, i64 0, i32 3 + store i8 4, i8* %5, align 1 + %6 = getelementptr inbounds %struct.C, %struct.C* %.01, i64 0, i32 4 + store i32 134678021, i32* %6, align 1 + %7 = add nsw i32 %i.02, 1 + %8 = getelementptr inbounds %struct.C, %struct.C* %.01, i64 1 + %exitcond = icmp eq i32 %7, %count + br i1 %exitcond, label %._crit_edge, label %.lr.ph +._crit_edge: + ret void +} + +; Merging heterogeneous integer types. +define void @merge_heterogeneous(%struct.C* nocapture %p, %struct.C* nocapture %q) { +; CHECK-LABEL: merge_heterogeneous: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: movq %rax, (%rsi) +; CHECK-NEXT: retq + %s0 = getelementptr inbounds %struct.C, %struct.C* %p, i64 0, i32 0 + %s1 = getelementptr inbounds %struct.C, %struct.C* %p, i64 0, i32 1 + %s2 = getelementptr inbounds %struct.C, %struct.C* %p, i64 0, i32 2 + %s3 = getelementptr inbounds %struct.C, %struct.C* %p, i64 0, i32 3 + %s4 = getelementptr inbounds %struct.C, %struct.C* %p, i64 0, i32 4 + %d0 = getelementptr inbounds %struct.C, %struct.C* %q, i64 0, i32 0 + %d1 = getelementptr inbounds %struct.C, %struct.C* %q, i64 0, i32 1 + %d2 = getelementptr inbounds %struct.C, %struct.C* %q, i64 0, i32 2 + %d3 = getelementptr inbounds %struct.C, %struct.C* %q, i64 0, i32 3 + %d4 = getelementptr inbounds %struct.C, %struct.C* %q, i64 0, i32 4 + %v0 = load i8, i8* %s0, align 1 + %v1 = load i8, i8* %s1, align 1 + %v2 = load i8, i8* %s2, align 1 + %v3 = load i8, i8* %s3, align 1 + %v4 = load i32, i32* %s4, align 1 + store i8 %v0, i8* %d0, align 1 + store i8 %v1, i8* %d1, align 1 + store i8 %v2, i8* %d2, align 1 + store i8 %v3, i8* %d3, align 1 + store i32 %v4, i32* %d4, align 4 + ret void +} +