Diff 178020

llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,485 Lines • ▼ Show 20 Lines	while (VTSize > Size) {
if (NewVT == MVT::i8)		if (NewVT == MVT::i8)
break;		break;
} while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));		} while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
}		}
NewVTSize = NewVT.getSizeInBits() / 8;		NewVTSize = NewVT.getSizeInBits() / 8;

// If the new VT cannot cover all of the remaining bits, then consider		// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.		// issuing a (or a pair of) unaligned and overlapping load / store.
// FIXME: Only does this for 64-bit or more since we don't have proper
// cost model for unaligned load / store.
bool Fast;		bool Fast;
if (NumMemOps && AllowOverlap &&		if (NumMemOps && AllowOverlap && NewVTSize < Size &&
VTSize >= 8 && NewVTSize < Size &&		TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)		Fast)
VTSize = Size;		VTSize = Size;
else {		else {
VT = NewVT;		VT = NewVT;
VTSize = NewVTSize;		VTSize = NewVTSize;
}		}
}		}

if (++NumMemOps > Limit)		if (++NumMemOps > Limit)
▲ Show 20 Lines • Show All 3,691 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AArch64/arm64-memcpy-inline.ll

	Show All 10 Lines
	@.str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR \00", align 1			@.str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR \00", align 1
	@.str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1			@.str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1
	@.str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00", align 1			@.str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00", align 1
	@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16			@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16

	define i32 @t0() {			define i32 @t0() {
	entry:			entry:
	; CHECK-LABEL: t0:			; CHECK-LABEL: t0:
	; CHECK: ldrb [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #10]			; CHECK: ldur [[REG0:w[0-9]+]], [x[[BASEREG:[0-9]+]], #7]
	; CHECK: strb [[REG0]], [x[[BASEREG2:[0-9]+]], #10]			; CHECK: stur [[REG0]], [x[[BASEREG2:[0-9]+]], #7]
	; CHECK: ldrh [[REG1:w[0-9]+]], [x[[BASEREG]], #8]
	; CHECK: strh [[REG1]], [x[[BASEREG2]], #8]
	; CHECK: ldr [[REG2:x[0-9]+]],			; CHECK: ldr [[REG2:x[0-9]+]],
	; CHECK: str [[REG2]],			; CHECK: str [[REG2]],
	call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false)			call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false)
	ret i32 0			ret i32 0
	}			}

	define void @t1(i8* nocapture %C) nounwind {			define void @t1(i8* nocapture %C) nounwind {
	entry:			entry:
	Show All 38 Lines
	; CHECK: str [[REG6]], [x0]			; CHECK: str [[REG6]], [x0]
	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false)			tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false)
	ret void			ret void
	}			}

	define void @t5(i8* nocapture %C) nounwind {			define void @t5(i8* nocapture %C) nounwind {
	entry:			entry:
	; CHECK-LABEL: t5:			; CHECK-LABEL: t5:
	; CHECK: strb wzr, [x0, #6]			; CHECK: mov [[REG7:w[0-9]+]], #21337
	; CHECK: mov [[REG7:w[0-9]+]], #21587			; CHECK: movk [[REG7]],
	; CHECK: strh [[REG7]], [x0, #4]			; CHECK: stur [[REG7]], [x0, #3]
	; CHECK: mov [[REG8:w[0-9]+]],			; CHECK: mov [[REG8:w[0-9]+]],
	; CHECK: movk [[REG8]],			; CHECK: movk [[REG8]],
	; CHECK: str [[REG8]], [x0]			; CHECK: str [[REG8]], [x0]
	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false)			tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false)
	ret void			ret void
	}			}

	define void @t6() nounwind {			define void @t6() nounwind {
	Show All 25 Lines

llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll

	Show First 20 Lines • Show All 80 Lines • ▼ Show 20 Lines
	; CHECK-T1: bl _memcpy			; CHECK-T1: bl _memcpy
	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false)			tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false)
	ret void			ret void
	}			}

	define void @t5(i8* nocapture %C) nounwind {			define void @t5(i8* nocapture %C) nounwind {
	entry:			entry:
	; CHECK-LABEL: t5:			; CHECK-LABEL: t5:
	; CHECK: movs [[REG5:r[0-9]+]], #0			; CHECK: movw [[REG5:r[0-9]+]], #21337
	; CHECK: strb [[REG5]], [r0, #6]			; CHECK: movt [[REG5]], #84
	; CHECK: movw [[REG6:r[0-9]+]], #21587			; CHECK: str.w [[REG5]], [r0, #3]
	; CHECK: strh [[REG6]], [r0, #4]
	; CHECK: movw [[REG7:r[0-9]+]], #18500			; CHECK: movw [[REG7:r[0-9]+]], #18500
	; CHECK: movt [[REG7:r[0-9]+]], #22866			; CHECK: movt [[REG7:r[0-9]+]], #22866
	; CHECK: str [[REG7]]			; CHECK: str [[REG7]]
	; CHECK-T1-LABEL: t5:			; CHECK-T1-LABEL: t5:
	; CHECK-T1: bl _memcpy			; CHECK-T1: bl _memcpy
	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false)			tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false)
	ret void			ret void
	}			}
	Show All 37 Lines

llvm/trunk/test/CodeGen/ARM/memcpy-ldm-stm.ll

	Show All 28 Lines
	}			}

	; Function Attrs: nounwind			; Function Attrs: nounwind
	define void @t2() #0 {			define void @t2() #0 {
	entry:			entry:
	; CHECK-LABEL: t2:			; CHECK-LABEL: t2:
	; CHECKV6: ldr [[LB:r[0-7]]],			; CHECKV6: ldr [[LB:r[0-7]]],
	; CHECKV6-NEXT: ldr [[SB:r[0-7]]],			; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
				; CHECKV6-NEXT: ldm{{(\.w)?}} [[LB]]!,
				; CHECKV6-NEXT: stm{{(\.w)?}} [[SB]]!,
				; CHECKV6-NEXT: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
				; CHECKV6-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
				; CHECKV6-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
				; CHECKV6-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
	; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d			; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
	; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s			; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
	; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,			; CHECKV7: ldr{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #11]
	; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,			; CHECKV7-NEXT: str{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #11]
	; CHECK-NEXT: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
	; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
	; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
	; CHECK-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8), i8 align 4 bitcast ([64 x i32]* @d to i8*), i32 15, i1 false)			tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8), i8 align 4 bitcast ([64 x i32]* @d to i8*), i32 15, i1 false)
	ret void			ret void
	}			}

	; PR23768			; PR23768
	%struct.T = type { i8, i64, i8 }			%struct.T = type { i8, i64, i8 }

	@copy = external global %struct.T, align 8			@copy = external global %struct.T, align 8
	▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll

	Show All 28 Lines
	; CHECK-DAG: lbz {{[0-9]+}}, 175(1)			; CHECK-DAG: lbz {{[0-9]+}}, 175(1)
	; CHECK-DAG: lwz {{[0-9]+}}, 171(1)			; CHECK-DAG: lwz {{[0-9]+}}, 171(1)
	; CHECK-DAG: stb {{[0-9]+}}, 63(1)			; CHECK-DAG: stb {{[0-9]+}}, 63(1)
	; CHECK-DAG: stw {{[0-9]+}}, 59(1)			; CHECK-DAG: stw {{[0-9]+}}, 59(1)
	; CHECK-DAG: lhz {{[0-9]+}}, 182(1)			; CHECK-DAG: lhz {{[0-9]+}}, 182(1)
	; CHECK-DAG: lwz {{[0-9]+}}, 178(1)			; CHECK-DAG: lwz {{[0-9]+}}, 178(1)
	; CHECK-DAG: sth {{[0-9]+}}, 70(1)			; CHECK-DAG: sth {{[0-9]+}}, 70(1)
	; CHECK-DAG: stw {{[0-9]+}}, 66(1)			; CHECK-DAG: stw {{[0-9]+}}, 66(1)
	; CHECK-DAG: lbz {{[0-9]+}}, 191(1)			; CHECK-DAG: lwz {{[0-9]+}}, 188(1)
	; CHECK-DAG: lhz {{[0-9]+}}, 189(1)
	; CHECK-DAG: lwz {{[0-9]+}}, 185(1)			; CHECK-DAG: lwz {{[0-9]+}}, 185(1)
	; CHECK-DAG: stb {{[0-9]+}}, 79(1)			; CHECK-DAG: stw {{[0-9]+}}, 76(1)
	; CHECK-DAG: sth {{[0-9]+}}, 77(1)
	; CHECK-DAG: stw {{[0-9]+}}, 73(1)			; CHECK-DAG: stw {{[0-9]+}}, 73(1)
	; CHECK-DAG: ld 6, 72(1)			; CHECK-DAG: ld 6, 72(1)
	; CHECK-DAG: ld 5, 64(1)			; CHECK-DAG: ld 5, 64(1)
	; CHECK-DAG: ld 4, 56(1)			; CHECK-DAG: ld 4, 56(1)
	; CHECK-DAG: ld 3, 48(1)			; CHECK-DAG: ld 3, 48(1)

	declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval)			declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval)

llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll

	Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Lines
	; CHECK: sth {{[0-9]+}}, 126(1)			; CHECK: sth {{[0-9]+}}, 126(1)
	; CHECK: stb {{[0-9]+}}, 135(1)			; CHECK: stb {{[0-9]+}}, 135(1)
	; CHECK: sth {{[0-9]+}}, 133(1)			; CHECK: sth {{[0-9]+}}, 133(1)
	; CHECK: stw {{[0-9]+}}, 140(1)			; CHECK: stw {{[0-9]+}}, 140(1)
	; CHECK: stb {{[0-9]+}}, 151(1)			; CHECK: stb {{[0-9]+}}, 151(1)
	; CHECK: stw {{[0-9]+}}, 147(1)			; CHECK: stw {{[0-9]+}}, 147(1)
	; CHECK: sth {{[0-9]+}}, 158(1)			; CHECK: sth {{[0-9]+}}, 158(1)
	; CHECK: stw {{[0-9]+}}, 154(1)			; CHECK: stw {{[0-9]+}}, 154(1)
	; CHECK: stb {{[0-9]+}}, 167(1)			; CHECK: stw {{[0-9]+}}, 164(1)
	; CHECK: sth {{[0-9]+}}, 165(1)
	; CHECK: stw {{[0-9]+}}, 161(1)			; CHECK: stw {{[0-9]+}}, 161(1)
	}			}

	define internal i32 @callee2(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {			define internal i32 @callee2(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
	entry:			entry:
	%z1.addr = alloca i32, align 4			%z1.addr = alloca i32, align 4
	%z2.addr = alloca i32, align 4			%z2.addr = alloca i32, align 4
	%z3.addr = alloca i32, align 4			%z3.addr = alloca i32, align 4
	▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll

	Show First 20 Lines • Show All 142 Lines • ▼ Show 20 Lines

	; CHECK-LABEL: caller2			; CHECK-LABEL: caller2
	; CHECK: stb {{[0-9]+}}, 71(1)			; CHECK: stb {{[0-9]+}}, 71(1)
	; CHECK: sth {{[0-9]+}}, 69(1)			; CHECK: sth {{[0-9]+}}, 69(1)
	; CHECK: stb {{[0-9]+}}, 87(1)			; CHECK: stb {{[0-9]+}}, 87(1)
	; CHECK: stw {{[0-9]+}}, 83(1)			; CHECK: stw {{[0-9]+}}, 83(1)
	; CHECK: sth {{[0-9]+}}, 94(1)			; CHECK: sth {{[0-9]+}}, 94(1)
	; CHECK: stw {{[0-9]+}}, 90(1)			; CHECK: stw {{[0-9]+}}, 90(1)
	; CHECK: stb {{[0-9]+}}, 103(1)			; CHECK: stw {{[0-9]+}}, 100(1)
	; CHECK: sth {{[0-9]+}}, 101(1)
	; CHECK: stw {{[0-9]+}}, 97(1)			; CHECK: stw {{[0-9]+}}, 97(1)
	; CHECK: ld 9, 96(1)			; CHECK: ld 9, 96(1)
	; CHECK: ld 8, 88(1)			; CHECK: ld 8, 88(1)
	; CHECK: ld 7, 80(1)			; CHECK: ld 7, 80(1)
	; CHECK: lwz 6, 136(31)			; CHECK: lwz 6, 136(31)
	; CHECK: ld 5, 64(1)			; CHECK: ld 5, 64(1)
	; CHECK: lhz 4, 152(31)			; CHECK: lhz 4, 152(31)
	; CHECK: lbz 3, 160(31)			; CHECK: lbz 3, 160(31)
	▲ Show 20 Lines • Show All 45 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/memcpy-from-string.ll

	Show All 10 Lines
	@0 = internal constant [10 x i8] c"asdf jkl;\00", align 1			@0 = internal constant [10 x i8] c"asdf jkl;\00", align 1

	; Memcpy lowering should emit stores of immediates containing string data from			; Memcpy lowering should emit stores of immediates containing string data from
	; the correct offsets.			; the correct offsets.

	define void @foo(i8* %tmp2) {			define void @foo(i8* %tmp2) {
	; X86-LABEL: foo:			; X86-LABEL: foo:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movb $0, 6(%rdi)			; X86-NEXT: movl $3894379, 3(%rdi) # imm = 0x3B6C6B
	; X86-NEXT: movw $15212, 4(%rdi) # imm = 0x3B6C
	; X86-NEXT: movl $1802117222, (%rdi) # imm = 0x6B6A2066			; X86-NEXT: movl $1802117222, (%rdi) # imm = 0x6B6A2066
	; X86-NEXT: retq			; X86-NEXT: retq
	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @0, i64 0, i64 3), i64 7, i1 false)			call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @0, i64 0, i64 3), i64 7, i1 false)
	ret void			ret void
	}			}

	declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)			declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)

llvm/trunk/test/CodeGen/X86/memset-2.ll

	Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines
	}			}

	define void @t4(i8* nocapture %s, i8 %a) nounwind {			define void @t4(i8* nocapture %s, i8 %a) nounwind {
	; CHECK-LABEL: t4:			; CHECK-LABEL: t4:
	; CHECK: ## %bb.0: ## %entry			; CHECK: ## %bb.0: ## %entry
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax			; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx			; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
	; CHECK-NEXT: imull $16843009, %ecx, %ecx ## imm = 0x1010101			; CHECK-NEXT: imull $16843009, %ecx, %ecx ## imm = 0x1010101
				; CHECK-NEXT: movl %ecx, 11(%eax)
	; CHECK-NEXT: movl %ecx, 8(%eax)			; CHECK-NEXT: movl %ecx, 8(%eax)
	; CHECK-NEXT: movl %ecx, 4(%eax)			; CHECK-NEXT: movl %ecx, 4(%eax)
	; CHECK-NEXT: movl %ecx, (%eax)			; CHECK-NEXT: movl %ecx, (%eax)
	; CHECK-NEXT: movw %cx, 12(%eax)
	; CHECK-NEXT: movb %cl, 14(%eax)
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	entry:			entry:
	tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i1 false)			tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i1 false)
	ret void			ret void
	}			}

llvm/trunk/test/CodeGen/X86/memset-zero.ll

Show First 20 Lines • Show All 65 Lines • ▼ Show 20 Lines	entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 5, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 5, i1 false)
ret void		ret void
}		}

define void @memset_7(i8* %a) nounwind {		define void @memset_7(i8* %a) nounwind {
; X86-LABEL: memset_7:		; X86-LABEL: memset_7:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $0, 6(%eax)		; X86-NEXT: movl $0, 3(%eax)
; X86-NEXT: movw $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)		; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; CORE2-LABEL: memset_7:		; CORE2-LABEL: memset_7:
; CORE2: # %bb.0: # %entry		; CORE2: # %bb.0: # %entry
; CORE2-NEXT: movb $0, 6(%rdi)		; CORE2-NEXT: movl $0, 3(%rdi)
; CORE2-NEXT: movw $0, 4(%rdi)
; CORE2-NEXT: movl $0, (%rdi)		; CORE2-NEXT: movl $0, (%rdi)
; CORE2-NEXT: retq		; CORE2-NEXT: retq
;		;
; NEHALEM-LABEL: memset_7:		; NEHALEM-LABEL: memset_7:
; NEHALEM: # %bb.0: # %entry		; NEHALEM: # %bb.0: # %entry
; NEHALEM-NEXT: movb $0, 6(%rdi)		; NEHALEM-NEXT: movl $0, 3(%rdi)
; NEHALEM-NEXT: movw $0, 4(%rdi)
; NEHALEM-NEXT: movl $0, (%rdi)		; NEHALEM-NEXT: movl $0, (%rdi)
; NEHALEM-NEXT: retq		; NEHALEM-NEXT: retq
entry:		entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 7, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 7, i1 false)
ret void		ret void
}		}

define void @memset_8(i8* %a) nounwind {		define void @memset_8(i8* %a) nounwind {
Show All 17 Lines	entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 8, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 8, i1 false)
ret void		ret void
}		}

define void @memset_11(i8* %a) nounwind {		define void @memset_11(i8* %a) nounwind {
; X86-LABEL: memset_11:		; X86-LABEL: memset_11:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $0, 10(%eax)		; X86-NEXT: movl $0, 7(%eax)
; X86-NEXT: movw $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)		; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)		; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; CORE2-LABEL: memset_11:		; CORE2-LABEL: memset_11:
; CORE2: # %bb.0: # %entry		; CORE2: # %bb.0: # %entry
; CORE2-NEXT: movb $0, 10(%rdi)		; CORE2-NEXT: movl $0, 7(%rdi)
; CORE2-NEXT: movw $0, 8(%rdi)
; CORE2-NEXT: movq $0, (%rdi)		; CORE2-NEXT: movq $0, (%rdi)
; CORE2-NEXT: retq		; CORE2-NEXT: retq
;		;
; NEHALEM-LABEL: memset_11:		; NEHALEM-LABEL: memset_11:
; NEHALEM: # %bb.0: # %entry		; NEHALEM: # %bb.0: # %entry
; NEHALEM-NEXT: movb $0, 10(%rdi)		; NEHALEM-NEXT: movl $0, 7(%rdi)
; NEHALEM-NEXT: movw $0, 8(%rdi)
; NEHALEM-NEXT: movq $0, (%rdi)		; NEHALEM-NEXT: movq $0, (%rdi)
; NEHALEM-NEXT: retq		; NEHALEM-NEXT: retq
entry:		entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 11, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 11, i1 false)
ret void		ret void
}		}

define void @memset_13(i8* %a) nounwind {		define void @memset_13(i8* %a) nounwind {
Show All 21 Lines	entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 13, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 13, i1 false)
ret void		ret void
}		}

define void @memset_15(i8* %a) nounwind {		define void @memset_15(i8* %a) nounwind {
; X86-LABEL: memset_15:		; X86-LABEL: memset_15:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $0, 14(%eax)		; X86-NEXT: movl $0, 11(%eax)
; X86-NEXT: movw $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)		; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)		; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)		; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; CORE2-LABEL: memset_15:		; CORE2-LABEL: memset_15:
; CORE2: # %bb.0: # %entry		; CORE2: # %bb.0: # %entry
; CORE2-NEXT: movq $0, 7(%rdi)		; CORE2-NEXT: movq $0, 7(%rdi)
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines	entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 17, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 17, i1 false)
ret void		ret void
}		}

define void @memset_19(i8* %a) nounwind {		define void @memset_19(i8* %a) nounwind {
; X86-LABEL: memset_19:		; X86-LABEL: memset_19:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $0, 18(%eax)		; X86-NEXT: movl $0, 15(%eax)
; X86-NEXT: movw $0, 16(%eax)
; X86-NEXT: movl $0, 12(%eax)		; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)		; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)		; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)		; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; CORE2-LABEL: memset_19:		; CORE2-LABEL: memset_19:
; CORE2: # %bb.0: # %entry		; CORE2: # %bb.0: # %entry
; CORE2-NEXT: movb $0, 18(%rdi)		; CORE2-NEXT: movl $0, 15(%rdi)
; CORE2-NEXT: movw $0, 16(%rdi)
; CORE2-NEXT: movq $0, 8(%rdi)		; CORE2-NEXT: movq $0, 8(%rdi)
; CORE2-NEXT: movq $0, (%rdi)		; CORE2-NEXT: movq $0, (%rdi)
; CORE2-NEXT: retq		; CORE2-NEXT: retq
;		;
; NEHALEM-LABEL: memset_19:		; NEHALEM-LABEL: memset_19:
; NEHALEM: # %bb.0: # %entry		; NEHALEM: # %bb.0: # %entry
; NEHALEM-NEXT: xorps %xmm0, %xmm0		; NEHALEM-NEXT: xorps %xmm0, %xmm0
; NEHALEM-NEXT: movups %xmm0, (%rdi)		; NEHALEM-NEXT: movups %xmm0, (%rdi)
; NEHALEM-NEXT: movb $0, 18(%rdi)		; NEHALEM-NEXT: movl $0, 15(%rdi)
; NEHALEM-NEXT: movw $0, 16(%rdi)
; NEHALEM-NEXT: retq		; NEHALEM-NEXT: retq
entry:		entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 19, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 19, i1 false)
ret void		ret void
}		}

define void @memset_31(i8* %a) nounwind {		define void @memset_31(i8* %a) nounwind {
; X86-LABEL: memset_31:		; X86-LABEL: memset_31:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $0, 30(%eax)		; X86-NEXT: movl $0, 27(%eax)
; X86-NEXT: movw $0, 28(%eax)
; X86-NEXT: movl $0, 24(%eax)		; X86-NEXT: movl $0, 24(%eax)
; X86-NEXT: movl $0, 20(%eax)		; X86-NEXT: movl $0, 20(%eax)
; X86-NEXT: movl $0, 16(%eax)		; X86-NEXT: movl $0, 16(%eax)
; X86-NEXT: movl $0, 12(%eax)		; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)		; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)		; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)		; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl		; X86-NEXT: retl
Show All 16 Lines	entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 31, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 31, i1 false)
ret void		ret void
}		}

define void @memset_35(i8* %a) nounwind {		define void @memset_35(i8* %a) nounwind {
; X86-LABEL: memset_35:		; X86-LABEL: memset_35:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $0, 34(%eax)		; X86-NEXT: movl $0, 31(%eax)
; X86-NEXT: movw $0, 32(%eax)
; X86-NEXT: movl $0, 28(%eax)		; X86-NEXT: movl $0, 28(%eax)
; X86-NEXT: movl $0, 24(%eax)		; X86-NEXT: movl $0, 24(%eax)
; X86-NEXT: movl $0, 20(%eax)		; X86-NEXT: movl $0, 20(%eax)
; X86-NEXT: movl $0, 16(%eax)		; X86-NEXT: movl $0, 16(%eax)
; X86-NEXT: movl $0, 12(%eax)		; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)		; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)		; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)		; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; CORE2-LABEL: memset_35:		; CORE2-LABEL: memset_35:
; CORE2: # %bb.0: # %entry		; CORE2: # %bb.0: # %entry
; CORE2-NEXT: movb $0, 34(%rdi)		; CORE2-NEXT: movl $0, 31(%rdi)
; CORE2-NEXT: movw $0, 32(%rdi)
; CORE2-NEXT: movq $0, 24(%rdi)		; CORE2-NEXT: movq $0, 24(%rdi)
; CORE2-NEXT: movq $0, 16(%rdi)		; CORE2-NEXT: movq $0, 16(%rdi)
; CORE2-NEXT: movq $0, 8(%rdi)		; CORE2-NEXT: movq $0, 8(%rdi)
; CORE2-NEXT: movq $0, (%rdi)		; CORE2-NEXT: movq $0, (%rdi)
; CORE2-NEXT: retq		; CORE2-NEXT: retq
;		;
; NEHALEM-LABEL: memset_35:		; NEHALEM-LABEL: memset_35:
; NEHALEM: # %bb.0: # %entry		; NEHALEM: # %bb.0: # %entry
; NEHALEM-NEXT: xorps %xmm0, %xmm0		; NEHALEM-NEXT: xorps %xmm0, %xmm0
; NEHALEM-NEXT: movups %xmm0, 16(%rdi)		; NEHALEM-NEXT: movups %xmm0, 16(%rdi)
; NEHALEM-NEXT: movups %xmm0, (%rdi)		; NEHALEM-NEXT: movups %xmm0, (%rdi)
; NEHALEM-NEXT: movb $0, 34(%rdi)		; NEHALEM-NEXT: movl $0, 31(%rdi)
; NEHALEM-NEXT: movw $0, 32(%rdi)
; NEHALEM-NEXT: retq		; NEHALEM-NEXT: retq
entry:		entry:
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 35, i1 false)		call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 35, i1 false)
ret void		ret void
}		}

llvm/trunk/test/CodeGen/X86/unaligned-load.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic \| FileCheck -check-prefix=I386 %s		; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic \| FileCheck -check-prefix=I386 %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic \| FileCheck -check-prefix=CORE2 %s		; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic \| FileCheck -check-prefix=CORE2 %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic \| FileCheck -check-prefix=COREI7 %s		; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic \| FileCheck -check-prefix=COREI7 %s

@.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8		@.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
@.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8		@.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8

		; This can be improved; see PR39952.

define void @func() nounwind ssp {		define void @func() nounwind ssp {
; I386-LABEL: func:		; I386-LABEL: func:
; I386: ## %bb.0: ## %entry		; I386: ## %bb.0: ## %entry
; I386-NEXT: pushl %esi		; I386-NEXT: subl $32, %esp
; I386-NEXT: subl $40, %esp
; I386-NEXT: leal {{[0-9]+}}(%esp), %esi
; I386-NEXT: .p2align 4, 0x90		; I386-NEXT: .p2align 4, 0x90
; I386-NEXT: LBB0_1: ## %bb		; I386-NEXT: LBB0_1: ## %bb
; I386-NEXT: ## =>This Inner Loop Header: Depth=1		; I386-NEXT: ## =>This Inner Loop Header: Depth=1
; I386-NEXT: subl $4, %esp		; I386-NEXT: movl $4673097, {{[0-9]+}}(%esp) ## imm = 0x474E49
; I386-NEXT: pushl $31		; I386-NEXT: movl $1230132307, {{[0-9]+}}(%esp) ## imm = 0x49525453
; I386-NEXT: pushl $_.str3		; I386-NEXT: movl $541347367, {{[0-9]+}}(%esp) ## imm = 0x20444E27
; I386-NEXT: pushl %esi		; I386-NEXT: movl $840969293, {{[0-9]+}}(%esp) ## imm = 0x32202C4D
; I386-NEXT: calll _memcpy		; I386-NEXT: movl $1095911247, {{[0-9]+}}(%esp) ## imm = 0x4152474F
; I386-NEXT: addl $16, %esp		; I386-NEXT: movl $1380982853, {{[0-9]+}}(%esp) ## imm = 0x52502045
		; I386-NEXT: movl $1313821779, {{[0-9]+}}(%esp) ## imm = 0x4E4F5453
		; I386-NEXT: movl $1498564676, (%esp) ## imm = 0x59524844
; I386-NEXT: jmp LBB0_1		; I386-NEXT: jmp LBB0_1
;		;
; CORE2-LABEL: func:		; CORE2-LABEL: func:
; CORE2: ## %bb.0: ## %entry		; CORE2: ## %bb.0: ## %entry
; CORE2-NEXT: movabsq $20070800167293728, %rax ## imm = 0x474E4952545320		; CORE2-NEXT: movabsq $20070800167293728, %rax ## imm = 0x474E4952545320
; CORE2-NEXT: movabsq $2325069237881678925, %rcx ## imm = 0x20444E2732202C4D		; CORE2-NEXT: movabsq $2325069237881678925, %rcx ## imm = 0x20444E2732202C4D
; CORE2-NEXT: movabsq $4706902966564560965, %rdx ## imm = 0x4152474F52502045		; CORE2-NEXT: movabsq $4706902966564560965, %rdx ## imm = 0x4152474F52502045
; CORE2-NEXT: movabsq $5642821575076104260, %rsi ## imm = 0x4E4F545359524844		; CORE2-NEXT: movabsq $5642821575076104260, %rsi ## imm = 0x4E4F545359524844
Show All 24 Lines	bb: ; preds = %bb, %entry
%String2Loc9 = getelementptr inbounds [31 x i8], [31 x i8]* %String2Loc, i64 0, i64 0		%String2Loc9 = getelementptr inbounds [31 x i8], [31 x i8]* %String2Loc, i64 0, i64 0
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str3, i64 0, i64 0), i64 31, i1 false)		call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str3, i64 0, i64 0), i64 31, i1 false)
br label %bb		br label %bb

return: ; No predecessors!		return: ; No predecessors!
ret void		ret void
}		}

		define void @func_aligned() nounwind ssp {
		; I386-LABEL: func_aligned:
		; I386: ## %bb.0: ## %entry
		; I386-NEXT: subl $44, %esp
		; I386-NEXT: movaps {{.*#+}} xmm0 = [1498564676,1313821779,1380982853,1095911247]
		; I386-NEXT: .p2align 4, 0x90
		; I386-NEXT: LBB1_1: ## %bb
		; I386-NEXT: ## =>This Inner Loop Header: Depth=1
		; I386-NEXT: movaps %xmm0, (%esp)
		; I386-NEXT: movl $4673097, {{[0-9]+}}(%esp) ## imm = 0x474E49
		; I386-NEXT: movl $1230132307, {{[0-9]+}}(%esp) ## imm = 0x49525453
		; I386-NEXT: movl $541347367, {{[0-9]+}}(%esp) ## imm = 0x20444E27
		; I386-NEXT: movl $840969293, {{[0-9]+}}(%esp) ## imm = 0x32202C4D
		; I386-NEXT: jmp LBB1_1
		;
		; CORE2-LABEL: func_aligned:
		; CORE2: ## %bb.0: ## %entry
		; CORE2-NEXT: movabsq $20070800167293728, %rax ## imm = 0x474E4952545320
		; CORE2-NEXT: movabsq $2325069237881678925, %rcx ## imm = 0x20444E2732202C4D
		; CORE2-NEXT: movabsq $4706902966564560965, %rdx ## imm = 0x4152474F52502045
		; CORE2-NEXT: movabsq $5642821575076104260, %rsi ## imm = 0x4E4F545359524844
		; CORE2-NEXT: .p2align 4, 0x90
		; CORE2-NEXT: LBB1_1: ## %bb
		; CORE2-NEXT: ## =>This Inner Loop Header: Depth=1
		; CORE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
		; CORE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
		; CORE2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
		; CORE2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
		; CORE2-NEXT: jmp LBB1_1
		;
		; COREI7-LABEL: func_aligned:
		; COREI7: ## %bb.0: ## %entry
		; COREI7-NEXT: movups _.str3+{{.*}}(%rip), %xmm0
		; COREI7-NEXT: movups {{.*}}(%rip), %xmm1
		; COREI7-NEXT: .p2align 4, 0x90
		; COREI7-NEXT: LBB1_1: ## %bb
		; COREI7-NEXT: ## =>This Inner Loop Header: Depth=1
		; COREI7-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
		; COREI7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
		; COREI7-NEXT: jmp LBB1_1
		entry:
		%String2Loc = alloca [31 x i8], align 16
		br label %bb

		bb: ; preds = %bb, %entry
		%String2Loc9 = getelementptr inbounds [31 x i8], [31 x i8]* %String2Loc, i64 0, i64 0
		call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str3, i64 0, i64 0), i64 31, i1 false)
		br label %bb

		return: ; No predecessors!
		ret void
		}

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind		declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Allow mempcy/memset to generate small overlapping stores.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 178020

llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/trunk/test/CodeGen/AArch64/arm64-memcpy-inline.ll

llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll

llvm/trunk/test/CodeGen/ARM/memcpy-ldm-stm.ll

llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll

llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll

llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll

llvm/trunk/test/CodeGen/X86/memcpy-from-string.ll

llvm/trunk/test/CodeGen/X86/memset-2.ll

llvm/trunk/test/CodeGen/X86/memset-zero.ll

llvm/trunk/test/CodeGen/X86/unaligned-load.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Allow mempcy/memset to generate small overlapping stores.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 178020

llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/trunk/test/CodeGen/AArch64/arm64-memcpy-inline.ll

llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll

llvm/trunk/test/CodeGen/ARM/memcpy-ldm-stm.ll

llvm/trunk/test/CodeGen/PowerPC/jaggedstructs.ll

llvm/trunk/test/CodeGen/PowerPC/structsinmem.ll

llvm/trunk/test/CodeGen/PowerPC/structsinregs.ll

llvm/trunk/test/CodeGen/X86/memcpy-from-string.ll

llvm/trunk/test/CodeGen/X86/memset-2.ll

llvm/trunk/test/CodeGen/X86/memset-zero.ll

llvm/trunk/test/CodeGen/X86/unaligned-load.ll

[CodeGen] Allow mempcy/memset to generate small overlapping stores.
ClosedPublic