Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -460,7 +460,8 @@ FeatureCrypto, FeatureFPARMv8, FeatureNEON, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler, ]>; def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", @@ -487,7 +488,8 @@ FeatureFullFP16, FeatureDotProd, FeatureRCPC, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler, ]>; def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", @@ -516,6 +518,7 @@ FeatureRAS, FeatureRCPC, FeatureSSBS, + FeaturePostRAScheduler, ]>; def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", @@ -524,8 +527,10 @@ FeatureCrypto, FeatureFPARMv8, FeatureFuseAES, + FeatureFuseLiterals, FeatureNEON, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler, ]>; def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", @@ -534,8 +539,10 @@ FeatureCrypto, FeatureFPARMv8, FeatureFuseAES, + FeatureFuseLiterals, FeatureNEON, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler, ]>; def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", @@ -548,7 +555,8 @@ FeatureFullFP16, FeatureDotProd, FeatureRCPC, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler, ]>; def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", @@ -560,7 +568,8 @@ FeatureCrypto, FeatureFullFP16, FeatureDotProd, - FeatureSSBS + FeatureSSBS, + FeaturePostRAScheduler, ]>; // Note that cyclone does not fuse AES instructions, but newer apple chips do Index: llvm/test/CodeGen/AArch64/postra-mi-sched.ll =================================================================== --- llvm/test/CodeGen/AArch64/postra-mi-sched.ll +++ llvm/test/CodeGen/AArch64/postra-mi-sched.ll @@ -1,4 +1,7 @@ +; RUN: llc < %s -O3 -mtriple=aarch64-eabi -mcpu=cortex-a35 | FileCheck %s ; RUN: llc < %s -O3 -mtriple=aarch64-eabi -mcpu=cortex-a53 | FileCheck %s +; RUN: llc < %s -O3 -mtriple=aarch64-eabi -mcpu=cortex-a55 | FileCheck %s +; RUN: llc < %s -O3 -mtriple=aarch64-eabi -mcpu=cortex-a65 | FileCheck %s ; With cortex-a53, each of fmul and fcvt have latency of 6 cycles. After the ; pre-RA MI scheduler, fmul, fcvt and fdiv will be consecutive. The top-down Index: llvm/test/CodeGen/AArch64/postra-misched-a53-model.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/postra-misched-a53-model.mir @@ -0,0 +1,560 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a35 -run-pass=postmisched %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a53 -run-pass=postmisched %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 -run-pass=postmisched %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a65 -run-pass=postmisched %s -o - | FileCheck %s + +--- | + @main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 + @main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 + ; Function Attrs: nounwind + define i32 @main() #0 { + entry: + %retval = alloca i32, align 4 + %x = alloca [8 x i32], align 4 + %y = alloca [8 x i32], align 4 + %i = alloca i32, align 4 + %xx = alloca i32, align 4 + %yy = alloca i32, align 4 + store i32 0, i32* %retval + %0 = bitcast [8 x i32]* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) + %1 = bitcast [8 x i32]* %y to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) + store i32 0, i32* %xx, align 4 + store i32 0, i32* %yy, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + + for.cond: ; preds = %for.body, %entry + %2 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %2, 8 + br i1 %cmp, label %for.body, label %for.end + + for.body: ; preds = %for.cond + %3 = load i32, i32* %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %x, i32 0, i64 %idxprom + %4 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %4, 1 + store i32 %add, i32* %xx, align 4 + %5 = load i32, i32* %xx, align 4 + %add1 = add nsw i32 %5, 12 + store i32 %add1, i32* %xx, align 4 + %6 = load i32, i32* %xx, align 4 + %add2 = add nsw i32 %6, 23 + store i32 %add2, i32* %xx, align 4 + %7 = load i32, i32* %xx, align 4 + %add3 = add nsw i32 %7, 34 + store i32 %add3, i32* %xx, align 4 + %8 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %8 to i64 + %arrayidx5 = getelementptr inbounds [8 x i32], [8 x i32]* %y, i32 0, i64 %idxprom4 + %9 = load i32, i32* %arrayidx5, align 4 + %10 = load i32, i32* %yy, align 4 + %mul = mul nsw i32 %10, %9 + store i32 %mul, i32* %yy, align 4 + %11 = load i32, i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + + for.end: ; preds = %for.cond + %12 = load i32, i32* %xx, align 4 + %13 = load i32, i32* %yy, align 4 + %add6 = add nsw i32 %12, %13 + ret i32 %add6 + } + define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) #1 { + %tmp1 = fadd <4 x float> %A, %B + %tmp2 = fadd <4 x float> %A, %tmp1 + %tmp3 = fadd <4 x float> %A, %tmp2 + %tmp4 = fadd <4 x float> %A, %tmp3 + %tmp5 = fadd <4 x float> %A, %tmp4 + %tmp6 = fadd <4 x float> %A, %tmp5 + %tmp7 = fadd <4 x float> %A, %tmp6 + %tmp8 = fadd <4 x float> %A, %tmp7 + %tmp9 = fdiv <4 x float> %A, %B + %tmp10 = fadd <4 x float> %tmp8, %tmp9 + ret <4 x float> %tmp10 + } + ; Function Attrs: argmemonly nounwind willreturn + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2 + define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) #1 { + %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) + %tmp = getelementptr i8, i8* %A, i32 32 + store i8* %tmp, i8** %ptr + ret { <16 x i8>, <16 x i8> } %ld2 + } + ; Function Attrs: argmemonly nounwind readonly + declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) #3 + define void @testResourceConflict(float* %ptr) #1 { + entry: + %add1 = fadd float undef, undef + %mul2 = fmul float undef, undef + %add3 = fadd float %mul2, undef + %mul4 = fmul float undef, %add3 + %add5 = fadd float %mul4, undef + %sub6 = fsub float 0.000000e+00, undef + %sub7 = fsub float %add5, undef + %div8 = fdiv float 1.000000e+00, undef + %mul9 = fmul float %div8, %sub7 + %mul14 = fmul float %sub6, %div8 + %mul10 = fsub float -0.000000e+00, %mul14 + %mul15 = fmul float undef, %div8 + %mul11 = fsub float -0.000000e+00, %mul15 + %mul12 = fmul float 0.000000e+00, %div8 + %mul13 = fmul float %add1, %mul9 + %mul21 = fmul float %add5, %mul11 + %add22 = fadd float %mul13, %mul21 + store float %add22, float* %ptr, align 4 + %mul28 = fmul float %add1, %mul10 + %mul33 = fmul float %add5, %mul12 + %add34 = fadd float %mul33, %mul28 + store float %add34, float* %ptr, align 4 + %mul240 = fmul float undef, %mul9 + %add246 = fadd float %mul240, undef + store float %add246, float* %ptr, align 4 + %mul52 = fmul float undef, %mul10 + %mul57 = fmul float undef, %mul12 + %add58 = fadd float %mul57, %mul52 + store float %add58, float* %ptr, align 4 + %mul27 = fmul float 0.000000e+00, %mul9 + %mul81 = fmul float undef, %mul10 + %add82 = fadd float %mul27, %mul81 + store float %add82, float* %ptr, align 4 + call void @llvm.trap() + unreachable + } + ; Function Attrs: cold noreturn nounwind + declare void @llvm.trap() #4 + define void @testLdStConflict(<2 x i64> %v) #1 { + entry: + br label %loop + + loop: ; preds = %loop, %entry + %0 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8* null) + %ptr = bitcast i8* undef to <2 x i64>* + store <2 x i64> %v, <2 x i64>* %ptr, align 4 + %ptr1 = bitcast i8* undef to <2 x i64>* + store <2 x i64> %v, <2 x i64>* %ptr1, align 4 + %ptr2 = bitcast i8* undef to <2 x i64>* + store <2 x i64> %v, <2 x i64>* %ptr2, align 4 + %ptr3 = bitcast i8* undef to <2 x i64>* + store <2 x i64> %v, <2 x i64>* %ptr3, align 4 + %ptr4 = bitcast i8* undef to <2 x i64>* + store <2 x i64> %v, <2 x i64>* %ptr4, align 4 + br label %loop + } + ; Function Attrs: argmemonly nounwind readonly + declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i8(i8*) #3 + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #5 + + attributes #0 = { nounwind "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a53" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { "target-cpu"="cortex-a53" } + attributes #2 = { argmemonly nounwind willreturn "target-cpu"="cortex-a53" } + attributes #3 = { argmemonly nounwind readonly "target-cpu"="cortex-a53" } + attributes #4 = { cold noreturn nounwind "target-cpu"="cortex-a53" } + attributes #5 = { nounwind } + +... +--- +name: main +alignment: 8 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 112 + offsetAdjustment: 0 + maxAlignment: 16 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 92 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: retval, type: default, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: x, type: default, offset: -64, size: 32, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -48, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 2, name: y, type: default, offset: -96, size: 32, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -80, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 3, name: i, type: default, offset: -100, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -84, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 4, name: xx, type: default, offset: -104, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -88, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 5, name: yy, type: default, offset: -108, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -92, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: main + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr + ; CHECK: $sp = frame-setup SUBXri $sp, 112, 0 + ; CHECK: $x8 = ADRP target-flags(aarch64-page) @main.x + ; CHECK: frame-setup STPXi $fp, killed $lr, $sp, 12 :: (store 8 into %stack.7), (store 8 into %stack.6) + ; CHECK: renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.x, 0 + ; CHECK: $fp = frame-setup ADDXri $sp, 96, 0 + ; CHECK: $x9 = ADDXri $sp, 16, 0 + ; CHECK: STRXui $xzr, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.i) + ; CHECK: STRWui $wzr, $sp, 1 :: (store 4 into %ir.yy) + ; CHECK: renamable $q0, renamable $q1 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)` + 16, align 4) + ; CHECK: $x8 = ADRP target-flags(aarch64-page) @main.y + ; CHECK: renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.y, 0 + ; CHECK: STURWi $wzr, $fp, -4 :: (store 4 into %ir.retval) + ; CHECK: STRQui killed renamable $q0, $sp, 3 :: (store 16 into %ir.0) + ; CHECK: renamable $q2, renamable $q0 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)` + 16, align 4) + ; CHECK: $x8 = ADDXri $sp, 48, 0 + ; CHECK: STRQui killed renamable $q1, $sp, 4 :: (store 16 into %ir.0 + 16) + ; CHECK: STPQi killed renamable $q2, killed renamable $q0, $sp, 1 :: (store 16 into %ir.1), (store 16 into %ir.1 + 16) + ; CHECK: bb.1.for.cond: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: liveins: $x8, $x9 + ; CHECK: renamable $w10 = LDRWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + ; CHECK: dead $wzr = SUBSWri killed renamable $w10, 7, 0, implicit-def $nzcv + ; CHECK: Bcc 12, %bb.3, implicit killed $nzcv + ; CHECK: bb.2.for.body: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $x8, $x9 + ; CHECK: renamable $x10 = LDRSWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + ; CHECK: renamable $w12 = LDRWui $sp, 1 :: (dereferenceable load 4 from %ir.yy) + ; CHECK: renamable $x11 = UBFMXri renamable $x10, 62, 61 + ; CHECK: renamable $w10 = nsw ADDWri killed renamable $w10, 1, 0, implicit $x10 + ; CHECK: renamable $w13 = LDRWroX renamable $x8, renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx) + ; CHECK: renamable $w11 = LDRWroX renamable $x9, killed renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx5) + ; CHECK: STRWui killed renamable $w10, $sp, 3 :: (store 4 into %ir.i) + ; CHECK: renamable $w13 = ADDWri killed renamable $w13, 70, 0 + ; CHECK: renamable $w11 = nsw MADDWrrr killed renamable $w12, killed renamable $w11, $wzr + ; CHECK: STPWi killed renamable $w11, killed renamable $w13, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.yy) + ; CHECK: B %bb.1 + ; CHECK: bb.3.for.end: + ; CHECK: renamable $w9, renamable $w8 = LDPWi $sp, 1 :: (dereferenceable load 4 from %ir.xx), (dereferenceable load 4 from %ir.yy) + ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.7), (load 8 from %stack.6) + ; CHECK: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0 + ; CHECK: $sp = frame-destroy ADDXri $sp, 112, 0 + ; CHECK: RET undef $lr, implicit killed $w0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $lr + + $sp = frame-setup SUBXri $sp, 112, 0 + frame-setup STPXi killed $fp, killed $lr, $sp, 12 :: (store 8 into %stack.7), (store 8 into %stack.6) + $fp = frame-setup ADDXri $sp, 96, 0 + $x8 = ADRP target-flags(aarch64-page) @main.x + renamable $x8 = ADDXri $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.x, 0 + renamable $q0, renamable $q1 = LDPQi renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)` + 16, align 4) + $x8 = ADRP target-flags(aarch64-page) @main.y + renamable $x8 = ADDXri $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.y, 0 + STRQui killed renamable $q0, $sp, 3 :: (store 16 into %ir.0) + renamable $q2, renamable $q0 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)` + 16, align 4) + STRQui killed renamable $q1, $sp, 4 :: (store 16 into %ir.0 + 16) + STPQi renamable $q2, renamable $q0, $sp, 1 :: (store 16 into %ir.1), (store 16 into %ir.1 + 16) + STURWi $wzr, $fp, -4 :: (store 4 into %ir.retval) + STRXui $xzr, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.i) + $x8 = ADDXri $sp, 48, 0 + $x9 = ADDXri $sp, 16, 0 + STRWui $wzr, $sp, 1 :: (store 4 into %ir.yy) + + bb.1.for.cond: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $x8, $x9 + + renamable $w10 = LDRWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + dead $wzr = SUBSWri killed renamable $w10, 7, 0, implicit-def $nzcv + Bcc 12, %bb.3, implicit $nzcv + + bb.2.for.body: + successors: %bb.1(0x80000000) + liveins: $x8, $x9 + + renamable $x10 = LDRSWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + renamable $x11 = UBFMXri renamable $x10, 62, 61 + renamable $w12 = LDRWui $sp, 1 :: (dereferenceable load 4 from %ir.yy) + renamable $w13 = LDRWroX renamable $x8, renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx) + renamable $w11 = LDRWroX renamable $x9, killed renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx5) + renamable $w13 = ADDWri killed renamable $w13, 70, 0 + renamable $w11 = nsw MADDWrrr killed renamable $w12, killed renamable $w11, $wzr + renamable $w10 = nsw ADDWri renamable $w10, 1, 0, implicit killed $x10 + STPWi renamable $w11, renamable $w13, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.yy) + STRWui killed renamable $w10, $sp, 3 :: (store 4 into %ir.i) + B %bb.1 + + bb.3.for.end: + renamable $w9, renamable $w8 = LDPWi $sp, 1 :: (dereferenceable load 4 from %ir.xx), (dereferenceable load 4 from %ir.yy) + $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0 + $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.7), (load 8 from %stack.6) + $sp = frame-destroy ADDXri $sp, 112, 0 + RET undef $lr, implicit $w0 + +... +--- +name: neon4xfloat +alignment: 8 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } + - { reg: '$q1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $q0, $q1 + + ; CHECK-LABEL: name: neon4xfloat + ; CHECK: liveins: $q0, $q1 + ; CHECK: renamable $q2 = FADDv4f32 renamable $q0, renamable $q1 + ; CHECK: renamable $q1 = FDIVv4f32 renamable $q0, killed renamable $q1 + ; CHECK: renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + ; CHECK: renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + ; CHECK: renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + ; CHECK: renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + ; CHECK: renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + ; CHECK: renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + ; CHECK: renamable $q0 = FADDv4f32 killed renamable $q0, killed renamable $q2 + ; CHECK: renamable $q0 = FADDv4f32 killed renamable $q0, killed renamable $q1 + ; CHECK: RET undef $lr, implicit killed $q0 + renamable $q2 = FADDv4f32 renamable $q0, renamable $q1 + renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + renamable $q1 = FDIVv4f32 renamable $q0, killed renamable $q1 + renamable $q2 = FADDv4f32 renamable $q0, killed renamable $q2 + renamable $q0 = FADDv4f32 killed renamable $q0, killed renamable $q2 + renamable $q0 = FADDv4f32 killed renamable $q0, killed renamable $q1 + RET undef $lr, implicit $q0 + +... +--- +name: test_v16i8_post_imm_ld2 +alignment: 8 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_v16i8_post_imm_ld2 + ; CHECK: liveins: $x0, $x1 + ; CHECK: renamable $x0, renamable $q0_q1 = LD2Twov16b_POST killed renamable $x0, $xzr + ; CHECK: STRXui killed renamable $x0, killed renamable $x1, 0 :: (store 8 into %ir.ptr) + ; CHECK: RET undef $lr, implicit killed $q0, implicit killed $q1 + renamable $x0, renamable $q0_q1 = LD2Twov16b_POST killed renamable $x0, $xzr + STRXui killed renamable $x0, killed renamable $x1, 0 :: (store 8 into %ir.ptr) + RET undef $lr, implicit $q0, implicit $q1 + +... +--- +name: testResourceConflict +alignment: 8 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: testResourceConflict + ; CHECK: liveins: $x0 + ; CHECK: renamable $w8 = MOVZWi 32704, 16 + ; CHECK: STRWui killed renamable $w8, killed renamable $x0, 0 :: (store 4 into %ir.ptr) + ; CHECK: BRK 1 + renamable $w8 = MOVZWi 32704, 16 + STRWui killed renamable $w8, killed renamable $x0, 0 :: (store 4 into %ir.ptr) + BRK 1 + +... +--- +name: testLdStConflict +alignment: 8 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$q0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: testLdStConflict + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $q0 + ; CHECK: bb.1.loop: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $q0 + ; CHECK: STRQui renamable $q0, undef renamable $x8, 0 :: (store 16 into %ir.ptr, align 4) + ; CHECK: B %bb.1 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $q0 + + + bb.1.loop: + successors: %bb.1(0x80000000) + liveins: $q0 + + STRQui renamable $q0, undef renamable $x8, 0 :: (store 16 into %ir.ptr, align 4) + B %bb.1 + +... Index: llvm/test/CodeGen/AArch64/postra-misched-a57-model.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/postra-misched-a57-model.mir @@ -0,0 +1,394 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -run-pass=postmisched %s -o - | FileCheck %s --check-prefix=FUSE-LITERAL +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a72 -run-pass=postmisched %s -o - | FileCheck %s --check-prefix=FUSE-LITERAL +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a73 -run-pass=postmisched %s -o - | FileCheck %s --check-prefix=FUSE-LITERAL +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a75 -run-pass=postmisched %s -o - | FileCheck %s --check-prefix=DONT-FUSE +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a76 -run-pass=postmisched %s -o - | FileCheck %s --check-prefix=DONT-FUSE + +--- | + + @main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 + @main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 + ; Function Attrs: nounwind + define i32 @main() #0 { + entry: + %retval = alloca i32, align 4 + %x = alloca [8 x i32], align 4 + %y = alloca [8 x i32], align 4 + %i = alloca i32, align 4 + %xx = alloca i32, align 4 + %yy = alloca i32, align 4 + store i32 0, i32* %retval + %0 = bitcast [8 x i32]* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) + %1 = bitcast [8 x i32]* %y to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) + store i32 0, i32* %xx, align 4 + store i32 0, i32* %yy, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + + for.cond: ; preds = %for.body, %entry + %2 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %2, 8 + br i1 %cmp, label %for.body, label %for.end + + for.body: ; preds = %for.cond + %3 = load i32, i32* %yy, align 4 + %4 = load i32, i32* %i, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %x, i32 0, i64 %idxprom + %5 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %5, 1 + store i32 %add, i32* %xx, align 4 + %6 = load i32, i32* %xx, align 4 + %add1 = add nsw i32 %6, 12 + store i32 %add1, i32* %xx, align 4 + %7 = load i32, i32* %xx, align 4 + %add2 = add nsw i32 %7, 23 + store i32 %add2, i32* %xx, align 4 + %8 = load i32, i32* %xx, align 4 + %add3 = add nsw i32 %8, 34 + store i32 %add3, i32* %xx, align 4 + %9 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %9 to i64 + %arrayidx5 = getelementptr inbounds [8 x i32], [8 x i32]* %y, i32 0, i64 %idxprom4 + %10 = load i32, i32* %arrayidx5, align 4 + %add4 = add nsw i32 %9, %add + %add5 = add nsw i32 %10, %add1 + %add6 = add nsw i32 %add4, %add5 + %add7 = add nsw i32 %9, %add3 + %add8 = add nsw i32 %10, %add4 + %add9 = add nsw i32 %add7, %add8 + %add10 = add nsw i32 %9, %add6 + %add11 = add nsw i32 %10, %add7 + %add12 = add nsw i32 %add10, %add11 + %add13 = add nsw i32 %9, %add9 + %add14 = add nsw i32 %10, %add10 + %add15 = add nsw i32 %add13, %add14 + store i32 %add15, i32* %xx, align 4 + %div = sdiv i32 %4, %5 + store i32 %div, i32* %yy, align 4 + %11 = load i32, i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + + for.end: ; preds = %for.cond + %12 = load i32, i32* %xx, align 4 + %13 = load i32, i32* %yy, align 4 + %add67 = add nsw i32 %12, %13 + ret i32 %add67 + } + ; Function Attrs: argmemonly nounwind willreturn + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { nounwind "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { argmemonly nounwind willreturn "target-cpu"="cortex-a57" } + attributes #2 = { nounwind } + +... +--- +name: main +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 112 + offsetAdjustment: 0 + maxAlignment: 16 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 92 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: retval, type: default, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: x, type: default, offset: -64, size: 32, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -48, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 2, name: y, type: default, offset: -96, size: 32, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -80, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 3, name: i, type: default, offset: -100, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -84, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 4, name: xx, type: default, offset: -104, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -88, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 5, name: yy, type: default, offset: -108, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -92, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '$fp', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; FUSE-LITERAL-LABEL: name: main + ; FUSE-LITERAL: bb.0.entry: + ; FUSE-LITERAL: successors: %bb.1(0x80000000) + ; FUSE-LITERAL: liveins: $lr + ; FUSE-LITERAL: $sp = frame-setup SUBXri $sp, 112, 0 + ; FUSE-LITERAL: $x8 = ADRP target-flags(aarch64-page) @main.x + ; FUSE-LITERAL: renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.x, 0 + ; FUSE-LITERAL: STRXui $xzr, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.i) + ; FUSE-LITERAL: renamable $q0, renamable $q1 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)` + 16, align 4) + ; FUSE-LITERAL: $x8 = ADRP target-flags(aarch64-page) @main.y + ; FUSE-LITERAL: renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.y, 0 + ; FUSE-LITERAL: $x9 = ADDXri $sp, 16, 0 + ; FUSE-LITERAL: frame-setup STPXi $fp, killed $lr, $sp, 12 :: (store 8 into %stack.7), (store 8 into %stack.6) + ; FUSE-LITERAL: $fp = frame-setup ADDXri $sp, 96, 0 + ; FUSE-LITERAL: STPQi killed renamable $q0, killed renamable $q1, $sp, 3 :: (store 16 into %ir.0), (store 16 into %ir.0 + 16) + ; FUSE-LITERAL: STURWi $wzr, $fp, -4 :: (store 4 into %ir.retval) + ; FUSE-LITERAL: renamable $q0, renamable $q1 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)` + 16, align 4) + ; FUSE-LITERAL: $x8 = ADDXri $sp, 48, 0 + ; FUSE-LITERAL: STRWui $wzr, $sp, 1 :: (store 4 into %ir.yy) + ; FUSE-LITERAL: STPQi killed renamable $q0, killed renamable $q1, $sp, 1 :: (store 16 into %ir.1), (store 16 into %ir.1 + 16) + ; FUSE-LITERAL: bb.1.for.cond: + ; FUSE-LITERAL: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; FUSE-LITERAL: liveins: $x8, $x9 + ; FUSE-LITERAL: renamable $w10 = LDRWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + ; FUSE-LITERAL: dead $wzr = SUBSWri killed renamable $w10, 7, 0, implicit-def $nzcv + ; FUSE-LITERAL: Bcc 12, %bb.3, implicit killed $nzcv + ; FUSE-LITERAL: bb.2.for.body: + ; FUSE-LITERAL: successors: %bb.1(0x80000000) + ; FUSE-LITERAL: liveins: $x8, $x9 + ; FUSE-LITERAL: renamable $x10 = LDRSWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + ; FUSE-LITERAL: renamable $x11 = UBFMXri renamable $x10, 62, 61 + ; FUSE-LITERAL: renamable $w12 = LDRWroX renamable $x8, renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx) + ; FUSE-LITERAL: $w13 = ADDWrs renamable $w12, renamable $w10, 0 + ; FUSE-LITERAL: renamable $w11 = LDRWroX renamable $x9, killed renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx5) + ; FUSE-LITERAL: renamable $w14 = ADDWri renamable $w12, 13, 0 + ; FUSE-LITERAL: $w15 = ADDWrs renamable $w14, renamable $w10, 0 + ; FUSE-LITERAL: renamable $w13 = ADDWri killed renamable $w13, 1, 0 + ; FUSE-LITERAL: $w14 = ADDWrs renamable $w11, killed renamable $w14, 0 + ; FUSE-LITERAL: $w14 = ADDWrs renamable $w13, killed renamable $w14, 0 + ; FUSE-LITERAL: $w13 = ADDWrs renamable $w11, killed renamable $w13, 0 + ; FUSE-LITERAL: $w13 = ADDWrs killed renamable $w15, killed renamable $w13, 0 + ; FUSE-LITERAL: $w14 = ADDWrs renamable $w10, killed renamable $w14, 0 + ; FUSE-LITERAL: $w13 = ADDWrs killed renamable $w13, renamable $w10, 0 + ; FUSE-LITERAL: $w11 = ADDWrs killed renamable $w11, killed renamable $w14, 0 + ; FUSE-LITERAL: $w11 = ADDWrs killed renamable $w13, killed renamable $w11, 0 + ; FUSE-LITERAL: $w13 = ADDWri killed renamable $w11, 57, 0 + ; FUSE-LITERAL: renamable $w11 = SDIVWr renamable $w10, killed renamable $w12 + ; FUSE-LITERAL: renamable $w10 = nsw ADDWri killed renamable $w10, 1, 0, implicit $x10 + ; FUSE-LITERAL: STRWui killed renamable $w10, $sp, 3 :: (store 4 into %ir.i) + ; FUSE-LITERAL: STPWi killed renamable $w11, killed $w13, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.yy) + ; FUSE-LITERAL: B %bb.1 + ; FUSE-LITERAL: bb.3.for.end: + ; FUSE-LITERAL: renamable $w9, renamable $w8 = LDPWi $sp, 1 :: (dereferenceable load 4 from %ir.xx), (dereferenceable load 4 from %ir.yy) + ; FUSE-LITERAL: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0 + ; FUSE-LITERAL: $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.7), (load 8 from %stack.6) + ; FUSE-LITERAL: $sp = frame-destroy ADDXri $sp, 112, 0 + ; FUSE-LITERAL: RET undef $lr, implicit killed $w0 + ; DONT-FUSE-LABEL: name: main + ; DONT-FUSE: bb.0.entry: + ; DONT-FUSE: successors: %bb.1(0x80000000) + ; DONT-FUSE: liveins: $lr + ; DONT-FUSE: $sp = frame-setup SUBXri $sp, 112, 0 + ; DONT-FUSE: $x8 = ADRP target-flags(aarch64-page) @main.x + ; DONT-FUSE: renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.x, 0 + ; DONT-FUSE: $x9 = ADDXri $sp, 16, 0 + ; DONT-FUSE: frame-setup STPXi $fp, killed $lr, $sp, 12 :: (store 8 into %stack.7), (store 8 into %stack.6) + ; DONT-FUSE: $fp = frame-setup ADDXri $sp, 96, 0 + ; DONT-FUSE: renamable $q0, renamable $q1 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)` + 16, align 4) + ; DONT-FUSE: $x8 = ADRP target-flags(aarch64-page) @main.y + ; DONT-FUSE: renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.y, 0 + ; DONT-FUSE: STURWi $wzr, $fp, -4 :: (store 4 into %ir.retval) + ; DONT-FUSE: STPQi killed renamable $q0, killed renamable $q1, $sp, 3 :: (store 16 into %ir.0), (store 16 into %ir.0 + 16) + ; DONT-FUSE: STRXui $xzr, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.i) + ; DONT-FUSE: renamable $q0, renamable $q1 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)` + 16, align 4) + ; DONT-FUSE: $x8 = ADDXri $sp, 48, 0 + ; DONT-FUSE: STRWui $wzr, $sp, 1 :: (store 4 into %ir.yy) + ; DONT-FUSE: STPQi killed renamable $q0, killed renamable $q1, $sp, 1 :: (store 16 into %ir.1), (store 16 into %ir.1 + 16) + ; DONT-FUSE: bb.1.for.cond: + ; DONT-FUSE: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; DONT-FUSE: liveins: $x8, $x9 + ; DONT-FUSE: renamable $w10 = LDRWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + ; DONT-FUSE: dead $wzr = SUBSWri killed renamable $w10, 7, 0, implicit-def $nzcv + ; DONT-FUSE: Bcc 12, %bb.3, implicit killed $nzcv + ; DONT-FUSE: bb.2.for.body: + ; DONT-FUSE: successors: %bb.1(0x80000000) + ; DONT-FUSE: liveins: $x8, $x9 + ; DONT-FUSE: renamable $x10 = LDRSWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + ; DONT-FUSE: renamable $x11 = UBFMXri renamable $x10, 62, 61 + ; DONT-FUSE: renamable $w12 = LDRWroX renamable $x8, renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx) + ; DONT-FUSE: $w13 = ADDWrs renamable $w12, renamable $w10, 0 + ; DONT-FUSE: renamable $w11 = LDRWroX renamable $x9, killed renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx5) + ; DONT-FUSE: renamable $w14 = ADDWri renamable $w12, 13, 0 + ; DONT-FUSE: $w15 = ADDWrs renamable $w14, renamable $w10, 0 + ; DONT-FUSE: renamable $w13 = ADDWri killed renamable $w13, 1, 0 + ; DONT-FUSE: $w14 = ADDWrs renamable $w11, killed renamable $w14, 0 + ; DONT-FUSE: $w14 = ADDWrs renamable $w13, killed renamable $w14, 0 + ; DONT-FUSE: $w13 = ADDWrs renamable $w11, killed renamable $w13, 0 + ; DONT-FUSE: $w13 = ADDWrs killed renamable $w15, killed renamable $w13, 0 + ; DONT-FUSE: $w14 = ADDWrs renamable $w10, killed renamable $w14, 0 + ; DONT-FUSE: $w13 = ADDWrs killed renamable $w13, renamable $w10, 0 + ; DONT-FUSE: $w11 = ADDWrs killed renamable $w11, killed renamable $w14, 0 + ; DONT-FUSE: $w11 = ADDWrs killed renamable $w13, killed renamable $w11, 0 + ; DONT-FUSE: $w13 = ADDWri killed renamable $w11, 57, 0 + ; DONT-FUSE: renamable $w11 = SDIVWr renamable $w10, killed renamable $w12 + ; DONT-FUSE: renamable $w10 = nsw ADDWri killed renamable $w10, 1, 0, implicit $x10 + ; DONT-FUSE: STRWui killed renamable $w10, $sp, 3 :: (store 4 into %ir.i) + ; DONT-FUSE: STPWi killed renamable $w11, killed $w13, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.yy) + ; DONT-FUSE: B %bb.1 + ; DONT-FUSE: bb.3.for.end: + ; DONT-FUSE: renamable $w9, renamable $w8 = LDPWi $sp, 1 :: (dereferenceable load 4 from %ir.xx), (dereferenceable load 4 from %ir.yy) + ; DONT-FUSE: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0 + ; DONT-FUSE: $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.7), (load 8 from %stack.6) + ; DONT-FUSE: $sp = frame-destroy ADDXri $sp, 112, 0 + ; DONT-FUSE: RET undef $lr, implicit killed $w0 + ; CHECK-LABEL: name: main + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr + ; CHECK: $sp = frame-setup SUBXri $sp, 112, 0 + ; CHECK: $x8 = ADRP target-flags(aarch64-page) @main.x + ; CHECK: renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.x, 0 + ; CHECK: STRXui $xzr, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.i) + ; CHECK: renamable $q0, renamable $q1 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)` + 16, align 4) + ; CHECK: $x8 = ADRP target-flags(aarch64-page) @main.y + ; CHECK: renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.y, 0 + ; CHECK: $x9 = ADDXri $sp, 16, 0 + ; CHECK: frame-setup STPXi $fp, killed $lr, $sp, 12 :: (store 8 into %stack.7), (store 8 into %stack.6) + ; CHECK: $fp = frame-setup ADDXri $sp, 96, 0 + ; CHECK: STPQi killed renamable $q0, killed renamable $q1, $sp, 3 :: (store 16 into %ir.0), (store 16 into %ir.0 + 16) + ; CHECK: STURWi $wzr, $fp, -4 :: (store 4 into %ir.retval) + ; CHECK: renamable $q0, renamable $q1 = LDPQi killed renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)` + 16, align 4) + ; CHECK: $x8 = ADDXri $sp, 48, 0 + ; CHECK: STRWui $wzr, $sp, 1 :: (store 4 into %ir.yy) + ; CHECK: STPQi killed renamable $q0, killed renamable $q1, $sp, 1 :: (store 16 into %ir.1), (store 16 into %ir.1 + 16) + ; CHECK: bb.1.for.cond: + ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK: liveins: $x8, $x9 + ; CHECK: renamable $w10 = LDRWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + ; CHECK: dead $wzr = SUBSWri killed renamable $w10, 7, 0, implicit-def $nzcv + ; CHECK: Bcc 12, %bb.3, implicit killed $nzcv + ; CHECK: bb.2.for.body: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $x8, $x9 + ; CHECK: renamable $x10 = LDRSWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + ; CHECK: renamable $x11 = UBFMXri renamable $x10, 62, 61 + ; CHECK: renamable $w12 = LDRWroX renamable $x8, renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx) + ; CHECK: $w13 = ADDWrs renamable $w12, renamable $w10, 0 + ; CHECK: renamable $w11 = LDRWroX renamable $x9, killed renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx5) + ; CHECK: renamable $w14 = ADDWri renamable $w12, 13, 0 + ; CHECK: $w15 = ADDWrs renamable $w14, renamable $w10, 0 + ; CHECK: renamable $w13 = ADDWri killed renamable $w13, 1, 0 + ; CHECK: $w14 = ADDWrs renamable $w11, killed renamable $w14, 0 + ; CHECK: $w14 = ADDWrs renamable $w13, killed renamable $w14, 0 + ; CHECK: $w13 = ADDWrs renamable $w11, killed renamable $w13, 0 + ; CHECK: $w13 = ADDWrs killed renamable $w15, killed renamable $w13, 0 + ; CHECK: $w14 = ADDWrs renamable $w10, killed renamable $w14, 0 + ; CHECK: $w13 = ADDWrs killed renamable $w13, renamable $w10, 0 + ; CHECK: $w11 = ADDWrs killed renamable $w11, killed renamable $w14, 0 + ; CHECK: $w11 = ADDWrs killed renamable $w13, killed renamable $w11, 0 + ; CHECK: $w13 = ADDWri killed renamable $w11, 57, 0 + ; CHECK: renamable $w11 = SDIVWr renamable $w10, killed renamable $w12 + ; CHECK: renamable $w10 = nsw ADDWri killed renamable $w10, 1, 0, implicit $x10 + ; CHECK: STRWui killed renamable $w10, $sp, 3 :: (store 4 into %ir.i) + ; CHECK: STPWi killed renamable $w11, killed $w13, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.yy) + ; CHECK: B %bb.1 + ; CHECK: bb.3.for.end: + ; CHECK: renamable $w9, renamable $w8 = LDPWi $sp, 1 :: (dereferenceable load 4 from %ir.xx), (dereferenceable load 4 from %ir.yy) + ; CHECK: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0 + ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.7), (load 8 from %stack.6) + ; CHECK: $sp = frame-destroy ADDXri $sp, 112, 0 + ; CHECK: RET undef $lr, implicit killed $w0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $lr + + $sp = frame-setup SUBXri $sp, 112, 0 + frame-setup STPXi killed $fp, killed $lr, $sp, 12 :: (store 8 into %stack.7), (store 8 into %stack.6) + $fp = frame-setup ADDXri $sp, 96, 0 + $x8 = ADRP target-flags(aarch64-page) @main.x + renamable $x8 = ADDXri $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.x, 0 + renamable $q0, renamable $q1 = LDPQi renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.x to i8*)` + 16, align 4) + STPQi renamable $q0, renamable $q1, $sp, 3 :: (store 16 into %ir.0), (store 16 into %ir.0 + 16) + $x8 = ADRP target-flags(aarch64-page) @main.y + renamable $x8 = ADDXri $x8, target-flags(aarch64-pageoff, aarch64-nc) @main.y, 0 + renamable $q0, renamable $q1 = LDPQi renamable $x8, 0 :: (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)`, align 4), (dereferenceable load 16 from `i8* bitcast ([8 x i32]* @main.y to i8*)` + 16, align 4) + STPQi renamable $q0, renamable $q1, $sp, 1 :: (store 16 into %ir.1), (store 16 into %ir.1 + 16) + STURWi $wzr, $fp, -4 :: (store 4 into %ir.retval) + STRXui $xzr, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.i) + STRWui $wzr, $sp, 1 :: (store 4 into %ir.yy) + $x8 = ADDXri $sp, 48, 0 + $x9 = ADDXri $sp, 16, 0 + + bb.1.for.cond: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + liveins: $x8, $x9 + + renamable $w10 = LDRWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + dead $wzr = SUBSWri killed renamable $w10, 7, 0, implicit-def $nzcv + Bcc 12, %bb.3, implicit $nzcv + + bb.2.for.body: + successors: %bb.1(0x80000000) + liveins: $x8, $x9 + + renamable $x10 = LDRSWui $sp, 3 :: (dereferenceable load 4 from %ir.i) + renamable $x11 = UBFMXri renamable $x10, 62, 61 + renamable $w12 = LDRWroX renamable $x8, renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx) + $w13 = ADDWrs renamable $w12, renamable $w10, 0 + renamable $w14 = ADDWri renamable $w12, 13, 0 + $w15 = ADDWrs renamable $w14, renamable $w10, 0 + renamable $w11 = LDRWroX renamable $x9, killed renamable $x11, 0, 0 :: (load 4 from %ir.arrayidx5) + renamable $w13 = ADDWri killed renamable $w13, 1, 0 + $w14 = ADDWrs renamable $w11, killed renamable $w14, 0 + $w14 = ADDWrs renamable $w13, killed renamable $w14, 0 + $w13 = ADDWrs renamable $w11, killed renamable $w13, 0 + $w13 = ADDWrs killed renamable $w15, killed renamable $w13, 0 + $w13 = ADDWrs killed renamable $w13, renamable $w10, 0 + $w14 = ADDWrs renamable $w10, killed renamable $w14, 0 + $w11 = ADDWrs killed renamable $w11, killed renamable $w14, 0 + $w11 = ADDWrs killed renamable $w13, killed renamable $w11, 0 + $w13 = ADDWri killed renamable $w11, 57, 0 + renamable $w11 = SDIVWr renamable $w10, killed renamable $w12 + STPWi killed renamable $w11, killed $w13, $sp, 1 :: (store 4 into %ir.xx), (store 4 into %ir.yy) + renamable $w10 = nsw ADDWri renamable $w10, 1, 0, implicit killed $x10 + STRWui killed renamable $w10, $sp, 3 :: (store 4 into %ir.i) + B %bb.1 + + bb.3.for.end: + renamable $w9, renamable $w8 = LDPWi $sp, 1 :: (dereferenceable load 4 from %ir.xx), (dereferenceable load 4 from %ir.yy) + $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0 + $fp, $lr = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.7), (load 8 from %stack.6) + $sp = frame-destroy ADDXri $sp, 112, 0 + RET undef $lr, implicit $w0 + +...