Index: lib/Target/AArch64/AArch64FalkorHWPFFix.cpp =================================================================== --- lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -240,27 +240,27 @@ default: return None; + case AArch64::LD1i64: + case AArch64::LD2i64: + DestRegIdx = 0; + BaseRegIdx = 3; + OffsetIdx = -1; + IsPrePost = false; + break; + case AArch64::LD1i8: case AArch64::LD1i16: case AArch64::LD1i32: - case AArch64::LD1i64: case AArch64::LD2i8: case AArch64::LD2i16: case AArch64::LD2i32: - case AArch64::LD2i64: case AArch64::LD3i8: case AArch64::LD3i16: case AArch64::LD3i32: + case AArch64::LD3i64: case AArch64::LD4i8: case AArch64::LD4i16: case AArch64::LD4i32: - DestRegIdx = 0; - BaseRegIdx = 3; - OffsetIdx = -1; - IsPrePost = false; - break; - - case AArch64::LD3i64: case AArch64::LD4i64: DestRegIdx = -1; BaseRegIdx = 3; @@ -284,23 +284,16 @@ case AArch64::LD1Rv4s: case AArch64::LD1Rv8h: case AArch64::LD1Rv16b: - case AArch64::LD1Twov1d: - case AArch64::LD1Twov2s: - case AArch64::LD1Twov4h: - case AArch64::LD1Twov8b: - case AArch64::LD2Twov2s: - case AArch64::LD2Twov4s: - case AArch64::LD2Twov8b: - case AArch64::LD2Rv1d: - case AArch64::LD2Rv2s: - case AArch64::LD2Rv4s: - case AArch64::LD2Rv8b: DestRegIdx = 0; BaseRegIdx = 1; OffsetIdx = -1; IsPrePost = false; break; + case AArch64::LD1Twov1d: + case AArch64::LD1Twov2s: + case AArch64::LD1Twov4h: + case AArch64::LD1Twov8b: case AArch64::LD1Twov2d: case AArch64::LD1Twov4s: case AArch64::LD1Twov8h: @@ -321,10 +314,17 @@ case AArch64::LD1Fourv4s: case AArch64::LD1Fourv8h: case AArch64::LD1Fourv16b: + case AArch64::LD2Twov2s: + case AArch64::LD2Twov4s: + case AArch64::LD2Twov8b: case AArch64::LD2Twov2d: case AArch64::LD2Twov4h: case AArch64::LD2Twov8h: case AArch64::LD2Twov16b: + case AArch64::LD2Rv1d: + case AArch64::LD2Rv2s: + case AArch64::LD2Rv4s: + case AArch64::LD2Rv8b: case AArch64::LD2Rv2d: case AArch64::LD2Rv4h: case AArch64::LD2Rv8h: @@ -365,32 +365,32 @@ IsPrePost = false; break; + case AArch64::LD1i64_POST: + case AArch64::LD2i64_POST: + DestRegIdx = 1; + BaseRegIdx = 4; + OffsetIdx = 5; + IsPrePost = true; + break; + case AArch64::LD1i8_POST: case AArch64::LD1i16_POST: case AArch64::LD1i32_POST: - case AArch64::LD1i64_POST: case AArch64::LD2i8_POST: case AArch64::LD2i16_POST: case AArch64::LD2i32_POST: - case AArch64::LD2i64_POST: case AArch64::LD3i8_POST: case AArch64::LD3i16_POST: case AArch64::LD3i32_POST: + case AArch64::LD3i64_POST: case AArch64::LD4i8_POST: case AArch64::LD4i16_POST: case AArch64::LD4i32_POST: - DestRegIdx = 1; - BaseRegIdx = 4; - OffsetIdx = 5; - IsPrePost = false; - break; - - case AArch64::LD3i64_POST: case AArch64::LD4i64_POST: DestRegIdx = -1; BaseRegIdx = 4; OffsetIdx = 5; - IsPrePost = false; + IsPrePost = true; break; case AArch64::LD1Onev1d_POST: @@ -409,23 +409,16 @@ case AArch64::LD1Rv4s_POST: case AArch64::LD1Rv8h_POST: case AArch64::LD1Rv16b_POST: - case AArch64::LD1Twov1d_POST: - case AArch64::LD1Twov2s_POST: - case AArch64::LD1Twov4h_POST: - case AArch64::LD1Twov8b_POST: - case AArch64::LD2Twov2s_POST: - case AArch64::LD2Twov4s_POST: - case AArch64::LD2Twov8b_POST: - case AArch64::LD2Rv1d_POST: - case AArch64::LD2Rv2s_POST: - case AArch64::LD2Rv4s_POST: - case AArch64::LD2Rv8b_POST: DestRegIdx = 1; BaseRegIdx = 2; OffsetIdx = 3; - IsPrePost = false; + IsPrePost = true; break; + case AArch64::LD1Twov1d_POST: + case AArch64::LD1Twov2s_POST: + case AArch64::LD1Twov4h_POST: + case AArch64::LD1Twov8b_POST: case AArch64::LD1Twov2d_POST: case AArch64::LD1Twov4s_POST: case AArch64::LD1Twov8h_POST: @@ -446,10 +439,17 @@ case AArch64::LD1Fourv4s_POST: case AArch64::LD1Fourv8h_POST: case AArch64::LD1Fourv16b_POST: + case AArch64::LD2Twov2s_POST: + case AArch64::LD2Twov4s_POST: + case AArch64::LD2Twov8b_POST: case AArch64::LD2Twov2d_POST: case AArch64::LD2Twov4h_POST: case AArch64::LD2Twov8h_POST: case AArch64::LD2Twov16b_POST: + case AArch64::LD2Rv1d_POST: + case AArch64::LD2Rv2s_POST: + case AArch64::LD2Rv4s_POST: + case AArch64::LD2Rv8b_POST: case AArch64::LD2Rv2d_POST: case AArch64::LD2Rv4h_POST: case AArch64::LD2Rv8h_POST: @@ -487,7 +487,7 @@ DestRegIdx = -1; BaseRegIdx = 2; OffsetIdx = 3; - IsPrePost = false; + IsPrePost = true; break; case AArch64::LDRBBroW: @@ -592,16 +592,19 @@ IsPrePost = true; break; + case AArch64::LDNPDi: + case AArch64::LDNPQi: + case AArch64::LDNPSi: case AArch64::LDPQi: + case AArch64::LDPDi: + case AArch64::LDPSi: DestRegIdx = -1; BaseRegIdx = 2; OffsetIdx = 3; IsPrePost = false; break; - case AArch64::LDPDi: case AArch64::LDPSWi: - case AArch64::LDPSi: case AArch64::LDPWi: case AArch64::LDPXi: DestRegIdx = 0; @@ -612,18 +615,18 @@ case AArch64::LDPQpost: case AArch64::LDPQpre: + case AArch64::LDPDpost: + case AArch64::LDPDpre: + case AArch64::LDPSpost: + case AArch64::LDPSpre: DestRegIdx = -1; BaseRegIdx = 3; OffsetIdx = 4; IsPrePost = true; break; - case AArch64::LDPDpost: - case AArch64::LDPDpre: case AArch64::LDPSWpost: case AArch64::LDPSWpre: - case AArch64::LDPSpost: - case AArch64::LDPSpre: case AArch64::LDPWpost: case AArch64::LDPWpre: case AArch64::LDPXpost: Index: test/CodeGen/AArch64/falkor-hwpf-fix.mir =================================================================== --- test/CodeGen/AArch64/falkor-hwpf-fix.mir +++ test/CodeGen/AArch64/falkor-hwpf-fix.mir @@ -1,12 +1,7 @@ # RUN: llc -mtriple=aarch64-linux-gnu -mcpu=falkor -run-pass falkor-hwpf-fix-late -o - %s | FileCheck %s ---- | - @g = external global i32 - - define void @hwpf1() { ret void } - define void @hwpf2() { ret void } -... --- -# Verify that the tag collision between the loads is resolved. +# Verify that the tag collision between the loads is resolved for various load opcodes. + # CHECK-LABEL: name: hwpf1 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 # CHECK: LDRWui %[[BASE]], 0 @@ -17,7 +12,7 @@ bb.0: liveins: %w0, %x1 - %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4 from @g) + %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4) %w2 = LDRWui %x1, 1 %w0 = SUBWri %w0, 1, 0 @@ -28,19 +23,147 @@ RET_ReallyLR ... --- -# Verify that the tag collision between the loads is resolved and written back for post increment addressing. # CHECK-LABEL: name: hwpf2 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1i64 %q2, 0, %[[BASE]] +# CHECK: LDRWui %x1, 0 +name: hwpf2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %q2 = LD1i64 %q2, 0, %x1 :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf3 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1i8 %q2, 0, %[[BASE]] +# CHECK: LDRWui %x1, 0 +name: hwpf3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %q2 = LD1i8 %q2, 0, %x1 :: ("aarch64-strided-access" load 4) + %w0 = LDRWui %x1, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf4 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1Onev1d %[[BASE]] +# CHECK: LDRWui %x1, 0 +name: hwpf4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1 + + %d2 = LD1Onev1d %x1 :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf5 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1Twov1d %[[BASE]] +# CHECK: LDRWui %x1, 0 +name: hwpf5 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1 + + %d2_d3 = LD1Twov1d %x1 :: ("aarch64-strided-access" load 4) + %w0 = LDRWui %x1, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf6 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LDPQi %[[BASE]] +# CHECK: LDRWui %x1, 3 +name: hwpf6 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1 + + %q2, %q3 = LDPQi %x1, 3 :: ("aarch64-strided-access" load 4) + %w0 = LDRWui %x1, 3 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpf7 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LDPXi %[[BASE]] +# CHECK: LDRWui %x1, 2 +name: hwpf7 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1 + + %x2, %x3 = LDPXi %x1, 3 :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 2 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# Verify that the tag collision between the loads is resolved and written back +# for post increment addressing for various load opcodes. + +# CHECK-LABEL: name: hwpfinc1 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 # CHECK: LDRWpost %[[BASE]], 0 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 # CHECK: LDRWui %x1, 1 -name: hwpf2 +name: hwpfinc1 tracksRegLiveness: true body: | bb.0: liveins: %w0, %x1 - %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4 from @g) + %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4) %w2 = LDRWui %x1, 1 %w0 = SUBWri %w0, 1, 0 @@ -50,3 +173,135 @@ bb.1: RET_ReallyLR ... +--- +# CHECK-LABEL: name: hwpfinc2 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1i64_POST %q2, 0, %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x1, 1 +name: hwpfinc2 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %x1, %q2 = LD1i64_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 132 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc3 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1i8_POST %q2, 0, %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x1, 132 +name: hwpfinc3 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %x1, %q2 = LD1i8_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4) + %w0 = LDRWui %x1, 132 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc4 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD1Rv1d_POST %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x1, 252 +name: hwpfinc4 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %q2 + + %x1, %d2 = LD1Rv1d_POST %x1, %xzr :: ("aarch64-strided-access" load 4) + %w2 = LDRWui %x1, 252 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc5 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LD3Threev2s_POST %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWroX %x17, %x0 +name: hwpfinc5 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %x17, %q2 + + %x1, %d2_d3_d4 = LD3Threev2s_POST %x1, %x0 :: ("aarch64-strided-access" load 4) + %w0 = LDRWroX %x17, %x0, 0, 0 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc6 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LDPDpost %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x17, 2 +name: hwpfinc6 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %x17, %q2 + + %x1, %d2, %d3 = LDPDpost %x1, 3 :: ("aarch64-strided-access" load 4) + %w16 = LDRWui %x17, 2 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +... +--- +# CHECK-LABEL: name: hwpfinc7 +# CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0 +# CHECK: LDPXpost %[[BASE]] +# CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0 +# CHECK: LDRWui %x17, 2 +name: hwpfinc7 +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %x1, %x17, %q2 + + %x1, %x2, %x3 = LDPXpost %x1, 3 :: ("aarch64-strided-access" load 4) + %w18 = LDRWui %x17, 2 + + %w0 = SUBWri %w0, 1, 0 + %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv + Bcc 9, %bb.0, implicit %nzcv + + bb.1: + RET_ReallyLR +...