Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td @@ -4185,6 +4185,9 @@ defm EXT : SIMDBitwiseExtract<"ext">; +def AdjustExtImm : SDNodeXFormgetTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; multiclass ExtPat { def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; @@ -4194,6 +4197,22 @@ // 128-bit vector. def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; + // A 64-bit EXT of two halves of the same 128-bit register can be done as a + // single 128-bit EXT. + def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), + (extract_subvector V128:$Rn, (i64 N)), + (i32 imm:$imm))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; + // A 64-bit EXT of the high half of a 128-bit register can be done using a + // 128-bit EXT of the whole register with an adjustment to the immediate. The + // top half of the other operand will be unset, but that doesn't matter as it + // will not be used. + def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), + V64:$Rm, + (i32 imm:$imm))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), + (AdjustExtImm imm:$imm)), dsub)>; } defm : ExtPat; Index: llvm/trunk/test/CodeGen/AArch64/ext-narrow-index.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/ext-narrow-index.ll +++ llvm/trunk/test/CodeGen/AArch64/ext-narrow-index.ll @@ -0,0 +1,345 @@ +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s + +; Tests of shufflevector where the index operand is half the width of the vector +; operands. We should get one ext instruction and not two. + +; i8 tests +define <8 x i8> @i8_off0(<16 x i8> %arg1, <16 x i8> %arg2) { +; CHECK-LABEL: i8_off0: +; CHECK-NOT: mov +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) { +; CHECK-LABEL: i8_off1: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #1 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) { +; CHECK-LABEL: i8_off8: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @i8_off15(<16 x i8> %arg1, <16 x i8> %arg2) { +; CHECK-LABEL: i8_off15: +; CHECK: ext v0.16b, v0.16b, v1.16b, #15 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @i8_off22(<16 x i8> %arg1, <16 x i8> %arg2) { +; CHECK-LABEL: i8_off22: +; CHECK: ext v0.16b, v1.16b, v1.16b, #6 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> + ret <8 x i8> %shuffle +} + +; i16 tests +define <4 x i16> @i16_off0(<8 x i16> %arg1, <8 x i16> %arg2) { +; CHECK-LABEL: i16_off0: +; CHECK-NOT: mov +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> + ret <4 x i16> %shuffle +} + +define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) { +; CHECK-LABEL: i16_off1: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #2 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> + ret <4 x i16> %shuffle +} + +define <4 x i16> @i16_off7(<8 x i16> %arg1, <8 x i16> %arg2) { +; CHECK-LABEL: i16_off7: +; CHECK: ext v0.16b, v0.16b, v1.16b, #14 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> + ret <4 x i16> %shuffle +} + +define <4 x i16> @i16_off8(<8 x i16> %arg1, <8 x i16> %arg2) { +; CHECK-LABEL: i16_off8: +; CHECK: mov v0.16b, v1.16b +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> + ret <4 x i16> %shuffle +} + +; i32 tests +define <2 x i32> @i32_off0(<4 x i32> %arg1, <4 x i32> %arg2) { +; CHECK-LABEL: i32_off0: +; CHECK-NOT: mov +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) { +; CHECK-LABEL: i32_off1: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #4 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @i32_off3(<4 x i32> %arg1, <4 x i32> %arg2) { +; CHECK-LABEL: i32_off3: +; CHECK: ext v0.16b, v0.16b, v1.16b, #12 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @i32_off4(<4 x i32> %arg1, <4 x i32> %arg2) { +; CHECK-LABEL: i32_off4: +; CHECK: mov v0.16b, v1.16b +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> + ret <2 x i32> %shuffle +} + +; i64 tests +define <1 x i64> @i64_off0(<2 x i64> %arg1, <2 x i64> %arg2) { +; CHECK-LABEL: i64_off0: +; CHECK-NOT: mov +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> + ret <1 x i64> %shuffle +} + +define <1 x i64> @i64_off1(<2 x i64> %arg1, <2 x i64> %arg2) { +; CHECK-LABEL: i64_off1: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> + ret <1 x i64> %shuffle +} + +define <1 x i64> @i64_off2(<2 x i64> %arg1, <2 x i64> %arg2) { +; CHECK-LABEL: i64_off2: +; CHECK: mov v0.16b, v1.16b +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> + ret <1 x i64> %shuffle +} + +; i8 tests with second operand zero +define <8 x i8> @i8_zero_off0(<16 x i8> %arg1) { +; CHECK-LABEL: i8_zero_off0: +; CHECK-NOT: mov +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) { +; CHECK-LABEL: i8_zero_off1: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #1 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) { +; CHECK-LABEL: i8_zero_off8: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @i8_zero_off15(<16 x i8> %arg1) { +; CHECK-LABEL: i8_zero_off15: +; CHECK: movi [[REG:v[0-9]+]].2d, #0 +; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #15 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) { +; CHECK-LABEL: i8_zero_off22: +; CHECK: movi v0.2d, #0 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> + ret <8 x i8> %shuffle +} + +; i16 tests with second operand zero +define <4 x i16> @i16_zero_off0(<8 x i16> %arg1) { +; CHECK-LABEL: i16_zero_off0: +; CHECK-NOT: mov +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> + ret <4 x i16> %shuffle +} + +define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) { +; CHECK-LABEL: i16_zero_off1: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #2 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> + ret <4 x i16> %shuffle +} + +define <4 x i16> @i16_zero_off7(<8 x i16> %arg1) { +; CHECK-LABEL: i16_zero_off7: +; CHECK: movi [[REG:v[0-9]+]].2d, #0 +; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #14 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> + ret <4 x i16> %shuffle +} + +define <4 x i16> @i16_zero_off8(<8 x i16> %arg1) { +; CHECK-LABEL: i16_zero_off8: +; CHECK: movi v0.2d, #0 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> + ret <4 x i16> %shuffle +} + +; i32 tests with second operand zero +define <2 x i32> @i32_zero_off0(<4 x i32> %arg1) { +; CHECK-LABEL: i32_zero_off0: +; CHECK-NOT: mov +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) { +; CHECK-LABEL: i32_zero_off1: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #4 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @i32_zero_off3(<4 x i32> %arg1) { +; CHECK-LABEL: i32_zero_off3: +; CHECK: movi [[REG:v[0-9]+]].2d, #0 +; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #12 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @i32_zero_off4(<4 x i32> %arg1) { +; CHECK-LABEL: i32_zero_off4: +; CHECK: movi v0.2d, #0 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> + ret <2 x i32> %shuffle +} + +; i64 tests with second operand zero +define <1 x i64> @i64_zero_off0(<2 x i64> %arg1) { +; CHECK-LABEL: i64_zero_off0: +; CHECK-NOT: mov +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> + ret <1 x i64> %shuffle +} + +define <1 x i64> @i64_zero_off1(<2 x i64> %arg1) { +; CHECK-LABEL: i64_zero_off1: +; CHECK-NOT: mov +; CHECK: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> + ret <1 x i64> %shuffle +} + +define <1 x i64> @i64_zero_off2(<2 x i64> %arg1) { +; CHECK-LABEL: i64_zero_off2: +; CHECK: fmov d0, xzr +; CHECK-NOT: ext +; CHECK: ret +entry: + %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> + ret <1 x i64> %shuffle +}