Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1102,6 +1102,7 @@ case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FSQRT: assert(TypeIdx == 0); Observer.changingInstr(MI); @@ -1630,6 +1631,7 @@ case G_INTRINSIC_TRUNC: case G_FCOS: case G_FSIN: + case G_FSQRT: return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); case G_ZEXT: case G_SEXT: Index: lib/Target/AArch64/AArch64LegalizerInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -124,7 +124,7 @@ getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64}); - getActionDefinitionsBuilder(G_FCEIL) + getActionDefinitionsBuilder({G_FCEIL, G_FSQRT}) // If we don't have full FP16 support, then scalarize the elements of // vectors containing fp16 types. .fewerElementsIf( Index: lib/Target/AArch64/AArch64RegisterBankInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -398,6 +398,7 @@ case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FSQRT: return true; } return false; Index: test/CodeGen/AArch64/GlobalISel/legalize-sqrt.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/GlobalISel/legalize-sqrt.mir @@ -0,0 +1,86 @@ +# RUN: llc -mtriple=arm64-unknown-unknown -global-isel -O0 -mattr=-fullfp16 -run-pass=legalizer %s -o - | FileCheck %s + +--- | + define <8 x half> @test_v8f16.sqrt(<8 x half> %a) { + ret <8 x half> %a + } + + define <4 x half> @test_v4f16.sqrt(<4 x half> %a) { + ret <4 x half> %a + } + +... +--- +name: test_v8f16.sqrt +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: $q0 + ; CHECK-LABEL: name: test_v8f16.sqrt + %0:_(<8 x s16>) = COPY $q0 + ; CHECK: %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<8 x s16>) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(<8 x s16>) = G_BUILD_VECTOR %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16) + %1:_(<8 x s16>) = G_FSQRT %0 + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v4f16.sqrt +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: $d0 + ; CHECK-LABEL: name: test_v4f16.sqrt + %0:_(<4 x s16>) = COPY $d0 + ; CHECK: %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FPEXT %{{[0-9]+}}(s16) + ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}} + ; CHECK: %{{[0-9]+}}:_(s16) = G_FPTRUNC %{{[0-9]+}}(s32) + ; CHECK: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16), %{{[0-9]+}}(s16) + %1:_(<4 x s16>) = G_FSQRT %0 + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 + +... Index: test/CodeGen/AArch64/GlobalISel/select-sqrt.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/GlobalISel/select-sqrt.mir @@ -0,0 +1,130 @@ +# RUN: llc -verify-machineinstrs -mtriple aarch64--- \ +# RUN: -run-pass=instruction-select -mattr=+fullfp16 -global-isel %s -o - \ +# RUN: | FileCheck %s +... +--- +name: sqrt_float +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_float + ; CHECK: %{{[0-9]+}}:fpr32 = FSQRTSr %{{[0-9]+}} + liveins: $s0 + %0:fpr(s32) = COPY $s0 + %1:fpr(s32) = G_FSQRT %0 + $s0 = COPY %1(s32) + +... +--- +name: sqrt_double +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_double + ; CHECK: %{{[0-9]+}}:fpr64 = FSQRTDr %{{[0-9]+}} + liveins: $d0 + %0:fpr(s64) = COPY $d0 + %1:fpr(s64) = G_FSQRT %0 + $d0 = COPY %1(s64) + +... +--- +name: sqrt_v2f32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v2f32 + ; CHECK: %{{[0-9]+}}:fpr64 = FSQRTv2f32 %{{[0-9]+}} + liveins: $d0 + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = G_FSQRT %0 + $d0 = COPY %1(<2 x s32>) + +... +--- +name: sqrt_v4f32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v4f32 + ; CHECK: %{{[0-9]+}}:fpr128 = FSQRTv4f32 %{{[0-9]+}} + liveins: $q0 + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = G_FSQRT %0 + $q0 = COPY %1(<4 x s32>) + +... +--- +name: sqrt_v2f64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v2f64 + ; CHECK: %{{[0-9]+}}:fpr128 = FSQRTv2f64 %{{[0-9]+}} + liveins: $q0 + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = G_FSQRT %0 + $q0 = COPY %1(<2 x s64>) + +... +--- +name: sqrt_v4f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v4f16 + ; CHECK: %{{[0-9]+}}:fpr64 = FSQRTv4f16 %{{[0-9]+}} + liveins: $d0 + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = G_FSQRT %0 + $d0 = COPY %1(<4 x s16>) + +... +--- +name: sqrt_v8f16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } +body: | + bb.0: + ; CHECK-LABEL: name: sqrt_v8f16 + ; CHECK: %{{[0-9]+}}:fpr128 = FSQRTv8f16 %{{[0-9]+}} + liveins: $q0 + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = G_FSQRT %0 + $q0 = COPY %1(<8 x s16>) + +... Index: test/CodeGen/AArch64/arm64-vfloatintrinsics.ll =================================================================== --- test/CodeGen/AArch64/arm64-vfloatintrinsics.ll +++ test/CodeGen/AArch64/arm64-vfloatintrinsics.ll @@ -14,12 +14,18 @@ %v4f16 = type <4 x half> +; FALLBACK-NOT: remark{{.*}}test_v4f16.sqrt define %v4f16 @test_v4f16.sqrt(%v4f16 %a) { ; CHECK-LABEL: test_v4f16.sqrt: ; CHECK-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}} ; CHECK-FP16-NOT: fcvt ; CHECK-FP16: fsqrt.4h ; CHECK-FP16-NEXT: ret + ; GISEL-LABEL: test_v4f16.sqrt: + ; GISEL-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: fsqrt.4h + ; GISEL-FP16-NEXT: ret %1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a) ret %v4f16 %1 } @@ -197,12 +203,18 @@ %v8f16 = type <8 x half> +; FALLBACK-NOT: remark{{.*}}test_v8f16.sqrt define %v8f16 @test_v8f16.sqrt(%v8f16 %a) { ; CHECK-LABEL: test_v8f16.sqrt: ; CHECK-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}} ; CHECK-FP16-NOT: fcvt ; CHECK-FP16: fsqrt.8h ; CHECK-FP16-NEXT: ret + ; GISEL-LABEL: test_v8f16.sqrt: + ; GISEL-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}} + ; GISEL-FP16-NOT: fcvt + ; GISEL-FP16: fsqrt.8h + ; GISEL-FP16-NEXT: ret %1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a) ret %v8f16 %1 } @@ -380,9 +392,12 @@ %v2f32 = type <2 x float> +; FALLBACK-NOT: remark{{.*}}test_v2f32.sqrt ; CHECK-LABEL: test_v2f32.sqrt: +; GISEL-LABEL: test_v2f32.sqrt: define %v2f32 @test_v2f32.sqrt(%v2f32 %a) { ; CHECK: fsqrt.2s + ; GISEL: fsqrt.2s %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a) ret %v2f32 %1 } @@ -525,9 +540,12 @@ %v4f32 = type <4 x float> +; FALLBACK-NOT: remark{{.*}}test_v4f32.sqrt ; CHECK: test_v4f32.sqrt: +; GISEL: test_v4f32.sqrt: define %v4f32 @test_v4f32.sqrt(%v4f32 %a) { ; CHECK: fsqrt.4s + ; GISEL: fsqrt.4s %1 = call %v4f32 @llvm.sqrt.v4f32(%v4f32 %a) ret %v4f32 %1 } @@ -668,9 +686,12 @@ ;;; Double vector %v2f64 = type <2 x double> +; FALLBACK-NOT: remark{{.*}}test_v2f64.sqrt ; CHECK: test_v2f64.sqrt: +; GISEL: test_v2f64.sqrt: define %v2f64 @test_v2f64.sqrt(%v2f64 %a) { ; CHECK: fsqrt.2d + ; GISEL: fsqrt.2d %1 = call %v2f64 @llvm.sqrt.v2f64(%v2f64 %a) ret %v2f64 %1 } Index: test/CodeGen/AArch64/f16-instructions.ll =================================================================== --- test/CodeGen/AArch64/f16-instructions.ll +++ test/CodeGen/AArch64/f16-instructions.ll @@ -781,6 +781,9 @@ declare half @llvm.aarch64.neon.frecpx.f16(half %a) #0 declare half @llvm.aarch64.neon.frsqrte.f16(half %a) #0 +; FALLBACK-NOT: remark:{{.*}}test_sqrt +; FALLBACK-FP16-NOT: remark:{{.*}}test_sqrt + ; CHECK-CVT-LABEL: test_sqrt: ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fsqrt s0, s0 @@ -791,6 +794,16 @@ ; CHECK-FP16-NEXT: fsqrt h0, h0 ; CHECK-FP16-NEXT: ret +; GISEL-CVT-LABEL: test_sqrt: +; GISEL-CVT-NEXT: fcvt s0, h0 +; GISEL-CVT-NEXT: fsqrt s0, s0 +; GISEL-CVT-NEXT: fcvt h0, s0 +; GISEL-CVT-NEXT: ret + +; GISEL-FP16-LABEL: test_sqrt: +; GISEL-FP16-NEXT: fsqrt h0, h0 +; GISEL-FP16-NEXT: ret + define half @test_sqrt(half %a) #0 { %r = call half @llvm.sqrt.f16(half %a) ret half %r