Index: llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -379,6 +379,7 @@ break; } + case Intrinsic::amdgcn_sqrt: case Intrinsic::amdgcn_rsq: { Value *Src = II.getArgOperand(0); Index: llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll =================================================================== --- llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -73,6 +73,119 @@ ret float %val } +; -------------------------------------------------------------------- +; llvm.amdgcn.sqrt +; -------------------------------------------------------------------- + +declare half @llvm.amdgcn.sqrt.f16(half) nounwind readnone +declare float @llvm.amdgcn.sqrt.f32(float) nounwind readnone +declare double @llvm.amdgcn.sqrt.f64(double) nounwind readnone + +define half @test_constant_fold_sqrt_f16_undef() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f16_undef( +; CHECK-NEXT: ret half 0xH7E00 +; + %val = call half @llvm.amdgcn.sqrt.f16(half undef) nounwind readnone + ret half %val +} + +define float @test_constant_fold_sqrt_f32_undef() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f32_undef( +; CHECK-NEXT: ret float 0x7FF8000000000000 +; + %val = call float @llvm.amdgcn.sqrt.f32(float undef) nounwind readnone + ret float %val +} + +define double @test_constant_fold_sqrt_f64_undef() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f64_undef( +; CHECK-NEXT: ret double 0x7FF8000000000000 +; + %val = call double @llvm.amdgcn.sqrt.f64(double undef) nounwind readnone + ret double %val +} + +define half @test_constant_fold_sqrt_f16_0() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f16_0( +; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH0000) #[[ATTR15:[0-9]+]] +; CHECK-NEXT: ret half [[VAL]] +; + %val = call half @llvm.amdgcn.sqrt.f16(half 0.0) nounwind readnone + ret half %val +} + +define float @test_constant_fold_sqrt_f32_0() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f32_0( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15]] +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone + ret float %val +} + +define double @test_constant_fold_sqrt_f64_0() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f64_0( +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR15]] +; CHECK-NEXT: ret double [[VAL]] +; + %val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone + ret double %val +} + +define half @test_constant_fold_sqrt_f16_neg0() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f16_neg0( +; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH8000) #[[ATTR15]] +; CHECK-NEXT: ret half [[VAL]] +; + %val = call half @llvm.amdgcn.sqrt.f16(half -0.0) nounwind readnone + ret half %val +} + +define float @test_constant_fold_sqrt_f32_neg0() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR15]] +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone + ret float %val +} + +define double @test_constant_fold_sqrt_f64_neg0() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0( +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR15]] +; CHECK-NEXT: ret double [[VAL]] +; + %val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone + ret double %val +} + +define double @test_constant_fold_sqrt_snan_f64() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_snan_f64( +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0x7FF0000000000001) +; CHECK-NEXT: ret double [[VAL]] +; + %val = call double @llvm.amdgcn.sqrt.f64(double 0x7FF0000000000001) + ret double %val +} + +define double @test_constant_fold_sqrt_qnan_f64() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_qnan_f64( +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0x7FF8000000000000) +; CHECK-NEXT: ret double [[VAL]] +; + %val = call double @llvm.amdgcn.sqrt.f64(double 0x7FF8000000000000) + ret double %val +} + +define double @test_constant_fold_sqrt_neg1() nounwind { +; CHECK-LABEL: @test_constant_fold_sqrt_neg1( +; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -1.000000e+00) +; CHECK-NEXT: ret double [[VAL]] +; + %val = call double @llvm.amdgcn.sqrt.f64(double -1.0) + ret double %val +} + ; -------------------------------------------------------------------- ; llvm.amdgcn.rsq ; -------------------------------------------------------------------- @@ -1662,7 +1775,7 @@ define i64 @icmp_constant_inputs_true() { ; CHECK-LABEL: @icmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR15:[0-9]+]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR16:[0-9]+]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34) @@ -2369,7 +2482,7 @@ define i64 @fcmp_constant_inputs_true() { ; CHECK-LABEL: @fcmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR15]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR16]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4) @@ -2411,7 +2524,7 @@ define i64 @ballot_one_64() { ; CHECK-LABEL: @ballot_one_64( -; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR15]] +; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR16]] ; CHECK-NEXT: ret i64 [[B]] ; %b = call i64 @llvm.amdgcn.ballot.i64(i1 1) @@ -2437,7 +2550,7 @@ define i32 @ballot_one_32() { ; CHECK-LABEL: @ballot_one_32( -; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR15]] +; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR16]] ; CHECK-NEXT: ret i32 [[B]] ; %b = call i32 @llvm.amdgcn.ballot.i32(i1 1)