Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -59,6 +59,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" @@ -299,7 +300,7 @@ } bool ScopDetection::isValidCallInst(CallInst &CI) { - if (CI.mayHaveSideEffects() || CI.doesNotReturn()) + if (CI.doesNotReturn()) return false; if (CI.doesNotAccessMemory()) @@ -311,7 +312,29 @@ if (CalledFunction == 0) return false; - // TODO: Intrinsics. + // Check if we can handle the intrinsic call. + if (auto *IT = dyn_cast(&CI)) { + switch (IT->getIntrinsicID()) { + // Lifetime markers are supported/ignored. + case llvm::Intrinsic::lifetime_start: + case llvm::Intrinsic::lifetime_end: + // Invariant markers are supported/ignored. + case llvm::Intrinsic::invariant_start: + case llvm::Intrinsic::invariant_end: + // Some misc annotations are supported/ignored. + case llvm::Intrinsic::var_annotation: + case llvm::Intrinsic::ptr_annotation: + case llvm::Intrinsic::annotation: + case llvm::Intrinsic::donothing: + case llvm::Intrinsic::assume: + case llvm::Intrinsic::expect: + return true; + default: + // Other intrinsics which may access the memory are not yet supported. + break; + } + } + return false; } Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -267,6 +267,30 @@ return; } + // Skip some special intrinsics for which we do not adjust the semantics to + // the new schedule. All others are handled like every other instruction. + if (auto *IT = dyn_cast(Inst)) { + switch (IT->getIntrinsicID()) { + // Lifetime markers are ignored. + case llvm::Intrinsic::lifetime_start: + case llvm::Intrinsic::lifetime_end: + // Invariant markers are ignored. + case llvm::Intrinsic::invariant_start: + case llvm::Intrinsic::invariant_end: + // Some misc annotations are ignored. + case llvm::Intrinsic::var_annotation: + case llvm::Intrinsic::ptr_annotation: + case llvm::Intrinsic::annotation: + case llvm::Intrinsic::donothing: + case llvm::Intrinsic::assume: + case llvm::Intrinsic::expect: + return; + default: + // Other intrinsics are copied. + break; + } + } + copyInstScalar(Inst, BBMap, GlobalMap, LTS); } Index: test/Isl/CodeGen/intrinsics_lifetime.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/intrinsics_lifetime.ll @@ -0,0 +1,86 @@ +; RUN: opt %loadPolly -basicaa -polly-codegen-isl -polly-codegen-scev -S < %s | FileCheck %s +; +; Verify that we remove the lifetime markers from the optimized SCoP. +; +; CHECK: for.body: +; CHECK: call void @llvm.lifetime.start +; CHECK: for.end: +; CHECK: call void @llvm.lifetime.end +; CHECK-NOT: call void @llvm.lifetime.start +; CHECK-NOT: call void @llvm.lifetime.end +; +; int A[1024]; +; void jd() { +; for (int i = 0; i < 1024; i++) { +; int tmp[1024]; +; for (int j = i; j < 1024; j++) +; tmp[i] += A[j]; +; A[i] = tmp[i]; +; } +; } +; +; ModuleID = 'test/Isl/CodeGen/lifetime_intrinsics.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 16 + +; Function Attrs: nounwind uwtable +define void @jd() #0 { +entry: + %tmp = alloca [1024 x i32], align 16 + %tmp3 = bitcast [1024 x i32]* %tmp to i8* + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc11 ], [ 0, %entry ] + %exitcond5 = icmp ne i64 %indvars.iv3, 1024 + br i1 %exitcond5, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + call void @llvm.lifetime.start(i64 4096, i8* %tmp3) #1 + br label %for.cond2 + +for.cond2: ; preds = %for.inc, %for.body + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ %indvars.iv3, %for.body ] + %lftr.wideiv = trunc i64 %indvars.iv1 to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.body4, label %for.end + +for.body4: ; preds = %for.cond2 + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv1 + %tmp6 = load i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds [1024 x i32]* %tmp, i64 0, i64 %indvars.iv3 + %tmp7 = load i32* %arrayidx6, align 4 + %add = add nsw i32 %tmp7, %tmp6 + store i32 %add, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body4 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond2 + +for.end: ; preds = %for.cond2 + %arrayidx8 = getelementptr inbounds [1024 x i32]* %tmp, i64 0, i64 %indvars.iv3 + %tmp8 = load i32* %arrayidx8, align 4 + %arrayidx10 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv3 + store i32 %tmp8, i32* %arrayidx10, align 4 + call void @llvm.lifetime.end(i64 4096, i8* %tmp3) #1 + br label %for.inc11 + +for.inc11: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nounwind } Index: test/Isl/CodeGen/intrinsics_misc.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/intrinsics_misc.ll @@ -0,0 +1,101 @@ +; RUN: opt %loadPolly -basicaa -polly-codegen-isl -polly-codegen-scev -S < %s | FileCheck %s +; +; Verify that we remove the misc intrinsics from the optimized SCoP. +; +; CHECK: for.body: +; CHECK: call {}* @llvm.invariant.start +; CHECK: for.body4: +; CHECK: call void @llvm.assume +; CHECK: call i1 @llvm.expect.i1 +; CHECK: call void @llvm.donothing +; CHECK: for.end: +; CHECK: call void @llvm.invariant.end +; CHECK-NOT: call void @llvm.{{.*}} +; +; int A[1024]; +; void jd() { +; for (int i = 0; i < 1024; i++) { +; int tmp[1024]; +; for (int j = i; j < 1024; j++) +; tmp[i] += A[j]; +; A[i] = tmp[i]; +; } +; } +; +; ModuleID = 'test/Isl/CodeGen/lifetime_intrinsics.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 16 + +; Function Attrs: nounwind uwtable +define void @jd() #0 { +entry: + %tmp = alloca [1024 x i32], align 16 + %tmp3 = bitcast [1024 x i32]* @A to i8* + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc11 ], [ 0, %entry ] + %exitcond5 = icmp ne i64 %indvars.iv3, 1024 + br i1 %exitcond5, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + %lis = call {}* @llvm.invariant.start(i64 4096, i8* %tmp3) #1 + br label %for.cond2 + +for.cond2: ; preds = %for.inc, %for.body + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ %indvars.iv3, %for.body ] + %lftr.wideiv = trunc i64 %indvars.iv1 to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.body4, label %for.end + +for.body4: ; preds = %for.cond2 + call void @llvm.assume(i1 %exitcond) + call i1 @llvm.expect.i1(i1 %exitcond, i1 1) + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv1 + %tmp6 = load i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds [1024 x i32]* %tmp, i64 0, i64 %indvars.iv3 + call void @llvm.donothing() + %tmp7 = load i32* %arrayidx6, align 4 + %add = add nsw i32 %tmp7, %tmp6 + store i32 %add, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body4 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond2 + +for.end: ; preds = %for.cond2 + %arrayidx8 = getelementptr inbounds [1024 x i32]* %tmp, i64 0, i64 %indvars.iv3 + %tmp8 = load i32* %arrayidx8, align 4 + %arrayidx10 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv3 + call void @llvm.invariant.end({}* %lis, i64 4096, i8* %tmp3) #1 + store i32 %tmp8, i32* %arrayidx10, align 4 + br label %for.inc11 + +for.inc11: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} + +; Function Attrs: nounwind +declare void @llvm.donothing() #1 + +; Function Attrs: nounwind +declare void @llvm.assume(i1) #1 + +; Function Attrs: nounwind +declare i1 @llvm.expect.i1(i1, i1) #1 + +; Function Attrs: nounwind +declare {}* @llvm.invariant.start(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.invariant.end({}*, i64, i8* nocapture) #1 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nounwind } Index: test/ScopDetect/intrinsics_1.ll =================================================================== --- /dev/null +++ test/ScopDetect/intrinsics_1.ll @@ -0,0 +1,106 @@ +; RUN: opt %loadPolly -basicaa -polly-detect -analyze < %s | FileCheck %s +; +; CHECK: Valid Region for Scop: for.cond => for.end +; +; #include "math.h" +; +; void jd(int *restrict A, float *restrict B) { +; for (int i = 0; i < 1024; i++) { +; A[i] = pow(ceil(log10(sqrt(i))), floor(log2(i))); +; B[i] = fabs(log(sin(i)) + exp2(cos(i))) + exp(i); +; } +; } +; +; ModuleID = '/home/johannes/repos/polly/test/ScopDetect/intrinsics.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @jd(i32* noalias %A, float* noalias %B) #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %tmp1 = call double @llvm.sqrt.f64(double %conv) + %call = call double @__log10_finite(double %tmp1) #2 + %call1 = call double @ceil(double %call) #2 + %tmp2 = trunc i64 %indvars.iv to i32 + %conv2 = sitofp i32 %tmp2 to double + %call3 = call double @__log2_finite(double %conv2) #2 + %call4 = call double @floor(double %call3) #2 + %tmp3 = call double @llvm.pow.f64(double %call1, double %call4) + %conv5 = fptosi double %tmp3 to i32 + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %conv5, i32* %arrayidx, align 4 + %tmp4 = trunc i64 %indvars.iv to i32 + %conv6 = sitofp i32 %tmp4 to double + %call7 = call double @sin(double %conv6) #2 + %call8 = call double @__log_finite(double %call7) #2 + %tmp5 = trunc i64 %indvars.iv to i32 + %conv9 = sitofp i32 %tmp5 to double + %call10 = call double @cos(double %conv9) #2 + %call11 = call double @__exp2_finite(double %call10) #2 + %add = fadd fast double %call8, %call11 + %call12 = call double @fabs(double %add) #2 + %tmp6 = trunc i64 %indvars.iv to i32 + %conv13 = sitofp i32 %tmp6 to double + %call14 = call double @__exp_finite(double %conv13) #2 + %add15 = fadd fast double %call12, %call14 + %conv16 = fptrunc double %add15 to float + %arrayidx18 = getelementptr inbounds float* %B, i64 %indvars.iv + store float %conv16, float* %arrayidx18, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; Function Attrs: nounwind readnone +declare double @ceil(double) #1 + +; Function Attrs: nounwind readnone +declare double @__log10_finite(double) #1 + +; Function Attrs: nounwind readnone +declare double @llvm.sqrt.f64(double) #2 + +; Function Attrs: nounwind readnone +declare double @floor(double) #1 + +; Function Attrs: nounwind readnone +declare double @__log2_finite(double) #1 + +; Function Attrs: nounwind readnone +declare double @llvm.pow.f64(double, double) #2 + +; Function Attrs: nounwind readnone +declare double @fabs(double) #1 + +; Function Attrs: nounwind readnone +declare double @__log_finite(double) #1 + +; Function Attrs: nounwind readnone +declare double @sin(double) #1 + +; Function Attrs: nounwind readnone +declare double @__exp2_finite(double) #1 + +; Function Attrs: nounwind readnone +declare double @cos(double) #1 + +; Function Attrs: nounwind readnone +declare double @__exp_finite(double) #1 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } Index: test/ScopDetect/intrinsics_2.ll =================================================================== --- /dev/null +++ test/ScopDetect/intrinsics_2.ll @@ -0,0 +1,80 @@ +; RUN: opt %loadPolly -basicaa -polly-detect -polly-codegen-scev -analyze < %s | FileCheck %s +; +; Verify that we allow the lifetime markers for the tmp array. +; +; CHECK: Valid Region for Scop: for.cond => for.end13 +; +; int A[1024]; +; void jd() { +; for (int i = 0; i < 1024; i++) { +; int tmp[1024]; +; for (int j = i; j < 1024; j++) +; tmp[i] += A[j]; +; A[i] = tmp[i]; +; } +; } +; +; ModuleID = 'test/Isl/CodeGen/lifetime_intrinsics.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A = common global [1024 x i32] zeroinitializer, align 16 + +; Function Attrs: nounwind uwtable +define void @jd() #0 { +entry: + %tmp = alloca [1024 x i32], align 16 + %tmp3 = bitcast [1024 x i32]* %tmp to i8* + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc11 ], [ 0, %entry ] + %exitcond5 = icmp ne i64 %indvars.iv3, 1024 + br i1 %exitcond5, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + call void @llvm.lifetime.start(i64 4096, i8* %tmp3) #1 + br label %for.cond2 + +for.cond2: ; preds = %for.inc, %for.body + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ %indvars.iv3, %for.body ] + %lftr.wideiv = trunc i64 %indvars.iv1 to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.body4, label %for.end + +for.body4: ; preds = %for.cond2 + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv1 + %tmp6 = load i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds [1024 x i32]* %tmp, i64 0, i64 %indvars.iv3 + %tmp7 = load i32* %arrayidx6, align 4 + %add = add nsw i32 %tmp7, %tmp6 + store i32 %add, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body4 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond2 + +for.end: ; preds = %for.cond2 + %arrayidx8 = getelementptr inbounds [1024 x i32]* %tmp, i64 0, i64 %indvars.iv3 + %tmp8 = load i32* %arrayidx8, align 4 + %arrayidx10 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv3 + store i32 %tmp8, i32* %arrayidx10, align 4 + br label %for.inc11 + +for.inc11: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + call void @llvm.lifetime.end(i64 4096, i8* %tmp3) #1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nounwind } Index: test/ScopDetect/intrinsics_3.ll =================================================================== --- /dev/null +++ test/ScopDetect/intrinsics_3.ll @@ -0,0 +1,93 @@ +; RUN: opt %loadPolly -basicaa -polly-detect -polly-codegen-scev -analyze < %s | FileCheck %s +; +; Verify that we allow the misc intrinsics. +; +; CHECK: Valid Region for Scop: for.cond => for.end13 +; +; int A[1024]; +; void jd() { +; for (int i = 0; i < 1024; i++) { +; int tmp[1024]; +; for (int j = i; j < 1024; j++) +; tmp[i] += A[j]; +; A[i] = tmp[i]; +; } +; } +; +; ModuleID = 'test/Isl/CodeGen/lifetime_intrinsics.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@A = common global [1024 x i32] zeroinitializer, align 16 + +; Function Attrs: nounwind uwtable +define void @jd() #0 { +entry: + %tmp = alloca [1024 x i32], align 16 + %tmp3 = bitcast [1024 x i32]* @A to i8* + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc11 ], [ 0, %entry ] + %exitcond5 = icmp ne i64 %indvars.iv3, 1024 + br i1 %exitcond5, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + %lis = call {}* @llvm.invariant.start(i64 4096, i8* %tmp3) #1 + br label %for.cond2 + +for.cond2: ; preds = %for.inc, %for.body + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ %indvars.iv3, %for.body ] + %lftr.wideiv = trunc i64 %indvars.iv1 to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.body4, label %for.end + +for.body4: ; preds = %for.cond2 + call void @llvm.assume(i1 %exitcond) + call i1 @llvm.expect.i1(i1 %exitcond, i1 1) + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv1 + %tmp6 = load i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds [1024 x i32]* %tmp, i64 0, i64 %indvars.iv3 + call void @llvm.donothing() + %tmp7 = load i32* %arrayidx6, align 4 + %add = add nsw i32 %tmp7, %tmp6 + store i32 %add, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body4 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond2 + +for.end: ; preds = %for.cond2 + %arrayidx8 = getelementptr inbounds [1024 x i32]* %tmp, i64 0, i64 %indvars.iv3 + %tmp8 = load i32* %arrayidx8, align 4 + %arrayidx10 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv3 + call void @llvm.invariant.end({}* %lis, i64 4096, i8* %tmp3) #1 + store i32 %tmp8, i32* %arrayidx10, align 4 + br label %for.inc11 + +for.inc11: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} + +; Function Attrs: nounwind +declare void @llvm.donothing() #1 + +; Function Attrs: nounwind +declare void @llvm.assume(i1) #1 + +; Function Attrs: nounwind +declare i1 @llvm.expect.i1(i1, i1) #1 + +; Function Attrs: nounwind +declare {}* @llvm.invariant.start(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.invariant.end({}*, i64, i8* nocapture) #1 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nounwind }