Index: llvm/test/Transforms/Mem2Reg/pr30188-1.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Mem2Reg/pr30188-1.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -O2 -S | grep load | count 1 +; RUN: opt < %s -O2 -S | not grep store + +; Check that the optimized code only contains one load in the innermost loop. + +define i32 @_Z4testfPf(float %exp, float* %array) { +entry: + %exp.addr = alloca float, align 4 + %array.addr = alloca float*, align 8 + %hi = alloca i32, align 4 + %lo = alloca i32, align 4 + %offset = alloca i32, align 4 + %delta = alloca i32, align 4 + store float %exp, float* %exp.addr, align 4 + store float* %array, float** %array.addr, align 8 + store i32 255, i32* %hi, align 4 + store i32 0, i32* %lo, align 4 + store i32 0, i32* %offset, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %hi, align 4 + %1 = load i32, i32* %lo, align 4 + %cmp = icmp sgt i32 %0, %1 + br i1 %cmp, label %while.body, label %while.end + +while.body: ; preds = %while.cond + %2 = load i32, i32* %hi, align 4 + %3 = load i32, i32* %lo, align 4 + %sub = sub nsw i32 %2, %3 + %div = udiv i32 %sub, 2 + store i32 %div, i32* %delta, align 4 + %4 = load i32, i32* %delta, align 4 + %cmp1 = icmp ugt i32 1, %4 + br i1 %cmp1, label %cond.true, label %cond.false + +cond.true: ; preds = %while.body + br label %cond.end + +cond.false: ; preds = %while.body + %5 = load i32, i32* %delta, align 4 + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32 [ 1, %cond.true ], [ %5, %cond.false ] + store i32 %cond, i32* %delta, align 4 + %6 = load i32, i32* %lo, align 4 + %7 = load i32, i32* %delta, align 4 + %add = add i32 %6, %7 + store i32 %add, i32* %offset, align 4 + %8 = load i32, i32* %offset, align 4 + %idxprom = sext i32 %8 to i64 + %9 = load float*, float** %array.addr, align 8 + %arrayidx = getelementptr inbounds float, float* %9, i64 %idxprom + %10 = load float, float* %arrayidx, align 4 + %11 = load float, float* %exp.addr, align 4 + %cmp2 = fcmp ogt float %10, %11 + br i1 %cmp2, label %if.then, label %if.else + +if.then: ; preds = %cond.end + %12 = load i32, i32* %hi, align 4 + %13 = load i32, i32* %delta, align 4 + %sub3 = sub i32 %12, %13 + store i32 %sub3, i32* %hi, align 4 + br label %if.end + +if.else: ; preds = %cond.end + %14 = load i32, i32* %lo, align 4 + %15 = load i32, i32* %delta, align 4 + %add4 = add i32 %14, %15 + store i32 %add4, i32* %lo, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond + +while.end: ; preds = %while.cond + %16 = load i32, i32* %offset, align 4 + ret i32 %16 +} Index: llvm/test/Transforms/Mem2Reg/pr30188.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Mem2Reg/pr30188.ll @@ -0,0 +1,85 @@ +; RUN: opt < %s -O2 -S | grep load | count 3 +; RUN: opt < %s -O2 -S | not grep store + +; Check that the optimized code only contains one load in the innermost loop. + +%struct.a = type { i32, %struct.b* } +%struct.b = type { i32 } + +define i32 @BinarySearch(%struct.a* %input, i32 %t.coerce) { +entry: + %retval = alloca i32, align 4 + %t = alloca %struct.b, align 4 + %input.addr = alloca %struct.a*, align 8 + %low = alloca i32, align 4 + %high = alloca i32, align 4 + %mid = alloca i32, align 4 + %coerce.dive = getelementptr inbounds %struct.b, %struct.b* %t, i32 0, i32 0 + store i32 %t.coerce, i32* %coerce.dive, align 4 + store %struct.a* %input, %struct.a** %input.addr, align 8 + %0 = load %struct.a*, %struct.a** %input.addr, align 8 + %l_ = getelementptr inbounds %struct.a, %struct.a* %0, i32 0, i32 0 + %1 = load i32, i32* %l_, align 8 + %cmp = icmp sgt i32 %1, 0 + br i1 %cmp, label %if.then, label %if.end7 + +if.then: ; preds = %entry + store i32 0, i32* %low, align 4 + %2 = load %struct.a*, %struct.a** %input.addr, align 8 + %l_1 = getelementptr inbounds %struct.a, %struct.a* %2, i32 0, i32 0 + %3 = load i32, i32* %l_1, align 8 + store i32 %3, i32* %high, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %if.then + %4 = load i32, i32* %high, align 4 + %5 = load i32, i32* %low, align 4 + %add = add nsw i32 %5, 1 + %cmp2 = icmp ne i32 %4, %add + br i1 %cmp2, label %while.body, label %while.end + +while.body: ; preds = %while.cond + %6 = load i32, i32* %high, align 4 + %7 = load i32, i32* %low, align 4 + %add3 = add nsw i32 %6, %7 + %div = sdiv i32 %add3, 2 + store i32 %div, i32* %mid, align 4 + %8 = load i32, i32* %mid, align 4 + %idxprom = sext i32 %8 to i64 + %9 = load %struct.a*, %struct.a** %input.addr, align 8 + %data_ = getelementptr inbounds %struct.a, %struct.a* %9, i32 0, i32 1 + %10 = load %struct.b*, %struct.b** %data_, align 8 + %arrayidx = getelementptr inbounds %struct.b, %struct.b* %10, i64 %idxprom + %x_ = getelementptr inbounds %struct.b, %struct.b* %arrayidx, i32 0, i32 0 + %11 = load i32, i32* %x_, align 4 + %x_4 = getelementptr inbounds %struct.b, %struct.b* %t, i32 0, i32 0 + %12 = load i32, i32* %x_4, align 4 + %cmp5 = icmp sgt i32 %11, %12 + br i1 %cmp5, label %if.then6, label %if.else + +if.then6: ; preds = %while.body + %13 = load i32, i32* %mid, align 4 + store i32 %13, i32* %high, align 4 + br label %if.end + +if.else: ; preds = %while.body + %14 = load i32, i32* %mid, align 4 + store i32 %14, i32* %low, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then6 + br label %while.cond + +while.end: ; preds = %while.cond + %15 = load i32, i32* %low, align 4 + store i32 %15, i32* %retval, align 4 + br label %return + +if.end7: ; preds = %entry + store i32 -1, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end7, %while.end + %16 = load i32, i32* %retval, align 4 + ret i32 %16 +}