Index: llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -211,6 +211,11 @@ cl::desc("Verify this pass produces no dead code"), cl::Hidden); +static cl::opt ForceLowerGEP( + "force-lower-gep", cl::init(false), + cl::desc("Force to lower GEP during SeparateConstOffsetGEP pass"), + cl::Hidden); + namespace { /// A helper class for separating a constant offset from a GEP index. @@ -1167,6 +1172,9 @@ if (DisableSeparateConstOffsetFromGEP) return false; + if (ForceLowerGEP) + LowerGEP = true; + DL = &F.getParent()->getDataLayout(); bool Changed = false; for (BasicBlock &B : F) { Index: llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=separate-const-offset-from-gep -force-lower-gep < %s | FileCheck %s + +define void @test(ptr %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo() +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5 +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]] +; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512 +; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]] +; CHECK: cond.true: +; CHECK-NEXT: [[SUB22:%.*]] = sext i32 [[SUB1]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[SUB22]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 2044 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: store float 1.000000e+00, ptr [[TMP5]], align 4 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %0 = tail call i32 @foo() + %rem = srem i32 %0, 5 + %add = add nsw i32 %rem , 511 + br label %for.body + +for.body: + %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ] + %mul = mul nuw nsw i32 %k, 5 + %sub1 = sub nsw i32 %mul, %rem + %cmp26 = icmp ult i32 %sub1, 512 + br i1 %cmp26, label %cond.true, label %cond.end + +cond.true: + %sub2 = sub nsw i32 %add, %mul + %idxprom = sext i32 %sub2 to i64 + %arryidx = getelementptr inbounds float, ptr %p, i64 %idxprom + store float 1.0, ptr %arryidx, align 4 + br label %cond.end + +cond.end: + %inc = add nuw nsw i32 %k, 1 + %exitcond = icmp ne i32 %inc, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +declare i32 @foo()