Index: include/llvm/IR/Operator.h =================================================================== --- include/llvm/IR/Operator.h +++ include/llvm/IR/Operator.h @@ -472,6 +472,16 @@ return true; } + unsigned getNumNonConstantIndices() const { + unsigned NumNonConstantIndices = 0; + for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { + if (!isa(I)) { + ++NumNonConstantIndices; + } + } + return NumNonConstantIndices; + } + /// \brief Accumulate the constant address offset of this GEP if possible. /// /// This routine accepts an APInt into which it will accumulate the constant Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1465,11 +1465,32 @@ } } - // Avoid duplicating the arithmetic if GEP2 has non-constant indices and - // multiple users. - if (!GEP1 || - (GEP2 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) + if (GEP1 == nullptr) { + // No GEP found. return nullptr; + } + + if (GEP2 != nullptr) { + // (gep X, ...) - (gep X, ...) + // + // Avoid duplicating the arithmetic if there are more than one non-constant + // indices between the two GEPs and either GEP has a non-constant index and + // multiple users. If zero non-constant index, the result is a constant and + // there is no duplication. If one non-constant index, the result is an add + // or sub with a constant, which is no larger than the original code, and + // there's no duplicated arithmetic, even if either GEP has multiple + // users. If more than one non-constant indices combined, as long as the GEP + // with at least one non-constant index doesn't have multiple users, there + // is no duplication. + unsigned NumNonConstantIndices1 = GEP1->getNumNonConstantIndices(); + unsigned NumNonConstantIndices2 = GEP2->getNumNonConstantIndices(); + if (NumNonConstantIndices1 + NumNonConstantIndices2 > 1 && + ((NumNonConstantIndices1 > 0 && !GEP1->hasOneUse()) || + (NumNonConstantIndices2 > 0 && !GEP2->hasOneUse()))) { + DEBUG(dbgs() << "Rejected because ...\n"); + return nullptr; + } + } // Emit the offset of the GEP and an intptr_t. Value *Result = EmitGEPOffset(GEP1); Index: test/Transforms/InstCombine/sub.ll =================================================================== --- test/Transforms/InstCombine/sub.ll +++ test/Transforms/InstCombine/sub.ll @@ -989,3 +989,84 @@ %X = add i32 %B, %A %Y = sub i32 %A, %X ret i32 %Y } + +@dummy_global1 = external global i8* +@dummy_global2 = external global i8* + +define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test58( +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[J:%.*]], 4200 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[I:%.*]], 4200 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2:%.*]] [[TMP1:%.*]] +; CHECK-NEXT: ret i64 [[TMP3]] +; +; Note the reassociate pass and another instcombine pass will further optimize this to +; "%sub = i64 %i, %j, ret i64 %sub" +; +; gep1 and gep2 have only one use + %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i + %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j + %cast1 = ptrtoint i8* %gep1 to i64 + %cast2 = ptrtoint i8* %gep2 to i64 + %sub = sub i64 %cast1, %cast2 + ret i64 %sub +} + +define i64 @test59([100 x [100 x i8]]* %foo, i64 %i) { +; CHECK-LABEL: @test59( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0 +; CHECK-NEXT: store i8* [[GEP1]], i8** @dummy_global1, align 8 +; CHECK-NEXT: store i8* [[GEP2]], i8** @dummy_global2, align 8 +; CHECK-NEXT: ret i64 %i +; +; gep1 and gep2 have more than one uses + %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i + %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0 + %cast1 = ptrtoint i8* %gep1 to i64 + %cast2 = ptrtoint i8* %gep2 to i64 + %sub = sub i64 %cast1, %cast2 + store i8* %gep1, i8** @dummy_global1 + store i8* %gep2, i8** @dummy_global2 + ret i64 %sub +} + +define i64 @test60([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test60( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 %j, i64 %i +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0 +; CHECK-NEXT: [[CAST1:%.*]] = ptrtoint i8* [[GEP1]] to i64 +; CHECK-NEXT: [[CAST2:%.*]] = ptrtoint i8* [[GEP2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[CAST1]], [[CAST2]] +; CHECK-NEXT: store i8* [[GEP1]], i8** @dummy_global1, align 8 +; CHECK-NEXT: ret i64 [[SUB]] +; +; gep1 has a non-constant index and more than one uses. Shouldn't duplicate the arithmetic. + %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 %j, i64 %i + %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0 + %cast1 = ptrtoint i8* %gep1 to i64 + %cast2 = ptrtoint i8* %gep2 to i64 + %sub = sub i64 %cast1, %cast2 + store i8* %gep1, i8** @dummy_global1 + ret i64 %sub +} + +define i64 @test61([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test61( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 %j, i64 %i +; CHECK-NEXT: [[CAST1:%.*]] = ptrtoint i8* [[GEP1]] to i64 +; CHECK-NEXT: [[CAST2:%.*]] = ptrtoint i8* [[GEP2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[CAST1]], [[CAST2]] +; CHECK-NEXT: store i8* [[GEP2]], i8** @dummy_global2, align 8 +; CHECK-NEXT: ret i64 [[SUB]] +; +; gep2 has a non-constant index and more than one uses. Shouldn't duplicate the arithmetic. + %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 0 + %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 %j, i64 %i + %cast1 = ptrtoint i8* %gep1 to i64 + %cast2 = ptrtoint i8* %gep2 to i64 + %sub = sub i64 %cast1, %cast2 + store i8* %gep2, i8** @dummy_global2 + ret i64 %sub +}