diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -211,6 +211,12 @@ cl::desc("Verify this pass produces no dead code"), cl::Hidden); +static cl::opt ForceEnableLowerGEP( + "seperate-const-offset-from-gep-force-enable-lower-gep", cl::init(false), + cl::desc("Enable the LowerGEP feature even for targets that disable it." + " This is mainly for testing purposes."), + cl::Hidden); + namespace { /// A helper class for separating a constant offset from a GEP index. @@ -348,7 +354,7 @@ static char ID; SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false) - : FunctionPass(ID), LowerGEP(LowerGEP) { + : FunctionPass(ID), LowerGEP(ForceEnableLowerGEP ? true : LowerGEP) { initializeSeparateConstOffsetFromGEPLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -377,7 +383,8 @@ DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, TargetLibraryInfo *TLI, function_ref GetTTI, bool LowerGEP) - : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {} + : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), + LowerGEP(ForceEnableLowerGEP ? true : LowerGEP) {} bool run(Function &F); @@ -635,6 +642,30 @@ /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth); } + // If SignExtend and ZeroExtend are both true then the zext and sext + // operations are in this order: + // zext ( sext ( a op b)). + // We know this, because if the sext came first then the zext instruction + // would have changed SignExtended to false. + // + // We can't handle negative offsets that are going to be zero_extended later. + // For example: + // %1 = add i32 -15, %0 + // %2 = zext i32 %1 to i64 + // + // Will be transformed to: + // + // %1 = zext i32 -15 to i64 + // %2 = zext i32 %0 to i64 + // %3 = add i64 %1, %2 + // + // Which is not the same. Instead of -15 + %0, we end up with + // 4294967281 + %0 + // TODO: It should be possible to fix this pass to be able to correctly handle + // negative offsets. + if (ZeroExtended && !SignExtended && !isa(V) && ConstantOffset.isNegative()) + ConstantOffset = APInt(BitWidth, 0); + // If we found a non-zero constant offset, add it to the path for // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't // help this optimization. diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/pr62379-zeroext-negative.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pr62379-zeroext-negative.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pr62379-zeroext-negative.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=separate-const-offset-from-gep -seperate-const-offset-from-gep-force-enable-lower-gep < %s | FileCheck %s + +; These tests check that this pass does not incorrectly handle negative offsets. + +@c = internal constant [4 x i32] [i32 0, i32 1, i32 2, i32 3] + +define i32 @add_negative_rhs(i32 %a, i32 %b, ptr %ptr) { +; CHECK-LABEL: @add_negative_rhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 -15, [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP3]] +; +entry: + %0 = add nuw nsw i32 -15, %b + %1 = zext i32 %0 to i64 + %2 = getelementptr inbounds [4 x i32], ptr %ptr, i64 0, i64 %1 + %3 = load i32, ptr %2 + ret i32 %3 +} + +define i32 @add_negative_lhs(i32 %a, i32 %b, ptr %ptr) { +; CHECK-LABEL: @add_negative_lhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[B:%.*]], -15 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP3]] +; +entry: + %0 = add nuw nsw i32 %b, -15 + %1 = zext i32 %0 to i64 + %2 = getelementptr inbounds [4 x i32], ptr %ptr, i64 0, i64 %1 + %3 = load i32, ptr %2 + ret i32 %3 +} + + +define i32 @sub_positive(i32 %a, i32 %b, ptr %ptr) { +; CHECK-LABEL: @sub_positive( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sub nuw nsw i32 15, [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 [[B:%.*]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %0 = sub nuw nsw i32 15, %a + %1 = sub nuw nsw i32 %b, %0 + %2 = zext i32 %1 to i64 + %3 = getelementptr inbounds [4 x i32], ptr %ptr, i64 0, i64 %2 + %4 = load i32, ptr %3 + ret i32 %4 +}