diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4666,9 +4666,40 @@ } // bitcast x -> x - if (CastOpc == Instruction::BitCast) + if (CastOpc == Instruction::BitCast) { if (Op->getType() == Ty) return Op; + } else if (CastOpc == Instruction::PtrToInt) { + // Fold (ptrtoint (gep i8 null, x)) -> x for non-inbounds GEPs. + // The GEP simplification will transform all inbounds GEPs with known + // non-zero arguments to poison, so we don't have to handle the + // non-inbounds case here. We don't perform this fold for non-integral + // pointers since the bitwise representation is undefined (even for GEPs + // on null). + // FIXME: Or is this also valid for non-integral pointers? + if (auto *GEP = dyn_cast(Op)) { + if (isa(GEP->getOperand(0)) && + !Q.DL.isNonIntegralPointerType(GEP->getPointerOperandType())) { + // FIXME: we could fold all ptrtoint(inbounds GEP) to zero here since + // that is the only valid offset for an inbounds GEP. If the offset is + // not zero, that GEP is poison so returning 0 is also valid + // (https://alive2.llvm.org/ce/z/Gzb5iH). However, Clang currently + // generates inbounds GEPs on NULL for hand-written offsetof + // expressions, so this risks miscompilation. + constexpr bool CanFoldInboundsNullGEPToZero = false; + if (CanFoldInboundsNullGEPToZero && GEP->isInBounds()) { + return ConstantInt::get(Ty, 0); + } + // We can't create any zext/trunc instructions here, so this fold is + // limited to cases where the result type matches the GEP index type. + // We also can't perform the fold if the GEP type is not an i8 + // More complex cases are handled in InstCombineCompares.cpp. + if (GEP->getNumIndices() == 1 && GEP->getOperand(1)->getType() == Ty && + Q.DL.getTypeSizeInBits(GEP->getSourceElementType()) == 8) + return GEP->getOperand(1); + } + } + } return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2071,6 +2071,11 @@ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); } + // TODO: should we call SimplifyCastInst() in commonCastTransforms()? + if (Value *V = SimplifyCastInst(CI.getOpcode(), SrcOp, Ty, + SQ.getWithInstruction(&CI))) + return replaceInstUsesWith(CI, V); + Value *Vec, *Scalar, *Index; if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)), m_Value(Scalar), m_Value(Index)))) && diff --git a/llvm/test/Transforms/InstCombine/ptrtoint-nullgep.ll b/llvm/test/Transforms/InstCombine/ptrtoint-nullgep.ll --- a/llvm/test/Transforms/InstCombine/ptrtoint-nullgep.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoint-nullgep.ll @@ -191,11 +191,21 @@ ; We should be able to fold ptrtoint(gep null, x) to x define i64 @fold_ptrtoint_nullgep_variable(i64 %val) { -; ALL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable -; ALL-SAME: (i64 [[VAL:%.*]]) { -; ALL-NEXT: [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]] -; ALL-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 -; ALL-NEXT: ret i64 [[RET]] +; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable +; LLPARSER-SAME: (i64 [[VAL:%.*]]) { +; LLPARSER-NEXT: [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]] +; LLPARSER-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; LLPARSER-NEXT: ret i64 [[RET]] +; +; INTEGRAL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable +; INTEGRAL-SAME: (i64 [[VAL:%.*]]) { +; INTEGRAL-NEXT: ret i64 [[VAL]] +; +; NONINTEGRAL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable +; NONINTEGRAL-SAME: (i64 [[VAL:%.*]]) { +; NONINTEGRAL-NEXT: [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]] +; NONINTEGRAL-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; NONINTEGRAL-NEXT: ret i64 [[RET]] ; %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %val %ret = ptrtoint i8 addrspace(1)* %ptr to i64 @@ -204,12 +214,24 @@ ; Inbounds null-GEP with non-zero offset can be folded to poison. define i64 @fold_ptrtoint_nullgep_variable_known_nonzero(i64 %val) { -; ALL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero -; ALL-SAME: (i64 [[VAL:%.*]]) { -; ALL-NEXT: [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1 -; ALL-NEXT: [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]] -; ALL-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 -; ALL-NEXT: ret i64 [[RET]] +; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero +; LLPARSER-SAME: (i64 [[VAL:%.*]]) { +; LLPARSER-NEXT: [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1 +; LLPARSER-NEXT: [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]] +; LLPARSER-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; LLPARSER-NEXT: ret i64 [[RET]] +; +; INTEGRAL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero +; INTEGRAL-SAME: (i64 [[VAL:%.*]]) { +; INTEGRAL-NEXT: [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1 +; INTEGRAL-NEXT: ret i64 [[NON_ZERO_OFFSET]] +; +; NONINTEGRAL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero +; NONINTEGRAL-SAME: (i64 [[VAL:%.*]]) { +; NONINTEGRAL-NEXT: [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1 +; NONINTEGRAL-NEXT: [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]] +; NONINTEGRAL-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; NONINTEGRAL-NEXT: ret i64 [[RET]] ; %non_zero_offset = or i64 %val, 1 %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %non_zero_offset @@ -219,11 +241,21 @@ ; This is only valid if %val is zero so we can fold the result to 0. define i64 @fold_ptrtoint_nullgep_variable_inbounds(i64 %val) { -; ALL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_inbounds -; ALL-SAME: (i64 [[VAL:%.*]]) { -; ALL-NEXT: [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[VAL]] -; ALL-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 -; ALL-NEXT: ret i64 [[RET]] +; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_inbounds +; LLPARSER-SAME: (i64 [[VAL:%.*]]) { +; LLPARSER-NEXT: [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[VAL]] +; LLPARSER-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; LLPARSER-NEXT: ret i64 [[RET]] +; +; INTEGRAL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_inbounds +; INTEGRAL-SAME: (i64 [[VAL:%.*]]) { +; INTEGRAL-NEXT: ret i64 [[VAL]] +; +; NONINTEGRAL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_inbounds +; NONINTEGRAL-SAME: (i64 [[VAL:%.*]]) { +; NONINTEGRAL-NEXT: [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[VAL]] +; NONINTEGRAL-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; NONINTEGRAL-NEXT: ret i64 [[RET]] ; %ptr = getelementptr inbounds i8, i8 addrspace(1)* null, i64 %val %ret = ptrtoint i8 addrspace(1)* %ptr to i64 @@ -232,12 +264,24 @@ ; A non-constant but known-non-zero GEP should fold to poison define i64 @fold_ptrtoint_nullgep_variable_known_nonzero_inbounds(i64 %val) { -; ALL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds -; ALL-SAME: (i64 [[VAL:%.*]]) { -; ALL-NEXT: [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1 -; ALL-NEXT: [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]] -; ALL-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 -; ALL-NEXT: ret i64 [[RET]] +; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds +; LLPARSER-SAME: (i64 [[VAL:%.*]]) { +; LLPARSER-NEXT: [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1 +; LLPARSER-NEXT: [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]] +; LLPARSER-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; LLPARSER-NEXT: ret i64 [[RET]] +; +; INTEGRAL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds +; INTEGRAL-SAME: (i64 [[VAL:%.*]]) { +; INTEGRAL-NEXT: [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1 +; INTEGRAL-NEXT: ret i64 [[NON_ZERO_OFFSET]] +; +; NONINTEGRAL-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds +; NONINTEGRAL-SAME: (i64 [[VAL:%.*]]) { +; NONINTEGRAL-NEXT: [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1 +; NONINTEGRAL-NEXT: [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]] +; NONINTEGRAL-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; NONINTEGRAL-NEXT: ret i64 [[RET]] ; %non_zero_offset = or i64 %val, 1 %ptr = getelementptr inbounds i8, i8 addrspace(1)* null, i64 %non_zero_offset @@ -303,12 +347,17 @@ ; INSTSIMPLIFY-NEXT: [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i32 ; INSTSIMPLIFY-NEXT: ret i32 [[RET]] ; -; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_trunc -; INSTCOMBINE-SAME: (i64 [[VAL:%.*]]) { -; INSTCOMBINE-NEXT: [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]] -; INSTCOMBINE-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 -; INSTCOMBINE-NEXT: [[RET:%.*]] = trunc i64 [[TMP1]] to i32 -; INSTCOMBINE-NEXT: ret i32 [[RET]] +; INTEGRAL-INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_trunc +; INTEGRAL-INSTCOMBINE-SAME: (i64 [[VAL:%.*]]) { +; INTEGRAL-INSTCOMBINE-NEXT: [[RET:%.*]] = trunc i64 [[VAL]] to i32 +; INTEGRAL-INSTCOMBINE-NEXT: ret i32 [[RET]] +; +; NONINTEGRAL-INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_trunc +; NONINTEGRAL-INSTCOMBINE-SAME: (i64 [[VAL:%.*]]) { +; NONINTEGRAL-INSTCOMBINE-NEXT: [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]] +; NONINTEGRAL-INSTCOMBINE-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 +; NONINTEGRAL-INSTCOMBINE-NEXT: [[RET:%.*]] = trunc i64 [[TMP1]] to i32 +; NONINTEGRAL-INSTCOMBINE-NEXT: ret i32 [[RET]] ; %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %val %ret = ptrtoint i8 addrspace(1)* %ptr to i32