Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6843,9 +6843,13 @@ uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); + // The original load itself didn't wrap, so an offset within it doesn't. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, DL, PtrType)); + DAG.getConstant(PtrOff, DL, PtrType), + &Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1329,12 +1329,18 @@ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SmallVector Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], - getCurSDLoc())); + getCurSDLoc()), + &Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -2994,8 +3000,15 @@ if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); + + // In an inbouds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (int64_t(Offset) >= 0 && cast(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, - DAG.getConstant(Offset, dl, N.getValueType())); + DAG.getConstant(Offset, dl, N.getValueType()), &Flags); } Ty = StTy->getElementType(Field); @@ -3020,7 +3033,14 @@ SDValue OffsVal = VectorWidth ? DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : DAG.getConstant(Offs, dl, PtrTy); - N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); + + // In an inbouds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (Offs.isNonNegative() && cast(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); continue; } @@ -3092,10 +3112,13 @@ Align = 0; // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl)); + DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, @@ -3168,6 +3191,11 @@ if (isVolatile) Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); @@ -3188,7 +3216,8 @@ } SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT)); + DAG.getConstant(Offsets[i], dl, PtrVT), + &Flags); SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, AAInfo, @@ -3243,6 +3272,11 @@ AAMDNodes AAInfo; I.getAAMetadata(AAInfo); + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. @@ -3253,7 +3287,7 @@ ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT)); + DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); SDValue St = DAG.getStore(Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), @@ -7202,10 +7236,15 @@ ReturnValues.resize(NumValues); SmallVector Chains(NumValues); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT)); + PtrVT), &Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), Index: llvm/trunk/test/CodeGen/WebAssembly/offset.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/offset.ll +++ llvm/trunk/test/CodeGen/WebAssembly/offset.ll @@ -17,6 +17,28 @@ ret i32 %t } +; With an inbounds gep, we can fold an offset. + +; CHECK-LABEL: load_i32_with_folded_gep_offset: +; CHECK: i32.load $push0=, 24($0){{$}} +define i32 @load_i32_with_folded_gep_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = load i32, i32* %s + ret i32 %t +} + +; We can't fold a negative offset though, even with an inbounds gep. + +; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %t = load i32, i32* %s + ret i32 %t +} + ; Without nuw, and even with nsw, we can't fold an offset. ; CHECK-LABEL: load_i32_with_unfolded_offset: @@ -31,6 +53,18 @@ ret i32 %t } +; Without inbounds, we can't fold a gep offset. + +; CHECK-LABEL: load_i32_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_gep_offset(i32* %p) { + %s = getelementptr i32, i32* %p, i32 6 + %t = load i32, i32* %s + ret i32 %t +} + ; Same as above but with i64. ; CHECK-LABEL: load_i64_with_folded_offset: @@ -45,6 +79,28 @@ ; Same as above but with i64. +; CHECK-LABEL: load_i64_with_folded_gep_offset: +; CHECK: i64.load $push0=, 24($0){{$}} +define i64 @load_i64_with_folded_gep_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 3 + %t = load i64, i64* %s + ret i64 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 -3 + %t = load i64, i64* %s + ret i64 %t +} + +; Same as above but with i64. + ; CHECK-LABEL: load_i64_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} ; CHECK: i32.add $push1=, $0, $pop0{{$}} @@ -57,6 +113,18 @@ ret i64 %t } +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_gep_offset(i64* %p) { + %s = getelementptr i64, i64* %p, i32 3 + %t = load i64, i64* %s + ret i64 %t +} + ; Same as above but with store. ; CHECK-LABEL: store_i32_with_folded_offset: @@ -71,6 +139,28 @@ ; Same as above but with store. +; CHECK-LABEL: store_i32_with_folded_gep_offset: +; CHECK: i32.store $discard=, 24($0), $pop0{{$}} +define void @store_i32_with_folded_gep_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + store i32 0, i32* %s + ret void +} + +; Same as above but with store. + +; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i32_with_unfolded_gep_negative_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 -6 + store i32 0, i32* %s + ret void +} + +; Same as above but with store. + ; CHECK-LABEL: store_i32_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} ; CHECK: i32.add $push1=, $0, $pop0{{$}} @@ -83,6 +173,18 @@ ret void } +; Same as above but with store. + +; CHECK-LABEL: store_i32_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i32_with_unfolded_gep_offset(i32* %p) { + %s = getelementptr i32, i32* %p, i32 6 + store i32 0, i32* %s + ret void +} + ; Same as above but with store with i64. ; CHECK-LABEL: store_i64_with_folded_offset: @@ -97,6 +199,28 @@ ; Same as above but with store with i64. +; CHECK-LABEL: store_i64_with_folded_gep_offset: +; CHECK: i64.store $discard=, 24($0), $pop0{{$}} +define void @store_i64_with_folded_gep_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 3 + store i64 0, i64* %s + ret void +} + +; Same as above but with store with i64. + +; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i64_with_unfolded_gep_negative_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 -3 + store i64 0, i64* %s + ret void +} + +; Same as above but with store with i64. + ; CHECK-LABEL: store_i64_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} ; CHECK: i32.add $push1=, $0, $pop0{{$}} @@ -109,6 +233,18 @@ ret void } +; Same as above but with store with i64. + +; CHECK-LABEL: store_i64_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i64_with_unfolded_gep_offset(i64* %p) { + %s = getelementptr i64, i64* %p, i32 3 + store i64 0, i64* %s + ret void +} + ; When loading from a fixed address, materialize a zero. ; CHECK-LABEL: load_i32_from_numeric_address @@ -159,6 +295,17 @@ ret i32 %u } +; Fold a gep offset into a sign-extending load. + +; CHECK-LABEL: load_i8_s_with_folded_gep_offset: +; CHECK: i32.load8_s $push0=, 24($0){{$}} +define i32 @load_i8_s_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %t = load i8, i8* %s + %u = sext i8 %t to i32 + ret i32 %u +} + ; Fold an offset into a zero-extending load. ; CHECK-LABEL: load_i8_u_with_folded_offset: @@ -172,6 +319,17 @@ ret i32 %u } +; Fold a gep offset into a zero-extending load. + +; CHECK-LABEL: load_i8_u_with_folded_gep_offset: +; CHECK: i32.load8_u $push0=, 24($0){{$}} +define i32 @load_i8_u_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %t = load i8, i8* %s + %u = zext i8 %t to i32 + ret i32 %u +} + ; Fold an offset into a truncating store. ; CHECK-LABEL: store_i8_with_folded_offset: @@ -183,3 +341,43 @@ store i8 0, i8* %s ret void } + +; Fold a gep offset into a truncating store. + +; CHECK-LABEL: store_i8_with_folded_gep_offset: +; CHECK: i32.store8 $discard=, 24($0), $pop0{{$}} +define void @store_i8_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + store i8 0, i8* %s + ret void +} + +; Fold the offsets when lowering aggregate loads and stores. + +; CHECK-LABEL: aggregate_load_store: +; CHECK: i32.load $2=, 0($0){{$}} +; CHECK: i32.load $3=, 4($0){{$}} +; CHECK: i32.load $4=, 8($0){{$}} +; CHECK: i32.load $push0=, 12($0){{$}} +; CHECK: i32.store $discard=, 12($1), $pop0{{$}} +; CHECK: i32.store $discard=, 8($1), $4{{$}} +; CHECK: i32.store $discard=, 4($1), $3{{$}} +; CHECK: i32.store $discard=, 0($1), $2{{$}} +define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q) { + ; volatile so that things stay in order for the tests above + %t = load volatile {i32,i32,i32,i32}, {i32, i32,i32,i32}* %p + store volatile {i32,i32,i32,i32} %t, {i32, i32,i32,i32}* %q + ret void +} + +; Fold the offsets when lowering aggregate return values. + +; CHECK-LABEL: aggregate_return: +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.store $push1=, 12($0), $pop0{{$}} +; CHECK: i32.store $push2=, 8($0), $pop1{{$}} +; CHECK: i32.store $push3=, 4($0), $pop2{{$}} +; CHECK: i32.store $discard=, 0($0), $pop3{{$}} +define {i32,i32,i32,i32} @aggregate_return() { + ret {i32,i32,i32,i32} zeroinitializer +}