Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3456,8 +3456,13 @@ SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend - // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); + // it. Prefer a zext vs a sext if we can. + bool KnownZero, KnownOne; + ComputeSignBit(const_cast(Idx), KnownZero, KnownOne, DL); + if (KnownZero) + IdxN = DAG.getZExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); + else + IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1064,6 +1064,16 @@ Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + // If we know that the value being extended is positive, we can use a zext + // instead. + bool KnownZero, KnownOne; + ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); + if (KnownZero) { + Value *ZExt = Builder->CreateZExt(Src, DestTy); + return ReplaceInstUsesWith(CI, ZExt); + } + + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1323,9 +1323,12 @@ Type *IndexTy = (*I)->getType(); if (IndexTy != IntPtrTy) { // If we are using a wider index than needed for this platform, shrink - // it to what we need. If narrower, sign-extend it to what we need. + // it to what we need. If narrower, sign-extend it to what we need + // (unless we can tell it's positive, in which case zero-extend.) // This explicit cast can make subsequent optimizations more obvious. - *I = Builder->CreateIntCast(*I, IntPtrTy, true); + bool KnownZero, KnownOne; + ComputeSignBit(*I, KnownZero, KnownOne, 0, &GEP); + *I = Builder->CreateIntCast(*I, IntPtrTy, !KnownZero); MadeChange = true; } } Index: test/CodeGen/X86/gep-sext.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/gep-sext.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s 2>&1 | FileCheck %s + +target datalayout = "e-p:32:32" +target triple = "x86_64-linux-unknown" + + +declare void @use(i32) + +; Sign extension is manditory for the gep argument +define void @test(i32* %p, i32 %index) { +; CHECK-LABEL: test: +; CHECK: movslq %esi, %rax +; CHECK: movl (%rdi,%rax,4), %edi + %addr = getelementptr i32* %p, i32 %index + %val = load i32* %addr + call void @use(i32 %val) + ret void +} + +; Zero extension is manditory since the high bits of %esi +; aren't specified +define void @test2(i32* %p, i32 %index) { +; CHECK-LABEL: test2 +; CHECK: movl %esi, %eax +; CHECK: movl (%rdi,%rax,4), %edi + %i = zext i32 %index to i64 + %addr = getelementptr i32* %p, i64 %i + %val = load i32* %addr + call void @use(i32 %val) + ret void +} +define void @test3(i32* %p, i32 %index) { +; CHECK-LABEL: test3 +; CHECK-NOT: movslq +; CHECK: movl 352(%rdi), %eax +; CHECK: movl 160(%rdi,%rax,4), %edi + %addr_begin = getelementptr i32* %p, i64 40 + %addr_fixed = getelementptr i32* %addr_begin, i64 48 + %val_fixed = load i32* %addr_fixed, !range !0 + %addr = getelementptr i32* %addr_begin, i32 %val_fixed + %val = load i32* %addr + call void @use(i32 %val) + ret void +} +;; !range !0 +!0 = !{i32 0, i32 2147483647} + + + Index: test/Transforms/InstCombine/gep-sext.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/gep-sext.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-win32" + +declare void @use(i32) readonly + +; We prefer to canonicalize the machine width gep indices early +define void @test(i32* %p, i32 %index) { +; CHECK-LABEL: @test +; CHECK-NEXT: %1 = sext i32 %index to i64 +; CHECK-NEXT: %addr = getelementptr i32* %p, i64 %1 + %addr = getelementptr i32* %p, i32 %index + %val = load i32* %addr + call void @use(i32 %val) + ret void +} +; If they've already been canonicalized via zext, that's fine +define void @test2(i32* %p, i32 %index) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: %i = zext i32 %index to i64 +; CHECK-NEXT: %addr = getelementptr i32* %p, i64 %i + %i = zext i32 %index to i64 + %addr = getelementptr i32* %p, i64 %i + %val = load i32* %addr + call void @use(i32 %val) + ret void +} +; If we can use a zext, we prefer that. This requires +; knowing that the index is positive. +define void @test3(i32* %p, i32 %index) { +; CHECK-LABEL: @test3 +; CHECK: zext +; CHECK-NOT: sext + %addr_begin = getelementptr i32* %p, i64 40 + %addr_fixed = getelementptr i32* %addr_begin, i64 48 + %val_fixed = load i32* %addr_fixed, !range !0 + %addr = getelementptr i32* %addr_begin, i32 %val_fixed + %val = load i32* %addr + call void @use(i32 %val) + ret void +} +; Replace sext with zext where possible +define void @test4(i32* %p, i32 %index) { +; CHECK-LABEL: @test4 +; CHECK: zext +; CHECK-NOT: sext + %addr_begin = getelementptr i32* %p, i64 40 + %addr_fixed = getelementptr i32* %addr_begin, i64 48 + %val_fixed = load i32* %addr_fixed, !range !0 + %i = sext i32 %val_fixed to i64 + %addr = getelementptr i32* %addr_begin, i64 %i + %val = load i32* %addr + call void @use(i32 %val) + ret void +} + +;; !range !0 +!0 = !{i32 0, i32 2147483647} + + +