Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -109,7 +109,6 @@ LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() || - GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return nullptr; @@ -122,23 +121,36 @@ if (ArrayElementCount > MaxArraySizeForCombine) return nullptr; + uint64_t IndexPosInOperands; // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. // - // Require: GEP GV, 0, i {{, constant indices}} - if (GEP->getNumOperands() < 3 || - !isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero() || - isa(GEP->getOperand(2))) + // Require: GEP ArrayTy GV, 0, i {{, constant indices}} + // Or: GEP ElementTy GV, i {{. constant indices}} + if (GV->getValueType() && + GV->getValueType()->getArrayElementType() == + GEP->getSourceElementType() && + GEP->getNumOperands() >= 2) { + IndexPosInOperands = 1; + } + + else if (GV->getValueType() == GEP->getSourceElementType() && + GEP->getNumOperands() >= 3 && isa(GEP->getOperand(1)) && + cast(GEP->getOperand(1))->isZero() && + !isa(GEP->getOperand(2))) { + IndexPosInOperands = 2; + } else { return nullptr; + } // Check that indices after the variable are constants and in-range for the // type they index. Collect the indices. This is typically for arrays of // structs. SmallVector LaterIndices; - Type *EltTy = Init->getType()->getArrayElementType(); - for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { + + for (unsigned i = IndexPosInOperands + 1, e = GEP->getNumOperands(); i != e; + ++i) { ConstantInt *Idx = dyn_cast(GEP->getOperand(i)); if (!Idx) return nullptr; // Variable index. @@ -279,7 +291,7 @@ // Now that we've scanned the entire array, emit our new comparison(s). We // order the state machines in complexity of the generated code. - Value *Idx = GEP->getOperand(2); + Value *Idx = GEP->getOperand(IndexPosInOperands); // If the index is larger than the pointer offset size of the target, truncate // the index down like the GEP would do implicitly. We don't have to do this Index: llvm/test/Transforms/InstCombine/load-cmp.ll =================================================================== --- llvm/test/Transforms/InstCombine/load-cmp.ll +++ llvm/test/Transforms/InstCombine/load-cmp.ll @@ -338,6 +338,7 @@ @CG = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4] +; TODO: Fold it globally. define i1 @cmp_load_constant_array0(i64 %x){ ; CHECK-LABEL: @cmp_load_constant_array0( ; CHECK-NEXT: entry: @@ -346,10 +347,8 @@ ; CHECK: case2: ; CHECK-NEXT: ret i1 false ; CHECK: case1: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[X]], 4294967294 +; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp eq i64 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[COND_INFERRED]] ; entry: @@ -374,11 +373,7 @@ ; CHECK: case2: ; CHECK-NEXT: ret i1 false ; CHECK: case1: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ugt i32 [[ISOK]], 10 -; CHECK-NEXT: ret i1 [[COND_INFERRED]] +; CHECK-NEXT: ret i1 false ; entry: %cond = icmp ult i64 %x, 2 @@ -405,9 +400,10 @@ ; CHECK-NEXT: ret i1 false ; CHECK: case1: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP0]] -; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4 -; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i32 [[ISOK]], 5 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1073741823 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 373, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 +; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: ret i1 [[COND_INFERRED]] ; entry: @@ -451,4 +447,3 @@ %cond_inferred = icmp ult i32 %isOK, %y ret i1 %cond_inferred } -