Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -85,6 +85,7 @@ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I = nullptr); Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -845,54 +845,102 @@ return BaseT::getVectorInstrCost(Opcode, Val, Index); } +// Check if a load may be folded as a memory operand in its user. +bool SystemZTTIImpl:: +isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) { + if (!Ld->hasOneUse()) + return false; + FoldedValue = Ld; + const Instruction *UserI = cast(*Ld->user_begin()); + unsigned LoadedBits = getScalarSizeInBits(Ld->getType()); + unsigned TruncBits = 0; + unsigned SExtBits = 0; + unsigned ZExtBits = 0; + if (UserI->hasOneUse()) { + unsigned UserBits = UserI->getType()->getScalarSizeInBits(); + if (isa(UserI)) + TruncBits = UserBits; + else if (isa(UserI)) + SExtBits = UserBits; + else if (isa(UserI)) + ZExtBits = UserBits; + } + if (TruncBits || SExtBits || ZExtBits) { + FoldedValue = UserI; + UserI = cast(*UserI->user_begin()); + // Load (single use) -> trunc/extend (single use) -> UserI + } + switch (UserI->getOpcode()) { + case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64 + case Instruction::Sub: + if (LoadedBits == 32 && ZExtBits == 64) + return true; + LLVM_FALLTHROUGH; + case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64 + if (LoadedBits == 16 && + (SExtBits == 32 || + (SExtBits == 64 && ST->hasMiscellaneousExtensions2()))) + return true; + LLVM_FALLTHROUGH; + case Instruction::SDiv:// SE: 32->64 + if (LoadedBits == 32 && SExtBits == 64) + return true; + LLVM_FALLTHROUGH; + case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + // This also makes sense for float operations, but disabled for now due + // to regressions. + // case Instruction::FCmp: + // case Instruction::FAdd: + // case Instruction::FSub: + // case Instruction::FMul: + // case Instruction::FDiv: + + // All possible extensions of memory checked above. + if (SExtBits || ZExtBits) + return false; + + unsigned LoadOrTruncBits = (TruncBits ? TruncBits : LoadedBits); + return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64); + break; + } + return false; +} + int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { assert(!Src->isVoidTy() && "Invalid type"); - if (!Src->isVectorTy() && Opcode == Instruction::Load && - I != nullptr && I->hasOneUse()) { - const Instruction *UserI = cast(*I->user_begin()); - unsigned Bits = getScalarSizeInBits(Src); - bool FoldsLoad = false; - switch (UserI->getOpcode()) { - case Instruction::ICmp: - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - // This also makes sense for float operations, but disabled for now due - // to regressions. - // case Instruction::FCmp: - // case Instruction::FAdd: - // case Instruction::FSub: - // case Instruction::FMul: - // case Instruction::FDiv: - FoldsLoad = (Bits == 32 || Bits == 64); - break; - } - - if (FoldsLoad) { - assert (UserI->getNumOperands() == 2 && - "Expected to only handle binops."); - - // UserI can't fold two loads, so in that case return 0 cost only - // half of the time. - for (unsigned i = 0; i < 2; ++i) { - if (UserI->getOperand(i) == I) - continue; - if (LoadInst *LI = dyn_cast(UserI->getOperand(i))) { - if (LI->hasOneUse()) - return i == 0; - } + if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) { + // Store the load or its truncated or extended value in FoldedValue. + const Instruction *FoldedValue = nullptr; + if (isFoldableLoad(cast(I), FoldedValue)) { + const Instruction *UserI = cast(*FoldedValue->user_begin()); + assert (UserI->getNumOperands() == 2 && "Expected a binop."); + + // UserI can't fold two loads, so in that case return 0 cost only + // half of the time. + for (unsigned i = 0; i < 2; ++i) { + if (UserI->getOperand(i) == FoldedValue) + continue; + + if (Instruction *OtherOp = dyn_cast(UserI->getOperand(i))){ + LoadInst *OtherLoad = dyn_cast(OtherOp); + if (!OtherLoad && + (isa(OtherOp) || isa(OtherOp) || + isa(OtherOp))) + OtherLoad = dyn_cast(OtherOp->getOperand(0)); + if (OtherLoad && isFoldableLoad(OtherLoad, FoldedValue/*dummy*/)) + return i == 0; // Both operands foldable. } - - return 0; } + + return 0; // Only I is foldable in user. + } } unsigned NumOps = Index: test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll =================================================================== --- test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll +++ test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll @@ -1,4 +1,7 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z14 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,Z14 ; ; Test that loads into operations that can fold one memory operand get zero ; cost. In the case that both operands are loaded, one load should get a cost @@ -19,6 +22,35 @@ %li64_1 = load i64, i64* undef add i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr = trunc i64 %li64_2 to i32 + add i32 %tr, undef + + ; Sign-extended loads + %li16_0 = load i16, i16* undef + %sext_0 = sext i16 %li16_0 to i32 + add i32 %sext_0, undef + + %li16_1 = load i16, i16* undef + %sext_1 = sext i16 %li16_1 to i64 + add i64 %sext_1, undef + + %li32_2 = load i32, i32* undef + %sext_2 = sext i32 %li32_2 to i64 + add i64 %sext_2, undef + + ; Zero-extended loads + %li32_3 = load i32, i32* undef + %zext_0 = zext i32 %li32_3 to i64 + add i64 %zext_0, undef + + ; Loads with multiple uses are *not* folded + %li16_3 = load i16, i16* undef + %sext_3 = sext i16 %li16_3 to i32 + %sext_4 = sext i16 %li16_3 to i32 + add i32 %sext_3, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef @@ -31,6 +63,26 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = add i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = add i32 %tr, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = add i32 %sext_0, undef +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef +; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = add i64 %sext_1, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = add i64 %sext_2, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i32 %li32_3 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = add i64 %zext_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = add i32 %sext_3, undef } define void @sub() { @@ -48,6 +100,35 @@ %li64_1 = load i64, i64* undef sub i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr = trunc i64 %li64_2 to i32 + sub i32 %tr, undef + + ; Sign-extended loads + %li16_0 = load i16, i16* undef + %sext_0 = sext i16 %li16_0 to i32 + sub i32 %sext_0, undef + + %li16_1 = load i16, i16* undef + %sext_1 = sext i16 %li16_1 to i64 + sub i64 %sext_1, undef + + %li32_2 = load i32, i32* undef + %sext_2 = sext i32 %li32_2 to i64 + sub i64 %sext_2, undef + + ; Zero-extended loads + %li32_3 = load i32, i32* undef + %zext_0 = zext i32 %li32_3 to i64 + sub i64 %zext_0, undef + + ; Loads with multiple uses are *not* folded + %li16_3 = load i16, i16* undef + %sext_3 = sext i16 %li16_3 to i32 + %sext_4 = sext i16 %li16_3 to i32 + sub i32 %sext_3, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef @@ -60,6 +141,26 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = sub i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = sub i32 %tr, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = sub i32 %sext_0, undef +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef +; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = sub i64 %sext_1, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = sub i64 %sext_2, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i32 %li32_3 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = sub i64 %zext_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = sub i32 %sext_3, undef } define void @mul() { @@ -77,6 +178,35 @@ %li64_1 = load i64, i64* undef mul i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr = trunc i64 %li64_2 to i32 + mul i32 %tr, undef + + ; Sign-extended loads + %li16_0 = load i16, i16* undef + %sext_0 = sext i16 %li16_0 to i32 + mul i32 %sext_0, undef + + %li16_1 = load i16, i16* undef + %sext_1 = sext i16 %li16_1 to i64 + mul i64 %sext_1, undef + + %li32_2 = load i32, i32* undef + %sext_2 = sext i32 %li32_2 to i64 + mul i64 %sext_2, undef + + ; Zero-extended loads are *not* folded + %li16_2 = load i16, i16* undef + %zext_0 = zext i16 %li16_2 to i32 + mul i32 %zext_0, undef + + ; Loads with multiple uses are *not* folded + %li16_3 = load i16, i16* undef + %sext_3 = sext i16 %li16_3 to i32 + %sext_4 = sext i16 %li16_3 to i32 + mul i32 %sext_3, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = mul i32 %li32, undef @@ -88,6 +218,26 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = mul i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = mul i32 %tr, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = mul i32 %sext_0, undef +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef +; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = mul i64 %sext_1, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = mul i64 %sext_2, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_2 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i16 %li16_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = mul i32 %zext_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = mul i32 %sext_3, undef } define void @sdiv() { @@ -105,6 +255,22 @@ %li64_1 = load i64, i64* undef sdiv i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr = trunc i64 %li64_2 to i32 + sdiv i32 %tr, undef + + ; Sign-extended loads + %li32_2 = load i32, i32* undef + %sext_0 = sext i32 %li32_2 to i64 + sdiv i64 %sext_0, undef + + ; Loads with multiple uses are *not* folded + %li32_3 = load i32, i32* undef + %sext_1 = sext i32 %li32_3 to i64 + %sext_2 = sext i32 %li32_3 to i64 + sdiv i64 %sext_1, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i32 %li32, undef @@ -116,6 +282,16 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = sdiv i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %5 = sdiv i32 %tr, undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i32 %li32_2 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = sdiv i64 %sext_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_3 = load i32, i32* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i32 %li32_3 to i64 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_3 to i64 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = sdiv i64 %sext_1, undef } define void @udiv() { @@ -133,6 +309,16 @@ %li64_1 = load i64, i64* undef udiv i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr_0 = trunc i64 %li64_2 to i32 + udiv i32 %tr_0, undef + + ; Loads with multiple uses are *not* folded + %li64_3 = load i64, i64* undef + %tr_1 = trunc i64 %li64_3 to i32 + udiv i64 %li64_3, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %1 = udiv i32 %li32, undef @@ -144,6 +330,12 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %4 = udiv i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %5 = udiv i32 %tr_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 %li64_3, undef } define void @and() { @@ -161,6 +353,16 @@ %li64_1 = load i64, i64* undef and i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr_0 = trunc i64 %li64_2 to i32 + and i32 %tr_0, undef + + ; Loads with multiple uses are *not* folded + %li64_3 = load i64, i64* undef + %tr_1 = trunc i64 %li64_3 to i32 + and i64 %li64_3, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = and i32 %li32, undef @@ -172,6 +374,12 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = and i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = and i32 %tr_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = and i64 %li64_3, undef } define void @or() { @@ -189,6 +397,16 @@ %li64_1 = load i64, i64* undef or i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr_0 = trunc i64 %li64_2 to i32 + or i32 %tr_0, undef + + ; Loads with multiple uses are *not* folded + %li64_3 = load i64, i64* undef + %tr_1 = trunc i64 %li64_3 to i32 + or i64 %li64_3, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = or i32 %li32, undef @@ -200,6 +418,12 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = or i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = or i32 %tr_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = or i64 %li64_3, undef } define void @xor() { @@ -217,6 +441,16 @@ %li64_1 = load i64, i64* undef xor i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr_0 = trunc i64 %li64_2 to i32 + xor i32 %tr_0, undef + + ; Loads with multiple uses are *not* folded + %li64_3 = load i64, i64* undef + %tr_1 = trunc i64 %li64_3 to i32 + xor i64 %li64_3, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = xor i32 %li32, undef @@ -228,6 +462,12 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = xor i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = xor i32 %tr_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = xor i64 %li64_3, undef } define void @icmp() { @@ -245,6 +485,16 @@ %li64_1 = load i64, i64* undef icmp eq i64 %li64_0, %li64_1 + ; Truncated load + %li64_2 = load i64, i64* undef + %tr_0 = trunc i64 %li64_2 to i32 + icmp eq i32 %tr_0, undef + + ; Loads with multiple uses are *not* folded + %li64_3 = load i64, i64* undef + %tr_1 = trunc i64 %li64_3 to i32 + icmp eq i64 %li64_3, undef + ret void; ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = icmp eq i32 %li32, undef @@ -256,4 +506,10 @@ ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = icmp eq i64 %li64_0, %li64_1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = icmp eq i32 %tr_0, undef +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = icmp eq i64 %li64_3, undef }