Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -899,6 +899,10 @@ UserI->getOpcode() == Instruction::UDiv) && UserI->getOperand(1) != FoldedValue) return false; // Not commutative, only RHS foldable. + // LoadOrTruncBits holds the number of effectively loaded bits, but 0 if an + // extension was made of the load. + unsigned LoadOrTruncBits = + ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits)); switch (UserI->getOpcode()) { case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64 case Instruction::Sub: @@ -910,6 +914,8 @@ (SExtBits == 32 || (SExtBits == 64 && ST->hasMiscellaneousExtensions2()))) return true; + if (LoadOrTruncBits == 16) + return true; LLVM_FALLTHROUGH; case Instruction::SDiv:// SE: 32->64 if (LoadedBits == 32 && SExtBits == 64) @@ -929,10 +935,6 @@ // case Instruction::FDiv: // All possible extensions of memory checked above. - if (SExtBits || ZExtBits) - return false; - - unsigned LoadOrTruncBits = (TruncBits ? TruncBits : LoadedBits); return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64); break; } Index: test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll =================================================================== --- test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll +++ test/Analysis/CostModel/SystemZ/memop-folding-int-arith.ll @@ -85,6 +85,37 @@ ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = add i32 %sext_3, undef } +define void @add_i16_mem16(i16 %Arg, i16* %Src1, i16* %Src2, i16* %Dst, i32* %Src32) { + %L1 = load i16, i16* %Src1 + %S0 = add i16 %L1, %Arg + store volatile i16 %S0, i16* %Dst + + %L2 = load i16, i16* %Src1 + %L3 = load i16, i16* %Src2 + %S1 = add i16 %L2, %L3 + store volatile i16 %S1, i16* %Dst + + ; Truncated load + %L32 = load i32, i32* %Src32 + %tr = trunc i32 %L32 to i16 + %S2 = add i16 %tr, %Arg + store volatile i16 %S2, i16* %Dst + + ret void +; CHECK: Printing analysis 'Cost Model Analysis' for function 'add_i16_mem16': +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, i16* %Src1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S0 = add i16 %L1, %Arg +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S0, i16* %Dst +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L2 = load i16, i16* %Src1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L3 = load i16, i16* %Src2 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S1 = add i16 %L2, %L3 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S1, i16* %Dst +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, i32* %Src32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i32 %L32 to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S2 = add i16 %tr, %Arg +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S2, i16* %Dst +} + define void @sub_lhs_mem() { %li32 = load i32, i32* undef sub i32 %li32, undef @@ -228,6 +259,37 @@ ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = sub i32 undef, %sext_3 } +define void @sub_i16_mem16(i16 %Arg, i16* %Src1, i16* %Src2, i16* %Dst, i32* %Src32) { + %L1 = load i16, i16* %Src1 + %D0 = sub i16 %Arg, %L1 + store volatile i16 %D0, i16* %Dst + + %L2 = load i16, i16* %Src1 + %L3 = load i16, i16* %Src2 + %D1 = sub i16 %L2, %L3 + store volatile i16 %D1, i16* %Dst + + ; Truncated load + %L32 = load i32, i32* %Src32 + %tr = trunc i32 %L32 to i16 + %D2 = sub i16 %Arg, %tr + store volatile i16 %D2, i16* %Dst + + ret void +; CHECK: Printing analysis 'Cost Model Analysis' for function 'sub_i16_mem16': +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, i16* %Src1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %D0 = sub i16 %Arg, %L1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D0, i16* %Dst +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L2 = load i16, i16* %Src1 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L3 = load i16, i16* %Src2 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sub i16 %L2, %L3 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D1, i16* %Dst +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, i32* %Src32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i32 %L32 to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sub i16 %Arg, %tr +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D2, i16* %Dst +} + define void @mul() { %li32 = load i32, i32* undef mul i32 %li32, undef @@ -305,6 +367,37 @@ ; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = mul i32 %sext_3, undef } +define void @mul_i16_mem16(i16 %Arg, i16* %Src1, i16* %Src2, i16* %Dst, i32* %Src32) { + %L1 = load i16, i16* %Src1 + %P0 = mul i16 %Arg, %L1 + store volatile i16 %P0, i16* %Dst + + %L2 = load i16, i16* %Src1 + %L3 = load i16, i16* %Src2 + %P1 = mul i16 %L2, %L3 + store volatile i16 %P1, i16* %Dst + + ; Truncated load + %L32 = load i32, i32* %Src32 + %tr = trunc i32 %L32 to i16 + %P2 = mul i16 %Arg, %tr + store volatile i16 %P2, i16* %Dst + + ret void +; CHECK: Printing analysis 'Cost Model Analysis' for function 'mul_i16_mem16': +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, i16* %Src1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %P0 = mul i16 %Arg, %L1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P0, i16* %Dst +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L2 = load i16, i16* %Src1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L3 = load i16, i16* %Src2 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %P1 = mul i16 %L2, %L3 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P1, i16* %Dst +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, i32* %Src32 +; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i32 %L32 to i16 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %P2 = mul i16 %Arg, %tr +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P2, i16* %Dst +} + define void @sdiv_lhs(i32 %arg32, i64 %arg64) { %li32 = load i32, i32* undef sdiv i32 %li32, %arg32