Index: lib/Target/Hexagon/HexagonPatterns.td =================================================================== --- lib/Target/Hexagon/HexagonPatterns.td +++ lib/Target/Hexagon/HexagonPatterns.td @@ -257,6 +257,24 @@ class Not2 : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; +// If there is a constant operand that feeds the and/or instruction, +// do not generate the compound instructions (S4_andi_asl_ri, S4_ori_asl_ri, +// S4_andi_lsr_ri, S4_ori_lsr_ri instructions). +// It is not always profitable, as some times we end up with a transfer. +// Check the below example. +// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra) +// Instead this is preferable. +// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra) +class Su_NonImm + : PatFraggetOperand(1); + return !dyn_cast(Op1); + } + return false;}], + Op.OperandTransform>; + class Su : PatFrag; @@ -1336,14 +1354,14 @@ def: Pat<(add Sext64:$Rs, I64:$Rt), (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; -def: AccRRR_pat, I32, I32, I32>; -def: AccRRR_pat, I32, I32, I32>; +def: AccRRR_pat,I32, I32, I32>; +def: AccRRR_pat, I32, I32, I32>; def: AccRRR_pat, I32, I32, I32>; -def: AccRRR_pat, I32, I32, I32>; -def: AccRRR_pat, I32, I32, I32>; +def: AccRRR_pat,I32, I32, I32>; +def: AccRRR_pat, I32, I32, I32>; def: AccRRR_pat, I32, I32, I32>; -def: AccRRR_pat, I32, I32, I32>; -def: AccRRR_pat, I32, I32, I32>; +def: AccRRR_pat,I32, I32, I32>; +def: AccRRR_pat, I32, I32, I32>; def: AccRRR_pat, I32, I32, I32>; def: AccRRR_pat, I64, I64, I64>; Index: test/CodeGen/Hexagon/constant_compound.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/constant_compound.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=hexagon < %s 2>&1 | FileCheck %s + +; Generating a compound instruction with a constant is not profitable. +; The constant needs to be kept in a register before it is fed to compound +; instruction. +; Before, we are generating +; ra = #65820; +; rb = lsr(rb, #8); +; rc ^= and (rb, ra) +; Now, we are generating +; ra = and (#65820, lsr(ra, #8)); +; rb = xor(rb, ra) + +; CHECK: and(##65280,lsr(r +; CHECK-NOT : ^= and + +define dso_local zeroext i16 @test_compound(i16 zeroext %varA, i16 zeroext %varB) local_unnamed_addr #0 { +entry: + %0 = zext i16 %varB to i32 + %1 = and i16 %varA, 255 + %2 = zext i16 %1 to i32 + %.masked.i = and i32 %0, 255 + %3 = xor i32 %.masked.i, %2 + %4 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %3, i32 255) #2 + %5 = trunc i64 %4 to i32 + %6 = and i32 %5, 255 + %7 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %6, i32 81922) #2 + %8 = trunc i64 %7 to i32 + %9 = xor i32 %8, %0 + %10 = lshr i32 %9, 8 + %11 = lshr i16 %varA, 8 + %conv2 = zext i16 %11 to i32 + %12 = and i32 %10, 65280 + %.masked.i7 = and i32 %10, 255 + %13 = xor i32 %.masked.i7, %conv2 + %14 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %13, i32 255) #2 + %15 = trunc i64 %14 to i32 + %16 = and i32 %15, 255 + %17 = tail call i64 @llvm.hexagon.M4.pmpyw(i32 %16, i32 81922) #2 + %18 = trunc i64 %17 to i32 + %19 = xor i32 %12, %18 + %20 = lshr i32 %19, 8 + %21 = trunc i32 %20 to i16 + ret i16 %21 +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.hexagon.M4.pmpyw(i32, i32) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv65" "target-features"="-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind }