diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -101,6 +101,8 @@ // Block s/udiv lowering for now bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; } + + bool hasAndNot(SDValue Y) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -531,6 +531,30 @@ return true; } +bool VETargetLowering::hasAndNot(SDValue Y) const { + EVT VT = Y.getValueType(); + + // VE doesn't have vector and not instruction. + if (VT.isVector()) + return false; + + // VE allows different immediate values for X and Y where ~X & Y. + // Only simm7 works for X, and only mimm works for Y on VE. However, this + // function is used to check whether an immediate value is OK for and-not + // instruction as both X and Y. Generating additional instruction to + // retrieve an immediate value is no good since the purpose of this + // function is to convert a series of 3 instructions to another series of + // 3 instructions with better parallelism. Therefore, we return false + // for all immediate values now. + // FIXME: Change hasAndNot function to have two operands to make it work + // correctly with Aurora VE. + if (auto *C = dyn_cast(Y)) + return false; + + // It's ok for generic registers. + return true; +} + VETargetLowering::VETargetLowering(const TargetMachine &TM, const VESubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -576,6 +576,27 @@ [(set Ty:$sx, (OpNode (Ty mimm:$sz)))]>; } +// Special RR multiclass for NND instruction. +// e.g. NND +let hasSideEffects = 0 in +multiclass RRNNDmopc, RegisterClass RC, ValueType Ty> { + def rr : RR; + let cy = 0 in + def ir : RR; + let cz = 0 in + def rm : RR; + let cy = 0, cz = 0 in + def im : RR; +} + // Special RR multiclass for MRG instruction. // e.g. MRG let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0 in @@ -1022,7 +1043,7 @@ defm EQV : RRm<"eqv", 0x47, I64, i64>; // Section 8.5.5 - NND (Negate AND) -defm NND : RRNCm<"nnd", 0x54, I64, i64>; +defm NND : RRNNDm<"nnd", 0x54, I64, i64>; // Section 8.5.6 - MRG (Merge) defm MRG : RRMRGm<"mrg", 0x56, I64, i64>; diff --git a/llvm/test/CodeGen/VE/cttz.ll b/llvm/test/CodeGen/VE/cttz.ll --- a/llvm/test/CodeGen/VE/cttz.ll +++ b/llvm/test/CodeGen/VE/cttz.ll @@ -4,8 +4,7 @@ ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea %s1, -1(, %s0) -; CHECK-NEXT: xor %s0, -1, %s0 -; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: nnd %s0, %s0, %s1 ; CHECK-NEXT: pcnt %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i64 @llvm.cttz.i64(i64 %p, i1 true) diff --git a/llvm/test/CodeGen/VE/nnd.ll b/llvm/test/CodeGen/VE/nnd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/nnd.ll @@ -0,0 +1,215 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define signext i8 @func8s(i8 signext %a, i8 signext %b) { +; CHECK-LABEL: func8s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i8 %a, -1 + %res = and i8 %not, %b + ret i8 %res +} + +define zeroext i8 @func8z(i8 zeroext %a, i8 zeroext %b) { +; CHECK-LABEL: func8z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i8 %a, -1 + %res = and i8 %b, %not + ret i8 %res +} + +define signext i8 @funci8s(i8 signext %a) { +; CHECK-LABEL: funci8s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, 5, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i8 %a, -1 + %res = and i8 %not, 5 + ret i8 %res +} + +define zeroext i8 @funci8z(i8 zeroext %a) { +; CHECK-LABEL: funci8z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: lea %s1, 251 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i8 %a, -1 + %res = and i8 -5, %not + ret i8 %res +} + +define signext i16 @func16s(i16 signext %a, i16 signext %b) { +; CHECK-LABEL: func16s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i16 %a, -1 + %res = and i16 %not, %b + ret i16 %res +} + +define zeroext i16 @func16z(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: func16z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i16 %a, -1 + %res = and i16 %b, %not + ret i16 %res +} + +define signext i16 @funci16s(i16 signext %a) { +; CHECK-LABEL: funci16s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i16 %a, -1 + %res = and i16 %not, 65535 + ret i16 %res +} + +define zeroext i16 @funci16z(i16 zeroext %a) { +; CHECK-LABEL: funci16z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, (52)0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i16 %a, -1 + %res = and i16 4095, %not + ret i16 %res +} + +define signext i32 @func32s(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: func32s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i32 %a, -1 + %res = and i32 %not, %b + ret i32 %res +} + +define zeroext i32 @func32z(i32 zeroext %a, i32 zeroext %b) { +; CHECK-LABEL: func32z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i32 %a, -1 + %res = and i32 %not, %b + ret i32 %res +} + +define signext i32 @funci32s(i32 signext %a) { +; CHECK-LABEL: funci32s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, (36)0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i32 %a, -1 + %res = and i32 %not, 268435455 + ret i32 %res +} + +define zeroext i32 @funci32z(i32 zeroext %a) { +; CHECK-LABEL: funci32z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, (36)0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i32 %a, -1 + %res = and i32 %not, 268435455 + ret i32 %res +} + +define i64 @func64(i64 %a, i64 %b) { +; CHECK-LABEL: func64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i64 %a, -1 + %res = and i64 %not, %b + ret i64 %res +} + +define i64 @func64i(i64 %a) { +; CHECK-LABEL: func64i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s0, (24)0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i64 %a, -1 + %res = and i64 %not, 1099511627775 + ret i64 %res +} + +define i128 @func128(i128 %a, i128 %b) { +; CHECK-LABEL: func128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s0, %s2 +; CHECK-NEXT: nnd %s1, %s1, %s3 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i128 %a, -1 + %res = and i128 %b, %not + ret i128 %res +} + +define i128 @funci128(i128 %a) { +; CHECK-LABEL: funci128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s1, 5, (0)1 +; CHECK-NEXT: nnd %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i128 %a, -1 + %res = and i128 %not, 5 + ret i128 %res +} + +define i64 @func64_nnd_fold(i64 %x, i64 %y, i64 %m) { +; CHECK-LABEL: func64_nnd_fold: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s1, %s2, %s1 +; CHECK-NEXT: and %s0, %s0, %s2 +; CHECK-NEXT: or %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %D = xor i64 %x, %y + %A = and i64 %D, %m + %res = xor i64 %A, %y + ret i64 %res +} + +define i64 @func64iy_nnd_fold(i64 %x, i64 %m) { +; CHECK-LABEL: func64iy_nnd_fold: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s0, %s1 +; CHECK-NEXT: or %s1, -64, %s1 +; CHECK-NEXT: nnd %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %D = xor i64 %x, -64 + %A = and i64 %D, %m + %res = xor i64 %A, -64 + ret i64 %res +} + +define i64 @func64im_nnd_fold(i64 %x, i64 %y) { +; CHECK-LABEL: func64im_nnd_fold: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, %s0, %s1 +; CHECK-NEXT: and %s0, 30, %s0 +; CHECK-NEXT: xor %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %D = xor i64 %x, %y + %A = and i64 %D, 30 + %res = xor i64 %A, %y + ret i64 %res +}