Index: lib/Target/X86/X86CallingConv.h =================================================================== --- lib/Target/X86/X86CallingConv.h +++ lib/Target/X86/X86CallingConv.h @@ -41,7 +41,6 @@ return false; // Continue the search, but now for i32. } - inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, CCValAssign::LocInfo &, ISD::ArgFlagsTy &, CCState &) { @@ -51,13 +50,6 @@ return false; } -inline bool CC_X86_RegCall_Error(unsigned &, MVT &, MVT &, - CCValAssign::LocInfo &, ISD::ArgFlagsTy &, - CCState &) { - report_fatal_error("LLVM x86 RegCall calling convention implementation" \ - " doesn't support long double and mask types yet."); -} - inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, Index: lib/Target/X86/X86CallingConv.td =================================================================== --- lib/Target/X86/X86CallingConv.td +++ lib/Target/X86/X86CallingConv.td @@ -76,6 +76,9 @@ // Promote i1/i8/i16 arguments to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote v8i1/v16i1/v32i1 arguments to i32. + CCIfType<[v8i1, v16i1, v32i1], CCPromoteToType>, + // bool, char, int, enum, long, pointer --> GPR CCIfType<[i32], CCAssignToReg>, @@ -89,9 +92,6 @@ CCIfSubtarget<"is32Bit()", CCIfType<[i64], CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>, - // TODO: Handle the case of mask types (v*i1) - CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>, - // float, double, float128 --> XMM // In the case of SSE disabled --> save to stack CCIfType<[f32, f64, f128], @@ -146,8 +146,14 @@ ]>; def RetCC_#NAME : CallingConv<[ - // Promote i1 arguments to i8. - CCIfType<[i1], CCPromoteToType>, + // Promote i1, v8i1 arguments to i8. + CCIfType<[i1, v8i1], CCPromoteToType>, + + // Promote v16i1 arguments to i16. + CCIfType<[v16i1], CCPromoteToType>, + + // Promote v32i1 arguments to i32. + CCIfType<[v32i1], CCPromoteToType>, // bool, char, int, enum, long, pointer --> GPR CCIfType<[i8], CCAssignToReg>, @@ -164,9 +170,6 @@ CCIfSubtarget<"is32Bit()", CCIfType<[i64], CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>, - // TODO: Handle the case of mask types (v*i1) - CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>, - // long double --> FP CCIfType<[f80], CCAssignToReg>, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2096,14 +2096,26 @@ } /// Lowers masks values (v*i1) to the local register values +/// \returns DAG node after lowering to register type static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &Dl, SelectionDAG &DAG) { EVT ValVT = ValArg.getValueType(); - if (ValVT == MVT::v64i1 && ValLoc == MVT::i64) { + if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || + (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { + // Two stage lowering might be required + // bitcast: v8i1 -> i8 / v16i1 -> i16 + // anyextend: i8 -> i32 / i16 -> i32 + EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; + SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); + if (ValLoc == MVT::i32) + ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy); + return ValToCopy; + } else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || + (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { // One stage lowering is required - // bitcast: v64i1 -> i64 - return DAG.getBitcast(MVT::i64, ValArg); + // bitcast: v32i1 -> i32 / v64i1 -> i64 + return DAG.getBitcast(ValLoc, ValArg); } else return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg); } @@ -2375,14 +2387,14 @@ } /// Reads two 32 bit registers and creates a 64 bit mask value. -/// @param VA The current 32 bit value that need to be assigned. -/// @param NextVA The next 32 bit value that need to be assigned. -/// @param Root The parent DAG node. -/// @param [in,out] InFlag Represents SDvalue in the parent DAG node for +/// \param VA The current 32 bit value that need to be assigned. +/// \param NextVA The next 32 bit value that need to be assigned. +/// \param Root The parent DAG node. +/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for /// glue purposes. In the case the DAG is already using /// physical register instead of virtual, we should glue /// our new SDValue to InFlag SDvalue. -/// @return a new SDvalue of size 64bit. +/// \return a new SDvalue of size 64bit. static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &Dl, const X86Subtarget &Subtarget, @@ -2432,23 +2444,35 @@ return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi); } +/// The function will lower a register of various sizes (8/16/32/64) +/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1) +/// \returns a DAG node contains the operand after lowering to mask type. static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &Dl, SelectionDAG &DAG) { - assert((ValLoc == MVT::i64 || ValLoc == MVT::i32) && - "Expecting register location of size 32/64 bit"); + SDValue ValReturned = ValArg; - // Currently not referenced - will be used in other mask lowering - (void)Dl; - - // In the case of v64i1 no special handling is required due to two reasons: - // In 32 bit machine, this case is handled by getv64i1Argument - // In 64 bit machine, There is no need to truncate the value only bitcast - if (ValVT == MVT::v64i1 && ValLoc == MVT::i32) { - llvm_unreachable("Expecting only i64 locations"); + if (ValVT == MVT::v8i1) { + // Currently the operands can reside in two register width 8b or 32b. + // In case the register is 8b TRUNCATE will return the same operand. + ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, MVT::i8, ValReturned); + } else if (ValVT == MVT::v16i1) { + // Currently the operands can reside in two register width 16b or 32b. + // In case the register is 16b TRUNCATE will return the same operand. + ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, MVT::i16, ValReturned); + } else if (ValVT == MVT::v32i1) { + if (ValLoc == MVT::i64) + ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, MVT::i32, ValReturned); + } else if (ValVT == MVT::v64i1) { + // In 32 bit machine, this case is handled by getv64i1Argument + if (ValLoc == MVT::i32) + llvm_unreachable("Expecting only i64 locations"); + // In 64 bit machine, There is no need to truncate the value only bitcast + } else { + llvm_unreachable("Expecting a vector of i1 types"); } - return DAG.getBitcast(ValVT, ValArg); + return DAG.getBitcast(ValVT, ValReturned); } /// Lower the result values of a call into the @@ -2509,8 +2533,9 @@ if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) { if (VA.getValVT().isVector() && - (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::i64)) { - // promoting a mask type (v*i1) into a register of type i64/i32 + ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || + (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { + // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); } else Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); @@ -2863,8 +2888,9 @@ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); else if (VA.getValVT().isVector() && VA.getValVT().getScalarType() == MVT::i1 && - ((RegVT == MVT::i32) || (RegVT == MVT::i64))) { - // Promoting a mask type (v*i1) into a register of type i64/i32 + ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || + (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { + // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); } else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); Index: test/CodeGen/X86/avx512-regcall-Mask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-Mask.ll +++ test/CodeGen/X86/avx512-regcall-Mask.ll @@ -193,3 +193,199 @@ %call = call x86_regcallcc <64 x i1> @test_retv64i1() ret <64 x i1> %call } + +; X32-LABEL: test_argv32i1: +; X32: addl %ecx, %eax +; X32: addl %edx, %eax +; X32: retl + +; WIN64-LABEL: test_argv32i1: +; WIN64: addl %ecx, %eax +; WIN64: addl %edx, %eax +; WIN64: retq + +; Test regcall when receiving arguments of v32i1 type +define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) { + %a = bitcast <32 x i1> %x0 to i32 + %b = bitcast <32 x i1> %x1 to i32 + %c = bitcast <32 x i1> %x2 to i32 + %add1 = add i32 %a, %b + %add2 = add i32 %add1, %c + ret i32 %add2 +} + +; X32-LABEL: caller_argv32i1: +; X32: mov{{.*}} $1, %eax +; X32: mov{{.*}} $1, %ecx +; X32: call{{.*}} _test_argv32i1 + +; WIN64-LABEL: caller_argv32i1: +; WIN64: mov{{.*}} $1, %eax +; WIN64: mov{{.*}} $1, %ecx +; WIN64: mov{{.*}} $1, %edx +; WIN64: call{{.*}} test_argv32i1 + +; Test regcall when passing arguments of v32i1 type +define x86_regcallcc i32 @caller_argv32i1() #0 { +entry: + %v0 = bitcast i32 1 to <32 x i1> + %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0) + ret i32 %call +} + +; X32-LABEL: test_retv32i1: +; X32: movl $1, %eax +; X32: retl + +; WIN64-LABEL: test_retv32i1: +; WIN64: movl $1, %eax +; WIN64: retq + +; Test regcall when returning v32i1 type +define x86_regcallcc <32 x i1> @test_retv32i1() { + %a = bitcast i32 1 to <32 x i1> + ret <32 x i1> %a +} + +; X32-LABEL: caller_retv32i1: +; X32: call{{.*}} _test_retv32i1 +; X32: incl %eax + +; Test regcall when processing result of v32i1 type +define x86_regcallcc i32 @caller_retv32i1() #0 { +entry: + %call = call x86_regcallcc <32 x i1> @test_retv32i1() + %c = bitcast <32 x i1> %call to i32 + %add = add i32 %c, 1 + ret i32 %add +} + +; X32-LABEL: test_argv16i1: +; X32: addl %ecx, %eax +; X32: addl %edx, %eax +; X32: retl + +; WIN64-LABEL: test_argv16i1: +; WIN64: addl %ecx, %eax +; WIN64: addl %edx, %eax +; WIN64: retq + +; Test regcall when receiving arguments of v16i1 type +define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) { + %a = bitcast <16 x i1> %x0 to i16 + %b = bitcast <16 x i1> %x1 to i16 + %c = bitcast <16 x i1> %x2 to i16 + %add1 = add i16 %a, %b + %add2 = add i16 %add1, %c + ret i16 %add2 +} + +; X32-LABEL: caller_argv16i1: +; X32: movl $1, %eax +; X32: movl $1, %ecx +; X32: calll _test_argv16i1 + +; WIN64-LABEL: caller_argv16i1: +; WIN64: movl $1, %eax +; WIN64: movl $1, %ecx +; WIN64: movl $1, %edx +; WIN64: callq test_argv16i1 + +; Test regcall when passing arguments of v16i1 type +define x86_regcallcc i16 @caller_argv16i1() #0 { +entry: + %v0 = bitcast i16 1 to <16 x i1> + %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0) + ret i16 %call +} + +; X32-LABEL: test_retv16i1: +; X32: movw $1, %ax +; X32: retl + +; WIN64-LABEL: test_retv16i1: +; WIN64: movw $1, %ax +; WIN64: retq + +; Test regcall when returning v16i1 type +define x86_regcallcc <16 x i1> @test_retv16i1() { + %a = bitcast i16 1 to <16 x i1> + ret <16 x i1> %a +} + +; X32-LABEL: caller_retv16i1: +; X32: calll _test_retv16i1 +; X32: incl %eax + +; Test regcall when processing result of v16i1 type +define x86_regcallcc i16 @caller_retv16i1() #0 { +entry: + %call = call x86_regcallcc <16 x i1> @test_retv16i1() + %c = bitcast <16 x i1> %call to i16 + %add = add i16 %c, 1 + ret i16 %add +} + +; X32-LABEL: test_argv8i1: +; X32: addb %cl, %al +; X32: addb %dl, %al +; X32: retl + +; WIN64-LABEL: test_argv8i1: +; WIN64: addb %cl, %al +; WIN64: addb %dl, %al +; WIN64: retq + +; Test regcall when receiving arguments of v8i1 type +define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) { + %a = bitcast <8 x i1> %x0 to i8 + %b = bitcast <8 x i1> %x1 to i8 + %c = bitcast <8 x i1> %x2 to i8 + %add1 = add i8 %a, %b + %add2 = add i8 %add1, %c + ret i8 %add2 +} + +; X32-LABEL: caller_argv8i1: +; X32: movl $1, %eax +; X32: movl $1, %ecx +; X32: calll _test_argv8i1 + +; WIN64-LABEL: caller_argv8i1: +; WIN64: movl $1, %eax +; WIN64: movl $1, %ecx +; WIN64: movl $1, %edx +; WIN64: callq test_argv8i1 + +; Test regcall when passing arguments of v8i1 type +define x86_regcallcc i8 @caller_argv8i1() #0 { +entry: + %v0 = bitcast i8 1 to <8 x i1> + %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0) + ret i8 %call +} + +; X32-LABEL: test_retv8i1: +; X32: movb $1, %al +; X32: retl + +; WIN64-LABEL: test_retv8i1: +; WIN64: movb $1, %al +; WIN64: retq + +; Test regcall when returning v8i1 type +define x86_regcallcc <8 x i1> @test_retv8i1() { + %a = bitcast i8 1 to <8 x i1> + ret <8 x i1> %a +} + +; X32-LABEL: caller_retv8i1: +; X32: calll _test_retv8i1 +; X32: retl + +; Test regcall when processing result of v8i1 type +define x86_regcallcc <8 x i1> @caller_retv8i1() #0 { +entry: + %call = call x86_regcallcc <8 x i1> @test_retv8i1() + ret <8 x i1> %call +}