Index: lib/Target/X86/X86CallingConv.td =================================================================== --- lib/Target/X86/X86CallingConv.td +++ lib/Target/X86/X86CallingConv.td @@ -76,6 +76,9 @@ // Promote i1/i8/i16 arguments to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote v8i1/v16i1/v32i1 arguments to i32. + CCIfType<[v8i1, v16i1, v32i1], CCPromoteToType>, + // bool, char, int, enum, long, pointer --> GPR CCIfType<[i32], CCAssignToReg>, @@ -89,9 +92,6 @@ CCIfSubtarget<"is32Bit()", CCIfType<[i64], CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>, - // TODO: Handle the case of mask types (v*i1) - CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>, - // float, double, float128 --> XMM // In the case of SSE disabled --> save to stack CCIfType<[f32, f64, f128], @@ -146,9 +146,15 @@ ]>; def RetCC_#NAME : CallingConv<[ - // Promote i1 arguments to i8. - CCIfType<[i1], CCPromoteToType>, + // Promote i1, v8i1 arguments to i8. + CCIfType<[i1, v8i1], CCPromoteToType>, + // Promote v16i1 arguments to i16. + CCIfType<[v16i1], CCPromoteToType>, + + // Promote v32i1 arguments to i32. + CCIfType<[v32i1], CCPromoteToType>, + // bool, char, int, enum, long, pointer --> GPR CCIfType<[i8], CCAssignToReg>, CCIfType<[i16], CCAssignToReg>, @@ -164,9 +170,6 @@ CCIfSubtarget<"is32Bit()", CCIfType<[i64], CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>, - // TODO: Handle the case of mask types (v*i1) - CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>, - // long double --> FP CCIfType<[f80], CCAssignToReg>, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2100,8 +2100,29 @@ const SDLoc &Dl, SelectionDAG &DAG) { EVT ValVT = ValArg.getValueType(); - if (ValVT == MVT::v64i1 && ValLoc == MVT::i64) { + if (ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) { + // Two stage lowering might be required + // bitcast: v8i1 -> i8 + // anyextend: i8 -> i32 + SDValue ValToCopy = DAG.getBitcast(MVT::i8, ValArg); + return (ValLoc != MVT::i8) + ? DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy) + : ValToCopy; + } else if (ValVT == MVT::v16i1 && + (ValLoc == MVT::i16 || ValLoc == MVT::i32)) { + // Two stage lowering might be required + // bitcast: v16i1 -> i16 + // anyextend: i16 -> i32 + SDValue ValToCopy = DAG.getBitcast(MVT::i16, ValArg); + return (ValLoc != MVT::i16) + ? DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy) + : ValToCopy; + } else if (ValVT == MVT::v32i1 && ValLoc == MVT::i32) { // One stage lowering is required + // bitcast: v32i1 -> i32 + return DAG.getBitcast(MVT::i32, ValArg); + } else if (ValVT == MVT::v64i1 && ValLoc == MVT::i64) { + // One stage lowering is required // bitcast: v64i1 -> i64 return DAG.getBitcast(MVT::i64, ValArg); } else @@ -2435,20 +2456,26 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &Dl, SelectionDAG &DAG) { - assert((ValLoc == MVT::i64 || ValLoc == MVT::i32) && - "Expecting register location of size 32/64 bit"); + SDValue ValReturned = ValArg; - // Currently not referenced - will be used in other mask lowering - (void)Dl; - - // In the case of v64i1 no special handling is required due to two reasons: - // In 32 bit machine, this case is handled by getv64i1Argument - // In 64 bit machine, There is no need to truncate the value only bitcast - if (ValVT == MVT::v64i1 && ValLoc == MVT::i32) { - llvm_unreachable("Expecting only i64 locations"); + if (ValVT == MVT::v8i1) { + ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, MVT::i8, ValReturned); + } else if (ValVT == MVT::v16i1) { + ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, MVT::i16, ValReturned); + } else if (ValVT == MVT::v32i1) { + if (ValLoc == MVT::i64) + ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, MVT::i32, ValReturned); + } else if (ValVT == MVT::v64i1) { + // In the case of v64i1 no special handling is required due to two reasons: + // In 32 bit machine, this case is handled by getv64i1Argument + // In 64 bit machine, There is no need to truncate the value only bitcast + if (ValLoc == MVT::i32) + llvm_unreachable("Expecting only i64 locations"); + } else { + llvm_unreachable("Expecting a vector of i1 types"); } - return DAG.getBitcast(ValVT, ValArg); + return DAG.getBitcast(ValVT, ValReturned); } /// Lower the result values of a call into the @@ -2509,8 +2536,9 @@ if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) { if (VA.getValVT().isVector() && - (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::i64)) { - // promoting a mask type (v*i1) into a register of type i64/i32 + ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || + (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { + // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); } else Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); @@ -2863,8 +2891,9 @@ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); else if (VA.getValVT().isVector() && VA.getValVT().getScalarType() == MVT::i1 && - ((RegVT == MVT::i32) || (RegVT == MVT::i64))) { - // Promoting a mask type (v*i1) into a register of type i64/i32 + ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || + (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { + // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); } else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); Index: test/CodeGen/X86/avx512-regcall-Mask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-Mask.ll +++ test/CodeGen/X86/avx512-regcall-Mask.ll @@ -193,3 +193,199 @@ %call = call x86_regcallcc <64 x i1> @test_retv64i1() ret <64 x i1> %call } + +; X32-LABEL: test_argv32i1: +; X32: addl %ecx, %eax +; X32: addl %edx, %eax +; X32: retl + +; WIN64-LABEL: test_argv32i1: +; WIN64: addl %ecx, %eax +; WIN64: addl %edx, %eax +; WIN64: retq + +; Test regcall when receiving arguments of v32i1 type +define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) { + %a = bitcast <32 x i1> %x0 to i32 + %b = bitcast <32 x i1> %x1 to i32 + %c = bitcast <32 x i1> %x2 to i32 + %add1 = add i32 %a, %b + %add2 = add i32 %add1, %c + ret i32 %add2 +} + +; X32-LABEL: caller_argv32i1: +; X32: mov{{.*}} $1, %eax +; X32: mov{{.*}} $1, %ecx +; X32: call{{.*}} _test_argv32i1 + +; WIN64-LABEL: caller_argv32i1: +; WIN64: mov{{.*}} $1, %eax +; WIN64: mov{{.*}} $1, %ecx +; WIN64: mov{{.*}} $1, %edx +; WIN64: call{{.*}} test_argv32i1 + +; Test regcall when passing arguments of v32i1 type +define x86_regcallcc i32 @caller_argv32i1() #0 { +entry: + %v0 = bitcast i32 1 to <32 x i1> + %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0) + ret i32 %call +} + +; X32-LABEL: test_retv32i1: +; X32: movl $1, %eax +; X32: retl + +; WIN64-LABEL: test_retv32i1: +; WIN64: movl $1, %eax +; WIN64: retq + +; Test regcall when returning v32i1 type +define x86_regcallcc <32 x i1> @test_retv32i1() { + %a = bitcast i32 1 to <32 x i1> + ret <32 x i1> %a +} + +; X32-LABEL: caller_retv32i1: +; X32: call{{.*}} _test_retv32i1 +; X32: incl %eax + +; Test regcall when processing result of v32i1 type +define x86_regcallcc i32 @caller_retv32i1() #0 { +entry: + %call = call x86_regcallcc <32 x i1> @test_retv32i1() + %c = bitcast <32 x i1> %call to i32 + %add = add i32 %c, 1 + ret i32 %add +} + +; X32-LABEL: test_argv16i1: +; X32: addl %ecx, %eax +; X32: addl %edx, %eax +; X32: retl + +; WIN64-LABEL: test_argv16i1: +; WIN64: addl %ecx, %eax +; WIN64: addl %edx, %eax +; WIN64: retq + +; Test regcall when receiving arguments of v16i1 type +define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) { + %a = bitcast <16 x i1> %x0 to i16 + %b = bitcast <16 x i1> %x1 to i16 + %c = bitcast <16 x i1> %x2 to i16 + %add1 = add i16 %a, %b + %add2 = add i16 %add1, %c + ret i16 %add2 +} + +; X32-LABEL: caller_argv16i1: +; X32: movl $1, %eax +; X32: movl $1, %ecx +; X32: calll _test_argv16i1 + +; WIN64-LABEL: caller_argv16i1: +; WIN64: movl $1, %eax +; WIN64: movl $1, %ecx +; WIN64: movl $1, %edx +; WIN64: callq test_argv16i1 + +; Test regcall when passing arguments of v16i1 type +define x86_regcallcc i16 @caller_argv16i1() #0 { +entry: + %v0 = bitcast i16 1 to <16 x i1> + %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0) + ret i16 %call +} + +; X32-LABEL: test_retv16i1: +; X32: movw $1, %ax +; X32: retl + +; WIN64-LABEL: test_retv16i1: +; WIN64: movw $1, %ax +; WIN64: retq + +; Test regcall when returning v16i1 type +define x86_regcallcc <16 x i1> @test_retv16i1() { + %a = bitcast i16 1 to <16 x i1> + ret <16 x i1> %a +} + +; X32-LABEL: caller_retv16i1: +; X32: calll _test_retv16i1 +; X32: incl %eax + +; Test regcall when processing result of v16i1 type +define x86_regcallcc i16 @caller_retv16i1() #0 { +entry: + %call = call x86_regcallcc <16 x i1> @test_retv16i1() + %c = bitcast <16 x i1> %call to i16 + %add = add i16 %c, 1 + ret i16 %add +} + +; X32-LABEL: test_argv8i1: +; X32: addb %cl, %al +; X32: addb %dl, %al +; X32: retl + +; WIN64-LABEL: test_argv8i1: +; WIN64: addb %cl, %al +; WIN64: addb %dl, %al +; WIN64: retq + +; Test regcall when receiving arguments of v8i1 type +define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) { + %a = bitcast <8 x i1> %x0 to i8 + %b = bitcast <8 x i1> %x1 to i8 + %c = bitcast <8 x i1> %x2 to i8 + %add1 = add i8 %a, %b + %add2 = add i8 %add1, %c + ret i8 %add2 +} + +; X32-LABEL: caller_argv8i1: +; X32: movl $1, %eax +; X32: movl $1, %ecx +; X32: calll _test_argv8i1 + +; WIN64-LABEL: caller_argv8i1: +; WIN64: movl $1, %eax +; WIN64: movl $1, %ecx +; WIN64: movl $1, %edx +; WIN64: callq test_argv8i1 + +; Test regcall when passing arguments of v8i1 type +define x86_regcallcc i8 @caller_argv8i1() #0 { +entry: + %v0 = bitcast i8 1 to <8 x i1> + %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0) + ret i8 %call +} + +; X32-LABEL: test_retv8i1: +; X32: movb $1, %al +; X32: retl + +; WIN64-LABEL: test_retv8i1: +; WIN64: movb $1, %al +; WIN64: retq + +; Test regcall when returning v8i1 type +define x86_regcallcc <8 x i1> @test_retv8i1() { + %a = bitcast i8 1 to <8 x i1> + ret <8 x i1> %a +} + +; X32-LABEL: caller_retv8i1: +; X32: calll _test_retv8i1 +; X32: retl + +; Test regcall when processing result of v8i1 type +define x86_regcallcc <8 x i1> @caller_retv8i1() #0 { +entry: + %call = call x86_regcallcc <8 x i1> @test_retv8i1() + ret <8 x i1> %call +}