Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -823,6 +823,7 @@ case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; case AArch64ISD::SITOF: return "AArch64ISD::SITOF"; case AArch64ISD::UITOF: return "AArch64ISD::UITOF"; + case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST"; case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I"; case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I"; case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I"; Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -4986,6 +4986,7 @@ def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; // Natural vector casts (128 bit) def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; Index: test/CodeGen/AArch64/aarch64-be-bv.ll =================================================================== --- test/CodeGen/AArch64/aarch64-be-bv.ll +++ test/CodeGen/AArch64/aarch64-be-bv.ll @@ -377,9 +377,11 @@ declare i8 @f_v8i8(<8 x i8> %arg) declare i16 @f_v4i16(<4 x i16> %arg) declare i32 @f_v2i32(<2 x i32> %arg) +declare i64 @f_v1i64(<1 x i64> %arg) declare i8 @f_v16i8(<16 x i8> %arg) declare i16 @f_v8i16(<8 x i16> %arg) declare i32 @f_v4i32(<4 x i32> %arg) +declare i64 @f_v2i64(<2 x i64> %arg) ; CHECK-LABEL: modimm_t1_call: define void @modimm_t1_call() { @@ -395,6 +397,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -410,6 +415,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -428,6 +437,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #8 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -443,6 +455,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -461,6 +477,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #16 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #16 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -476,6 +495,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #16 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -494,6 +517,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #24 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #24 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -509,6 +535,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #24 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -527,6 +557,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.4h, #0x5 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -542,6 +575,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].8h, #0x2 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -560,6 +597,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.4h, #0x5, lsl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5, lsl #8 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -575,6 +615,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].8h, #0x2, lsl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -593,6 +637,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #8 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -608,6 +655,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -626,6 +677,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #16 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #16 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -641,6 +695,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #16 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -725,6 +783,9 @@ ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: fmov v{{[0-9]+}}.2s, #0.39062500 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.25000000 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -740,6 +801,10 @@ ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: fmov v[[REG:[0-9]+]].4s, #2.5000000 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void }