Index: llvm/lib/Target/ARM/ARMInstrNEON.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrNEON.td +++ llvm/lib/Target/ARM/ARMInstrNEON.td @@ -4197,10 +4197,10 @@ defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, "vaddl", "s", add, sext, 1>; defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "u", add, zext, 1>; + "vaddl", "u", add, zanyext, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; -defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; +defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, @@ -5045,10 +5045,10 @@ defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, "vsubl", "s", sub, sext, 0>; defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "u", sub, zext, 0>; + "vsubl", "u", sub, zanyext, 0>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; -defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; +defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, Index: llvm/test/CodeGen/ARM/vadd.ll =================================================================== --- llvm/test/CodeGen/ARM/vadd.ll +++ llvm/test/CodeGen/ARM/vadd.ll @@ -224,9 +224,7 @@ define <8 x i16> @vaddla8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: vaddla8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u8 q8, d1 -; CHECK-NEXT: vmovl.u8 q9, d0 -; CHECK-NEXT: vadd.i16 q0, q9, q8 +; CHECK-NEXT: vaddl.u8 q0, d0, d1 ; CHECK-NEXT: vbic.i16 q0, #0xff00 ; CHECK-NEXT: bx lr %tmp3 = zext <8 x i8> %A to <8 x i16> @@ -239,11 +237,9 @@ define <4 x i32> @vaddla16(<4 x i16> %A, <4 x i16> %B) { ; CHECK-LABEL: vaddla16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u16 q8, d1 -; CHECK-NEXT: vmovl.u16 q9, d0 -; CHECK-NEXT: vmov.i32 q10, #0xffff -; CHECK-NEXT: vadd.i32 q8, q9, q8 -; CHECK-NEXT: vand q0, q8, q10 +; CHECK-NEXT: vmov.i32 q8, #0xffff +; CHECK-NEXT: vaddl.u16 q9, d0, d1 +; CHECK-NEXT: vand q0, q9, q8 ; CHECK-NEXT: bx lr %tmp3 = zext <4 x i16> %A to <4 x i32> %tmp4 = zext <4 x i16> %B to <4 x i32> @@ -255,11 +251,9 @@ define <2 x i64> @vaddla32(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: vaddla32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u32 q8, d1 -; CHECK-NEXT: vmovl.u32 q9, d0 -; CHECK-NEXT: vmov.i64 q10, #0xffffffff -; CHECK-NEXT: vadd.i64 q8, q9, q8 -; CHECK-NEXT: vand q0, q8, q10 +; CHECK-NEXT: vmov.i64 q8, #0xffffffff +; CHECK-NEXT: vaddl.u32 q9, d0, d1 +; CHECK-NEXT: vand q0, q9, q8 ; CHECK-NEXT: bx lr %tmp3 = zext <2 x i32> %A to <2 x i64> %tmp4 = zext <2 x i32> %B to <2 x i64> @@ -331,8 +325,7 @@ define <8 x i16> @vaddwa8(<8 x i16> %A, <8 x i8> %B) { ; CHECK-LABEL: vaddwa8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u8 q8, d2 -; CHECK-NEXT: vadd.i16 q0, q0, q8 +; CHECK-NEXT: vaddw.u8 q0, q0, d2 ; CHECK-NEXT: vbic.i16 q0, #0xff00 ; CHECK-NEXT: bx lr %tmp3 = zext <8 x i8> %B to <8 x i16> @@ -344,10 +337,9 @@ define <4 x i32> @vaddwa16(<4 x i32> %A, <4 x i16> %B) { ; CHECK-LABEL: vaddwa16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u16 q8, d2 -; CHECK-NEXT: vmov.i32 q9, #0xffff -; CHECK-NEXT: vadd.i32 q8, q0, q8 -; CHECK-NEXT: vand q0, q8, q9 +; CHECK-NEXT: vmov.i32 q8, #0xffff +; CHECK-NEXT: vaddw.u16 q9, q0, d2 +; CHECK-NEXT: vand q0, q9, q8 ; CHECK-NEXT: bx lr %tmp3 = zext <4 x i16> %B to <4 x i32> %tmp4 = add <4 x i32> %A, %tmp3 @@ -358,10 +350,9 @@ define <2 x i64> @vaddwa32(<2 x i64> %A, <2 x i32> %B) { ; CHECK-LABEL: vaddwa32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u32 q8, d2 -; CHECK-NEXT: vmov.i64 q9, #0xffffffff -; CHECK-NEXT: vadd.i64 q8, q0, q8 -; CHECK-NEXT: vand q0, q8, q9 +; CHECK-NEXT: vmov.i64 q8, #0xffffffff +; CHECK-NEXT: vaddw.u32 q9, q0, d2 +; CHECK-NEXT: vand q0, q9, q8 ; CHECK-NEXT: bx lr %tmp3 = zext <2 x i32> %B to <2 x i64> %tmp4 = add <2 x i64> %A, %tmp3 Index: llvm/test/CodeGen/ARM/vsub.ll =================================================================== --- llvm/test/CodeGen/ARM/vsub.ll +++ llvm/test/CodeGen/ARM/vsub.ll @@ -224,9 +224,7 @@ define <8 x i16> @vsubla8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: vsubla8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u8 q8, d1 -; CHECK-NEXT: vmovl.u8 q9, d0 -; CHECK-NEXT: vsub.i16 q0, q9, q8 +; CHECK-NEXT: vsubl.u8 q0, d0, d1 ; CHECK-NEXT: vbic.i16 q0, #0xff00 ; CHECK-NEXT: bx lr %tmp3 = zext <8 x i8> %A to <8 x i16> @@ -239,11 +237,9 @@ define <4 x i32> @vsubla16(<4 x i16> %A, <4 x i16> %B) { ; CHECK-LABEL: vsubla16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u16 q8, d1 -; CHECK-NEXT: vmovl.u16 q9, d0 -; CHECK-NEXT: vmov.i32 q10, #0xffff -; CHECK-NEXT: vsub.i32 q8, q9, q8 -; CHECK-NEXT: vand q0, q8, q10 +; CHECK-NEXT: vmov.i32 q8, #0xffff +; CHECK-NEXT: vsubl.u16 q9, d0, d1 +; CHECK-NEXT: vand q0, q9, q8 ; CHECK-NEXT: bx lr %tmp3 = zext <4 x i16> %A to <4 x i32> %tmp4 = zext <4 x i16> %B to <4 x i32> @@ -255,11 +251,9 @@ define <2 x i64> @vsubla32(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: vsubla32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u32 q8, d1 -; CHECK-NEXT: vmovl.u32 q9, d0 -; CHECK-NEXT: vmov.i64 q10, #0xffffffff -; CHECK-NEXT: vsub.i64 q8, q9, q8 -; CHECK-NEXT: vand q0, q8, q10 +; CHECK-NEXT: vmov.i64 q8, #0xffffffff +; CHECK-NEXT: vsubl.u32 q9, d0, d1 +; CHECK-NEXT: vand q0, q9, q8 ; CHECK-NEXT: bx lr %tmp3 = zext <2 x i32> %A to <2 x i64> %tmp4 = zext <2 x i32> %B to <2 x i64> @@ -331,8 +325,7 @@ define <8 x i16> @vsubwa8(<8 x i16> %A, <8 x i8> %B) { ; CHECK-LABEL: vsubwa8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u8 q8, d2 -; CHECK-NEXT: vsub.i16 q0, q0, q8 +; CHECK-NEXT: vsubw.u8 q0, q0, d2 ; CHECK-NEXT: vbic.i16 q0, #0xff00 ; CHECK-NEXT: bx lr %tmp3 = zext <8 x i8> %B to <8 x i16> @@ -344,10 +337,9 @@ define <4 x i32> @vsubwa16(<4 x i32> %A, <4 x i16> %B) { ; CHECK-LABEL: vsubwa16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u16 q8, d2 -; CHECK-NEXT: vmov.i32 q9, #0xffff -; CHECK-NEXT: vsub.i32 q8, q0, q8 -; CHECK-NEXT: vand q0, q8, q9 +; CHECK-NEXT: vmov.i32 q8, #0xffff +; CHECK-NEXT: vsubw.u16 q9, q0, d2 +; CHECK-NEXT: vand q0, q9, q8 ; CHECK-NEXT: bx lr %tmp3 = zext <4 x i16> %B to <4 x i32> %tmp4 = sub <4 x i32> %A, %tmp3 @@ -358,10 +350,9 @@ define <2 x i64> @vsubwa32(<2 x i64> %A, <2 x i32> %B) { ; CHECK-LABEL: vsubwa32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmovl.u32 q8, d2 -; CHECK-NEXT: vmov.i64 q9, #0xffffffff -; CHECK-NEXT: vsub.i64 q8, q0, q8 -; CHECK-NEXT: vand q0, q8, q9 +; CHECK-NEXT: vmov.i64 q8, #0xffffffff +; CHECK-NEXT: vsubw.u32 q9, q0, d2 +; CHECK-NEXT: vand q0, q9, q8 ; CHECK-NEXT: bx lr %tmp3 = zext <2 x i32> %B to <2 x i64> %tmp4 = sub <2 x i64> %A, %tmp3