Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -99,6 +99,7 @@ SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; }; Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -209,6 +209,20 @@ } } + // Custom lower lane accesses to expand out variable indices + if (Subtarget->hasSIMD128()) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) { + setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); + } + if (EnableUnimplementedWasmSIMDInstrs) { + for (auto T : {MVT::v2i64, MVT::v2f64}) { + setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); + } + } + } + // Trap lowers to wasm unreachable setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -859,6 +873,9 @@ return LowerCopyToReg(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + case ISD::INSERT_VECTOR_ELT: + return LowerAccessVectorElement(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::SHL: @@ -1050,6 +1067,18 @@ return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); } +SDValue +WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, + SelectionDAG &DAG) const { + // Allow constant lane indices, expand variable lane indices + SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); + if (isa(IdxNode) || IdxNode->isUndef()) + return Op; + else + // Perform default expansion + return SDValue(); +} + SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); Index: llvm/trunk/test/CodeGen/WebAssembly/simd.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/simd.ll +++ llvm/trunk/test/CodeGen/WebAssembly/simd.ll @@ -54,6 +54,26 @@ ret i32 %a } +; CHECK-LABEL: extract_var_v16i8_s: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result i32{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]] +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0 +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15 +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]] +; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]] +; SIMD128-NEXT: i32.load8_s $push[[R:[0-9]+]]=, 0($pop[[L6]]) +; SIMD128-NEXT: return $pop[[R]] +define i32 @extract_var_v16i8_s(<16 x i8> %v, i32 %i) { + %elem = extractelement <16 x i8> %v, i32 %i + %a = sext i8 %elem to i32 + ret i32 %a +} + ; CHECK-LABEL: extract_undef_v16i8_s: ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .param v128{{$}} @@ -78,6 +98,26 @@ ret i32 %a } +; CHECK-LABEL: extract_var_v16i8_u: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result i32{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]]{{$}} +; SIMD128-NEXT: i32.load8_u $push[[R:[0-9]+]]=, 0($pop[[L6]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i32 @extract_var_v16i8_u(<16 x i8> %v, i32 %i) { + %elem = extractelement <16 x i8> %v, i32 %i + %a = zext i8 %elem to i32 + ret i32 %a +} + ; CHECK-LABEL: extract_undef_v16i8_u: ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .param v128{{$}} @@ -101,6 +141,25 @@ ret i8 %elem } +; CHECK-LABEL: extract_var_v16i8: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result i32{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]]{{$}} +; SIMD128-NEXT: i32.load8_u $push[[R:[0-9]+]]=, 0($pop[[L6]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i8 @extract_var_v16i8(<16 x i8> %v, i32 %i) { + %elem = extractelement <16 x i8> %v, i32 %i + ret i8 %elem +} + ; CHECK-LABEL: extract_undef_v16i8: ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .param v128{{$}} @@ -123,6 +182,26 @@ ret <16 x i8> %res } +; CHECK-LABEL: replace_var_v16i8: +; NO-SIMD128-NOT: i8x16 +; SIMD128-NEXT: .param v128, i32, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $3, $pop[[L5]]{{$}} +; SIMD128-NEXT: i32.store8 0($pop[[L6]]), $2{{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @replace_var_v16i8(<16 x i8> %v, i32 %i, i8 %x) { + %res = insertelement <16 x i8> %v, i8 %x, i32 %i + ret <16 x i8> %res +} + ; CHECK-LABEL: replace_undef_v16i8: ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .param v128, i32{{$}} @@ -252,6 +331,28 @@ ret i32 %a } +; CHECK-LABEL: extract_var_v8i16_s: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result i32{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $2, $pop[[L7]]{{$}} +; SIMD128-NEXT: i32.load16_s $push[[R:[0-9]+]]=, 0($pop[[L8]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i32 @extract_var_v8i16_s(<8 x i16> %v, i32 %i) { + %elem = extractelement <8 x i16> %v, i32 %i + %a = sext i16 %elem to i32 + ret i32 %a +} + ; CHECK-LABEL: extract_undef_v8i16_s: ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .param v128{{$}} @@ -276,6 +377,28 @@ ret i32 %a } +; CHECK-LABEL: extract_var_v8i16_u: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result i32{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $2, $pop[[L7]]{{$}} +; SIMD128-NEXT: i32.load16_u $push[[R:[0-9]+]]=, 0($pop[[L8]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i32 @extract_var_v8i16_u(<8 x i16> %v, i32 %i) { + %elem = extractelement <8 x i16> %v, i32 %i + %a = zext i16 %elem to i32 + ret i32 %a +} + ; CHECK-LABEL: extract_undef_v8i16_u: ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .param v128{{$}} @@ -299,6 +422,27 @@ ret i16 %elem } +; CHECK-LABEL: extract_var_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result i32{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $2, $pop[[L7]]{{$}} +; SIMD128-NEXT: i32.load16_u $push[[R:[0-9]+]]=, 0($pop[[L8]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i16 @extract_var_v8i16(<8 x i16> %v, i32 %i) { + %elem = extractelement <8 x i16> %v, i32 %i + ret i16 %elem +} + ; CHECK-LABEL: extract_undef_v8i16: ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .param v128{{$}} @@ -321,6 +465,28 @@ ret <8 x i16> %res } +; CHECK-LABEL: replace_var_v8i16: +; NO-SIMD128-NOT: i16x8 +; SIMD128-NEXT: .param v128, i32, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $3, $pop[[L7]]{{$}} +; SIMD128-NEXT: i32.store16 0($pop[[L8]]), $2{{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @replace_var_v8i16(<8 x i16> %v, i32 %i, i16 %x) { + %res = insertelement <8 x i16> %v, i16 %x, i32 %i + ret <8 x i16> %res +} + ; CHECK-LABEL: replace_undef_v8i16: ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .param v128, i32{{$}} @@ -427,6 +593,27 @@ ret i32 %elem } +; CHECK-LABEL: extract_var_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result i32{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 3{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L4:[0-9]+]]=, $2, $pop[[L7]]{{$}} +; SIMD128-NEXT: i32.load $push[[R:[0-9]+]]=, 0($pop[[L4]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i32 @extract_var_v4i32(<4 x i32> %v, i32 %i) { + %elem = extractelement <4 x i32> %v, i32 %i + ret i32 %elem +} + ; CHECK-LABEL: extract_undef_v4i32: ; NO-SIMD128-NOT: i32x4 ; SIMD128-NEXT: .param v128{{$}} @@ -449,6 +636,28 @@ ret <4 x i32> %res } +; CHECK-LABEL: replace_var_v4i32: +; NO-SIMD128-NOT: i32x4 +; SIMD128-NEXT: .param v128, i32, i32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 3{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L4:[0-9]+]]=, $3, $pop[[L7]]{{$}} +; SIMD128-NEXT: i32.store 0($pop[[L4]]), $2{{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @replace_var_v4i32(<4 x i32> %v, i32 %i, i32 %x) { + %res = insertelement <4 x i32> %v, i32 %x, i32 %i + ret <4 x i32> %res +} + ; CHECK-LABEL: replace_undef_v4i32: ; NO-SIMD128-NOT: i32x4 ; SIMD128-NEXT: .param v128, i32{{$}} @@ -547,6 +756,27 @@ ret i64 %elem } +; CHECK-LABEL: extract_var_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result i64{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $2, $pop[[L7]]{{$}} +; SIMD128-NEXT: i64.load $push[[R:[0-9]+]]=, 0($pop[[L2]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i64 @extract_var_v2i64(<2 x i64> %v, i32 %i) { + %elem = extractelement <2 x i64> %v, i32 %i + ret i64 %elem +} + ; CHECK-LABEL: extract_undef_v2i64: ; NO-SIMD128-NOT: i64x2 ; SIMD128-VM-NOT: i64x2 @@ -571,6 +801,29 @@ ret <2 x i64> %res } +; CHECK-LABEL: replace_var_v2i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-VM-NOT: i64x2 +; SIMD128-NEXT: .param v128, i32, i64{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} +; SIMD128-NEXT: i64.store 0($pop[[L2]]), $2{{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @replace_var_v2i64(<2 x i64> %v, i32 %i, i64 %x) { + %res = insertelement <2 x i64> %v, i64 %x, i32 %i + ret <2 x i64> %res +} + ; CHECK-LABEL: replace_undef_v2i64: ; NO-SIMD128-NOT: i64x2 ; SIMD128-VM-NOT: i64x2 @@ -666,6 +919,27 @@ ret float %elem } +; CHECK-LABEL: extract_var_v4f32: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result f32{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 3{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $2, $pop[[L7]]{{$}} +; SIMD128-NEXT: f32.load $push[[R:[0-9]+]]=, 0($pop[[L2]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define float @extract_var_v4f32(<4 x float> %v, i32 %i) { + %elem = extractelement <4 x float> %v, i32 %i + ret float %elem +} + ; CHECK-LABEL: extract_undef_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .param v128{{$}} @@ -688,6 +962,28 @@ ret <4 x float> %res } +; CHECK-LABEL: replace_var_v4f32: +; NO-SIMD128-NOT: f32x4 +; SIMD128-NEXT: .param v128, i32, f32{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 3{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} +; SIMD128-NEXT: f32.store 0($pop[[L2]]), $2{{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <4 x float> @replace_var_v4f32(<4 x float> %v, i32 %i, float %x) { + %res = insertelement <4 x float> %v, float %x, i32 %i + ret <4 x float> %res +} + ; CHECK-LABEL: replace_undef_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .param v128, f32{{$}} @@ -785,6 +1081,27 @@ ret double %elem } +; CHECK-LABEL: extract_var_v2f64: +; NO-SIMD128-NOT: i62x2 +; SIMD128-NEXT: .param v128, i32{{$}} +; SIMD128-NEXT: .result f64{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $2, $pop[[L7]]{{$}} +; SIMD128-NEXT: f64.load $push[[R:[0-9]+]]=, 0($pop[[L2]]){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define double @extract_var_v2f64(<2 x double> %v, i32 %i) { + %elem = extractelement <2 x double> %v, i32 %i + ret double %elem +} + ; CHECK-LABEL: extract_undef_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-VM-NOT: f64x2 @@ -809,6 +1126,29 @@ ret <2 x double> %res } +; CHECK-LABEL: replace_var_v2f64: +; NO-SIMD128-NOT: f64x2 +; SIMD128-VM-NOT: f64x2 +; SIMD128-NEXT: .param v128, i32, f64{{$}} +; SIMD128-NEXT: .result v128{{$}} +; SIMD128-NEXT: get_global $push[[L0:[0-9]+]]=, __stack_pointer@GLOBAL{{$}} +; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} +; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; SIMD128-NEXT: tee_local $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} +; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} +; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} +; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} +; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} +; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} +; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} +; SIMD128-NEXT: f64.store 0($pop[[L2]]), $2{{$}} +; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <2 x double> @replace_var_v2f64(<2 x double> %v, i32 %i, double %x) { + %res = insertelement <2 x double> %v, double %x, i32 %i + ret <2 x double> %res +} + ; CHECK-LABEL: replace_undef_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-VM-NOT: f64x2