diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1812,6 +1812,7 @@ } static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); if (Op.getValueType() != MVT::v2f64) return SDValue(); @@ -1849,10 +1850,7 @@ !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex)) return SDValue(); - if (LHSOpcode != RHSOpcode || LHSSrcVec != RHSSrcVec) - return SDValue(); - - if (LHSIndex != 0 || RHSIndex != 1) + if (LHSOpcode != RHSOpcode) return SDValue(); MVT ExpectedSrcVT; @@ -1868,7 +1866,13 @@ if (LHSSrcVec.getValueType() != ExpectedSrcVT) return SDValue(); - return DAG.getNode(LHSOpcode, SDLoc(Op), MVT::v2f64, LHSSrcVec); + auto Src = LHSSrcVec; + if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) { + // Shuffle the source vector so that the converted lanes are the low lanes. + Src = DAG.getVectorShuffle(ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec, + {LHSIndex, RHSIndex + 4, -1, -1}); + } + return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src); } SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -317,13 +317,9 @@ ; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.extract_lane 2 -; CHECK-NEXT: f64.convert_i32_s -; CHECK-NEXT: f64x2.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.extract_lane 3 -; CHECK-NEXT: f64.convert_i32_s -; CHECK-NEXT: f64x2.replace_lane 1 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: f64x2.convert_low_i32x4_s ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: # fallthrough-return %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> @@ -341,13 +337,9 @@ ; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.extract_lane 2 -; CHECK-NEXT: f64.convert_i32_u -; CHECK-NEXT: f64x2.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.extract_lane 3 -; CHECK-NEXT: f64.convert_i32_u -; CHECK-NEXT: f64x2.replace_lane 1 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: f64x2.convert_low_i32x4_u ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: # fallthrough-return %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> @@ -366,13 +358,9 @@ ; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.extract_lane 2 -; CHECK-NEXT: f64.convert_i32_s -; CHECK-NEXT: f64x2.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.extract_lane 3 -; CHECK-NEXT: f64.convert_i32_s -; CHECK-NEXT: f64x2.replace_lane 1 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: f64x2.convert_low_i32x4_s ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: # fallthrough-return %v = sitofp <8 x i32> %x to <8 x double> @@ -390,13 +378,9 @@ ; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.extract_lane 2 -; CHECK-NEXT: f64.convert_i32_u -; CHECK-NEXT: f64x2.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.extract_lane 3 -; CHECK-NEXT: f64.convert_i32_u -; CHECK-NEXT: f64x2.replace_lane 1 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: f64x2.convert_low_i32x4_u ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: # fallthrough-return %v = uitofp <8 x i32> %x to <8 x double> @@ -414,13 +398,9 @@ ; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f32x4.extract_lane 2 -; CHECK-NEXT: f64.promote_f32 -; CHECK-NEXT: f64x2.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f32x4.extract_lane 3 -; CHECK-NEXT: f64.promote_f32 -; CHECK-NEXT: f64x2.replace_lane 1 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: f64x2.promote_low_f32x4 ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: # fallthrough-return %v = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> @@ -438,16 +418,26 @@ ; CHECK-NEXT: v128.store 0 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f32x4.extract_lane 2 -; CHECK-NEXT: f64.promote_f32 -; CHECK-NEXT: f64x2.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: f32x4.extract_lane 3 -; CHECK-NEXT: f64.promote_f32 -; CHECK-NEXT: f64x2.replace_lane 1 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: f64x2.promote_low_f32x4 ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: # fallthrough-return %v = fpext <8 x float> %x to <8 x double> %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> ret <4 x double> %a } + +define <2 x double> @promote_mixed_v2f64(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: promote_mixed_v2f64: +; CHECK: .functype promote_mixed_v2f64 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: f64x2.promote_low_f32x4 +; CHECK-NEXT: # fallthrough-return + %v = shufflevector <4 x float> %x, <4 x float> %y, <2 x i32> + %a = fpext <2 x float> %v to <2 x double> + ret <2 x double> %a +}