Index: include/clang/Basic/arm_neon.td
===================================================================
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -1499,11 +1499,10 @@
   def VMULX_LANEH   : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>;
   def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>;
   def VMULX_NH      : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>;
-  // TODO: Scalar floating point multiply extended (scalar, by element)
-  // Below ones are commented out because they need vmulx_f16(float16_t, float16_t)
-  // which will be implemented later with fp16 scalar intrinsic (arm_fp16.h)
-  //def SCALAR_FMULX_LANEH : IOpInst<"vmulx_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>;
-  //def SCALAR_FMULX_LANEQH : IOpInst<"vmulx_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>;
+
+  // Scalar floating point multiply extended (scalar, by element)
+  def SCALAR_FMULX_LANEH : IOpInst<"vmulx_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>;
+  def SCALAR_FMULX_LANEQH : IOpInst<"vmulx_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>;
 
   // ARMv8.2-A FP16 reduction vector intrinsics.
   def VMAXVH   : SInst<"vmaxv", "sd", "hQh">;
Index: test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
===================================================================
--- test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -1223,27 +1223,25 @@
   return vmulxq_n_f16(a, b);
 }
 
-/* TODO: Not implemented yet (needs scalar intrinsic from arm_fp16.h)
-// CCHECK-LABEL: test_vmulxh_lane_f16
-// CCHECK: [[CONV0:%.*]] = fpext half %a to float
-// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
-// CCHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
-// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
-// CCHECK: ret half [[CONV3:%.*]]
+// CHECK-LABEL: test_vmulxh_lane_f16
+// CHECK: [[CONV0:%.*]] = fpext half %a to float
+// CHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CHECK: ret half [[CONV3:%.*]]
 float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
   return vmulxh_lane_f16(a, b, 3);
 }
 
-// CCHECK-LABEL: test_vmulxh_laneq_f16
-// CCHECK: [[CONV0:%.*]] = fpext half %a to float
-// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
-// CCHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
-// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
-// CCHECK: ret half [[CONV3:%.*]]
+// CHECK-LABEL: test_vmulxh_laneq_f16
+// CHECK: [[CONV0:%.*]] = fpext half %a to float
+// CHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
+// CHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
+// CHECK: [[CONV3:%.*]] = fptrunc float %mul to half
+// CHECK: ret half [[CONV3:%.*]]
 float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
   return vmulxh_laneq_f16(a, b, 7);
 }
-*/
 
 // CHECK-LABEL: test_vmaxv_f16
 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>