diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3487,6 +3487,28 @@ setOriginForNaryOp(I); } + void handleVpermilvarIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + + // Do the same operation for the shadow as the first argument. + Value *ShadowIn = + IRB.CreateBitCast(getShadow(&I, 0), I.getArgOperand(0)->getType()); + IntrinsicInst *ShadowInst = cast(IRB.Insert(I.clone())); + ShadowInst->setArgOperand(0, ShadowIn); + Value *Shadow = IRB.CreateBitCast(ShadowInst, getShadowTy(&I)); + + // sext(icmp ne 0) of the second arg, but this is too strict as the only few + // lower bits of each elements are important. + Value *Shadow1 = getShadow(&I, 1); + Shadow1 = IRB.CreateSExt( + IRB.CreateICmpNE(Shadow1, Constant::getNullValue(Shadow1->getType())), + Shadow->getType()); + Shadow = IRB.CreateOr(Shadow, Shadow1); + + setShadow(&I, Shadow); + setOriginForNaryOp(I); + } + void handleBinarySdSsIntrinsic(IntrinsicInst &I) { IRBuilder<> IRB(&I); unsigned Width = @@ -3789,6 +3811,15 @@ handleBinarySdSsIntrinsic(I); break; + case Intrinsic::x86_avx_vpermilvar_pd: + case Intrinsic::x86_avx_vpermilvar_pd_256: + case Intrinsic::x86_avx_vpermilvar_ps: + case Intrinsic::x86_avx_vpermilvar_ps_256: + case Intrinsic::x86_avx512_vpermilvar_pd_512: + case Intrinsic::x86_avx512_vpermilvar_ps_512: + handleVpermilvarIntrinsic(I); + break; + case Intrinsic::fshl: case Intrinsic::fshr: handleFunnelShift(I); diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll --- a/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll @@ -956,18 +956,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <2 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0:%.*]], <2 x i64> [[A1:%.*]]) -; CHECK-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double> +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[TMP3]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <2 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <2 x i1> [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <2 x i64>*), align 8 ; CHECK-NEXT: ret <2 x double> [[RES]] ; %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] @@ -981,18 +977,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <4 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1:%.*]]) -; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double> +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[TMP3]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x double> [[TMP4]] to <4 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i1> [[TMP6]] to <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i64> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP8]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 ; CHECK-NEXT: ret <4 x double> [[RES]] ; %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] @@ -1004,15 +996,11 @@ ; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256_2( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i64>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double> +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[TMP2]], <4 x i64> ) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[TMP3]] to <4 x i64> ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> ) -; CHECK-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 +; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i64>*), align 8 ; CHECK-NEXT: ret <4 x double> [[RES]] ; %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) ; <<4 x double>> [#uses=1] @@ -1024,18 +1012,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <4 x i32>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 16) to <4 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A1:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float> +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[TMP3]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i1> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] @@ -1057,18 +1041,14 @@ ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to <4 x i32>* ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> +; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[TMP8]], <4 x i32> [[A2]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i1> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i32> [[TMP10]], [[TMP12]] ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A2]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 +; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <4 x i32>*), align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %a2 = load <4 x i32>, <4 x i32>* %a1 @@ -1083,18 +1063,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([100 x i64]* @__msan_param_tls to <8 x i32>*), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__msan_param_tls to i64), i64 32) to <8 x i32>*), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[TMP3]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x float> [[TMP4]] to <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <8 x i1> [[TMP6]] to <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0:%.*]], <8 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP8]], <8 x i32>* bitcast ([100 x i64]* @__msan_retval_tls to <8 x i32>*), align 8 ; CHECK-NEXT: ret <8 x float> [[RES]] ; %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]