diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3000,6 +3000,44 @@ setOriginForNaryOp(I); } + Constant *getPclmulMask(IRBuilder<> &IRB, unsigned Width, unsigned Imm) { + SmallVector Mask; + unsigned X = Imm ? 1 : 0; + for (; X < Width; X += 2) { + Constant *C = ConstantInt::get(IRB.getInt32Ty(), X); + Mask.push_back(C); + Mask.push_back(C); + } + return ConstantVector::get(Mask); + } + + // Instrument pclmul intrinsics. + // These intrinsics operate either on odd or on even elements of the input + // vectors, depending on the constant in the 3rd argument, ignoring the rest. + // Replace the unused elements with copies of the used ones, ex: + // (0, 1, 2, 3) -> (0, 0, 2, 2) (even case) + // or + // (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case) + // and then apply the usual shadow combining logic. + void handlePclmulIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Type *ShadowTy = getShadowTy(&I); + unsigned Width = I.getArgOperand(0)->getType()->getVectorNumElements(); + assert(isa(I.getArgOperand(2)) && + "pclmul 3rd operand must be a constant"); + unsigned Imm = dyn_cast(I.getArgOperand(2))->getZExtValue(); + Value *Shuf0 = + IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy), + getPclmulMask(IRB, Width, Imm & 0x01)); + Value *Shuf1 = + IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy), + getPclmulMask(IRB, Width, Imm & 0x10)); + ShadowAndOriginCombiner OC(this, IRB); + OC.Add(Shuf0, getOrigin(&I, 0)); + OC.Add(Shuf1, getOrigin(&I, 1)); + OC.Done(&I); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case Intrinsic::lifetime_start: @@ -3233,6 +3271,12 @@ handleBmiIntrinsic(I); break; + case Intrinsic::x86_pclmulqdq: + case Intrinsic::x86_pclmulqdq_256: + case Intrinsic::x86_pclmulqdq_512: + handlePclmulIntrinsic(I); + break; + case Intrinsic::is_constant: // The result of llvm.is.constant() is always defined. setShadow(&I, getCleanShadow(&I)); diff --git a/llvm/test/Instrumentation/MemorySanitizer/clmul.ll b/llvm/test/Instrumentation/MemorySanitizer/clmul.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/clmul.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck \ +; RUN: %s +; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s +; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck %s --check-prefixes=CHECK,ORIGIN +; REQUIRES: x86-registered-target + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8 immarg) nounwind readnone +declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8 immarg) nounwind readnone +declare <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64>, <8 x i64>, i8 immarg) nounwind readnone + +define <2 x i64> @clmul00(<2 x i64> %a, <2 x i64> %b) sanitize_memory { +entry: + %0 = tail call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a, <2 x i64> %b, i8 0) + ret <2 x i64> %0 +} + +; CHECK-LABEL: @clmul00 +; CHECK: %[[S0:.*]] = load <2 x i64>, <2 x i64>* {{.*}}@__msan_param_tls +; CHECK: %[[S1:.*]] = load <2 x i64>, <2 x i64>* {{.*}}@__msan_param_tls +; CHECK: %[[SHUF1:.*]] = shufflevector <2 x i64> %[[S1]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK: %[[SHUF0:.*]] = shufflevector <2 x i64> %[[S0]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK: %[[SRET:.*]] = or <2 x i64> %[[SHUF1]], %[[SHUF0]] +; CHECK: store <2 x i64> %[[SRET]], <2 x i64>* {{.*}}@__msan_retval_tls + +define <2 x i64> @clmul10(<2 x i64> %a, <2 x i64> %b) sanitize_memory { +entry: + %0 = tail call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a, <2 x i64> %b, i8 16) + ret <2 x i64> %0 +} + +; CHECK-LABEL: @clmul10 +; CHECK: %[[S0:.*]] = load <2 x i64>, <2 x i64>* {{.*}}@__msan_param_tls +; CHECK: %[[S1:.*]] = load <2 x i64>, <2 x i64>* {{.*}}@__msan_param_tls +; CHECK: %[[SHUF1:.*]] = shufflevector <2 x i64> %[[S1]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK: %[[SHUF0:.*]] = shufflevector <2 x i64> %[[S0]], <2 x i64> undef, <2 x i32> +; CHECK: %[[SRET:.*]] = or <2 x i64> %[[SHUF1]], %[[SHUF0]] +; CHECK: store <2 x i64> %[[SRET]], <2 x i64>* {{.*}}@__msan_retval_tls + +define <8 x i64> @clmul01_512(<8 x i64> %a, <8 x i64> %b) sanitize_memory { +entry: + %0 = tail call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a, <8 x i64> %b, i8 16) + ret <8 x i64> %0 +} + +; CHECK-LABEL: @clmul01_512 +; CHECK: %[[S0:.*]] = load <8 x i64>, <8 x i64>* {{.*}}@__msan_param_tls +; CHECK: %[[S1:.*]] = load <8 x i64>, <8 x i64>* {{.*}}@__msan_param_tls +; CHECK: %[[SHUF1:.*]] = shufflevector <8 x i64> %[[S1]], <8 x i64> undef, <8 x i32> +; CHECK: %[[SHUF0:.*]] = shufflevector <8 x i64> %[[S0]], <8 x i64> undef, <8 x i32> +; CHECK: %[[SRET:.*]] = or <8 x i64> %[[SHUF1]], %[[SHUF0]] +; ORIGIN: %[[FLAT:.*]] = bitcast <8 x i64> %[[SHUF0]] to i512 +; ORIGIN: %[[I:.*]] = icmp ne i512 %[[FLAT]], 0 +; ORIGIN: %[[O:.*]] = select i1 %[[I]], +; CHECK: store <8 x i64> %[[SRET]], <8 x i64>* {{.*}}@__msan_retval_tls +; ORIGN: store i32 %[[O]], i32* @__msan_retval_origin_tls