diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -60,10 +60,12 @@ #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" @@ -1112,7 +1114,9 @@ // Build a minimal pipeline based on the semantics required by Clang, // which is just that always inlining occurs. - MPM.addPass(AlwaysInlinerPass()); + // We always pass false here since according to the legacy PM logic for + // enabling lifetime intrinsics, they are not required with O0. + MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); // At -O0 we directly run necessary sanitizer passes. if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) @@ -1210,10 +1214,25 @@ MPM = PB.buildPerModuleDefaultPipeline(Level, CodeGenOpts.DebugPassManager); } + + // There is a test that requires checking that the PruneEH pass is run to + // remove unused exception handling info when a pgo profile sample file is + // provided. There currently does not seem to be a new PM port for PrunEH, + // but many opt tests instead substitute the 'function-attrs' and + // 'function(simplify-cgf)' passes to produce the same IR. The + // PostOrderFunctionPassAttrsPass is already in the pipeline, but + // SimplifyCGFPass is not, so we add it here when requesting pgo. + if (PGOOpt) + MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); } if (CodeGenOpts.OptimizationLevel == 0) addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); + + if (CodeGenOpts.hasProfileIRInstr()) { + // This file is stored as the ProfileFile. + MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->ProfileFile)); + } } // FIXME: We still use the legacy pass manager to do code generation. We @@ -1267,7 +1286,8 @@ NeedCodeGen = true; CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - if (!CodeGenOpts.SplitDwarfFile.empty()) { + if (!CodeGenOpts.SplitDwarfFile.empty() && + CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission) { DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); if (!DwoOS) return; diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -23,6 +23,7 @@ CLANG_ENABLE_ARCMT CLANG_ENABLE_STATIC_ANALYZER ENABLE_BACKTRACES + ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER HAVE_LIBZ LLVM_ENABLE_PER_TARGET_RUNTIME_DIR LLVM_ENABLE_PLUGINS) diff --git a/clang/test/CodeGen/aarch64-neon-across.c b/clang/test/CodeGen/aarch64-neon-across.c --- a/clang/test/CodeGen/aarch64-neon-across.c +++ b/clang/test/CodeGen/aarch64-neon-across.c @@ -6,7 +6,7 @@ #include // CHECK-LABEL: define i16 @test_vaddlv_s8(<8 x i8> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR:#[0-9]+]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] int16_t test_vaddlv_s8(int8x8_t a) { @@ -14,14 +14,14 @@ } // CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDLV_I]] int32_t test_vaddlv_s16(int16x4_t a) { return vaddlv_s16(a); } // CHECK-LABEL: define i16 @test_vaddlv_u8(<8 x i8> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] uint16_t test_vaddlv_u8(uint8x8_t a) { @@ -29,14 +29,14 @@ } // CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDLV_I]] uint32_t test_vaddlv_u16(uint16x4_t a) { return vaddlv_u16(a); } // CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #1 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] int16_t test_vaddlvq_s8(int8x16_t a) { @@ -44,21 +44,21 @@ } // CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #1 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDLV_I]] int32_t test_vaddlvq_s16(int16x8_t a) { return vaddlvq_s16(a); } // CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #1 { -// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #3 +// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VADDLVQ_S32_I]] int64_t test_vaddlvq_s32(int32x4_t a) { return vaddlvq_s32(a); } // CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #1 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] uint16_t test_vaddlvq_u8(uint8x16_t a) { @@ -66,21 +66,21 @@ } // CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #1 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDLV_I]] uint32_t test_vaddlvq_u16(uint16x8_t a) { return vaddlvq_u16(a); } // CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #1 { -// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #3 +// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VADDLVQ_U32_I]] uint64_t test_vaddlvq_u32(uint32x4_t a) { return vaddlvq_u32(a); } // CHECK-LABEL: define i8 @test_vmaxv_s8(<8 x i8> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vmaxv_s8(int8x8_t a) { @@ -88,7 +88,7 @@ } // CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vmaxv_s16(int16x4_t a) { @@ -96,7 +96,7 @@ } // CHECK-LABEL: define i8 @test_vmaxv_u8(<8 x i8> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vmaxv_u8(uint8x8_t a) { @@ -104,7 +104,7 @@ } // CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vmaxv_u16(uint16x4_t a) { @@ -112,7 +112,7 @@ } // CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #1 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vmaxvq_s8(int8x16_t a) { @@ -120,7 +120,7 @@ } // CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #1 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vmaxvq_s16(int16x8_t a) { @@ -128,14 +128,14 @@ } // CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #1 { -// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VMAXVQ_S32_I]] int32_t test_vmaxvq_s32(int32x4_t a) { return vmaxvq_s32(a); } // CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #1 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vmaxvq_u8(uint8x16_t a) { @@ -143,7 +143,7 @@ } // CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #1 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vmaxvq_u16(uint16x8_t a) { @@ -151,14 +151,14 @@ } // CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #1 { -// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VMAXVQ_U32_I]] uint32_t test_vmaxvq_u32(uint32x4_t a) { return vmaxvq_u32(a); } // CHECK-LABEL: define i8 @test_vminv_s8(<8 x i8> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vminv_s8(int8x8_t a) { @@ -166,7 +166,7 @@ } // CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vminv_s16(int16x4_t a) { @@ -174,7 +174,7 @@ } // CHECK-LABEL: define i8 @test_vminv_u8(<8 x i8> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vminv_u8(uint8x8_t a) { @@ -182,7 +182,7 @@ } // CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vminv_u16(uint16x4_t a) { @@ -190,7 +190,7 @@ } // CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #1 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vminvq_s8(int8x16_t a) { @@ -198,7 +198,7 @@ } // CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #1 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vminvq_s16(int16x8_t a) { @@ -206,14 +206,14 @@ } // CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #1 { -// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VMINVQ_S32_I]] int32_t test_vminvq_s32(int32x4_t a) { return vminvq_s32(a); } // CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #1 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vminvq_u8(uint8x16_t a) { @@ -221,7 +221,7 @@ } // CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #1 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vminvq_u16(uint16x8_t a) { @@ -229,14 +229,14 @@ } // CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #1 { -// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VMINVQ_U32_I]] uint32_t test_vminvq_u32(uint32x4_t a) { return vminvq_u32(a); } // CHECK-LABEL: define i8 @test_vaddv_s8(<8 x i8> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vaddv_s8(int8x8_t a) { @@ -244,7 +244,7 @@ } // CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vaddv_s16(int16x4_t a) { @@ -252,7 +252,7 @@ } // CHECK-LABEL: define i8 @test_vaddv_u8(<8 x i8> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vaddv_u8(uint8x8_t a) { @@ -260,7 +260,7 @@ } // CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vaddv_u16(uint16x4_t a) { @@ -268,7 +268,7 @@ } // CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #1 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vaddvq_s8(int8x16_t a) { @@ -276,7 +276,7 @@ } // CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #1 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vaddvq_s16(int16x8_t a) { @@ -284,14 +284,14 @@ } // CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #1 { -// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDVQ_S32_I]] int32_t test_vaddvq_s32(int32x4_t a) { return vaddvq_s32(a); } // CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #1 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vaddvq_u8(uint8x16_t a) { @@ -299,7 +299,7 @@ } // CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #1 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vaddvq_u16(uint16x8_t a) { @@ -307,35 +307,35 @@ } // CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #1 { -// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDVQ_U32_I]] uint32_t test_vaddvq_u32(uint32x4_t a) { return vaddvq_u32(a); } // CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #1 { -// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #3 +// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) [[NOUNWIND_ATTR]] // CHECK: ret float [[VMAXVQ_F32_I]] float32_t test_vmaxvq_f32(float32x4_t a) { return vmaxvq_f32(a); } // CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #1 { -// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #3 +// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) [[NOUNWIND_ATTR]] // CHECK: ret float [[VMINVQ_F32_I]] float32_t test_vminvq_f32(float32x4_t a) { return vminvq_f32(a); } // CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #1 { -// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #3 +// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) [[NOUNWIND_ATTR]] // CHECK: ret float [[VMAXNMVQ_F32_I]] float32_t test_vmaxnmvq_f32(float32x4_t a) { return vmaxnmvq_f32(a); } // CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #1 { -// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #3 +// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) [[NOUNWIND_ATTR]] // CHECK: ret float [[VMINNMVQ_F32_I]] float32_t test_vminnmvq_f32(float32x4_t a) { return vminnmvq_f32(a); @@ -343,3 +343,4 @@ // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" +// CHECK: attributes [[NOUNWIND_ATTR]] = { nounwind } diff --git a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c --- a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c @@ -6,119 +6,119 @@ #include // CHECK-LABEL: define float @test_vcvtxd_f32_f64(double %a) #0 { -// CHECK: [[VCVTXD_F32_F64_I:%.*]] = call float @llvm.aarch64.sisd.fcvtxn(double %a) #2 +// CHECK: [[VCVTXD_F32_F64_I:%.*]] = call float @llvm.aarch64.sisd.fcvtxn(double %a) [[NOUNWIND_ATTR:#[0-9]+]] // CHECK: ret float [[VCVTXD_F32_F64_I]] float32_t test_vcvtxd_f32_f64(float64_t a) { return (float32_t)vcvtxd_f32_f64(a); } // CHECK-LABEL: define i32 @test_vcvtas_s32_f32(float %a) #0 { -// CHECK: [[VCVTAS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %a) #2 +// CHECK: [[VCVTAS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTAS_S32_F32_I]] int32_t test_vcvtas_s32_f32(float32_t a) { return (int32_t)vcvtas_s32_f32(a); } // CHECK-LABEL: define i64 @test_test_vcvtad_s64_f64(double %a) #0 { -// CHECK: [[VCVTAD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a) #2 +// CHECK: [[VCVTAD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTAD_S64_F64_I]] int64_t test_test_vcvtad_s64_f64(float64_t a) { return (int64_t)vcvtad_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtas_u32_f32(float %a) #0 { -// CHECK: [[VCVTAS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %a) #2 +// CHECK: [[VCVTAS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTAS_U32_F32_I]] uint32_t test_vcvtas_u32_f32(float32_t a) { return (uint32_t)vcvtas_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtad_u64_f64(double %a) #0 { -// CHECK: [[VCVTAD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a) #2 +// CHECK: [[VCVTAD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTAD_U64_F64_I]] uint64_t test_vcvtad_u64_f64(float64_t a) { return (uint64_t)vcvtad_u64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtms_s32_f32(float %a) #0 { -// CHECK: [[VCVTMS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %a) #2 +// CHECK: [[VCVTMS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTMS_S32_F32_I]] int32_t test_vcvtms_s32_f32(float32_t a) { return (int32_t)vcvtms_s32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtmd_s64_f64(double %a) #0 { -// CHECK: [[VCVTMD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a) #2 +// CHECK: [[VCVTMD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTMD_S64_F64_I]] int64_t test_vcvtmd_s64_f64(float64_t a) { return (int64_t)vcvtmd_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtms_u32_f32(float %a) #0 { -// CHECK: [[VCVTMS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %a) #2 +// CHECK: [[VCVTMS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTMS_U32_F32_I]] uint32_t test_vcvtms_u32_f32(float32_t a) { return (uint32_t)vcvtms_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtmd_u64_f64(double %a) #0 { -// CHECK: [[VCVTMD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a) #2 +// CHECK: [[VCVTMD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTMD_U64_F64_I]] uint64_t test_vcvtmd_u64_f64(float64_t a) { return (uint64_t)vcvtmd_u64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtns_s32_f32(float %a) #0 { -// CHECK: [[VCVTNS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %a) #2 +// CHECK: [[VCVTNS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTNS_S32_F32_I]] int32_t test_vcvtns_s32_f32(float32_t a) { return (int32_t)vcvtns_s32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtnd_s64_f64(double %a) #0 { -// CHECK: [[VCVTND_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a) #2 +// CHECK: [[VCVTND_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTND_S64_F64_I]] int64_t test_vcvtnd_s64_f64(float64_t a) { return (int64_t)vcvtnd_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtns_u32_f32(float %a) #0 { -// CHECK: [[VCVTNS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %a) #2 +// CHECK: [[VCVTNS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTNS_U32_F32_I]] uint32_t test_vcvtns_u32_f32(float32_t a) { return (uint32_t)vcvtns_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtnd_u64_f64(double %a) #0 { -// CHECK: [[VCVTND_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a) #2 +// CHECK: [[VCVTND_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTND_U64_F64_I]] uint64_t test_vcvtnd_u64_f64(float64_t a) { return (uint64_t)vcvtnd_u64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtps_s32_f32(float %a) #0 { -// CHECK: [[VCVTPS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %a) #2 +// CHECK: [[VCVTPS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTPS_S32_F32_I]] int32_t test_vcvtps_s32_f32(float32_t a) { return (int32_t)vcvtps_s32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtpd_s64_f64(double %a) #0 { -// CHECK: [[VCVTPD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a) #2 +// CHECK: [[VCVTPD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTPD_S64_F64_I]] int64_t test_vcvtpd_s64_f64(float64_t a) { return (int64_t)vcvtpd_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtps_u32_f32(float %a) #0 { -// CHECK: [[VCVTPS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %a) #2 +// CHECK: [[VCVTPS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTPS_U32_F32_I]] uint32_t test_vcvtps_u32_f32(float32_t a) { return (uint32_t)vcvtps_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtpd_u64_f64(double %a) #0 { -// CHECK: [[VCVTPD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a) #2 +// CHECK: [[VCVTPD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTPD_U64_F64_I]] uint64_t test_vcvtpd_u64_f64(float64_t a) { return (uint64_t)vcvtpd_u64_f64(a); @@ -151,3 +151,5 @@ uint64_t test_vcvtd_u64_f64(float64_t a) { return (uint64_t)vcvtd_u64_f64(a); } + +// CHECK: attributes [[NOUNWIND_ATTR]] = { nounwind } diff --git a/clang/test/CodeGen/aarch64-neon-fma.c b/clang/test/CodeGen/aarch64-neon-fma.c --- a/clang/test/CodeGen/aarch64-neon-fma.c +++ b/clang/test/CodeGen/aarch64-neon-fma.c @@ -224,7 +224,7 @@ // CHECK: [[SUB_I:%.*]] = fsub <2 x double> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #3 +// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) [[NOUNWIND_ATTR:#[0-9]+]] // CHECK: ret <2 x double> [[TMP6]] float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { return vfmsq_n_f64(a, b, c); @@ -232,3 +232,4 @@ // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" +// CHECK: attributes [[NOUNWIND_ATTR]] = { nounwind } diff --git a/clang/test/CodeGen/aggregate-assign-call.c b/clang/test/CodeGen/aggregate-assign-call.c --- a/clang/test/CodeGen/aggregate-assign-call.c +++ b/clang/test/CodeGen/aggregate-assign-call.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O1 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O1 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O0 // // Ensure that we place appropriate lifetime markers around indirectly returned diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 -O -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch32 -// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 +// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 -O -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch32 +// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 #include diff --git a/clang/test/CodeGen/available-externally-suppress.c b/clang/test/CodeGen/available-externally-suppress.c --- a/clang/test/CodeGen/available-externally-suppress.c +++ b/clang/test/CodeGen/available-externally-suppress.c @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -// RUN: %clang_cc1 -O2 -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -// RUN: %clang_cc1 -flto -O2 -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -check-prefix=LTO +// RUN: %clang_cc1 -O2 -fno-experimental-new-pass-manager -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s +// RUN: %clang_cc1 -flto -O2 -fno-experimental-new-pass-manager -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -check-prefix=LTO // Ensure that we don't emit available_externally functions at -O0. // Also should not emit them at -O2, unless -flto is present in which case diff --git a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c --- a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c @@ -27,10 +27,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <8 x i64> [[TMP5]], [[TMP6]] // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 @@ -40,10 +40,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 @@ -53,10 +53,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp sgt <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 @@ -92,10 +92,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <8 x i64> [[TMP5]], [[TMP6]] // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 @@ -105,10 +105,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 @@ -118,10 +118,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 @@ -134,22 +134,22 @@ // CHECK-LABEL: define double @test_mm512_reduce_max_pd(<8 x double> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 @@ -161,10 +161,10 @@ // CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 +// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 // CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 // CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 @@ -175,10 +175,10 @@ // CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 // CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 @@ -187,10 +187,10 @@ // CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 // CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 @@ -225,10 +225,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i64> [[TMP5]], [[TMP6]] // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 @@ -238,10 +238,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp slt <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 @@ -251,10 +251,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp slt <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 @@ -290,10 +290,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = icmp ult <8 x i64> [[TMP5]], [[TMP6]] // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 @@ -303,10 +303,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp ult <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 @@ -316,10 +316,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp ult <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 @@ -332,22 +332,22 @@ // CHECK-LABEL: define double @test_mm512_reduce_min_pd(<8 x double> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 @@ -359,10 +359,10 @@ // CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 +// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 // CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 // CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 @@ -373,10 +373,10 @@ // CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 // CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 @@ -385,10 +385,10 @@ // CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 // CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 @@ -400,27 +400,27 @@ // CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 @@ -444,16 +444,16 @@ // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -463,10 +463,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = icmp sgt <8 x i64> [[TMP22]], [[TMP23]] // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 @@ -476,10 +476,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP32:%.*]] = icmp sgt <8 x i64> [[TMP30]], [[TMP31]] // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 @@ -489,10 +489,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP40:%.*]] = icmp sgt <8 x i64> [[TMP38]], [[TMP39]] // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 @@ -505,25 +505,25 @@ // CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 @@ -533,11 +533,11 @@ // CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: store i8 [[TMP2]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -547,10 +547,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T2_I]], align 64 @@ -560,10 +560,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T4_I]], align 64 @@ -573,10 +573,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP27]], <8 x i64>* [[__A_ADDR_I6_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP27]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP31:%.*]] = icmp ugt <8 x i64> [[TMP29]], [[TMP30]] // CHECK-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP29]], <8 x i64> [[TMP30]] // CHECK-NEXT: store <8 x i64> [[TMP32]], <8 x i64>* [[__T6_I]], align 64 @@ -589,62 +589,62 @@ // CHECK-LABEL: define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store double 0xFFF0000000000000, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: store double 0xFFF0000000000000, double* {{.*}}, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] // CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 @@ -656,10 +656,10 @@ // CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 +// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 // CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 // CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 @@ -670,10 +670,10 @@ // CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 // CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 @@ -683,9 +683,9 @@ // CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 // CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 // CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 @@ -697,27 +697,27 @@ // CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 @@ -741,16 +741,16 @@ // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -760,10 +760,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = icmp slt <8 x i64> [[TMP22]], [[TMP23]] // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 @@ -773,10 +773,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP32:%.*]] = icmp slt <8 x i64> [[TMP30]], [[TMP31]] // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 @@ -786,10 +786,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP40:%.*]] = icmp slt <8 x i64> [[TMP38]], [[TMP39]] // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 @@ -802,27 +802,27 @@ // CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 @@ -846,16 +846,16 @@ // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -865,10 +865,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = icmp ult <8 x i64> [[TMP22]], [[TMP23]] // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 @@ -878,10 +878,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP32:%.*]] = icmp ult <8 x i64> [[TMP30]], [[TMP31]] // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 @@ -891,10 +891,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP40:%.*]] = icmp ult <8 x i64> [[TMP38]], [[TMP39]] // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 @@ -907,62 +907,62 @@ // CHECK-LABEL: define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store double 0x7FF0000000000000, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: store double 0x7FF0000000000000, double* {{.*}}, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] // CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 @@ -974,10 +974,10 @@ // CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 +// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 // CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 // CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 @@ -988,10 +988,10 @@ // CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 // CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 @@ -1001,9 +1001,9 @@ // CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 // CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 // CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 @@ -1015,26 +1015,26 @@ // CHECK-LABEL: define i32 @test_mm512_reduce_max_epi32(<8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -1046,11 +1046,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> // CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <8 x i32> [[TMP6]], [[TMP8]] // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] @@ -1111,7 +1111,7 @@ // CHECK-NEXT: [[TMP48:%.*]] = icmp sgt <4 x i32> [[TMP45]], [[TMP47]] // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1121,26 +1121,26 @@ // CHECK-LABEL: define i32 @test_mm512_reduce_max_epu32(<8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -1152,11 +1152,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> // CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <8 x i32> [[TMP6]], [[TMP8]] // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] @@ -1217,7 +1217,7 @@ // CHECK-NEXT: [[TMP48:%.*]] = icmp ugt <4 x i32> [[TMP45]], [[TMP47]] // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1227,26 +1227,26 @@ // CHECK-LABEL: define float @test_mm512_reduce_max_ps(<16 x float> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 @@ -1262,10 +1262,10 @@ // CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 +// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 // CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 // CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 @@ -1276,10 +1276,10 @@ // CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 // CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 @@ -1288,10 +1288,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 // CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 @@ -1300,10 +1300,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 // CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 // CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 @@ -1315,26 +1315,26 @@ // CHECK-LABEL: define i32 @test_mm512_reduce_min_epi32(<8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -1346,11 +1346,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> // CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[TMP6]], [[TMP8]] // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] @@ -1411,7 +1411,7 @@ // CHECK-NEXT: [[TMP48:%.*]] = icmp slt <4 x i32> [[TMP45]], [[TMP47]] // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1421,26 +1421,26 @@ // CHECK-LABEL: define i32 @test_mm512_reduce_min_epu32(<8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -1452,11 +1452,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> // CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x i32> [[TMP6]], [[TMP8]] // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] @@ -1517,7 +1517,7 @@ // CHECK-NEXT: [[TMP48:%.*]] = icmp ult <4 x i32> [[TMP45]], [[TMP47]] // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1527,26 +1527,26 @@ // CHECK-LABEL: define float @test_mm512_reduce_min_ps(<16 x float> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 @@ -1562,10 +1562,10 @@ // CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 +// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 // CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 // CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 @@ -1576,10 +1576,10 @@ // CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 // CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 @@ -1588,10 +1588,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 // CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 @@ -1600,10 +1600,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 // CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 // CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 @@ -1615,33 +1615,33 @@ // CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 @@ -1681,18 +1681,18 @@ // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 // CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A2_ADDR_I_I]], align 64 // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 // CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A2_ADDR_I_I]], align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] @@ -1706,11 +1706,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> // CHECK-NEXT: [[TMP38:%.*]] = icmp sgt <8 x i32> [[TMP35]], [[TMP37]] // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] @@ -1771,7 +1771,7 @@ // CHECK-NEXT: [[TMP77:%.*]] = icmp sgt <4 x i32> [[TMP74]], [[TMP76]] // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1781,31 +1781,31 @@ // CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 @@ -1815,12 +1815,12 @@ // CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: store i16 [[TMP2]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP5]] to <16 x i32> -// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP7]] to <16 x i32> // CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> // CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP6]], <16 x i32> [[TMP8]] @@ -1835,10 +1835,10 @@ // CHECK-NEXT: [[TMP14:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP15:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: store <4 x i64> [[TMP14]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP15]], <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP15]], <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP16:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 // CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i64> [[TMP16]] to <8 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP18:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i64> [[TMP18]] to <8 x i32> // CHECK-NEXT: [[TMP20:%.*]] = icmp ugt <8 x i32> [[TMP17]], [[TMP19]] // CHECK-NEXT: [[TMP21:%.*]] = select <8 x i1> [[TMP20]], <8 x i32> [[TMP17]], <8 x i32> [[TMP19]] @@ -1899,7 +1899,7 @@ // CHECK-NEXT: [[TMP59:%.*]] = icmp ugt <4 x i32> [[TMP56]], [[TMP58]] // CHECK-NEXT: [[TMP60:%.*]] = select <4 x i1> [[TMP59]], <4 x i32> [[TMP56]], <4 x i32> [[TMP58]] // CHECK-NEXT: [[TMP61:%.*]] = bitcast <4 x i32> [[TMP60]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP62:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP62]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1909,82 +1909,82 @@ // CHECK-LABEL: define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store float 0xFFF0000000000000, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: store float 0xFFF0000000000000, float* {{.*}}, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> // CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] // CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 @@ -2000,10 +2000,10 @@ // CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 +// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 // CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 // CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 @@ -2014,10 +2014,10 @@ // CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 // CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 @@ -2026,10 +2026,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 // CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 @@ -2039,9 +2039,9 @@ // CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 // CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 // CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 @@ -2053,33 +2053,33 @@ // CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 @@ -2119,18 +2119,18 @@ // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] @@ -2145,10 +2145,10 @@ // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> // CHECK-NEXT: [[TMP38:%.*]] = icmp slt <8 x i32> [[TMP35]], [[TMP37]] // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] @@ -2209,7 +2209,7 @@ // CHECK-NEXT: [[TMP77:%.*]] = icmp slt <4 x i32> [[TMP74]], [[TMP76]] // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -2219,33 +2219,33 @@ // CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 @@ -2285,18 +2285,18 @@ // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] @@ -2311,10 +2311,10 @@ // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> // CHECK-NEXT: [[TMP38:%.*]] = icmp ult <8 x i32> [[TMP35]], [[TMP37]] // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] @@ -2375,7 +2375,7 @@ // CHECK-NEXT: [[TMP77:%.*]] = icmp ult <4 x i32> [[TMP74]], [[TMP76]] // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -2385,82 +2385,82 @@ // CHECK-LABEL: define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store float 0x7FF0000000000000, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: store float 0x7FF0000000000000, float* {{.*}}, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> // CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] // CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 @@ -2476,10 +2476,10 @@ // CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 +// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 // CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 // CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 @@ -2490,10 +2490,10 @@ // CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 // CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 @@ -2502,10 +2502,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 // CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 @@ -2515,9 +2515,9 @@ // CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 // CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 // CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -1,5 +1,10 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s + +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -10480,20 +10485,24 @@ __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_abs_epi32 + // CHECK-LABEL: @test_mm512_mask_abs_epi32 // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} return _mm512_mask_abs_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_abs_epi32 + // CHECK-LABEL: @test_mm512_maskz_abs_epi32 // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} return _mm512_maskz_abs_epi32 (__U,__A); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -1,5 +1,8 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -4589,6 +4592,8 @@ // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} return _mm_mask_abs_epi32(__W,__U,__A); } @@ -4597,6 +4602,8 @@ // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} return _mm_maskz_abs_epi32(__U,__A); } @@ -4605,6 +4612,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} return _mm256_mask_abs_epi32(__W,__U,__A); } @@ -4613,6 +4622,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} return _mm256_maskz_abs_epi32(__U,__A); } @@ -4668,6 +4679,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epi32(__M,__A,__B); } @@ -4675,6 +4688,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epi32(__W,__M,__A,__B); } @@ -4682,6 +4697,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epi32(__M,__A,__B); } @@ -4689,6 +4706,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epi32(__W,__M,__A,__B); } @@ -4736,6 +4755,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epu32(__M,__A,__B); } @@ -4743,6 +4764,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epu32(__W,__M,__A,__B); } @@ -4750,6 +4773,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epu32(__M,__A,__B); } @@ -4757,6 +4782,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epu32(__W,__M,__A,__B); } @@ -4804,6 +4831,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epi32(__M,__A,__B); } @@ -4811,6 +4840,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epi32(__W,__M,__A,__B); } @@ -4818,6 +4849,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epi32(__M,__A,__B); } @@ -4825,6 +4858,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epi32(__W,__M,__A,__B); } @@ -4872,6 +4907,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epu32(__M,__A,__B); } @@ -4879,6 +4916,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epu32(__W,__M,__A,__B); } @@ -4886,6 +4925,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epu32(__M,__A,__B); } @@ -4893,6 +4934,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epu32(__W,__M,__A,__B); } diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/avx512vlbw-builtins.c --- a/clang/test/CodeGen/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/avx512vlbw-builtins.c @@ -1,6 +1,10 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -901,6 +905,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} return _mm_mask_abs_epi8(__W,__U,__A); } @@ -910,6 +916,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} return _mm_maskz_abs_epi8(__U,__A); } @@ -919,6 +927,8 @@ // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} return _mm256_mask_abs_epi8(__W,__U,__A); } @@ -928,6 +938,8 @@ // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} return _mm256_maskz_abs_epi8(__U,__A); } @@ -937,6 +949,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} return _mm_mask_abs_epi16(__W,__U,__A); } @@ -946,6 +960,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} return _mm_maskz_abs_epi16(__U,__A); } @@ -955,6 +971,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} return _mm256_mask_abs_epi16(__W,__U,__A); } @@ -964,6 +982,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} return _mm256_maskz_abs_epi16(__U,__A); } @@ -1229,6 +1249,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epi8(__M,__A,__B); } @@ -1236,6 +1258,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epi8(__W,__M,__A,__B); } @@ -1243,6 +1267,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epi8(__M,__A,__B); } @@ -1250,6 +1276,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epi8(__W,__M,__A,__B); } @@ -1257,6 +1285,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epi16(__M,__A,__B); } @@ -1264,6 +1294,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epi16(__W,__M,__A,__B); } @@ -1271,6 +1303,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epi16(__M,__A,__B); } @@ -1278,6 +1312,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epi16(__W,__M,__A,__B); } @@ -1285,6 +1321,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epu8(__M,__A,__B); } @@ -1292,6 +1330,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epu8(__W,__M,__A,__B); } @@ -1299,6 +1339,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epu8(__M,__A,__B); } @@ -1306,6 +1348,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epu8(__W,__M,__A,__B); } @@ -1313,6 +1357,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epu16(__M,__A,__B); } @@ -1320,6 +1366,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epu16(__W,__M,__A,__B); } @@ -1327,6 +1375,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epu16(__M,__A,__B); } @@ -1334,6 +1384,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epu16(__W,__M,__A,__B); } @@ -1341,6 +1393,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epi8(__M,__A,__B); } @@ -1348,6 +1402,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epi8(__W,__M,__A,__B); } @@ -1355,6 +1411,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epi8(__M,__A,__B); } @@ -1362,6 +1420,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epi8(__W,__M,__A,__B); } @@ -1369,6 +1429,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epi16(__M,__A,__B); } @@ -1376,6 +1438,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epi16(__W,__M,__A,__B); } @@ -1383,6 +1447,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epi16(__M,__A,__B); } @@ -1390,6 +1456,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epi16(__W,__M,__A,__B); } @@ -1397,6 +1465,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epu8(__M,__A,__B); } @@ -1404,6 +1474,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epu8(__W,__M,__A,__B); } @@ -1411,6 +1483,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epu8(__M,__A,__B); } @@ -1418,6 +1492,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epu8(__W,__M,__A,__B); } @@ -1425,6 +1501,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epu16(__M,__A,__B); } @@ -1432,6 +1510,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epu16(__W,__M,__A,__B); } @@ -1439,6 +1519,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epu16(__M,__A,__B); } @@ -1446,6 +1528,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epu16(__W,__M,__A,__B); } diff --git a/clang/test/CodeGen/cspgo-instrumentation.c b/clang/test/CodeGen/cspgo-instrumentation.c --- a/clang/test/CodeGen/cspgo-instrumentation.c +++ b/clang/test/CodeGen/cspgo-instrumentation.c @@ -1,7 +1,7 @@ // Test if CSPGO instrumentation and use pass are invoked. // // Ensure Pass PGOInstrumentationGenPass is invoked. -// RUN: %clang_cc1 -O2 -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN +// RUN: %clang_cc1 -O2 -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN // RUN: %clang_cc1 -O2 -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-NEWPM // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN: PGOInstrumentationGenCreateVarPass // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN: PGOInstrumentationGenPass @@ -12,7 +12,7 @@ // RUN: llvm-profdata merge -o %t/noncs.profdata %S/Inputs/pgotestir.proftext // // Ensure Pass PGOInstrumentationUsePass and PGOInstrumentationGenPass are invoked. -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2 +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2 // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2-NEWPM // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2: PGOInstrumentationUsePass // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2: PGOInstrumentationGenCreateVarPass @@ -22,7 +22,7 @@ // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN2-NEWPM: Running pass: PGOInstrumentationGen on // Ensure Pass PGOInstrumentationUsePass is invoked only once. -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE-NEWPM // CHECK-PGOUSEPASS-INVOKED-USE: PGOInstrumentationUsePass // CHECK-PGOUSEPASS-INVOKED-USE-NOT: PGOInstrumentationGenCreateVarPass @@ -33,7 +33,7 @@ // // Ensure Pass PGOInstrumentationUsePass is invoked twice. // RUN: llvm-profdata merge -o %t/cs.profdata %S/Inputs/pgotestir_cs.proftext -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE2 +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE2 // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-USE2-NEWPM // CHECK-PGOUSEPASS-INVOKED-USE2: PGOInstrumentationUsePass // CHECK-PGOUSEPASS-INVOKED-USE2: PGOInstrumentationUsePass diff --git a/clang/test/CodeGen/cspgo-instrumentation_lto.c b/clang/test/CodeGen/cspgo-instrumentation_lto.c --- a/clang/test/CodeGen/cspgo-instrumentation_lto.c +++ b/clang/test/CodeGen/cspgo-instrumentation_lto.c @@ -4,7 +4,7 @@ // RUN: llvm-profdata merge -o %t/noncs.profdata %S/Inputs/pgotestir.proftext // // Ensure Pass PGOInstrumentationGenPass is not invoked in PreLink. -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -flto -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -flto -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -flto -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm-bc -o %t/foo_fe_pm.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE: PGOInstrumentationUsePass // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE: PGOInstrumentationGenCreateVarPass @@ -14,7 +14,7 @@ // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM-NOT: Running pass: PGOInstrumentationGen on // // Ensure Pass PGOInstrumentationGenPass is invoked in PostLink. -// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fprofile-instrument=csllvm -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST +// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fno-experimental-new-pass-manager -fprofile-instrument=csllvm -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST // RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument=csllvm -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NOT: PGOInstrumentationUsePass // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST: PGOInstrumentationGenPass @@ -26,7 +26,7 @@ // RUN: llvm-profdata merge -o %t/cs.profdata %S/Inputs/pgotestir_cs.proftext // // Ensure Pass PGOInstrumentationUsePass is invoked Once in PreLink. -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm-bc -o %t/foo_fe_pm.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE: PGOInstrumentationUsePass // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NOT: PGOInstrumentationGenCreateVarPass @@ -36,7 +36,7 @@ // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM-NOT: Running pass: PGOInstrumentationUse // // Ensure Pass PGOInstrumentationUSEPass is invoked in PostLink. -// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fprofile-instrument-use-path=%t/cs.profdata -flto -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST +// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fno-experimental-new-pass-manager -fprofile-instrument-use-path=%t/cs.profdata -flto -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST // RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument-use-path=%t/cs.profdata -flto -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST: PGOInstrumentationUsePass // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NOT: PGOInstrumentationUsePass diff --git a/clang/test/CodeGen/cspgo-instrumentation_thinlto.c b/clang/test/CodeGen/cspgo-instrumentation_thinlto.c --- a/clang/test/CodeGen/cspgo-instrumentation_thinlto.c +++ b/clang/test/CodeGen/cspgo-instrumentation_thinlto.c @@ -4,7 +4,7 @@ // RUN: llvm-profdata merge -o %t/noncs.profdata %S/Inputs/pgotestir.proftext // // Ensure Pass PGOInstrumentationGenPass is not invoked in PreLink. -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -fprofile-instrument-path=default.profraw -flto=thin -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -fprofile-instrument-path=default.profraw -flto=thin -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/noncs.profdata -fprofile-instrument=csllvm %s -fprofile-instrument-path=default.profraw -flto=thin -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm-bc -o %t/foo_fe_pm.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE-NEWPM // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE: PGOInstrumentationUsePass // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-PRE: PGOInstrumentationGenCreateVarPass @@ -16,7 +16,7 @@ // RUN: llvm-lto -thinlto -o %t/foo %t/foo_fe.bc // RUN: llvm-lto -thinlto -o %t/foo_pm %t/foo_fe_pm.bc // Ensure Pass PGOInstrumentationGenPass is invoked in PostLink. -// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST +// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fno-experimental-new-pass-manager -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST // RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fthinlto-index=%t/foo_pm.thinlto.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument=csllvm -fprofile-instrument-path=default.profraw -flto=thin -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NEWPM // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NOT: PGOInstrumentationUsePass // CHECK-CSPGOGENPASS-INVOKED-INSTR-GEN-POST-NOT: PGOInstrumentationGenCreateVarPass @@ -28,7 +28,7 @@ // RUN: llvm-profdata merge -o %t/cs.profdata %S/Inputs/pgotestir_cs.proftext // // Ensure Pass PGOInstrumentationUsePass is invoked Once in PreLink. -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto=thin -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto=thin -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm-bc -o %t/foo_fe.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t/cs.profdata %s -flto=thin -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm-bc -o %t/foo_fe_pm.bc 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NEWPM // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE: PGOInstrumentationUsePass // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-PRE-NOT: PGOInstrumentationUsePass @@ -38,7 +38,7 @@ // RUN: llvm-lto -thinlto -o %t/foo %t/foo_fe.bc // RUN: llvm-lto -thinlto -o %t/foo_pm %t/foo_fe_pm.bc // Ensure Pass PGOInstrumentationUSEPass is invoked in PostLink. -// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fprofile-instrument-use-path=%t/cs.profdata -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST -dump-input=always +// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fno-experimental-new-pass-manager -fprofile-instrument-use-path=%t/cs.profdata -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST -dump-input=always // RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fthinlto-index=%t/foo_pm.thinlto.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument-use-path=%t/cs.profdata -flto=thin -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM -dump-input=always // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST: PGOInstrumentationUsePass // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NOT: PGOInstrumentationUsePass @@ -46,7 +46,7 @@ // CHECK-CSPGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM-NOT: Running pass: PGOInstrumentationUse // // Finally, test if a non-cs profile is passed to PostLink passes, PGO UsePass is not invoked. -// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fprofile-instrument-use-path=%t/noncs.profdata -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST +// RUN: %clang_cc1 -O2 -x ir %t/foo_fe.bc -fthinlto-index=%t/foo.thinlto.bc -fno-experimental-new-pass-manager -fprofile-instrument-use-path=%t/noncs.profdata -flto=thin -emit-llvm -mllvm -debug-pass=Structure -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST // RUN: %clang_cc1 -O2 -x ir %t/foo_fe_pm.bc -fthinlto-index=%t/foo_pm.thinlto.bc -fexperimental-new-pass-manager -fdebug-pass-manager -fprofile-instrument-use-path=%t/noncs.profdata -flto=thin -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM // CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST-NOT: PGOInstrumentationUsePass // CHECK-PGOUSEPASS-INVOKED-INSTR-USE-POST-NEWPM-NOT: Running pass: PGOInstrumentationUse diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c --- a/clang/test/CodeGen/flatten.c +++ b/clang/test/CodeGen/flatten.c @@ -1,3 +1,9 @@ +// UNSUPPORTED: experimental-new-pass-manager +// Currently, different code seems to be intentionally generated under the new +// PM since we alwaysinline functions and not callsites under new PM. +// Under new PM, f() will not be inlined from g() since f is not marked as +// alwaysinline. + // RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s void f(void) {} diff --git a/clang/test/CodeGen/lifetime.c b/clang/test/CodeGen/lifetime.c --- a/clang/test/CodeGen/lifetime.c +++ b/clang/test/CodeGen/lifetime.c @@ -1,7 +1,7 @@ // RUN: %clang -S -emit-llvm -o - -O0 %s | FileCheck %s -check-prefix=O0 -// RUN: %clang -S -emit-llvm -o - -O1 -fno-experimental-new-pass-manager %s | FileCheck %s -check-prefix=O1 -// RUN: %clang -S -emit-llvm -o - -O2 -fno-experimental-new-pass-manager %s | FileCheck %s -check-prefix=O2 -// RUN: %clang -S -emit-llvm -o - -O3 -fno-experimental-new-pass-manager %s | FileCheck %s -check-prefix=O3 +// RUN: %clang -S -emit-llvm -o - -O1 %s | FileCheck %s -check-prefix=O1 +// RUN: %clang -S -emit-llvm -o - -O2 %s | FileCheck %s -check-prefix=O2 +// RUN: %clang -S -emit-llvm -o - -O3 %s | FileCheck %s -check-prefix=O3 extern void use(char *a); diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -27,6 +27,7 @@ // CHECK-FULL-O0: Starting llvm::Module pass manager run. // CHECK-FULL-O0: Running pass: AlwaysInlinerPass +// CHECK-FULL-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass @@ -34,6 +35,7 @@ // CHECK-THIN-O0: Starting llvm::Module pass manager run. // CHECK-THIN-O0: Running pass: AlwaysInlinerPass +// CHECK-THIN-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass diff --git a/clang/test/CodeGen/pgo-instrumentation.c b/clang/test/CodeGen/pgo-instrumentation.c --- a/clang/test/CodeGen/pgo-instrumentation.c +++ b/clang/test/CodeGen/pgo-instrumentation.c @@ -1,8 +1,8 @@ // Test if PGO instrumentation and use pass are invoked. // // Ensure Pass PGOInstrumentationGenPass is invoked. -// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN --check-prefix=CHECK-INSTRPROF -// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN-NEWPM --check-prefix=CHECK-INSTRPROF-NEWPM +// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN --check-prefix=CHECK-INSTRPROF +// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN-NEWPM --check-prefix=CHECK-INSTRPROF-NEWPM // CHECK-PGOGENPASS-INVOKED-INSTR-GEN: PGOInstrumentationGenPass // CHECK-INSTRPROF: Frontend instrumentation-based coverage lowering // CHECK-PGOGENPASS-INVOKED-INSTR-GEN-NEWPM: Running pass: PGOInstrumentationGen on @@ -14,16 +14,16 @@ // CHECK-PGOGENPASS-INVOKED-INSTR-GEN-CLANG-NOT: PGOInstrumentationGenPass // CHECK-PGOGENPASS-INVOKED-INSTR-GEN-CLANG-NEWPM-NOT: Running pass: PGOInstrumentationGen on -// RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF +// RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF // RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF-NEWPM -// RUN: %clang_cc1 -O0 -fprofile-instrument=clang %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF +// RUN: %clang_cc1 -O0 -fprofile-instrument=clang %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF // RUN: %clang_cc1 -O0 -fprofile-instrument=clang %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF-NEWPM // CHECK-CLANG-INSTRPROF: Frontend instrumentation-based coverage lowering // CHECK-CLANG-INSTRPROF-NEWPM: Running pass: InstrProfiling on // Ensure Pass PGOInstrumentationUsePass is invoked. // RUN: llvm-profdata merge -o %t.profdata %S/Inputs/pgotestir.profraw -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE-NEWPM // CHECK-PGOUSEPASS-INVOKED-INSTR-USE: PGOInstrumentationUsePass // CHECK-PGOUSEPASS-INVOKED-INSTR-USE-NEWPM: Running pass: PGOInstrumentationUse on diff --git a/clang/test/CodeGen/pgo-sample.c b/clang/test/CodeGen/pgo-sample.c --- a/clang/test/CodeGen/pgo-sample.c +++ b/clang/test/CodeGen/pgo-sample.c @@ -1,6 +1,13 @@ // Test if PGO sample use passes are invoked. // // Ensure Pass PGOInstrumentationGenPass is invoked. -// RUN: %clang_cc1 -O2 -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s +// RUN: %clang_cc1 -O2 -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -mllvm -debug-pass=Structure -fno-experimental-new-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s +// +// The new PM has a different debug output and uses a different debug pass flag. +// RUN: %clang_cc1 -O2 -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NEWPM // CHECK: Remove unused exception handling info // CHECK: Sample profile pass +// +// CHECK-NEWPM-DAG: SampleProfileLoaderPass on +// CHECK-NEWPM-DAG: PostOrderFunctionAttrsPass on +// CHECK-NEWPM-DAG: ModuleToFunctionPassAdaptor on diff --git a/clang/test/CodeGen/thinlto-debug-pm.c b/clang/test/CodeGen/thinlto-debug-pm.c --- a/clang/test/CodeGen/thinlto-debug-pm.c +++ b/clang/test/CodeGen/thinlto-debug-pm.c @@ -8,8 +8,8 @@ // O2-NEWPM: Running pass: LoopVectorizePass // O0-NEWPM-NOT: Running pass: LoopVectorizePass -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefix=O2-OLDPM -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefix=O0-OLDPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fno-experimental-new-pass-manager -emit-obj -O2 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefix=O2-OLDPM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fno-experimental-new-pass-manager -emit-obj -O0 -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -mllvm -debug-pass=Structure 2>&1 | FileCheck %s --check-prefix=O0-OLDPM // O2-OLDPM: Loop Vectorization // O0-OLDPM-NOT: Loop Vectorization diff --git a/clang/test/CodeGen/x86_64-instrument-functions.c b/clang/test/CodeGen/x86_64-instrument-functions.c --- a/clang/test/CodeGen/x86_64-instrument-functions.c +++ b/clang/test/CodeGen/x86_64-instrument-functions.c @@ -1,6 +1,6 @@ // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions -O2 -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions-after-inlining -O2 -o - %s | FileCheck -check-prefix=NOINLINE %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions -O2 -fno-experimental-new-pass-manager -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions-after-inlining -O2 -fno-experimental-new-pass-manager -o - %s | FileCheck -check-prefix=NOINLINE %s // It's not so nice having asm tests in Clang, but we need to check that we set // up the pipeline correctly in order to have the instrumentation inserted. diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -1,3 +1,5 @@ + // UNSUPPORTED: experimental-new-pass-manager + // RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown -fblocks %s -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK,CHECK-O0 // RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown -fblocks -ftrivial-auto-var-init=pattern %s -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK-O0,PATTERN,PATTERN-O0 // RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown -fblocks -ftrivial-auto-var-init=pattern %s -O1 -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK-O1,PATTERN,PATTERN-O1 diff --git a/clang/test/CodeGenCXX/conditional-temporaries.cpp b/clang/test/CodeGenCXX/conditional-temporaries.cpp --- a/clang/test/CodeGenCXX/conditional-temporaries.cpp +++ b/clang/test/CodeGenCXX/conditional-temporaries.cpp @@ -1,7 +1,7 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O2 -disable-llvm-passes | FileCheck %s --check-prefixes=CHECK,CHECK-NOOPT -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O2 | FileCheck %s --check-prefixes=CHECK,CHECK-OPT -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=amdgcn-amd-amdhsa -O2 | FileCheck %s --check-prefixes=CHECK,CHECK-OPT +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O2 -fno-experimental-new-pass-manager -disable-llvm-passes | FileCheck %s --check-prefixes=CHECK,CHECK-NOOPT +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O2 -fno-experimental-new-pass-manager | FileCheck %s --check-prefixes=CHECK,CHECK-OPT +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=amdgcn-amd-amdhsa -O2 -fno-experimental-new-pass-manager | FileCheck %s --check-prefixes=CHECK,CHECK-OPT namespace { diff --git a/clang/test/CodeGenCXX/flatten.cpp b/clang/test/CodeGenCXX/flatten.cpp --- a/clang/test/CodeGenCXX/flatten.cpp +++ b/clang/test/CodeGenCXX/flatten.cpp @@ -1,3 +1,7 @@ +// UNSUPPORTED: experimental-new-pass-manager +// See the comment for CodeGen/flatten.c on why this is unsupported with the new +// PM. + // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++11 %s -emit-llvm -o - | FileCheck %s void f(void) {} diff --git a/clang/test/CodeGenCXX/member-function-pointer-calls.cpp b/clang/test/CodeGenCXX/member-function-pointer-calls.cpp --- a/clang/test/CodeGenCXX/member-function-pointer-calls.cpp +++ b/clang/test/CodeGenCXX/member-function-pointer-calls.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin10 -emit-llvm -O3 -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin10 -emit-llvm -O3 -fno-experimental-new-pass-manager -o - | FileCheck %s // RUN: %clang_cc1 %s -triple=x86_64-windows-gnu -emit-llvm -o - | FileCheck %s -check-prefix MINGW64 struct A { virtual int vf1() { return 1; } diff --git a/clang/test/CodeGenObjC/os_log.m b/clang/test/CodeGenObjC/os_log.m --- a/clang/test/CodeGenObjC/os_log.m +++ b/clang/test/CodeGenObjC/os_log.m @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc -O2 | FileCheck %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc -O2 -fno-experimental-new-pass-manager | FileCheck %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc -O0 | FileCheck %s -check-prefix=CHECK-O0 // Make sure we emit clang.arc.use before calling objc_release as part of the diff --git a/clang/test/CodeGenObjCXX/os_log.mm b/clang/test/CodeGenObjCXX/os_log.mm --- a/clang/test/CodeGenObjCXX/os_log.mm +++ b/clang/test/CodeGenObjCXX/os_log.mm @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc \ -// RUN: -fexceptions -fcxx-exceptions -O1 | FileCheck %s +// RUN: -fexceptions -fcxx-exceptions -O1 -fno-experimental-new-pass-manager | FileCheck %s // Check that no EH cleanup is emitted around the call to __os_log_helper. namespace no_eh_cleanup { diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-LEGACY +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NEWPM // This is initially assumed convergent, but can be deduced to not require it. @@ -117,7 +118,12 @@ // CHECK: [[for_body]]: // CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]] // CHECK-NOT: call spir_func void @nodupfun() -// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] + +// The new PM produces a slightly different IR for the loop from the legacy PM, +// but the test still checks that the loop is not unrolled. +// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] +// CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]] +// CHECK-NEW: [[for_body_crit_edge]]: void test_not_unroll() { for (int i = 0; i < 10; i++) diff --git a/clang/test/Driver/msan.c b/clang/test/Driver/msan.c --- a/clang/test/Driver/msan.c +++ b/clang/test/Driver/msan.c @@ -1,11 +1,11 @@ // REQUIRES: x86-registered-target -// RUN: %clang -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN +// RUN: %clang -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN // RUN: %clang -O1 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN // RUN: %clang -O2 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN // RUN: %clang -O3 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN -// RUN: %clang -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN +// RUN: %clang -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN // RUN: %clang -O1 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN // RUN: %clang -O2 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN // RUN: %clang -O3 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN diff --git a/clang/test/Driver/tsan.c b/clang/test/Driver/tsan.c --- a/clang/test/Driver/tsan.c +++ b/clang/test/Driver/tsan.c @@ -4,7 +4,7 @@ // RUN: %clang -O1 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s // RUN: %clang -O2 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s // RUN: %clang -O3 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s -// RUN: %clang -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s // Verify that -fsanitize=thread invokes tsan instrumentation. // Also check that this works with the new pass manager with and without diff --git a/clang/test/Frontend/optimization-remark-line-directive.c b/clang/test/Frontend/optimization-remark-line-directive.c --- a/clang/test/Frontend/optimization-remark-line-directive.c +++ b/clang/test/Frontend/optimization-remark-line-directive.c @@ -2,7 +2,11 @@ // directives. We cannot map #line directives back to // a SourceLocation. -// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -dwarf-column-info -emit-llvm-only -verify +// RUN: %clang_cc1 %s -Rpass=inline -fno-experimental-new-pass-manager -debug-info-kind=line-tables-only -dwarf-column-info -emit-llvm-only -verify + +// The new PM inliner is not added to the default pipeline at O0, so we add +// some optimizations to trigger it. +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -debug-info-kind=line-tables-only -dwarf-column-info -emit-llvm-only -verify int foo(int x, int y) __attribute__((always_inline)); int foo(int x, int y) { return x + y; } diff --git a/clang/test/Frontend/optimization-remark-new-pm.c b/clang/test/Frontend/optimization-remark-new-pm.c new file mode 100644 --- /dev/null +++ b/clang/test/Frontend/optimization-remark-new-pm.c @@ -0,0 +1,20 @@ +// Verify that remarks for the inliner appear. The remarks under the new PM will +// be slightly different than those emitted by the legacy PM. The new PM inliner +// also doesnot appear to be added at O0, so we test at O1. +// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O1 -fexperimental-new-pass-manager -emit-llvm-only -verify +// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O1 -fexperimental-new-pass-manager -emit-llvm-only -debug-info-kind=line-tables-only -verify + +int foo(int x, int y) __attribute__((always_inline)); +int foo(int x, int y) { return x + y; } + +float foz(int x, int y) __attribute__((noinline)); +float foz(int x, int y) { return x * y; } + +// The negative diagnostics are emitted twice because the inliner runs +// twice. +// +int bar(int j) { + // expected-remark@+2 {{foz not inlined into bar because it should never be inlined (cost=never)}} + // expected-remark@+1 {{foo inlined into bar}} + return foo(j, j - 2) * foz(j - 2, j); +} diff --git a/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c b/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c new file mode 100644 --- /dev/null +++ b/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c @@ -0,0 +1,85 @@ +// This test is similar to Frontend/optimization-remark-with-hotness.c but +// testing the output under the new pass manager. The inliner is not added to +// the default new PM pipeline at O0, so we compile with optimizations here. As +// a result, some of the remarks will be different since we turn on inlining, +// but the test is meant to show that remarks get dumped. The remarks are also +// slightly different in text. + +// Generate instrumentation and sampling profile data. +// RUN: llvm-profdata merge \ +// RUN: %S/Inputs/optimization-remark-with-hotness.proftext \ +// RUN: -o %t.profdata +// RUN: llvm-profdata merge -sample \ +// RUN: %S/Inputs/optimization-remark-with-hotness-sample.proftext \ +// RUN: -o %t-sample.profdata +// +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rpass-missed=inline \ +// RUN: -fdiagnostics-show-hotness -verify +// The clang version of the previous test. +// RUN: %clang -target x86_64-apple-macosx10.9 %s -c -emit-llvm -o /dev/null \ +// RUN: -fprofile-instr-use=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rpass-missed=inline \ +// RUN: -fdiagnostics-show-hotness -Xclang -verify +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-sample-use=%t-sample.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rpass-missed=inline \ +// RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 \ +// RUN: -verify +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rpass-missed=inline \ +// RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 -verify +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline 2>&1 | FileCheck -check-prefix=HOTNESS_OFF %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rno-pass-with-hotness 2>&1 | FileCheck \ +// RUN: -check-prefix=HOTNESS_OFF %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -Rpass-analysis=inline -fdiagnostics-show-hotness \ +// RUN: -fdiagnostics-hotness-threshold=100 2>&1 \ +// RUN: | FileCheck -allow-empty -check-prefix=THRESHOLD %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -Rpass=inline -Rpass-analysis=inline \ +// RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_PGO %s + +int foo(int x, int y) __attribute__((always_inline)); +int foo(int x, int y) { return x + y; } + +int sum = 0; + +void bar(int x) { + // HOTNESS_OFF: foo inlined into bar + // HOTNESS_OFF-NOT: hotness: + // THRESHOLD-NOT: inlined + // THRESHOLD-NOT: hotness + // NO_PGO: '-fdiagnostics-show-hotness' requires profile-guided optimization information + // NO_PGO: '-fdiagnostics-hotness-threshold=' requires profile-guided optimization information + // expected-remark@+1 {{foo inlined into bar with (cost=always): always inline attribute (hotness:}} + sum += foo(x, x - 2); +} + +int main(int argc, const char *argv[]) { + for (int i = 0; i < 30; i++) + // expected-remark@+1 {{bar inlined into main with}} + bar(argc); + return sum; +} diff --git a/clang/test/Frontend/optimization-remark-with-hotness.c b/clang/test/Frontend/optimization-remark-with-hotness.c --- a/clang/test/Frontend/optimization-remark-with-hotness.c +++ b/clang/test/Frontend/optimization-remark-with-hotness.c @@ -9,31 +9,37 @@ // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rpass-missed=inline \ // RUN: -fdiagnostics-show-hotness -verify // The clang version of the previous test. // RUN: %clang -target x86_64-apple-macosx10.9 %s -c -emit-llvm -o /dev/null \ // RUN: -fprofile-instr-use=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rpass-missed=inline \ // RUN: -fdiagnostics-show-hotness -Xclang -verify // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-sample-use=%t-sample.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rpass-missed=inline \ // RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 \ // RUN: -verify // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rpass-missed=inline \ // RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 -verify // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline 2>&1 | FileCheck -check-prefix=HOTNESS_OFF %s // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rno-pass-with-hotness 2>&1 | FileCheck \ // RUN: -check-prefix=HOTNESS_OFF %s // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ diff --git a/clang/test/Frontend/optimization-remark.c b/clang/test/Frontend/optimization-remark.c --- a/clang/test/Frontend/optimization-remark.c +++ b/clang/test/Frontend/optimization-remark.c @@ -1,20 +1,30 @@ // This file tests the -Rpass family of flags (-Rpass, -Rpass-missed // and -Rpass-analysis) with the inliner. The test is designed to // always trigger the inliner, so it should be independent of the -// optimization level. +// optimization level (under the legacy PM). The inliner is not added to the new +// PM pipeline unless optimizations are present. -// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -emit-llvm-only -verify -// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -emit-llvm-only -debug-info-kind=line-tables-only -verify +// The inliner for the new PM does not seem to be enabled at O0, but we still +// get the same remarks with at least O1. The remarks are also slightly +// different and located in another test file. +// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -fno-experimental-new-pass-manager -emit-llvm-only -verify +// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -fno-experimental-new-pass-manager -emit-llvm-only -debug-info-kind=line-tables-only -verify // RUN: %clang_cc1 %s -Rpass=inline -emit-llvm -o - 2>/dev/null | FileCheck %s // // Check that we can override -Rpass= with -Rno-pass. -// RUN: %clang_cc1 %s -Rpass=inline -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fno-experimental-new-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS // RUN: %clang_cc1 %s -Rpass=inline -Rno-pass -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NO-REMARKS // RUN: %clang_cc1 %s -Rpass=inline -Rno-everything -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NO-REMARKS -// RUN: %clang_cc1 %s -Rpass=inline -Rno-everything -Reverything -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fno-experimental-new-pass-manager -Rno-everything -Reverything -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// +// The inliner for the new PM does not seem to be enabled at O0, but we still +// get the same remarks with at least O1. +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -Rno-everything -Reverything -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS // // Check that -w doesn't disable remarks. -// RUN: %clang_cc1 %s -Rpass=inline -w -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fno-experimental-new-pass-manager -w -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -w -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS // // FIXME: -Reverything should imply -Rpass=.*. // RUN: %clang_cc1 %s -Reverything -emit-llvm -o - 2>/dev/null | FileCheck %s --check-prefix=CHECK-NO-REMARKS diff --git a/clang/test/Misc/pr32207.c b/clang/test/Misc/pr32207.c --- a/clang/test/Misc/pr32207.c +++ b/clang/test/Misc/pr32207.c @@ -1,4 +1,4 @@ // test for r305179 -// RUN: %clang_cc1 -emit-llvm -O -mllvm -print-after-all %s -o %t 2>&1 | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -O -fno-experimental-new-pass-manager -mllvm -print-after-all %s -o %t 2>&1 | FileCheck %s // CHECK: *** IR Dump After Function Integration/Inlining *** void foo() {} diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -97,6 +97,10 @@ if platform.system() not in ['FreeBSD']: config.available_features.add('crash-recovery') +# Support for new pass manager. +if config.enable_experimental_new_pass_manager: + config.available_features.add('experimental-new-pass-manager') + # ANSI escape sequences in non-dumb terminal if platform.system() not in ['Windows']: config.available_features.add('ansi-escape-sequences') diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in --- a/clang/test/lit.site.cfg.py.in +++ b/clang/test/lit.site.cfg.py.in @@ -24,6 +24,7 @@ config.clang_examples = @CLANG_BUILD_EXAMPLES@ config.enable_shared = @ENABLE_SHARED@ config.enable_backtrace = @ENABLE_BACKTRACES@ +config.enable_experimental_new_pass_manager = @ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER@ config.host_arch = "@HOST_ARCH@" config.python_executable = "@PYTHON_EXECUTABLE@" config.use_z3_solver = lit_config.params.get('USE_Z3_SOLVER', "@USE_Z3_SOLVER@") diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -31,8 +31,17 @@ #define DEBUG_TYPE "inline" -PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &) { - InlineFunctionInfo IFI; +PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &AM) { + // Attach an inline assumption cache such that AddAlignmentAssumptions() is + // called to match the codegen for the legacy PM. + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + std::function GetAssumptionCache = + [&](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache); + SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions;