diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -60,10 +60,12 @@ #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" @@ -1101,7 +1103,9 @@ // Build a minimal pipeline based on the semantics required by Clang, // which is just that always inlining occurs. - MPM.addPass(AlwaysInlinerPass()); + // We always pass false here since according to the legacy PM logic for + // enabling lifetime intrinsics, they are not required with O0. + MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); // At -O0 we directly run necessary sanitizer passes. if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) @@ -1199,10 +1203,25 @@ MPM = PB.buildPerModuleDefaultPipeline(Level, CodeGenOpts.DebugPassManager); } + + // There is a test that requires checking that the PruneEH pass is run to + // remove unused exception handling info when a pgo profile sample file is + // provided. There currently does not seem to be a new PM port for PrunEH, + // but many opt tests instead substitute the 'function-attrs' and + // 'function(simplify-cgf)' passes to produce the same IR. The + // PostOrderFunctionPassAttrsPass is already in the pipeline, but + // SimplifyCGFPass is not, so we add it here when requesting pgo. + if (PGOOpt) + MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); } if (CodeGenOpts.OptimizationLevel == 0) addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); + + if (CodeGenOpts.hasProfileIRInstr()) { + // This file is stored as the ProfileFile. + MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->ProfileFile)); + } } // FIXME: We still use the legacy pass manager to do code generation. We @@ -1256,7 +1275,8 @@ NeedCodeGen = true; CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - if (!CodeGenOpts.SplitDwarfFile.empty()) { + if (!CodeGenOpts.SplitDwarfFile.empty() && + CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission) { DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); if (!DwoOS) return; diff --git a/clang/test/CodeGen/aarch64-neon-across.c b/clang/test/CodeGen/aarch64-neon-across.c --- a/clang/test/CodeGen/aarch64-neon-across.c +++ b/clang/test/CodeGen/aarch64-neon-across.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s @@ -8,7 +6,7 @@ #include // CHECK-LABEL: define i16 @test_vaddlv_s8(<8 x i8> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR:#[0-9]+]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] int16_t test_vaddlv_s8(int8x8_t a) { @@ -16,14 +14,14 @@ } // CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDLV_I]] int32_t test_vaddlv_s16(int16x4_t a) { return vaddlv_s16(a); } // CHECK-LABEL: define i16 @test_vaddlv_u8(<8 x i8> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] uint16_t test_vaddlv_u8(uint8x8_t a) { @@ -31,14 +29,14 @@ } // CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDLV_I]] uint32_t test_vaddlv_u16(uint16x4_t a) { return vaddlv_u16(a); } // CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #1 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] int16_t test_vaddlvq_s8(int8x16_t a) { @@ -46,21 +44,21 @@ } // CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #1 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDLV_I]] int32_t test_vaddlvq_s16(int16x8_t a) { return vaddlvq_s16(a); } // CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #1 { -// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #3 +// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VADDLVQ_S32_I]] int64_t test_vaddlvq_s32(int32x4_t a) { return vaddlvq_s32(a); } // CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #1 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16 // CHECK: ret i16 [[TMP0]] uint16_t test_vaddlvq_u8(uint8x16_t a) { @@ -68,21 +66,21 @@ } // CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #1 { -// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDLV_I]] uint32_t test_vaddlvq_u16(uint16x8_t a) { return vaddlvq_u16(a); } // CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #1 { -// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #3 +// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VADDLVQ_U32_I]] uint64_t test_vaddlvq_u32(uint32x4_t a) { return vaddlvq_u32(a); } // CHECK-LABEL: define i8 @test_vmaxv_s8(<8 x i8> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vmaxv_s8(int8x8_t a) { @@ -90,7 +88,7 @@ } // CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vmaxv_s16(int16x4_t a) { @@ -98,7 +96,7 @@ } // CHECK-LABEL: define i8 @test_vmaxv_u8(<8 x i8> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vmaxv_u8(uint8x8_t a) { @@ -106,7 +104,7 @@ } // CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vmaxv_u16(uint16x4_t a) { @@ -114,7 +112,7 @@ } // CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #1 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vmaxvq_s8(int8x16_t a) { @@ -122,7 +120,7 @@ } // CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #1 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vmaxvq_s16(int16x8_t a) { @@ -130,14 +128,14 @@ } // CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #1 { -// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VMAXVQ_S32_I]] int32_t test_vmaxvq_s32(int32x4_t a) { return vmaxvq_s32(a); } // CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #1 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vmaxvq_u8(uint8x16_t a) { @@ -145,7 +143,7 @@ } // CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #1 { -// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vmaxvq_u16(uint16x8_t a) { @@ -153,14 +151,14 @@ } // CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #1 { -// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VMAXVQ_U32_I]] uint32_t test_vmaxvq_u32(uint32x4_t a) { return vmaxvq_u32(a); } // CHECK-LABEL: define i8 @test_vminv_s8(<8 x i8> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vminv_s8(int8x8_t a) { @@ -168,7 +166,7 @@ } // CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vminv_s16(int16x4_t a) { @@ -176,7 +174,7 @@ } // CHECK-LABEL: define i8 @test_vminv_u8(<8 x i8> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vminv_u8(uint8x8_t a) { @@ -184,7 +182,7 @@ } // CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vminv_u16(uint16x4_t a) { @@ -192,7 +190,7 @@ } // CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #1 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vminvq_s8(int8x16_t a) { @@ -200,7 +198,7 @@ } // CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #1 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vminvq_s16(int16x8_t a) { @@ -208,14 +206,14 @@ } // CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #1 { -// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VMINVQ_S32_I]] int32_t test_vminvq_s32(int32x4_t a) { return vminvq_s32(a); } // CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #1 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vminvq_u8(uint8x16_t a) { @@ -223,7 +221,7 @@ } // CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #1 { -// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vminvq_u16(uint16x8_t a) { @@ -231,14 +229,14 @@ } // CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #1 { -// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VMINVQ_U32_I]] uint32_t test_vminvq_u32(uint32x4_t a) { return vminvq_u32(a); } // CHECK-LABEL: define i8 @test_vaddv_s8(<8 x i8> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vaddv_s8(int8x8_t a) { @@ -246,7 +244,7 @@ } // CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vaddv_s16(int16x4_t a) { @@ -254,7 +252,7 @@ } // CHECK-LABEL: define i8 @test_vaddv_u8(<8 x i8> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vaddv_u8(uint8x8_t a) { @@ -262,7 +260,7 @@ } // CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vaddv_u16(uint16x4_t a) { @@ -270,7 +268,7 @@ } // CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #1 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] int8_t test_vaddvq_s8(int8x16_t a) { @@ -278,7 +276,7 @@ } // CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #1 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] int16_t test_vaddvq_s16(int16x8_t a) { @@ -286,14 +284,14 @@ } // CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #1 { -// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDVQ_S32_I]] int32_t test_vaddvq_s32(int32x4_t a) { return vaddvq_s32(a); } // CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #1 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8 // CHECK: ret i8 [[TMP0]] uint8_t test_vaddvq_u8(uint8x16_t a) { @@ -301,7 +299,7 @@ } // CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #1 { -// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #3 +// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) [[NOUNWIND_ATTR]] // CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16 // CHECK: ret i16 [[TMP2]] uint16_t test_vaddvq_u16(uint16x8_t a) { @@ -309,35 +307,35 @@ } // CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #1 { -// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #3 +// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VADDVQ_U32_I]] uint32_t test_vaddvq_u32(uint32x4_t a) { return vaddvq_u32(a); } // CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #1 { -// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #3 +// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) [[NOUNWIND_ATTR]] // CHECK: ret float [[VMAXVQ_F32_I]] float32_t test_vmaxvq_f32(float32x4_t a) { return vmaxvq_f32(a); } // CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #1 { -// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #3 +// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) [[NOUNWIND_ATTR]] // CHECK: ret float [[VMINVQ_F32_I]] float32_t test_vminvq_f32(float32x4_t a) { return vminvq_f32(a); } // CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #1 { -// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #3 +// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) [[NOUNWIND_ATTR]] // CHECK: ret float [[VMAXNMVQ_F32_I]] float32_t test_vmaxnmvq_f32(float32x4_t a) { return vmaxnmvq_f32(a); } // CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #1 { -// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #3 +// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) [[NOUNWIND_ATTR]] // CHECK: ret float [[VMINNMVQ_F32_I]] float32_t test_vminnmvq_f32(float32x4_t a) { return vminnmvq_f32(a); @@ -345,3 +343,4 @@ // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" +// CHECK: attributes [[NOUNWIND_ATTR]] = { nounwind } diff --git a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c --- a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s @@ -8,119 +6,119 @@ #include // CHECK-LABEL: define float @test_vcvtxd_f32_f64(double %a) #0 { -// CHECK: [[VCVTXD_F32_F64_I:%.*]] = call float @llvm.aarch64.sisd.fcvtxn(double %a) #2 +// CHECK: [[VCVTXD_F32_F64_I:%.*]] = call float @llvm.aarch64.sisd.fcvtxn(double %a) [[NOUNWIND_ATTR:#[0-9]+]] // CHECK: ret float [[VCVTXD_F32_F64_I]] float32_t test_vcvtxd_f32_f64(float64_t a) { return (float32_t)vcvtxd_f32_f64(a); } // CHECK-LABEL: define i32 @test_vcvtas_s32_f32(float %a) #0 { -// CHECK: [[VCVTAS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %a) #2 +// CHECK: [[VCVTAS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTAS_S32_F32_I]] int32_t test_vcvtas_s32_f32(float32_t a) { return (int32_t)vcvtas_s32_f32(a); } // CHECK-LABEL: define i64 @test_test_vcvtad_s64_f64(double %a) #0 { -// CHECK: [[VCVTAD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a) #2 +// CHECK: [[VCVTAD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTAD_S64_F64_I]] int64_t test_test_vcvtad_s64_f64(float64_t a) { return (int64_t)vcvtad_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtas_u32_f32(float %a) #0 { -// CHECK: [[VCVTAS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %a) #2 +// CHECK: [[VCVTAS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTAS_U32_F32_I]] uint32_t test_vcvtas_u32_f32(float32_t a) { return (uint32_t)vcvtas_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtad_u64_f64(double %a) #0 { -// CHECK: [[VCVTAD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a) #2 +// CHECK: [[VCVTAD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTAD_U64_F64_I]] uint64_t test_vcvtad_u64_f64(float64_t a) { return (uint64_t)vcvtad_u64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtms_s32_f32(float %a) #0 { -// CHECK: [[VCVTMS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %a) #2 +// CHECK: [[VCVTMS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTMS_S32_F32_I]] int32_t test_vcvtms_s32_f32(float32_t a) { return (int32_t)vcvtms_s32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtmd_s64_f64(double %a) #0 { -// CHECK: [[VCVTMD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a) #2 +// CHECK: [[VCVTMD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTMD_S64_F64_I]] int64_t test_vcvtmd_s64_f64(float64_t a) { return (int64_t)vcvtmd_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtms_u32_f32(float %a) #0 { -// CHECK: [[VCVTMS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %a) #2 +// CHECK: [[VCVTMS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTMS_U32_F32_I]] uint32_t test_vcvtms_u32_f32(float32_t a) { return (uint32_t)vcvtms_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtmd_u64_f64(double %a) #0 { -// CHECK: [[VCVTMD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a) #2 +// CHECK: [[VCVTMD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTMD_U64_F64_I]] uint64_t test_vcvtmd_u64_f64(float64_t a) { return (uint64_t)vcvtmd_u64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtns_s32_f32(float %a) #0 { -// CHECK: [[VCVTNS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %a) #2 +// CHECK: [[VCVTNS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTNS_S32_F32_I]] int32_t test_vcvtns_s32_f32(float32_t a) { return (int32_t)vcvtns_s32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtnd_s64_f64(double %a) #0 { -// CHECK: [[VCVTND_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a) #2 +// CHECK: [[VCVTND_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTND_S64_F64_I]] int64_t test_vcvtnd_s64_f64(float64_t a) { return (int64_t)vcvtnd_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtns_u32_f32(float %a) #0 { -// CHECK: [[VCVTNS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %a) #2 +// CHECK: [[VCVTNS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTNS_U32_F32_I]] uint32_t test_vcvtns_u32_f32(float32_t a) { return (uint32_t)vcvtns_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtnd_u64_f64(double %a) #0 { -// CHECK: [[VCVTND_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a) #2 +// CHECK: [[VCVTND_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTND_U64_F64_I]] uint64_t test_vcvtnd_u64_f64(float64_t a) { return (uint64_t)vcvtnd_u64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtps_s32_f32(float %a) #0 { -// CHECK: [[VCVTPS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %a) #2 +// CHECK: [[VCVTPS_S32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTPS_S32_F32_I]] int32_t test_vcvtps_s32_f32(float32_t a) { return (int32_t)vcvtps_s32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtpd_s64_f64(double %a) #0 { -// CHECK: [[VCVTPD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a) #2 +// CHECK: [[VCVTPD_S64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTPD_S64_F64_I]] int64_t test_vcvtpd_s64_f64(float64_t a) { return (int64_t)vcvtpd_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvtps_u32_f32(float %a) #0 { -// CHECK: [[VCVTPS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %a) #2 +// CHECK: [[VCVTPS_U32_F32_I:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %a) [[NOUNWIND_ATTR]] // CHECK: ret i32 [[VCVTPS_U32_F32_I]] uint32_t test_vcvtps_u32_f32(float32_t a) { return (uint32_t)vcvtps_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtpd_u64_f64(double %a) #0 { -// CHECK: [[VCVTPD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a) #2 +// CHECK: [[VCVTPD_U64_F64_I:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a) [[NOUNWIND_ATTR]] // CHECK: ret i64 [[VCVTPD_U64_F64_I]] uint64_t test_vcvtpd_u64_f64(float64_t a) { return (uint64_t)vcvtpd_u64_f64(a); @@ -153,3 +151,5 @@ uint64_t test_vcvtd_u64_f64(float64_t a) { return (uint64_t)vcvtd_u64_f64(a); } + +// CHECK: attributes [[NOUNWIND_ATTR]] = { nounwind } diff --git a/clang/test/CodeGen/aarch64-neon-fma.c b/clang/test/CodeGen/aarch64-neon-fma.c --- a/clang/test/CodeGen/aarch64-neon-fma.c +++ b/clang/test/CodeGen/aarch64-neon-fma.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s // Test new aarch64 intrinsics and types @@ -226,7 +224,7 @@ // CHECK: [[SUB_I:%.*]] = fsub <2 x double> , %b // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 -// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #3 +// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) [[NOUNWIND_ATTR:#[0-9]+]] // CHECK: ret <2 x double> [[TMP6]] float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { return vfmsq_n_f64(a, b, c); @@ -234,3 +232,4 @@ // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" +// CHECK: attributes [[NOUNWIND_ATTR]] = { nounwind } diff --git a/clang/test/CodeGen/aarch64-neon-perm.c b/clang/test/CodeGen/aarch64-neon-perm.c --- a/clang/test/CodeGen/aarch64-neon-perm.c +++ b/clang/test/CodeGen/aarch64-neon-perm.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-tbl.c b/clang/test/CodeGen/aarch64-neon-tbl.c --- a/clang/test/CodeGen/aarch64-neon-tbl.c +++ b/clang/test/CodeGen/aarch64-neon-tbl.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-poly128.c b/clang/test/CodeGen/aarch64-poly128.c --- a/clang/test/CodeGen/aarch64-poly128.c +++ b/clang/test/CodeGen/aarch64-poly128.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -disable-O0-optnone -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \ diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ // RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg \ diff --git a/clang/test/CodeGen/aggregate-assign-call.c b/clang/test/CodeGen/aggregate-assign-call.c --- a/clang/test/CodeGen/aggregate-assign-call.c +++ b/clang/test/CodeGen/aggregate-assign-call.c @@ -1,6 +1,4 @@ -// UNSUPPORTED: experimental-new-pass-manager - -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O1 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O1 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O0 // // Ensure that we place appropriate lifetime markers around indirectly returned diff --git a/clang/test/CodeGen/arm-neon-fma.c b/clang/test/CodeGen/arm-neon-fma.c --- a/clang/test/CodeGen/arm-neon-fma.c +++ b/clang/test/CodeGen/arm-neon-fma.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple thumbv7-none-linux-gnueabihf \ // RUN: -target-abi aapcs \ // RUN: -target-cpu cortex-a7 \ diff --git a/clang/test/CodeGen/arm-neon-numeric-maxmin.c b/clang/test/CodeGen/arm-neon-numeric-maxmin.c --- a/clang/test/CodeGen/arm-neon-numeric-maxmin.c +++ b/clang/test/CodeGen/arm-neon-numeric-maxmin.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck %s #include diff --git a/clang/test/CodeGen/arm-neon-vcvtX.c b/clang/test/CodeGen/arm-neon-vcvtX.c --- a/clang/test/CodeGen/arm-neon-vcvtX.c +++ b/clang/test/CodeGen/arm-neon-vcvtX.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck %s #include diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -1,7 +1,5 @@ -// UNSUPPORTED: experimental-new-pass-manager - -// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 -O -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch32 -// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 +// RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 -O -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch32 +// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 #include diff --git a/clang/test/CodeGen/available-externally-suppress.c b/clang/test/CodeGen/available-externally-suppress.c --- a/clang/test/CodeGen/available-externally-suppress.c +++ b/clang/test/CodeGen/available-externally-suppress.c @@ -1,8 +1,6 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -// RUN: %clang_cc1 -O2 -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -// RUN: %clang_cc1 -flto -O2 -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -check-prefix=LTO +// RUN: %clang_cc1 -O2 -fno-experimental-new-pass-manager -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s +// RUN: %clang_cc1 -flto -O2 -fno-experimental-new-pass-manager -fno-inline -emit-llvm -o - -triple x86_64-apple-darwin10 %s | FileCheck %s -check-prefix=LTO // Ensure that we don't emit available_externally functions at -O0. // Also should not emit them at -O2, unless -flto is present in which case diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s diff --git a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c --- a/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CodeGen/avx512-reduceMinMaxIntrin.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s #include @@ -29,10 +27,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <8 x i64> [[TMP5]], [[TMP6]] // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 @@ -42,10 +40,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 @@ -55,10 +53,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp sgt <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 @@ -94,10 +92,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <8 x i64> [[TMP5]], [[TMP6]] // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 @@ -107,10 +105,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 @@ -120,10 +118,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 @@ -136,22 +134,22 @@ // CHECK-LABEL: define double @test_mm512_reduce_max_pd(<8 x double> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 @@ -163,10 +161,10 @@ // CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 +// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 // CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 // CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 @@ -177,10 +175,10 @@ // CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 // CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 @@ -189,10 +187,10 @@ // CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 // CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 @@ -227,10 +225,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i64> [[TMP5]], [[TMP6]] // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 @@ -240,10 +238,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp slt <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 @@ -253,10 +251,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp slt <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 @@ -292,10 +290,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = icmp ult <8 x i64> [[TMP5]], [[TMP6]] // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 @@ -305,10 +303,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp ult <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 @@ -318,10 +316,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp ult <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 @@ -334,22 +332,22 @@ // CHECK-LABEL: define double @test_mm512_reduce_min_pd(<8 x double> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 @@ -361,10 +359,10 @@ // CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 +// CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 // CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 // CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 @@ -375,10 +373,10 @@ // CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 -// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 // CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 @@ -387,10 +385,10 @@ // CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 // CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 @@ -402,27 +400,27 @@ // CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 @@ -446,16 +444,16 @@ // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -465,10 +463,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = icmp sgt <8 x i64> [[TMP22]], [[TMP23]] // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 @@ -478,10 +476,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP32:%.*]] = icmp sgt <8 x i64> [[TMP30]], [[TMP31]] // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 @@ -491,10 +489,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP40:%.*]] = icmp sgt <8 x i64> [[TMP38]], [[TMP39]] // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 @@ -507,25 +505,25 @@ // CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 @@ -535,11 +533,11 @@ // CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: store i8 [[TMP2]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -549,10 +547,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T2_I]], align 64 @@ -562,10 +560,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T4_I]], align 64 @@ -575,10 +573,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP27]], <8 x i64>* [[__A_ADDR_I6_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I6_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP27]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP31:%.*]] = icmp ugt <8 x i64> [[TMP29]], [[TMP30]] // CHECK-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP29]], <8 x i64> [[TMP30]] // CHECK-NEXT: store <8 x i64> [[TMP32]], <8 x i64>* [[__T6_I]], align 64 @@ -591,62 +589,62 @@ // CHECK-LABEL: define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store double 0xFFF0000000000000, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: store double 0xFFF0000000000000, double* {{.*}}, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] // CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 @@ -658,10 +656,10 @@ // CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 +// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 // CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 // CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 @@ -672,10 +670,10 @@ // CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 // CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 @@ -685,9 +683,9 @@ // CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 // CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 // CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 @@ -699,27 +697,27 @@ // CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 @@ -743,16 +741,16 @@ // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -762,10 +760,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = icmp slt <8 x i64> [[TMP22]], [[TMP23]] // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 @@ -775,10 +773,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP32:%.*]] = icmp slt <8 x i64> [[TMP30]], [[TMP31]] // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 @@ -788,10 +786,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP40:%.*]] = icmp slt <8 x i64> [[TMP38]], [[TMP39]] // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 @@ -804,27 +802,27 @@ // CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 @@ -848,16 +846,16 @@ // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -867,10 +865,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 -// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = icmp ult <8 x i64> [[TMP22]], [[TMP23]] // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 @@ -880,10 +878,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 -// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 -// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP32:%.*]] = icmp ult <8 x i64> [[TMP30]], [[TMP31]] // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 @@ -893,10 +891,10 @@ // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP40:%.*]] = icmp ult <8 x i64> [[TMP38]], [[TMP39]] // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 @@ -909,62 +907,62 @@ // CHECK-LABEL: define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x double>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x double>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store double 0x7FF0000000000000, double* [[__W_ADDR_I_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: store double 0x7FF0000000000000, double* {{.*}}, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load double, double* {{.*}}, align 8 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 -// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* {{.*}}, align 64 // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 -// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* {{.*}}, align 64 // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] // CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 @@ -976,10 +974,10 @@ // CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 -// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 +// CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* {{.*}}, align 32 // CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 // CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 @@ -990,10 +988,10 @@ // CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 // CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 @@ -1003,9 +1001,9 @@ // CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 // CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* {{.*}}, align 16 // CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 // CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 @@ -1017,26 +1015,26 @@ // CHECK-LABEL: define i32 @test_mm512_reduce_max_epi32(<8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -1048,11 +1046,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> // CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <8 x i32> [[TMP6]], [[TMP8]] // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] @@ -1113,7 +1111,7 @@ // CHECK-NEXT: [[TMP48:%.*]] = icmp sgt <4 x i32> [[TMP45]], [[TMP47]] // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1123,26 +1121,26 @@ // CHECK-LABEL: define i32 @test_mm512_reduce_max_epu32(<8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -1154,11 +1152,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> // CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <8 x i32> [[TMP6]], [[TMP8]] // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] @@ -1219,7 +1217,7 @@ // CHECK-NEXT: [[TMP48:%.*]] = icmp ugt <4 x i32> [[TMP45]], [[TMP47]] // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1229,26 +1227,26 @@ // CHECK-LABEL: define float @test_mm512_reduce_max_ps(<16 x float> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 @@ -1264,10 +1262,10 @@ // CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 +// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 // CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 // CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 @@ -1278,10 +1276,10 @@ // CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 // CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 @@ -1290,10 +1288,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 // CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 @@ -1302,10 +1300,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 // CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 // CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 @@ -1317,26 +1315,26 @@ // CHECK-LABEL: define i32 @test_mm512_reduce_min_epi32(<8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -1348,11 +1346,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> // CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[TMP6]], [[TMP8]] // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] @@ -1413,7 +1411,7 @@ // CHECK-NEXT: [[TMP48:%.*]] = icmp slt <4 x i32> [[TMP45]], [[TMP47]] // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1423,26 +1421,26 @@ // CHECK-LABEL: define i32 @test_mm512_reduce_min_epu32(<8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 @@ -1454,11 +1452,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> -// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> // CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x i32> [[TMP6]], [[TMP8]] // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] @@ -1519,7 +1517,7 @@ // CHECK-NEXT: [[TMP48:%.*]] = icmp ult <4 x i32> [[TMP45]], [[TMP47]] // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1529,26 +1527,26 @@ // CHECK-LABEL: define float @test_mm512_reduce_min_ps(<16 x float> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 @@ -1564,10 +1562,10 @@ // CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 +// CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 // CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 // CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 @@ -1578,10 +1576,10 @@ // CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 // CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 @@ -1590,10 +1588,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 -// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 -// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 // CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 @@ -1602,10 +1600,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 // CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 // CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 @@ -1617,33 +1615,33 @@ // CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 @@ -1683,18 +1681,18 @@ // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 // CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A2_ADDR_I_I]], align 64 // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 // CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A2_ADDR_I_I]], align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] @@ -1708,11 +1706,11 @@ // CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> // CHECK-NEXT: [[TMP38:%.*]] = icmp sgt <8 x i32> [[TMP35]], [[TMP37]] // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] @@ -1773,7 +1771,7 @@ // CHECK-NEXT: [[TMP77:%.*]] = icmp sgt <4 x i32> [[TMP74]], [[TMP76]] // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1783,31 +1781,31 @@ // CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 @@ -1817,12 +1815,12 @@ // CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 // CHECK-NEXT: store i16 [[TMP2]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP5]] to <16 x i32> -// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP7]] to <16 x i32> // CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> // CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP6]], <16 x i32> [[TMP8]] @@ -1837,10 +1835,10 @@ // CHECK-NEXT: [[TMP14:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP15:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: store <4 x i64> [[TMP14]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP15]], <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP15]], <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP16:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 // CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i64> [[TMP16]] to <8 x i32> -// CHECK-NEXT: [[TMP18:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP18:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i64> [[TMP18]] to <8 x i32> // CHECK-NEXT: [[TMP20:%.*]] = icmp ugt <8 x i32> [[TMP17]], [[TMP19]] // CHECK-NEXT: [[TMP21:%.*]] = select <8 x i1> [[TMP20]], <8 x i32> [[TMP17]], <8 x i32> [[TMP19]] @@ -1901,7 +1899,7 @@ // CHECK-NEXT: [[TMP59:%.*]] = icmp ugt <4 x i32> [[TMP56]], [[TMP58]] // CHECK-NEXT: [[TMP60:%.*]] = select <4 x i1> [[TMP59]], <4 x i32> [[TMP56]], <4 x i32> [[TMP58]] // CHECK-NEXT: [[TMP61:%.*]] = bitcast <4 x i32> [[TMP60]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP62:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP62]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -1911,82 +1909,82 @@ // CHECK-LABEL: define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store float 0xFFF0000000000000, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: store float 0xFFF0000000000000, float* {{.*}}, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> // CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] // CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 @@ -2002,10 +2000,10 @@ // CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 +// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 // CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 // CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 @@ -2016,10 +2014,10 @@ // CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 // CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 @@ -2028,10 +2026,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 // CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 @@ -2041,9 +2039,9 @@ // CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 // CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 // CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 @@ -2055,33 +2053,33 @@ // CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 @@ -2121,18 +2119,18 @@ // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] @@ -2147,10 +2145,10 @@ // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> // CHECK-NEXT: [[TMP38:%.*]] = icmp slt <8 x i32> [[TMP35]], [[TMP37]] // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] @@ -2211,7 +2209,7 @@ // CHECK-NEXT: [[TMP77:%.*]] = icmp slt <4 x i32> [[TMP74]], [[TMP76]] // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -2221,33 +2219,33 @@ // CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 @@ -2287,18 +2285,18 @@ // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> -// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 64 // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] @@ -2313,10 +2311,10 @@ // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 // CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 -// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> -// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 +// CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* {{.*}}, align 32 // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> // CHECK-NEXT: [[TMP38:%.*]] = icmp ult <8 x i32> [[TMP35]], [[TMP37]] // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] @@ -2377,7 +2375,7 @@ // CHECK-NEXT: [[TMP77:%.*]] = icmp ult <4 x i32> [[TMP74]], [[TMP76]] // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> -// CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 +// CHECK: store <4 x i32> {{.*}}, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 // CHECK-NEXT: ret i32 [[VECEXT_I]] @@ -2387,82 +2385,82 @@ // CHECK-LABEL: define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W) #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 -// CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 -// CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 +// CHECK-DAG: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__M_ADDR_I:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-DAG: [[__T1_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T3_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-DAG: [[__T4_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T5_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T6_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T7_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T8_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T9_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__T10_I:%.*]] = alloca <4 x float>, align 16 +// CHECK-DAG: [[__M_ADDR:%.*]] = alloca i16, align 2 +// CHECK-DAG: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 // CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store float 0x7FF0000000000000, float* [[__W_ADDR_I_I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: store float 0x7FF0000000000000, float* {{.*}}, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 -// CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 -// CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 -// CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 -// CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 -// CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 -// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 -// CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 -// CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 -// CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load float, float* {{.*}}, align 4 // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 -// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 -// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* {{.*}}, align 64 // CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 +// CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 -// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 -// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 +// CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 +// CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* {{.*}}, align 64 // CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> // CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] // CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 @@ -2478,10 +2476,10 @@ // CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 // CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 // CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 -// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 -// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 +// CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 +// CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* {{.*}}, align 32 // CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 // CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 // CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 @@ -2492,10 +2490,10 @@ // CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 // CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 // CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 -// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 -// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 // CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 @@ -2504,10 +2502,10 @@ // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 // CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 // CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 -// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 -// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 +// CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 // CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 @@ -2517,9 +2515,9 @@ // CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 // CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 // CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 -// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 +// CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 16 // CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 // CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 // CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -1,7 +1,10 @@ -// UNSUPPORTED: experimental-new-pass-manager +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM +// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -10482,20 +10485,24 @@ __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_abs_epi32 + // CHECK-LABEL: @test_mm512_mask_abs_epi32 // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} return _mm512_mask_abs_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_abs_epi32 + // CHECK-LABEL: @test_mm512_maskz_abs_epi32 // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} return _mm512_maskz_abs_epi32 (__U,__A); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -1,7 +1,8 @@ -// UNSUPPORTED: experimental-new-pass-manager - -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -4591,6 +4592,8 @@ // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} return _mm_mask_abs_epi32(__W,__U,__A); } @@ -4599,6 +4602,8 @@ // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} return _mm_maskz_abs_epi32(__U,__A); } @@ -4607,6 +4612,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} return _mm256_mask_abs_epi32(__W,__U,__A); } @@ -4615,6 +4622,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} return _mm256_maskz_abs_epi32(__U,__A); } @@ -4670,6 +4679,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epi32(__M,__A,__B); } @@ -4677,6 +4688,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epi32(__W,__M,__A,__B); } @@ -4684,6 +4697,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epi32(__M,__A,__B); } @@ -4691,6 +4706,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epi32(__W,__M,__A,__B); } @@ -4738,6 +4755,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epu32(__M,__A,__B); } @@ -4745,6 +4764,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epu32(__W,__M,__A,__B); } @@ -4752,6 +4773,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epu32(__M,__A,__B); } @@ -4759,6 +4782,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epu32(__W,__M,__A,__B); } @@ -4806,6 +4831,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epi32(__M,__A,__B); } @@ -4813,6 +4840,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epi32(__W,__M,__A,__B); } @@ -4820,6 +4849,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epi32(__M,__A,__B); } @@ -4827,6 +4858,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epi32(__W,__M,__A,__B); } @@ -4874,6 +4907,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epu32(__M,__A,__B); } @@ -4881,6 +4916,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epu32(__W,__M,__A,__B); } @@ -4888,6 +4925,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epu32(__M,__A,__B); } @@ -4895,6 +4934,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epu32(__W,__M,__A,__B); } diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/avx512vlbw-builtins.c --- a/clang/test/CodeGen/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/avx512vlbw-builtins.c @@ -1,8 +1,10 @@ -// UNSUPPORTED: experimental-new-pass-manager - -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include @@ -903,6 +905,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} return _mm_mask_abs_epi8(__W,__U,__A); } @@ -912,6 +916,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} return _mm_maskz_abs_epi8(__U,__A); } @@ -921,6 +927,8 @@ // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} return _mm256_mask_abs_epi8(__W,__U,__A); } @@ -930,6 +938,8 @@ // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} return _mm256_maskz_abs_epi8(__U,__A); } @@ -939,6 +949,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} return _mm_mask_abs_epi16(__W,__U,__A); } @@ -948,6 +960,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} return _mm_maskz_abs_epi16(__U,__A); } @@ -957,6 +971,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} return _mm256_mask_abs_epi16(__W,__U,__A); } @@ -966,6 +982,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} return _mm256_maskz_abs_epi16(__U,__A); } @@ -1231,6 +1249,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epi8(__M,__A,__B); } @@ -1238,6 +1258,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epi8(__W,__M,__A,__B); } @@ -1245,6 +1267,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epi8(__M,__A,__B); } @@ -1252,6 +1276,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epi8(__W,__M,__A,__B); } @@ -1259,6 +1285,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epi16(__M,__A,__B); } @@ -1266,6 +1294,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epi16(__W,__M,__A,__B); } @@ -1273,6 +1303,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epi16(__M,__A,__B); } @@ -1280,6 +1312,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epi16(__W,__M,__A,__B); } @@ -1287,6 +1321,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epu8(__M,__A,__B); } @@ -1294,6 +1330,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epu8(__W,__M,__A,__B); } @@ -1301,6 +1339,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epu8(__M,__A,__B); } @@ -1308,6 +1348,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epu8(__W,__M,__A,__B); } @@ -1315,6 +1357,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epu16(__M,__A,__B); } @@ -1322,6 +1366,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epu16(__W,__M,__A,__B); } @@ -1329,6 +1375,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epu16(__M,__A,__B); } @@ -1336,6 +1384,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epu16(__W,__M,__A,__B); } @@ -1343,6 +1393,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epi8(__M,__A,__B); } @@ -1350,6 +1402,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epi8(__W,__M,__A,__B); } @@ -1357,6 +1411,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epi8(__M,__A,__B); } @@ -1364,6 +1420,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epi8(__W,__M,__A,__B); } @@ -1371,6 +1429,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epi16(__M,__A,__B); } @@ -1378,6 +1438,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epi16(__W,__M,__A,__B); } @@ -1385,6 +1447,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epi16(__M,__A,__B); } @@ -1392,6 +1456,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epi16(__W,__M,__A,__B); } @@ -1399,6 +1465,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epu8(__M,__A,__B); } @@ -1406,6 +1474,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epu8(__W,__M,__A,__B); } @@ -1413,6 +1483,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epu8(__M,__A,__B); } @@ -1420,6 +1492,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epu8(__W,__M,__A,__B); } @@ -1427,6 +1501,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epu16(__M,__A,__B); } @@ -1434,6 +1510,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epu16(__W,__M,__A,__B); } @@ -1441,6 +1519,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epu16(__M,__A,__B); } @@ -1448,6 +1528,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epu16(__W,__M,__A,__B); } diff --git a/clang/test/CodeGen/builtin-movdir.c b/clang/test/CodeGen/builtin-movdir.c --- a/clang/test/CodeGen/builtin-movdir.c +++ b/clang/test/CodeGen/builtin-movdir.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -ffreestanding -Wall -pedantic -triple x86_64-unknown-unknown -target-feature +movdiri -target-feature +movdir64b %s -emit-llvm -o - | FileCheck %s --check-prefix=X86_64 --check-prefix=CHECK // RUN: %clang_cc1 -ffreestanding -Wall -pedantic -triple i386-unknown-unknown -target-feature +movdiri -target-feature +movdir64b %s -emit-llvm -o - | FileCheck %s --check-prefix=X86 --check-prefix=CHECK diff --git a/clang/test/CodeGen/builtins-ppc-p9vector.c b/clang/test/CodeGen/builtins-ppc-p9vector.c --- a/clang/test/CodeGen/builtins-ppc-p9vector.c +++ b/clang/test/CodeGen/builtins-ppc-p9vector.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -target-feature +altivec -target-feature +power9-vector \ // RUN: -triple powerpc64-unknown-unknown -emit-llvm %s \ diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-LE diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c --- a/clang/test/CodeGen/flatten.c +++ b/clang/test/CodeGen/flatten.c @@ -1,4 +1,8 @@ // UNSUPPORTED: experimental-new-pass-manager +// Currently, different code seems to be intentionally generated under the new +// PM since we alwaysinline functions and not callsites under new PM. +// Under new PM, f() will not be inlined from g() since f is not marked as +// alwaysinline. // RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -27,6 +27,7 @@ // CHECK-FULL-O0: Starting llvm::Module pass manager run. // CHECK-FULL-O0: Running pass: AlwaysInlinerPass +// CHECK-FULL-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass @@ -34,6 +35,7 @@ // CHECK-THIN-O0: Starting llvm::Module pass manager run. // CHECK-THIN-O0: Running pass: AlwaysInlinerPass +// CHECK-THIN-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass diff --git a/clang/test/CodeGen/pgo-instrumentation.c b/clang/test/CodeGen/pgo-instrumentation.c --- a/clang/test/CodeGen/pgo-instrumentation.c +++ b/clang/test/CodeGen/pgo-instrumentation.c @@ -1,10 +1,8 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Test if PGO instrumentation and use pass are invoked. // // Ensure Pass PGOInstrumentationGenPass is invoked. -// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN --check-prefix=CHECK-INSTRPROF -// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN-NEWPM --check-prefix=CHECK-INSTRPROF-NEWPM +// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN --check-prefix=CHECK-INSTRPROF +// RUN: %clang_cc1 -O2 -fprofile-instrument=llvm %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOGENPASS-INVOKED-INSTR-GEN-NEWPM --check-prefix=CHECK-INSTRPROF-NEWPM // CHECK-PGOGENPASS-INVOKED-INSTR-GEN: PGOInstrumentationGenPass // CHECK-INSTRPROF: Frontend instrumentation-based coverage lowering // CHECK-PGOGENPASS-INVOKED-INSTR-GEN-NEWPM: Running pass: PGOInstrumentationGen on @@ -16,16 +14,16 @@ // CHECK-PGOGENPASS-INVOKED-INSTR-GEN-CLANG-NOT: PGOInstrumentationGenPass // CHECK-PGOGENPASS-INVOKED-INSTR-GEN-CLANG-NEWPM-NOT: Running pass: PGOInstrumentationGen on -// RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF +// RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF // RUN: %clang_cc1 -O2 -fprofile-instrument=clang %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF-NEWPM -// RUN: %clang_cc1 -O0 -fprofile-instrument=clang %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF +// RUN: %clang_cc1 -O0 -fprofile-instrument=clang %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF // RUN: %clang_cc1 -O0 -fprofile-instrument=clang %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-CLANG-INSTRPROF-NEWPM // CHECK-CLANG-INSTRPROF: Frontend instrumentation-based coverage lowering // CHECK-CLANG-INSTRPROF-NEWPM: Running pass: InstrProfiling on // Ensure Pass PGOInstrumentationUsePass is invoked. // RUN: llvm-profdata merge -o %t.profdata %S/Inputs/pgotestir.profraw -// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE +// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -fno-experimental-new-pass-manager -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE // RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.profdata %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PGOUSEPASS-INVOKED-INSTR-USE-NEWPM // CHECK-PGOUSEPASS-INVOKED-INSTR-USE: PGOInstrumentationUsePass // CHECK-PGOUSEPASS-INVOKED-INSTR-USE-NEWPM: Running pass: PGOInstrumentationUse on diff --git a/clang/test/CodeGen/pgo-sample.c b/clang/test/CodeGen/pgo-sample.c --- a/clang/test/CodeGen/pgo-sample.c +++ b/clang/test/CodeGen/pgo-sample.c @@ -1,8 +1,13 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Test if PGO sample use passes are invoked. // // Ensure Pass PGOInstrumentationGenPass is invoked. -// RUN: %clang_cc1 -O2 -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -mllvm -debug-pass=Structure -emit-llvm -o - 2>&1 | FileCheck %s +// RUN: %clang_cc1 -O2 -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -mllvm -debug-pass=Structure -fno-experimental-new-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s +// +// The new PM has a different debug output and uses a different debug pass flag. +// RUN: %clang_cc1 -O2 -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s -fexperimental-new-pass-manager -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NEWPM // CHECK: Remove unused exception handling info // CHECK: Sample profile pass +// +// CHECK-NEWPM-DAG: SampleProfileLoaderPass on +// CHECK-NEWPM-DAG: PostOrderFunctionAttrsPass on +// CHECK-NEWPM-DAG: ModuleToFunctionPassAdaptor on diff --git a/clang/test/CodeGen/split-debug-single-file.c b/clang/test/CodeGen/split-debug-single-file.c --- a/clang/test/CodeGen/split-debug-single-file.c +++ b/clang/test/CodeGen/split-debug-single-file.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // REQUIRES: x86-registered-target // Testing to ensure -enable-split-dwarf=single allows to place .dwo sections into regular output object. diff --git a/clang/test/CodeGen/sse-builtins.c b/clang/test/CodeGen/sse-builtins.c --- a/clang/test/CodeGen/sse-builtins.c +++ b/clang/test/CodeGen/sse-builtins.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s diff --git a/clang/test/CodeGen/sse2-builtins.c b/clang/test/CodeGen/sse2-builtins.c --- a/clang/test/CodeGen/sse2-builtins.c +++ b/clang/test/CodeGen/sse2-builtins.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s diff --git a/clang/test/CodeGen/x86_64-instrument-functions.c b/clang/test/CodeGen/x86_64-instrument-functions.c --- a/clang/test/CodeGen/x86_64-instrument-functions.c +++ b/clang/test/CodeGen/x86_64-instrument-functions.c @@ -1,8 +1,6 @@ -// UNSUPPORTED: experimental-new-pass-manager - // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions -O2 -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions-after-inlining -O2 -o - %s | FileCheck -check-prefix=NOINLINE %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions -O2 -fno-experimental-new-pass-manager -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -S -finstrument-functions-after-inlining -O2 -fno-experimental-new-pass-manager -o - %s | FileCheck -check-prefix=NOINLINE %s // It's not so nice having asm tests in Clang, but we need to check that we set // up the pipeline correctly in order to have the instrumentation inserted. diff --git a/clang/test/CodeGenCXX/conditional-temporaries.cpp b/clang/test/CodeGenCXX/conditional-temporaries.cpp --- a/clang/test/CodeGenCXX/conditional-temporaries.cpp +++ b/clang/test/CodeGenCXX/conditional-temporaries.cpp @@ -1,9 +1,7 @@ -// UNSUPPORTED: experimental-new-pass-manager - // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O2 -disable-llvm-passes | FileCheck %s --check-prefixes=CHECK,CHECK-NOOPT -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O2 | FileCheck %s --check-prefixes=CHECK,CHECK-OPT -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=amdgcn-amd-amdhsa -O2 | FileCheck %s --check-prefixes=CHECK,CHECK-OPT +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O2 -fno-experimental-new-pass-manager -disable-llvm-passes | FileCheck %s --check-prefixes=CHECK,CHECK-NOOPT +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O2 -fno-experimental-new-pass-manager | FileCheck %s --check-prefixes=CHECK,CHECK-OPT +// RUN: %clang_cc1 -emit-llvm %s -o - -triple=amdgcn-amd-amdhsa -O2 -fno-experimental-new-pass-manager | FileCheck %s --check-prefixes=CHECK,CHECK-OPT namespace { diff --git a/clang/test/CodeGenCXX/flatten.cpp b/clang/test/CodeGenCXX/flatten.cpp --- a/clang/test/CodeGenCXX/flatten.cpp +++ b/clang/test/CodeGenCXX/flatten.cpp @@ -1,4 +1,6 @@ // UNSUPPORTED: experimental-new-pass-manager +// See the comment for CodeGen/flatten.c on why this is unsupported with the new +// PM. // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++11 %s -emit-llvm -o - | FileCheck %s diff --git a/clang/test/CodeGenCXX/member-function-pointer-calls.cpp b/clang/test/CodeGenCXX/member-function-pointer-calls.cpp --- a/clang/test/CodeGenCXX/member-function-pointer-calls.cpp +++ b/clang/test/CodeGenCXX/member-function-pointer-calls.cpp @@ -1,6 +1,4 @@ -// UNSUPPORTED: experimental-new-pass-manager - -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin10 -emit-llvm -O3 -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin10 -emit-llvm -O3 -fno-experimental-new-pass-manager -o - | FileCheck %s // RUN: %clang_cc1 %s -triple=x86_64-windows-gnu -emit-llvm -o - | FileCheck %s -check-prefix MINGW64 struct A { virtual int vf1() { return 1; } diff --git a/clang/test/CodeGenObjC/os_log.m b/clang/test/CodeGenObjC/os_log.m --- a/clang/test/CodeGenObjC/os_log.m +++ b/clang/test/CodeGenObjC/os_log.m @@ -1,6 +1,4 @@ -// UNSUPPORTED: experimental-new-pass-manager - -// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc -O2 | FileCheck %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc -O2 -fno-experimental-new-pass-manager | FileCheck %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc -O0 | FileCheck %s -check-prefix=CHECK-O0 // Make sure we emit clang.arc.use before calling objc_release as part of the diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -1,6 +1,5 @@ -// UNSUPPORTED: experimental-new-pass-manager - -// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-LEGACY +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NEWPM // This is initially assumed convergent, but can be deduced to not require it. @@ -119,7 +118,12 @@ // CHECK: [[for_body]]: // CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]] // CHECK-NOT: call spir_func void @nodupfun() -// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] + +// The new PM produces a slightly different IR for the loop from the legacy PM, +// but the test still checks that the loop is not unrolled. +// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] +// CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]] +// CHECK-NEW: [[for_body_crit_edge]]: void test_not_unroll() { for (int i = 0; i < 10; i++) diff --git a/clang/test/CoverageMapping/unused_names.c b/clang/test/CoverageMapping/unused_names.c --- a/clang/test/CoverageMapping/unused_names.c +++ b/clang/test/CoverageMapping/unused_names.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -main-file-name unused_names.c -o - %s > %t // RUN: FileCheck -input-file %t %s // RUN: FileCheck -check-prefix=SYSHEADER -input-file %t %s diff --git a/clang/test/Frontend/optimization-remark-line-directive.c b/clang/test/Frontend/optimization-remark-line-directive.c --- a/clang/test/Frontend/optimization-remark-line-directive.c +++ b/clang/test/Frontend/optimization-remark-line-directive.c @@ -1,10 +1,12 @@ -// UNSUPPORTED: experimental-new-pass-manager - // This file tests -Rpass diagnostics together with #line // directives. We cannot map #line directives back to // a SourceLocation. -// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -dwarf-column-info -emit-llvm-only -verify +// RUN: %clang_cc1 %s -Rpass=inline -fno-experimental-new-pass-manager -debug-info-kind=line-tables-only -dwarf-column-info -emit-llvm-only -verify + +// The new PM inliner is not added to the default pipeline at O0, so we add +// some optimizations to trigger it. +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -debug-info-kind=line-tables-only -dwarf-column-info -emit-llvm-only -verify int foo(int x, int y) __attribute__((always_inline)); int foo(int x, int y) { return x + y; } diff --git a/clang/test/Frontend/optimization-remark-new-pm.c b/clang/test/Frontend/optimization-remark-new-pm.c new file mode 100644 --- /dev/null +++ b/clang/test/Frontend/optimization-remark-new-pm.c @@ -0,0 +1,20 @@ +// Verify that remarks for the inliner appear. The remarks under the new PM will +// be slightly different than those emitted by the legacy PM. The new PM inliner +// also doesnot appear to be added at O0, so we test at O1. +// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O1 -fexperimental-new-pass-manager -emit-llvm-only -verify +// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O1 -fexperimental-new-pass-manager -emit-llvm-only -debug-info-kind=line-tables-only -verify + +int foo(int x, int y) __attribute__((always_inline)); +int foo(int x, int y) { return x + y; } + +float foz(int x, int y) __attribute__((noinline)); +float foz(int x, int y) { return x * y; } + +// The negative diagnostics are emitted twice because the inliner runs +// twice. +// +int bar(int j) { + // expected-remark@+2 {{foz not inlined into bar because it should never be inlined (cost=never)}} + // expected-remark@+1 {{foo inlined into bar}} + return foo(j, j - 2) * foz(j - 2, j); +} diff --git a/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c b/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c new file mode 100644 --- /dev/null +++ b/clang/test/Frontend/optimization-remark-with-hotness-new-pm.c @@ -0,0 +1,85 @@ +// This test is similar to Frontend/optimization-remark-with-hotness.c but +// testing the output under the new pass manager. The inliner is not added to +// the default new PM pipeline at O0, so we compile with optimizations here. As +// a result, some of the remarks will be different since we turn on inlining, +// but the test is meant to show that remarks get dumped. The remarks are also +// slightly different in text. + +// Generate instrumentation and sampling profile data. +// RUN: llvm-profdata merge \ +// RUN: %S/Inputs/optimization-remark-with-hotness.proftext \ +// RUN: -o %t.profdata +// RUN: llvm-profdata merge -sample \ +// RUN: %S/Inputs/optimization-remark-with-hotness-sample.proftext \ +// RUN: -o %t-sample.profdata +// +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rpass-missed=inline \ +// RUN: -fdiagnostics-show-hotness -verify +// The clang version of the previous test. +// RUN: %clang -target x86_64-apple-macosx10.9 %s -c -emit-llvm -o /dev/null \ +// RUN: -fprofile-instr-use=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rpass-missed=inline \ +// RUN: -fdiagnostics-show-hotness -Xclang -verify +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-sample-use=%t-sample.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rpass-missed=inline \ +// RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 \ +// RUN: -verify +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rpass-missed=inline \ +// RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 -verify +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline 2>&1 | FileCheck -check-prefix=HOTNESS_OFF %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fexperimental-new-pass-manager -O1 \ +// RUN: -Rpass-analysis=inline -Rno-pass-with-hotness 2>&1 | FileCheck \ +// RUN: -check-prefix=HOTNESS_OFF %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -Rpass-analysis=inline -fdiagnostics-show-hotness \ +// RUN: -fdiagnostics-hotness-threshold=100 2>&1 \ +// RUN: | FileCheck -allow-empty -check-prefix=THRESHOLD %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ +// RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ +// RUN: -Rpass=inline -Rpass-analysis=inline \ +// RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_PGO %s + +int foo(int x, int y) __attribute__((always_inline)); +int foo(int x, int y) { return x + y; } + +int sum = 0; + +void bar(int x) { + // HOTNESS_OFF: foo inlined into bar + // HOTNESS_OFF-NOT: hotness: + // THRESHOLD-NOT: inlined + // THRESHOLD-NOT: hotness + // NO_PGO: '-fdiagnostics-show-hotness' requires profile-guided optimization information + // NO_PGO: '-fdiagnostics-hotness-threshold=' requires profile-guided optimization information + // expected-remark@+1 {{foo inlined into bar with (cost=always): always inline attribute (hotness:}} + sum += foo(x, x - 2); +} + +int main(int argc, const char *argv[]) { + for (int i = 0; i < 30; i++) + // expected-remark@+1 {{bar inlined into main with}} + bar(argc); + return sum; +} diff --git a/clang/test/Frontend/optimization-remark-with-hotness.c b/clang/test/Frontend/optimization-remark-with-hotness.c --- a/clang/test/Frontend/optimization-remark-with-hotness.c +++ b/clang/test/Frontend/optimization-remark-with-hotness.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Generate instrumentation and sampling profile data. // RUN: llvm-profdata merge \ // RUN: %S/Inputs/optimization-remark-with-hotness.proftext \ @@ -11,31 +9,37 @@ // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rpass-missed=inline \ // RUN: -fdiagnostics-show-hotness -verify // The clang version of the previous test. // RUN: %clang -target x86_64-apple-macosx10.9 %s -c -emit-llvm -o /dev/null \ // RUN: -fprofile-instr-use=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rpass-missed=inline \ // RUN: -fdiagnostics-show-hotness -Xclang -verify // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-sample-use=%t-sample.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rpass-missed=inline \ // RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 \ // RUN: -verify // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rpass-missed=inline \ // RUN: -fdiagnostics-show-hotness -fdiagnostics-hotness-threshold=10 -verify // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline 2>&1 | FileCheck -check-prefix=HOTNESS_OFF %s // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ // RUN: optimization-remark-with-hotness.c %s -emit-llvm-only \ // RUN: -fprofile-instrument-use-path=%t.profdata -Rpass=inline \ +// RUN: -fno-experimental-new-pass-manager \ // RUN: -Rpass-analysis=inline -Rno-pass-with-hotness 2>&1 | FileCheck \ // RUN: -check-prefix=HOTNESS_OFF %s // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name \ diff --git a/clang/test/Frontend/optimization-remark.c b/clang/test/Frontend/optimization-remark.c --- a/clang/test/Frontend/optimization-remark.c +++ b/clang/test/Frontend/optimization-remark.c @@ -1,22 +1,30 @@ -// UNSUPPORTED: experimental-new-pass-manager - // This file tests the -Rpass family of flags (-Rpass, -Rpass-missed // and -Rpass-analysis) with the inliner. The test is designed to // always trigger the inliner, so it should be independent of the -// optimization level. - -// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -emit-llvm-only -verify -// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -emit-llvm-only -debug-info-kind=line-tables-only -verify +// optimization level (under the legacy PM). The inliner is not added to the new +// PM pipeline unless optimizations are present. + +// The inliner for the new PM does not seem to be enabled at O0, but we still +// get the same remarks with at least O1. The remarks are also slightly +// different and located in another test file. +// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -fno-experimental-new-pass-manager -emit-llvm-only -verify +// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -fno-experimental-new-pass-manager -emit-llvm-only -debug-info-kind=line-tables-only -verify // RUN: %clang_cc1 %s -Rpass=inline -emit-llvm -o - 2>/dev/null | FileCheck %s // // Check that we can override -Rpass= with -Rno-pass. -// RUN: %clang_cc1 %s -Rpass=inline -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fno-experimental-new-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS // RUN: %clang_cc1 %s -Rpass=inline -Rno-pass -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NO-REMARKS // RUN: %clang_cc1 %s -Rpass=inline -Rno-everything -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NO-REMARKS -// RUN: %clang_cc1 %s -Rpass=inline -Rno-everything -Reverything -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fno-experimental-new-pass-manager -Rno-everything -Reverything -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// +// The inliner for the new PM does not seem to be enabled at O0, but we still +// get the same remarks with at least O1. +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -Rno-everything -Reverything -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS // // Check that -w doesn't disable remarks. -// RUN: %clang_cc1 %s -Rpass=inline -w -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fno-experimental-new-pass-manager -w -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS +// RUN: %clang_cc1 %s -Rpass=inline -fexperimental-new-pass-manager -O1 -w -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS // // FIXME: -Reverything should imply -Rpass=.*. // RUN: %clang_cc1 %s -Reverything -emit-llvm -o - 2>/dev/null | FileCheck %s --check-prefix=CHECK-NO-REMARKS diff --git a/clang/test/Misc/pr32207.c b/clang/test/Misc/pr32207.c --- a/clang/test/Misc/pr32207.c +++ b/clang/test/Misc/pr32207.c @@ -1,6 +1,4 @@ -// UNSUPPORTED: experimental-new-pass-manager - // test for r305179 -// RUN: %clang_cc1 -emit-llvm -O -mllvm -print-after-all %s -o %t 2>&1 | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -O -fno-experimental-new-pass-manager -mllvm -print-after-all %s -o %t 2>&1 | FileCheck %s // CHECK: *** IR Dump After Function Integration/Inlining *** void foo() {} diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s --check-prefix=CHECK --check-prefix=LIFETIME // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s diff --git a/clang/test/Profile/c-captured.c b/clang/test/Profile/c-captured.c --- a/clang/test/Profile/c-captured.c +++ b/clang/test/Profile/c-captured.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-captured.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOGEN -check-prefix=PGOALL %s // RUN: llvm-profdata merge %S/Inputs/c-captured.proftext -o %t.profdata diff --git a/clang/test/Profile/c-general.c b/clang/test/Profile/c-general.c --- a/clang/test/Profile/c-general.c +++ b/clang/test/Profile/c-general.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Test instrumentation of general constructs in C. // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOGEN %s diff --git a/clang/test/Profile/c-generate.c b/clang/test/Profile/c-generate.c --- a/clang/test/Profile/c-generate.c +++ b/clang/test/Profile/c-generate.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Check that the -fprofile-instrument-path= form works. // RUN: %clang_cc1 -main-file-name c-generate.c %s -o - -emit-llvm -fprofile-instrument=clang -fprofile-instrument-path=c-generate-test.profraw | FileCheck %s --check-prefix=PROF-INSTR-PATH // RUN: %clang_cc1 %s -o - -emit-llvm -fprofile-instrument=none | FileCheck %s --check-prefix=PROF-INSTR-NONE diff --git a/clang/test/Profile/c-indirect-call.c b/clang/test/Profile/c-indirect-call.c --- a/clang/test/Profile/c-indirect-call.c +++ b/clang/test/Profile/c-indirect-call.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Check the value profiling instrinsics emitted by instrumentation. // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-indirect-call.c %s -o - -emit-llvm -fprofile-instrument=clang -mllvm -enable-value-profiling | FileCheck --check-prefix=NOEXT %s diff --git a/clang/test/Profile/c-linkage-available_externally.c b/clang/test/Profile/c-linkage-available_externally.c --- a/clang/test/Profile/c-linkage-available_externally.c +++ b/clang/test/Profile/c-linkage-available_externally.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Make sure instrumentation data from available_externally functions doesn't // get thrown out and are emitted with the expected linkage. // RUN: %clang_cc1 -O2 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage-available_externally.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s diff --git a/clang/test/Profile/c-linkage.c b/clang/test/Profile/c-linkage.c --- a/clang/test/Profile/c-linkage.c +++ b/clang/test/Profile/c-linkage.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Check that the profiling counters and data we create have the linkage we expect // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s diff --git a/clang/test/Profile/c-ternary.c b/clang/test/Profile/c-ternary.c --- a/clang/test/Profile/c-ternary.c +++ b/clang/test/Profile/c-ternary.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple x86_64-apple-macosx10.11.0 -x c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s // PR32019: Clang can lower some ternary operator expressions to select diff --git a/clang/test/Profile/c-unreachable-after-switch.c b/clang/test/Profile/c-unreachable-after-switch.c --- a/clang/test/Profile/c-unreachable-after-switch.c +++ b/clang/test/Profile/c-unreachable-after-switch.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -O3 -triple x86_64-apple-macosx10.10 -main-file-name c-unreachable-after-switch.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s // CHECK: @[[C:__profc_foo]] = private global [3 x i64] zeroinitializer diff --git a/clang/test/Profile/cxx-class.cpp b/clang/test/Profile/cxx-class.cpp --- a/clang/test/Profile/cxx-class.cpp +++ b/clang/test/Profile/cxx-class.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Tests for instrumentation of C++ methods, constructors, and destructors. // RUN: %clang_cc1 %s -o - -emit-llvm -fprofile-instrument=clang -triple %itanium_abi_triple > %tgen diff --git a/clang/test/Profile/cxx-implicit.cpp b/clang/test/Profile/cxx-implicit.cpp --- a/clang/test/Profile/cxx-implicit.cpp +++ b/clang/test/Profile/cxx-implicit.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Ensure that implicit methods aren't instrumented. // RUN: %clang_cc1 -x c++ -std=c++11 %s -triple %itanium_abi_triple -main-file-name cxx-implicit.cpp -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s diff --git a/clang/test/Profile/cxx-indirect-call.cpp b/clang/test/Profile/cxx-indirect-call.cpp --- a/clang/test/Profile/cxx-indirect-call.cpp +++ b/clang/test/Profile/cxx-indirect-call.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Check the value profiling instrinsics emitted by instrumentation. // RUN: %clang_cc1 %s -o - -emit-llvm -fprofile-instrument=clang -mllvm -enable-value-profiling -fexceptions -fcxx-exceptions -triple x86_64-apple-macosx10.9 | FileCheck %s diff --git a/clang/test/Profile/cxx-lambda.cpp b/clang/test/Profile/cxx-lambda.cpp --- a/clang/test/Profile/cxx-lambda.cpp +++ b/clang/test/Profile/cxx-lambda.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Tests for instrumentation of C++11 lambdas // RUN: %clang_cc1 -x c++ %s -triple %itanium_abi_triple -main-file-name cxx-lambda.cpp -std=c++11 -o - -emit-llvm -fprofile-instrument=clang > %tgen diff --git a/clang/test/Profile/cxx-linkage.cpp b/clang/test/Profile/cxx-linkage.cpp --- a/clang/test/Profile/cxx-linkage.cpp +++ b/clang/test/Profile/cxx-linkage.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9.0 -emit-llvm -main-file-name cxx-linkage.cpp %s -o - -fprofile-instrument=clang | FileCheck %s // CHECK: @__profc__Z3foov = private global diff --git a/clang/test/Profile/cxx-rangefor.cpp b/clang/test/Profile/cxx-rangefor.cpp --- a/clang/test/Profile/cxx-rangefor.cpp +++ b/clang/test/Profile/cxx-rangefor.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Tests for instrumentation of C++11 range-for // RUN: %clang_cc1 -x c++ %s -triple %itanium_abi_triple -main-file-name cxx-rangefor.cpp -std=c++11 -o - -emit-llvm -fprofile-instrument=clang > %tgen diff --git a/clang/test/Profile/cxx-stmt-initializers.cpp b/clang/test/Profile/cxx-stmt-initializers.cpp --- a/clang/test/Profile/cxx-stmt-initializers.cpp +++ b/clang/test/Profile/cxx-stmt-initializers.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Tests for instrumentation of C++17 statement initializers // RUN: %clang_cc1 -x c++ %s -triple %itanium_abi_triple -main-file-name cxx-stmt-initializers.cpp -std=c++1z -o - -emit-llvm -fprofile-instrument=clang > %tgen diff --git a/clang/test/Profile/cxx-structors.cpp b/clang/test/Profile/cxx-structors.cpp --- a/clang/test/Profile/cxx-structors.cpp +++ b/clang/test/Profile/cxx-structors.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Tests for instrumentation of C++ constructors and destructors. // // RUN: %clang_cc1 -triple x86_64-apple-macosx10.11.0 -x c++ %s -o %t -emit-llvm -fprofile-instrument=clang diff --git a/clang/test/Profile/cxx-templates.cpp b/clang/test/Profile/cxx-templates.cpp --- a/clang/test/Profile/cxx-templates.cpp +++ b/clang/test/Profile/cxx-templates.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Tests for instrumentation of templated code. Each instantiation of a template // should be instrumented separately. diff --git a/clang/test/Profile/cxx-throws.cpp b/clang/test/Profile/cxx-throws.cpp --- a/clang/test/Profile/cxx-throws.cpp +++ b/clang/test/Profile/cxx-throws.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Test instrumentation of C++ exception handling constructs. // FIXME: Don't seek bb labels, like "if.else" diff --git a/clang/test/Profile/cxx-virtual-destructor-calls.cpp b/clang/test/Profile/cxx-virtual-destructor-calls.cpp --- a/clang/test/Profile/cxx-virtual-destructor-calls.cpp +++ b/clang/test/Profile/cxx-virtual-destructor-calls.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -main-file-name cxx-virtual-destructor-calls.cpp %s -o - -fprofile-instrument=clang | FileCheck %s struct Member { diff --git a/clang/test/Profile/def-assignop.cpp b/clang/test/Profile/def-assignop.cpp --- a/clang/test/Profile/def-assignop.cpp +++ b/clang/test/Profile/def-assignop.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -x c++ -std=c++11 %s -triple x86_64-unknown-linux-gnu -main-file-name def-assignop.cpp -o - -emit-llvm -fprofile-instrument=clang | FileCheck --check-prefix=PGOGEN %s // RUN: %clang_cc1 -x c++ -std=c++11 %s -triple x86_64-unknown-linux-gnu -main-file-name def-assignop.cpp -o - -emit-llvm -fprofile-instrument=clang -fcoverage-mapping | FileCheck --check-prefix=COVMAP %s diff --git a/clang/test/Profile/def-ctors.cpp b/clang/test/Profile/def-ctors.cpp --- a/clang/test/Profile/def-ctors.cpp +++ b/clang/test/Profile/def-ctors.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -x c++ -std=c++11 %s -triple x86_64-unknown-linux-gnu -main-file-name def-ctors.cpp -o - -emit-llvm -fprofile-instrument=clang | FileCheck --check-prefix=PGOGEN %s // RUN: %clang_cc1 -x c++ -std=c++11 %s -triple x86_64-unknown-linux-gnu -main-file-name def-ctors.cpp -o - -emit-llvm -fprofile-instrument=clang -fcoverage-mapping | FileCheck --check-prefix=COVMAP %s diff --git a/clang/test/Profile/def-dtors.cpp b/clang/test/Profile/def-dtors.cpp --- a/clang/test/Profile/def-dtors.cpp +++ b/clang/test/Profile/def-dtors.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // RUN: %clang_cc1 -x c++ -std=c++11 %s -triple x86_64-unknown-linux-gnu -main-file-name def-dtors.cpp -o - -emit-llvm -fprofile-instrument=clang | FileCheck --check-prefix=PGOGEN %s // RUN: %clang_cc1 -x c++ -std=c++11 %s -triple x86_64-unknown-linux-gnu -main-file-name def-dtors.cpp -o - -emit-llvm -fprofile-instrument=clang -fcoverage-mapping | FileCheck --check-prefix=COVMAP %s diff --git a/clang/test/Profile/gcc-flag-compatibility.c b/clang/test/Profile/gcc-flag-compatibility.c --- a/clang/test/Profile/gcc-flag-compatibility.c +++ b/clang/test/Profile/gcc-flag-compatibility.c @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Tests for -fprofile-generate and -fprofile-use flag compatibility. These two // flags behave similarly to their GCC counterparts: // diff --git a/clang/test/Profile/objc-general.m b/clang/test/Profile/objc-general.m --- a/clang/test/Profile/objc-general.m +++ b/clang/test/Profile/objc-general.m @@ -1,5 +1,3 @@ -// UNSUPPORTED: experimental-new-pass-manager - // Test instrumentation of general constructs in objective C. // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name objc-general.m %s -o - -emit-llvm -fblocks -fprofile-instrument=clang | FileCheck -check-prefix=PGOGEN %s diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -31,8 +31,17 @@ #define DEBUG_TYPE "inline" -PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &) { - InlineFunctionInfo IFI; +PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &AM) { + // Attach an inline assumption cache such that AddAlignmentAssumptions() is + // called to match the codegen for the legacy PM. + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + std::function GetAssumptionCache = + [&](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache); + SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions;