Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2636,6 +2636,7 @@ PrePostActionTy &) { // Use the OpenMPIRBuilder if enabled. if (UseOMPIRBuilder) { + llvm::DenseMap AlignedVars; // Emit the associated statement and get its loop representation. const Stmt *Inner = S.getRawStmt(); llvm::CanonicalLoopInfo *CLI = @@ -2662,7 +2663,8 @@ } // Add simd metadata to the collapsed loop. Do not generate // another loop for if clause. Support for if clause is done earlier. - OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Simdlen, Safelen); + OMPBuilder.applySimd(CLI, AlignedVars, + /*IfCond*/ nullptr, Simdlen, Safelen); return; } }; Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -617,12 +617,16 @@ /// to the cloned loop. The cloned loop is executed when ifCond is evaluated /// to false. /// - /// \param Loop The loop to simd-ize. - /// \param IfCond The value which corresponds to the if clause condition. + /// \param Loop The loop to simd-ize. + /// \param AlignedVars The map of the variables which need to aligned with + /// corresponding alignment values + /// \param IfCond The value which corresponds to the if clause + /// condition. /// \param Simdlen The Simdlen length to apply to the simd loop. /// \param Safelen The Safelen length to apply to the simd loop. - void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, ConstantInt *Simdlen, - ConstantInt *Safelen); + void applySimd(CanonicalLoopInfo *Loop, + llvm::DenseMap AlignedVars, + Value *IfCond, ConstantInt *Simdlen, ConstantInt *Safelen); /// Generator for '#omp flush' /// Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2965,11 +2965,17 @@ Builder.CreateBr(NewBlocks.front()); } -void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond, - ConstantInt *Simdlen, ConstantInt *Safelen) { +void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, + DenseMap AlignedVars, + Value *IfCond, ConstantInt *Simdlen, + ConstantInt *Safelen) { LLVMContext &Ctx = Builder.getContext(); Function *F = CanonicalLoop->getFunction(); + const DataLayout &DL = F->getParent()->getDataLayout(); + const int DefaultAddressSpace = 0; + const int DefaultAlignment = + DL.getPointerABIAlignment(DefaultAddressSpace).value(); // TODO: We should not rely on pass manager. Currently we use pass manager // only for getting llvm::Loop which corresponds to given CanonicalLoopInfo @@ -2985,6 +2991,31 @@ Loop *L = LI.getLoopFor(CanonicalLoop->getHeader()); + for (auto AlignedItem : AlignedVars) { + assert(isa(AlignedItem.first) && + "Value which needs to be aligned must represented by alloca " + "instruction"); + Value *AlignPtrInstruction = nullptr; + AllocaInst *AllocaInstruction = dyn_cast(AlignedItem.first); + Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator()); + Type *AllocatedVarType = AllocaInstruction->getAllocatedType(); + if (isa(AllocatedVarType)) + AlignPtrInstruction = Builder.CreateInBoundsGEP( + AllocaInstruction->getAllocatedType(), AllocaInstruction, + SmallVector{Builder.getInt64(0), Builder.getInt64(0)}, + "arraydecay"); + else if (isa(AllocatedVarType)) + AlignPtrInstruction = Builder.CreateLoad( + AllocaInstruction->getAllocatedType(), AllocaInstruction); + assert(AlignPtrInstruction && + "Aligned variables must be either pointer or array type"); + + Builder.CreateAlignmentAssumption( + F->getParent()->getDataLayout(), AlignPtrInstruction, + AlignedItem.second ? AlignedItem.second + : Builder.getInt64(DefaultAlignment)); + } + if (IfCond) { ValueToValueMapTy VMap; createIfVersion(CanonicalLoop, IfCond, VMap, "simd"); Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1767,11 +1767,59 @@ TEST_F(OpenMPIRBuilderTest, ApplySimd) { OpenMPIRBuilder OMPBuilder(*M); + DenseMap AlignedVars; + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); + + // Simd-ize the loop. + OMPBuilder.applySimd(CLI, AlignedVars, + /* IfCond */ nullptr, /* Simdlen */ nullptr, + /* Safelen */ nullptr); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + LoopInfo &LI = FAM.getResult(*F); + + const std::vector &TopLvl = LI.getTopLevelLoops(); + EXPECT_EQ(TopLvl.size(), 1u); + + Loop *L = TopLvl.front(); + EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); + EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); + + // Check for llvm.access.group metadata attached to the printf + // function in the loop body. + BasicBlock *LoopBody = CLI->getBody(); + EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { + return I.getMetadata("llvm.access.group") != nullptr; + })); +} + +TEST_F(OpenMPIRBuilderTest, ApplySimdiDefaultAligned) { + OpenMPIRBuilder OMPBuilder(*M); + IRBuilder<> Builder(BB); + AllocaInst *Alloc1 = + Builder.CreateAlloca(Builder.getInt8PtrTy(), Builder.getInt64(1)); + AllocaInst *Alloc2 = Builder.CreateAlloca( + ArrayType::get(Builder.getInt32Ty(), 10), Builder.getInt64(1)); + DenseMap AlignedVars; + auto Int8Ty = Builder.getInt8Ty(); + Instruction *MallocInstr = CallInst::CreateMalloc( + Alloc2, Builder.getInt64Ty(), Int8Ty, ConstantExpr::getSizeOf(Int8Ty), + Builder.getInt64(400), nullptr, ""); + Builder.CreateStore(MallocInstr, Alloc1); + + AlignedVars.insert({Alloc1, nullptr}); + AlignedVars.insert({Alloc2, nullptr}); CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, /* Simdlen */ nullptr, + OMPBuilder.applySimd(CLI, AlignedVars, + /* IfCond */ nullptr, /* Simdlen */ nullptr, /* Safelen */ nullptr); OMPBuilder.finalize(); @@ -1795,15 +1843,92 @@ EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { return I.getMetadata("llvm.access.group") != nullptr; })); + + // Check if number of assumption instructions is equal to number of aligned + // variables + BasicBlock *LoopPreheader = CLI->getPreheader(); + size_t NumAssummptionCallsInPreheader = count_if( + *LoopPreheader, [](Instruction &I) { return isa(I); }); + EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); } -TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { +TEST_F(OpenMPIRBuilderTest, ApplySimdiCustomAligned) { OpenMPIRBuilder OMPBuilder(*M); + IRBuilder<> Builder(BB); + const int AlignmentValue = 32; + AllocaInst *Alloc1 = + Builder.CreateAlloca(Builder.getInt8PtrTy(), Builder.getInt64(1)); + AllocaInst *Alloc2 = Builder.CreateAlloca( + ArrayType::get(Builder.getInt32Ty(), 10), Builder.getInt64(1)); + DenseMap AlignedVars; + auto Int8Ty = Builder.getInt8Ty(); + Instruction *MallocInstr = CallInst::CreateMalloc( + Alloc2, Builder.getInt64Ty(), Int8Ty, ConstantExpr::getSizeOf(Int8Ty), + Builder.getInt64(400), nullptr, ""); + Builder.CreateStore(MallocInstr, Alloc1); + + AlignedVars.insert({Alloc1, Builder.getInt64(AlignmentValue)}); + AlignedVars.insert({Alloc2, Builder.getInt64(AlignmentValue)}); CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, + OMPBuilder.applySimd(CLI, AlignedVars, + /* IfCond */ nullptr, /* Simdlen */ nullptr, + /* Safelen */ nullptr); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + LoopInfo &LI = FAM.getResult(*F); + + const std::vector &TopLvl = LI.getTopLevelLoops(); + EXPECT_EQ(TopLvl.size(), 1u); + + Loop *L = TopLvl.front(); + EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); + EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); + + // Check for llvm.access.group metadata attached to the printf + // function in the loop body. + BasicBlock *LoopBody = CLI->getBody(); + EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { + return I.getMetadata("llvm.access.group") != nullptr; + })); + + // Check if number of assumption instructions is equal to number of aligned + // variables + BasicBlock *LoopPreheader = CLI->getPreheader(); + size_t NumAssummptionCallsInPreheader = count_if( + *LoopPreheader, [](Instruction &I) { return isa(I); }); + EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); + + // Check if variables are correctly aligned + for (auto &Instr : LoopPreheader->getInstList()) { + if (isa(Instr)) { + auto AssumeInstruction = dyn_cast(&Instr); + if (AssumeInstruction->getNumTotalBundleOperands()) { + auto Bundle = AssumeInstruction->getOperandBundleAt(0); + if (Bundle.getTagName() == "align") { + EXPECT_TRUE(isa(Bundle.Inputs[1])); + auto ConstIntVal = dyn_cast(Bundle.Inputs[1]); + EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue); + } + } + } + } +} +TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { + OpenMPIRBuilder OMPBuilder(*M); + DenseMap AlignedVars; + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); + + // Simd-ize the loop. + OMPBuilder.applySimd(CLI, AlignedVars, + /* IfCond */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3), /* Safelen */ nullptr); @@ -1833,11 +1958,12 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelen) { OpenMPIRBuilder OMPBuilder(*M); + DenseMap AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); @@ -1867,11 +1993,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { OpenMPIRBuilder OMPBuilder(*M); + DenseMap AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, + OMPBuilder.applySimd(CLI, AlignedVars, + /* IfCond */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 2), ConstantInt::get(Type::getInt32Ty(Ctx), 3)); @@ -1902,6 +2030,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) { OpenMPIRBuilder OMPBuilder(*M); IRBuilder<> Builder(BB); + DenseMap AlignedVars; AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); @@ -1916,7 +2045,8 @@ CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop with if condition - OMPBuilder.applySimd(CLI, IfCmp, ConstantInt::get(Type::getInt32Ty(Ctx), 3), + OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, + ConstantInt::get(Type::getInt32Ty(Ctx), 3), /* Safelen */ nullptr); OMPBuilder.finalize(); Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -994,9 +994,9 @@ llvm::ConstantInt *safelen = nullptr; if (llvm::Optional safelenVar = loop.safelen()) safelen = builder.getInt64(safelenVar.value()); - + llvm::DenseMap AlignedVars; ompBuilder->applySimd( - loopInfo, + loopInfo, AlignedVars, loop.if_expr() ? moduleTranslation.lookupValue(loop.if_expr()) : nullptr, simdlen, safelen);