Index: llvm/lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -51,6 +51,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -697,6 +698,11 @@ continue; if (CB->isConvergent()) return Changed; + // Volatile inline asm may contain convergent instructions + // e.g., PTX shfl.sync, so rejecting them for correctness + if (auto *Asm = dyn_cast(CB->getCalledOperand())) + if (Asm->hasSideEffects()) + return Changed; if (auto *II = dyn_cast(&I)) if (!II->getUnwindDest()->canSplitPredecessors()) return Changed; Index: llvm/test/Transforms/LoopUnswitch/convergent-hoist-modified.ll =================================================================== --- llvm/test/Transforms/LoopUnswitch/convergent-hoist-modified.ll +++ llvm/test/Transforms/LoopUnswitch/convergent-hoist-modified.ll @@ -5,17 +5,21 @@ ; Modified status. This was caught by the pass return status check that is ; hidden under EXPENSIVE_CHECKS. -; CHECK-LABEL: entry: -; CHECK-NEXT: %0 = call i32 @llvm.objectsize.i32.p0i8(i8* bitcast (%struct.anon* @b to i8*), i1 false, i1 false, i1 false) -; CHECK-NEXT: %1 = icmp uge i32 %0, 1 -; CHECK-NEXT: br label %for.cond - %struct.anon = type { i16 } @b = global %struct.anon zeroinitializer, align 1 +; Function Attrs: nounwind readnone speculatable willreturn +declare i32 @llvm.objectsize.i32.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #1 + +declare void @conv() convergent + ; Function Attrs: nounwind define i16 @c() #0 { +; CHECK-LABEL: entry: +; CHECK-NEXT: %0 = call i32 @llvm.objectsize.i32.p0i8(i8* bitcast (%struct.anon* @b to i8*), i1 false, i1 false, i1 false) +; CHECK-NEXT: %1 = icmp uge i32 %0, 1 +; CHECK-NEXT: br label %for.cond entry: br label %for.cond @@ -33,10 +37,28 @@ br label %for.cond } -; Function Attrs: nounwind readnone speculatable willreturn -declare i32 @llvm.objectsize.i32.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #1 +; Function Attrs: nounwind +define i16 @d() #0 { +; CHECK-LABEL: entry: +; CHECK-NEXT: %0 = call i32 @llvm.objectsize.i32.p0i8(i8* bitcast (%struct.anon* @b to i8*), i1 false, i1 false, i1 false) +; CHECK-NEXT: %1 = icmp uge i32 %0, 1 +; CHECK-NEXT: br label %for.cond +entry: + br label %for.cond -declare void @conv() convergent +for.cond: ; preds = %cont, %entry + br label %for.inc + +for.inc: ; preds = %for.cond + %0 = call i32 @llvm.objectsize.i32.p0i8(i8* bitcast (%struct.anon* @b to i8*), i1 false, i1 false, i1 false) + %1 = icmp uge i32 %0, 1 + br i1 %1, label %cont, label %cont + +cont: ; preds = %for.inc + call void asm sideeffect "; some convergent instructions here", ""() + %2 = load i16, i16* getelementptr inbounds (%struct.anon, %struct.anon* @b, i32 0, i32 0), align 1 + br label %for.cond +} attributes #0 = { nounwind } attributes #1 = { nounwind readnone speculatable willreturn }