diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -485,8 +485,8 @@ auto scalarOperands = llvm::map_to_vector(operands, extractElement); Operation *scalarOp = rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs()); - rewriter.create(loc, result, scalarOp->getResult(0), - index); + result = rewriter.create( + loc, result, scalarOp->getResult(0), index); } rewriter.replaceOp(op, result); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -516,10 +516,16 @@ // CHECK-LABEL: func @gpu_unroll func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> { %result = math.exp %arg0 : vector<4xf32> - // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32 - // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32 - // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32 - // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V0:.+]] = llvm.mlir.undef : vector<4xf32> + // CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V1:.+]] = llvm.insertelement %[[CL]], %[[V0]] + // CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V2:.+]] = llvm.insertelement %[[CL]], %[[V1]] + // CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V3:.+]] = llvm.insertelement %[[CL]], %[[V2]] + // CHECK: %[[CL:.+]] = llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V4:.+]] = llvm.insertelement %[[CL]], %[[V3]] + // CHECK: return %[[V4]] func.return %result : vector<4xf32> } } diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -456,10 +456,16 @@ // CHECK-LABEL: func @gpu_unroll func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> { %result = math.exp %arg0 : vector<4xf32> - // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32 - // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32 - // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32 - // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V0:.+]] = llvm.mlir.undef : vector<4xf32> + // CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V1:.+]] = llvm.insertelement %[[CL]], %[[V0]] + // CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V2:.+]] = llvm.insertelement %[[CL]], %[[V1]] + // CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V3:.+]] = llvm.insertelement %[[CL]], %[[V2]] + // CHECK: %[[CL:.+]] = llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32 + // CHECK: %[[V4:.+]] = llvm.insertelement %[[CL]], %[[V3]] + // CHECK: return %[[V4]] func.return %result : vector<4xf32> } }