diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaFoldConstantTranspose.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaFoldConstantTranspose.cpp
--- a/mlir/lib/Dialect/Tosa/Transforms/TosaFoldConstantTranspose.cpp
+++ b/mlir/lib/Dialect/Tosa/Transforms/TosaFoldConstantTranspose.cpp
@@ -20,6 +20,87 @@
 
 namespace {
 
+uint64_t permuteLinearIndex(uint64_t srcLinearIndex,
+                            llvm::ArrayRef<uint64_t> permValues,
+                            llvm::ArrayRef<int64_t> inputShape,
+                            llvm::ArrayRef<int64_t> outputShape) {
+  uint64_t totalCount = srcLinearIndex;
+
+  // Convert the source linear index to it's corresponding multi-dimensional
+  // index into the source tensor.
+  SmallVector<uint64_t, 6> srcIndices(inputShape.size(), 0);
+  for (int dim = inputShape.size() - 1; dim >= 0; --dim) {
+    srcIndices[dim] = totalCount % inputShape[dim];
+    totalCount /= inputShape[dim];
+  }
+
+  // Permute the source indices into the destination indices
+  SmallVector<uint64_t, 6> dstIndices(outputShape.size(), 0);
+  for (const auto &it : llvm::enumerate(permValues))
+    dstIndices[it.index()] = srcIndices[it.value()];
+
+  // Flatten the destination indices to a linear index
+  uint64_t dstLinearIndex = dstIndices.front();
+  for (uint64_t dim = 1; dim < outputShape.size(); ++dim)
+    dstLinearIndex = dstLinearIndex * outputShape[dim] + dstIndices[dim];
+
+  return dstLinearIndex;
+}
+
+template <typename BaseType>
+DenseElementsAttr transposeType(ElementsAttr attr, ShapedType inputType,
+                                ShapedType outputType,
+                                llvm::ArrayRef<uint64_t> permValues) {
+  auto attrValues = attr.getValues<BaseType>();
+  auto inputShape = inputType.getShape();
+  auto outputShape = outputType.getShape();
+
+  SmallVector<BaseType> outputValues;
+  outputValues.resize(inputType.getNumElements());
+
+  for (const auto &it : llvm::enumerate(attrValues)) {
+    auto dstLinearIndex =
+        permuteLinearIndex(it.index(), permValues, inputShape, outputShape);
+    outputValues[dstLinearIndex] = it.value();
+  }
+
+  return DenseElementsAttr::get(outputType,
+                                llvm::ArrayRef<BaseType>(outputValues));
+}
+
+// A type specialized transposition of an ElementsAttr.
+// This implementation tries to operate on the underlying data in its raw
+// representation when possible to avoid allocating a large number of Attribute
+// objects.
+DenseElementsAttr transpose(ElementsAttr attr, ShapedType inputType,
+                            ShapedType outputType,
+                            llvm::ArrayRef<uint64_t> permValues) {
+  auto baseType = inputType.getElementType();
+
+  // Handle possible integer types
+  if (auto intType = baseType.dyn_cast<IntegerType>()) {
+    switch (intType.getWidth()) {
+    case 1:
+      return transposeType<bool>(attr, inputType, outputType, permValues);
+    case 8:
+      return transposeType<int8_t>(attr, inputType, outputType, permValues);
+    case 16:
+      return transposeType<int16_t>(attr, inputType, outputType, permValues);
+    case 32:
+      return transposeType<int32_t>(attr, inputType, outputType, permValues);
+    case 64:
+      return transposeType<int64_t>(attr, inputType, outputType, permValues);
+    }
+  }
+
+  // Handle possible float types
+  if (baseType.isF32()) {
+    return transposeType<float>(attr, inputType, outputType, permValues);
+  }
+
+  return transposeType<Attribute>(attr, inputType, outputType, permValues);
+}
+
 struct TosaFoldConstantTranspose : public OpRewritePattern<tosa::TransposeOp> {
   using OpRewritePattern::OpRewritePattern;
 
@@ -46,38 +127,9 @@
         [](const APInt &val) { return val.getZExtValue(); }));
 
     auto inputType = op.getInput1().getType().cast<ShapedType>();
-    ArrayRef<int64_t> inputShape = inputType.getShape();
-    int64_t numElements = inputType.getNumElements();
-
-    SmallVector<Attribute, 4> outputValues;
-    outputValues.resize(numElements);
-
-    // Transpose the input constant. Because we don't know its rank in advance,
-    // we need to loop over the range [0, element count) and delinearize the
-    // index.
-    auto attrValues = inputValues.getValues<Attribute>();
-    ArrayRef<int64_t> outputShape = outputType.getShape();
-    for (const auto &it : llvm::enumerate(attrValues)) {
-      SmallVector<uint64_t, 6> srcIndices(inputType.getRank(), 0);
-      int totalCount = it.index();
-      for (int dim = inputType.getRank() - 1; dim >= 0; --dim) {
-        srcIndices[dim] = totalCount % inputShape[dim];
-        totalCount /= inputShape[dim];
-      }
-
-      SmallVector<uint64_t, 6> dstIndices(outputType.getRank(), 0);
-      for (int dim = outputType.getRank() - 1; dim >= 0; --dim)
-        dstIndices[dim] = srcIndices[permValues[dim]];
-
-      uint64_t dstLinearIndex = dstIndices.front();
-      for (int dim = 1; dim < outputType.getRank(); ++dim)
-        dstLinearIndex = dstLinearIndex * outputShape[dim] + dstIndices[dim];
-
-      outputValues[dstLinearIndex] = it.value();
-    }
 
-    rewriter.replaceOpWithNewOp<tosa::ConstOp>(
-        op, outputType, DenseElementsAttr::get(outputType, outputValues));
+    auto resultAttr = transpose(inputValues, inputType, outputType, permValues);
+    rewriter.replaceOpWithNewOp<tosa::ConstOp>(op, outputType, resultAttr);
     return success();
   }
 };
diff --git a/mlir/test/Dialect/Tosa/constant-op-fold.mlir b/mlir/test/Dialect/Tosa/constant-op-fold.mlir
--- a/mlir/test/Dialect/Tosa/constant-op-fold.mlir
+++ b/mlir/test/Dialect/Tosa/constant-op-fold.mlir
@@ -46,6 +46,17 @@
   return %1 : tensor<3x2xf32>
 }
 
+// CHECK-LABEL: @transpose_fold_2d_bool
+func.func @transpose_fold_2d_bool() -> tensor<3x2xi1> {
+  %input = "tosa.const"() {value = dense<[[true, false, false], [false, false, true]]> : tensor<2x3xi1>} : () -> tensor<2x3xi1>
+  %perms = "tosa.const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32>
+  //               CHECK: %[[CST:.+]] = "tosa.const"()
+  // CHECK-SAME{LITERAL}: value = dense<[[true, false], [false, false], [false, true]]> : tensor<3x2xi1>
+  %1 = "tosa.transpose"(%input, %perms) : (tensor<2x3xi1>, tensor<2xi32>) -> tensor<3x2xi1>
+  // CHECK: return %[[CST]]
+  return %1 : tensor<3x2xi1>
+}
+
 // CHECK-LABEL: @transpose_fold_4d_int
 func.func @transpose_fold_4d_int() -> tensor<3x1x4x2xi32> {
   %input = "tosa.const"() {value = dense<[[