[mlir][sparse] introduce MapRef, unify conversion/codegen for reader (#…

…68360) This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs).
llvm · Oct 6, 2023 · d3af653 · d3af653
1 parent f045f2c
commit d3af653
Show file tree

Hide file tree

Showing 14 changed files with 437 additions and 483 deletions.
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -20,6 +20,7 @@
 #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
 #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
 
+#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
 #include "mlir/ExecutionEngine/SparseTensor/Storage.h"
 
 #include <fstream>
@@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) {
 
 } // namespace detail
 
+//===----------------------------------------------------------------------===//
+//
+//  Reader class.
+//
 //===----------------------------------------------------------------------===//
 
 /// This class abstracts over the information stored in file headers,
@@ -132,6 +137,7 @@ class SparseTensorReader final {
   /// Reads and parses the file's header.
   void readHeader();
 
+  /// Returns the stored value kind.
   ValueKind getValueKind() const { return valueKind_; }
 
   /// Checks if a header has been successfully read.
@@ -185,58 +191,37 @@ class SparseTensorReader final {
   /// valid after parsing the header.
   void assertMatchesShape(uint64_t rank, const uint64_t *shape) const;
 
-  /// Reads a sparse tensor element from the next line in the input file and
-  /// returns the value of the element. Stores the coordinates of the element
-  /// to the `dimCoords` array.
-  template <typename V>
-  V readElement(uint64_t dimRank, uint64_t *dimCoords) {
-    assert(dimRank == getRank() && "rank mismatch");
-    char *linePtr = readCoords(dimCoords);
-    return detail::readValue<V>(&linePtr, isPattern());
-  }
-
-  /// Allocates a new COO object for `lvlSizes`, initializes it by reading
-  /// all the elements from the file and applying `dim2lvl` to their
-  /// dim-coordinates, and then closes the file. Templated on V only.
-  template <typename V>
-  SparseTensorCOO<V> *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes,
-                              const uint64_t *dim2lvl);
-
   /// Allocates a new sparse-tensor storage object with the given encoding,
   /// initializes it by reading all the elements from the file, and then
   /// closes the file. Templated on P, I, and V.
   template <typename P, typename I, typename V>
   SparseTensorStorage<P, I, V> *
   readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes,
-                   const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
-                   const uint64_t *dim2lvl) {
-    auto *lvlCOO = readCOO<V>(lvlRank, lvlSizes, dim2lvl);
+                   const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
+                   const uint64_t *lvl2dim) {
+    const uint64_t dimRank = getRank();
+    MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim);
+    auto *coo = readCOO<V>(map, lvlSizes);
     auto *tensor = SparseTensorStorage<P, I, V>::newFromCOO(
-        getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO);
-    delete lvlCOO;
+        dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo);
+    delete coo;
     return tensor;
   }
 
   /// Reads the COO tensor from the file, stores the coordinates and values to
   /// the given buffers, returns a boolean value to indicate whether the COO
   /// elements are sorted.
-  /// Precondition: the buffers should have enough space to hold the elements.
   template <typename C, typename V>
   bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl,
-                     C *lvlCoordinates, V *values);
+                     const uint64_t *lvl2dim, C *lvlCoordinates, V *values);
 
 private:
-  /// Attempts to read a line from the file.  Is private because there's
-  /// no reason for client code to call it.
+  /// Attempts to read a line from the file.
   void readLine();
 
   /// Reads the next line of the input file and parses the coordinates
   /// into the `dimCoords` argument.  Returns the position in the `line`
-  /// buffer where the element's value should be parsed from.  This method
-  /// has been factored out from `readElement` to minimize code bloat
-  /// for the generated library.
-  ///
-  /// Precondition: `dimCoords` is valid for `getRank()`.
+  /// buffer where the element's value should be parsed from.
   template <typename C>
   char *readCoords(C *dimCoords) {
     readLine();
@@ -251,24 +236,20 @@ class SparseTensorReader final {
     return linePtr;
   }
 
-  /// The internal implementation of `readCOO`.  We template over
-  /// `IsPattern` in order to perform LICM without needing to duplicate the
-  /// source code.
-  //
-  // TODO: We currently take the `dim2lvl` argument as a `PermutationRef`
-  // since that's what `readCOO` creates.  Once we update `readCOO` to
-  // functionalize the mapping, then this helper will just take that
-  // same function.
+  /// Reads all the elements from the file while applying the given map.
+  template <typename V>
+  SparseTensorCOO<V> *readCOO(const MapRef &map, const uint64_t *lvlSizes);
+
+  /// The implementation of `readCOO` that is templated `IsPattern` in order
+  /// to perform LICM without needing to duplicate the source code.
   template <typename V, bool IsPattern>
-  void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl,
-                   SparseTensorCOO<V> *lvlCOO);
+  void readCOOLoop(const MapRef &map, SparseTensorCOO<V> *coo);
 
-  /// The internal implementation of `readToBuffers`.  We template over
-  /// `IsPattern` in order to perform LICM without needing to duplicate the
-  /// source code.
+  /// The internal implementation of `readToBuffers`. We template over
+  /// `IsPattern` in order to perform LICM without needing to duplicate
+  /// the source code.
   template <typename C, typename V, bool IsPattern>
-  bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl,
-                         C *lvlCoordinates, V *values);
+  bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values);
 
   /// Reads the MME header of a general sparse matrix of type real.
   void readMMEHeader();
@@ -288,96 +269,76 @@ class SparseTensorReader final {
   char line[kColWidth];
 };
 
+//===----------------------------------------------------------------------===//
+//
+//  Reader class methods.
+//
 //===----------------------------------------------------------------------===//
 
 template <typename V>
-SparseTensorCOO<V> *SparseTensorReader::readCOO(uint64_t lvlRank,
-                                                const uint64_t *lvlSizes,
-                                                const uint64_t *dim2lvl) {
+SparseTensorCOO<V> *SparseTensorReader::readCOO(const MapRef &map,
+                                                const uint64_t *lvlSizes) {
   assert(isValid() && "Attempt to readCOO() before readHeader()");
-  const uint64_t dimRank = getRank();
-  assert(lvlRank == dimRank && "Rank mismatch");
-  detail::PermutationRef d2l(dimRank, dim2lvl);
   // Prepare a COO object with the number of stored elems as initial capacity.
-  auto *lvlCOO = new SparseTensorCOO<V>(lvlRank, lvlSizes, getNSE());
-  // Do some manual LICM, to avoid assertions in the for-loop.
-  const bool IsPattern = isPattern();
-  if (IsPattern)
-    readCOOLoop<V, true>(lvlRank, d2l, lvlCOO);
+  auto *coo = new SparseTensorCOO<V>(map.getLvlRank(), lvlSizes, getNSE());
+  // Enter the reading loop.
+  if (isPattern())
+    readCOOLoop<V, true>(map, coo);
   else
-    readCOOLoop<V, false>(lvlRank, d2l, lvlCOO);
+    readCOOLoop<V, false>(map, coo);
   // Close the file and return the COO.
   closeFile();
-  return lvlCOO;
+  return coo;
 }
 
 template <typename V, bool IsPattern>
-void SparseTensorReader::readCOOLoop(uint64_t lvlRank,
-                                     detail::PermutationRef dim2lvl,
-                                     SparseTensorCOO<V> *lvlCOO) {
-  const uint64_t dimRank = getRank();
+void SparseTensorReader::readCOOLoop(const MapRef &map,
+                                     SparseTensorCOO<V> *coo) {
+  const uint64_t dimRank = map.getDimRank();
+  const uint64_t lvlRank = map.getLvlRank();
+  assert(dimRank == getRank());
   std::vector<uint64_t> dimCoords(dimRank);
   std::vector<uint64_t> lvlCoords(lvlRank);
-  for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) {
-    // We inline `readElement` here in order to avoid redundant
-    // assertions, since they're guaranteed by the call to `isValid()`
-    // and the construction of `dimCoords` above.
+  for (uint64_t k = 0, nse = getNSE(); k < nse; k++) {
     char *linePtr = readCoords(dimCoords.data());
     const V value = detail::readValue<V, IsPattern>(&linePtr);
-    dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data());
-    // TODO: <https://github.com/llvm/llvm-project/issues/54179>
-    lvlCOO->add(lvlCoords, value);
+    map.pushforward(dimCoords.data(), lvlCoords.data());
+    coo->add(lvlCoords, value);
   }
 }
 
 template <typename C, typename V>
 bool SparseTensorReader::readToBuffers(uint64_t lvlRank,
                                        const uint64_t *dim2lvl,
+                                       const uint64_t *lvl2dim,
                                        C *lvlCoordinates, V *values) {
   assert(isValid() && "Attempt to readCOO() before readHeader()");
-  // Construct a `PermutationRef` for the `pushforward` below.
-  // TODO: This specific implementation does not generalize to arbitrary
-  // mappings, but once we functionalize the `dim2lvl` argument we can
-  // simply use that function instead.
-  const uint64_t dimRank = getRank();
-  assert(lvlRank == dimRank && "Rank mismatch");
-  detail::PermutationRef d2l(dimRank, dim2lvl);
-  // Do some manual LICM, to avoid assertions in the for-loop.
+  MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim);
   bool isSorted =
-      isPattern()
-          ? readToBuffersLoop<C, V, true>(lvlRank, d2l, lvlCoordinates, values)
-          : readToBuffersLoop<C, V, false>(lvlRank, d2l, lvlCoordinates,
-                                           values);
-
-  // Close the file and return isSorted.
+      isPattern() ? readToBuffersLoop<C, V, true>(map, lvlCoordinates, values)
+                  : readToBuffersLoop<C, V, false>(map, lvlCoordinates, values);
   closeFile();
   return isSorted;
 }
 
 template <typename C, typename V, bool IsPattern>
-bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank,
-                                           detail::PermutationRef dim2lvl,
-                                           C *lvlCoordinates, V *values) {
-  const uint64_t dimRank = getRank();
+bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates,
+                                           V *values) {
+  const uint64_t dimRank = map.getDimRank();
+  const uint64_t lvlRank = map.getLvlRank();
   const uint64_t nse = getNSE();
+  assert(dimRank == getRank());
   std::vector<C> dimCoords(dimRank);
-  // Read the first element with isSorted=false as a way to avoid accessing its
-  // previous element.
   bool isSorted = false;
   char *linePtr;
-  // We inline `readElement` here in order to avoid redundant assertions,
-  // since they're guaranteed by the call to `isValid()` and the construction
-  // of `dimCoords` above.
   const auto readNextElement = [&]() {
     linePtr = readCoords<C>(dimCoords.data());
-    dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates);
+    map.pushforward(dimCoords.data(), lvlCoordinates);
     *values = detail::readValue<V, IsPattern>(&linePtr);
     if (isSorted) {
-      // Note that isSorted was set to false while reading the first element,
+      // Note that isSorted is set to false when reading the first element,
       // to guarantee the safeness of using prevLvlCoords.
       C *prevLvlCoords = lvlCoordinates - lvlRank;
-      // TODO: define a new CoordsLT which is like ElementLT but doesn't have
-      // the V parameter, and use it here.
       for (uint64_t l = 0; l < lvlRank; ++l) {
         if (prevLvlCoords[l] != lvlCoordinates[l]) {
           if (prevLvlCoords[l] > lvlCoordinates[l])
@@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank,
   isSorted = true;
   for (uint64_t n = 1; n < nse; ++n)
     readNextElement();
-
   return isSorted;
 }
 

diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
@@ -0,0 +1,81 @@
+//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A dim2lvl/lvl2dim map encoding class, with utility methods.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H
+#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H
+
+#include <cinttypes>
+
+#include <cassert>
+#include <vector>
+
+namespace mlir {
+namespace sparse_tensor {
+
+/// A class for capturing the sparse tensor type map with a compact encoding.
+///
+/// Currently, the following situations are supported:
+///   (1) map is a permutation
+///   (2) map has affine ops (restricted set)
+///
+/// The pushforward/backward operations are fast for (1) but incur some obvious
+/// overhead for situation (2).
+///
+class MapRef final {
+public:
+  MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d);
+
+  //
+  // Push forward maps from dimensions to levels.
+  //
+
+  template <typename T>
+  inline void pushforward(const T *in, T *out) const {
+    if (isPermutation) {
+      for (uint64_t i = 0; i < lvlRank; ++i)
+        out[i] = in[lvl2dim[i]];
+    } else {
+      assert(0 && "coming soon");
+    }
+  }
+
+  //
+  // Push backward maps from levels to dimensions.
+  //
+
+  template <typename T>
+  inline void pushbackward(const T *in, T *out) const {
+    if (isPermutation) {
+      for (uint64_t i = 0; i < dimRank; ++i)
+        out[i] = in[dim2lvl[i]];
+    } else {
+      assert(0 && "coming soon");
+    }
+  }
+
+  uint64_t getDimRank() const { return dimRank; }
+  uint64_t getLvlRank() const { return lvlRank; }
+
+private:
+  bool isPermutationMap() const;
+
+  const uint64_t dimRank;
+  const uint64_t lvlRank;
+  const uint64_t *const dim2lvl; // non-owning pointer
+  const uint64_t *const lvl2dim; // non-owning pointer
+  const bool isPermutation;
+};
+
+} // namespace sparse_tensor
+} // namespace mlir
+
+#endif //  MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H