Skip to content

Commit

Permalink
[mlir][sparse] introduce MapRef, unify conversion/codegen for reader (#…
Browse files Browse the repository at this point in the history
…68360)

This revision introduces a MapRef, which will support a future
generalization beyond permutations (e.g. block sparsity). This revision
also unifies the conversion/codegen paths for the sparse_tensor.new
operation from file (eg. the readers). Note that more unification is
planned as well as general affine dim2lvl and lvl2dim (all marked with
TODOs).
  • Loading branch information
aartbik authored Oct 6, 2023
1 parent f045f2c commit d3af653
Show file tree
Hide file tree
Showing 14 changed files with 437 additions and 483 deletions.
156 changes: 58 additions & 98 deletions mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
#define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H

#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
#include "mlir/ExecutionEngine/SparseTensor/Storage.h"

#include <fstream>
Expand Down Expand Up @@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) {

} // namespace detail

//===----------------------------------------------------------------------===//
//
// Reader class.
//
//===----------------------------------------------------------------------===//

/// This class abstracts over the information stored in file headers,
Expand Down Expand Up @@ -132,6 +137,7 @@ class SparseTensorReader final {
/// Reads and parses the file's header.
void readHeader();

/// Returns the stored value kind.
ValueKind getValueKind() const { return valueKind_; }

/// Checks if a header has been successfully read.
Expand Down Expand Up @@ -185,58 +191,37 @@ class SparseTensorReader final {
/// valid after parsing the header.
void assertMatchesShape(uint64_t rank, const uint64_t *shape) const;

/// Reads a sparse tensor element from the next line in the input file and
/// returns the value of the element. Stores the coordinates of the element
/// to the `dimCoords` array.
template <typename V>
V readElement(uint64_t dimRank, uint64_t *dimCoords) {
assert(dimRank == getRank() && "rank mismatch");
char *linePtr = readCoords(dimCoords);
return detail::readValue<V>(&linePtr, isPattern());
}

/// Allocates a new COO object for `lvlSizes`, initializes it by reading
/// all the elements from the file and applying `dim2lvl` to their
/// dim-coordinates, and then closes the file. Templated on V only.
template <typename V>
SparseTensorCOO<V> *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes,
const uint64_t *dim2lvl);

/// Allocates a new sparse-tensor storage object with the given encoding,
/// initializes it by reading all the elements from the file, and then
/// closes the file. Templated on P, I, and V.
template <typename P, typename I, typename V>
SparseTensorStorage<P, I, V> *
readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes,
const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
const uint64_t *dim2lvl) {
auto *lvlCOO = readCOO<V>(lvlRank, lvlSizes, dim2lvl);
const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
const uint64_t *lvl2dim) {
const uint64_t dimRank = getRank();
MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim);
auto *coo = readCOO<V>(map, lvlSizes);
auto *tensor = SparseTensorStorage<P, I, V>::newFromCOO(
getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO);
delete lvlCOO;
dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo);
delete coo;
return tensor;
}

/// Reads the COO tensor from the file, stores the coordinates and values to
/// the given buffers, returns a boolean value to indicate whether the COO
/// elements are sorted.
/// Precondition: the buffers should have enough space to hold the elements.
template <typename C, typename V>
bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl,
C *lvlCoordinates, V *values);
const uint64_t *lvl2dim, C *lvlCoordinates, V *values);

private:
/// Attempts to read a line from the file. Is private because there's
/// no reason for client code to call it.
/// Attempts to read a line from the file.
void readLine();

/// Reads the next line of the input file and parses the coordinates
/// into the `dimCoords` argument. Returns the position in the `line`
/// buffer where the element's value should be parsed from. This method
/// has been factored out from `readElement` to minimize code bloat
/// for the generated library.
///
/// Precondition: `dimCoords` is valid for `getRank()`.
/// buffer where the element's value should be parsed from.
template <typename C>
char *readCoords(C *dimCoords) {
readLine();
Expand All @@ -251,24 +236,20 @@ class SparseTensorReader final {
return linePtr;
}

/// The internal implementation of `readCOO`. We template over
/// `IsPattern` in order to perform LICM without needing to duplicate the
/// source code.
//
// TODO: We currently take the `dim2lvl` argument as a `PermutationRef`
// since that's what `readCOO` creates. Once we update `readCOO` to
// functionalize the mapping, then this helper will just take that
// same function.
/// Reads all the elements from the file while applying the given map.
template <typename V>
SparseTensorCOO<V> *readCOO(const MapRef &map, const uint64_t *lvlSizes);

/// The implementation of `readCOO` that is templated `IsPattern` in order
/// to perform LICM without needing to duplicate the source code.
template <typename V, bool IsPattern>
void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl,
SparseTensorCOO<V> *lvlCOO);
void readCOOLoop(const MapRef &map, SparseTensorCOO<V> *coo);

/// The internal implementation of `readToBuffers`. We template over
/// `IsPattern` in order to perform LICM without needing to duplicate the
/// source code.
/// The internal implementation of `readToBuffers`. We template over
/// `IsPattern` in order to perform LICM without needing to duplicate
/// the source code.
template <typename C, typename V, bool IsPattern>
bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl,
C *lvlCoordinates, V *values);
bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values);

/// Reads the MME header of a general sparse matrix of type real.
void readMMEHeader();
Expand All @@ -288,96 +269,76 @@ class SparseTensorReader final {
char line[kColWidth];
};

//===----------------------------------------------------------------------===//
//
// Reader class methods.
//
//===----------------------------------------------------------------------===//

template <typename V>
SparseTensorCOO<V> *SparseTensorReader::readCOO(uint64_t lvlRank,
const uint64_t *lvlSizes,
const uint64_t *dim2lvl) {
SparseTensorCOO<V> *SparseTensorReader::readCOO(const MapRef &map,
const uint64_t *lvlSizes) {
assert(isValid() && "Attempt to readCOO() before readHeader()");
const uint64_t dimRank = getRank();
assert(lvlRank == dimRank && "Rank mismatch");
detail::PermutationRef d2l(dimRank, dim2lvl);
// Prepare a COO object with the number of stored elems as initial capacity.
auto *lvlCOO = new SparseTensorCOO<V>(lvlRank, lvlSizes, getNSE());
// Do some manual LICM, to avoid assertions in the for-loop.
const bool IsPattern = isPattern();
if (IsPattern)
readCOOLoop<V, true>(lvlRank, d2l, lvlCOO);
auto *coo = new SparseTensorCOO<V>(map.getLvlRank(), lvlSizes, getNSE());
// Enter the reading loop.
if (isPattern())
readCOOLoop<V, true>(map, coo);
else
readCOOLoop<V, false>(lvlRank, d2l, lvlCOO);
readCOOLoop<V, false>(map, coo);
// Close the file and return the COO.
closeFile();
return lvlCOO;
return coo;
}

template <typename V, bool IsPattern>
void SparseTensorReader::readCOOLoop(uint64_t lvlRank,
detail::PermutationRef dim2lvl,
SparseTensorCOO<V> *lvlCOO) {
const uint64_t dimRank = getRank();
void SparseTensorReader::readCOOLoop(const MapRef &map,
SparseTensorCOO<V> *coo) {
const uint64_t dimRank = map.getDimRank();
const uint64_t lvlRank = map.getLvlRank();
assert(dimRank == getRank());
std::vector<uint64_t> dimCoords(dimRank);
std::vector<uint64_t> lvlCoords(lvlRank);
for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) {
// We inline `readElement` here in order to avoid redundant
// assertions, since they're guaranteed by the call to `isValid()`
// and the construction of `dimCoords` above.
for (uint64_t k = 0, nse = getNSE(); k < nse; k++) {
char *linePtr = readCoords(dimCoords.data());
const V value = detail::readValue<V, IsPattern>(&linePtr);
dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data());
// TODO: <https://github.com/llvm/llvm-project/issues/54179>
lvlCOO->add(lvlCoords, value);
map.pushforward(dimCoords.data(), lvlCoords.data());
coo->add(lvlCoords, value);
}
}

template <typename C, typename V>
bool SparseTensorReader::readToBuffers(uint64_t lvlRank,
const uint64_t *dim2lvl,
const uint64_t *lvl2dim,
C *lvlCoordinates, V *values) {
assert(isValid() && "Attempt to readCOO() before readHeader()");
// Construct a `PermutationRef` for the `pushforward` below.
// TODO: This specific implementation does not generalize to arbitrary
// mappings, but once we functionalize the `dim2lvl` argument we can
// simply use that function instead.
const uint64_t dimRank = getRank();
assert(lvlRank == dimRank && "Rank mismatch");
detail::PermutationRef d2l(dimRank, dim2lvl);
// Do some manual LICM, to avoid assertions in the for-loop.
MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim);
bool isSorted =
isPattern()
? readToBuffersLoop<C, V, true>(lvlRank, d2l, lvlCoordinates, values)
: readToBuffersLoop<C, V, false>(lvlRank, d2l, lvlCoordinates,
values);

// Close the file and return isSorted.
isPattern() ? readToBuffersLoop<C, V, true>(map, lvlCoordinates, values)
: readToBuffersLoop<C, V, false>(map, lvlCoordinates, values);
closeFile();
return isSorted;
}

template <typename C, typename V, bool IsPattern>
bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank,
detail::PermutationRef dim2lvl,
C *lvlCoordinates, V *values) {
const uint64_t dimRank = getRank();
bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates,
V *values) {
const uint64_t dimRank = map.getDimRank();
const uint64_t lvlRank = map.getLvlRank();
const uint64_t nse = getNSE();
assert(dimRank == getRank());
std::vector<C> dimCoords(dimRank);
// Read the first element with isSorted=false as a way to avoid accessing its
// previous element.
bool isSorted = false;
char *linePtr;
// We inline `readElement` here in order to avoid redundant assertions,
// since they're guaranteed by the call to `isValid()` and the construction
// of `dimCoords` above.
const auto readNextElement = [&]() {
linePtr = readCoords<C>(dimCoords.data());
dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates);
map.pushforward(dimCoords.data(), lvlCoordinates);
*values = detail::readValue<V, IsPattern>(&linePtr);
if (isSorted) {
// Note that isSorted was set to false while reading the first element,
// Note that isSorted is set to false when reading the first element,
// to guarantee the safeness of using prevLvlCoords.
C *prevLvlCoords = lvlCoordinates - lvlRank;
// TODO: define a new CoordsLT which is like ElementLT but doesn't have
// the V parameter, and use it here.
for (uint64_t l = 0; l < lvlRank; ++l) {
if (prevLvlCoords[l] != lvlCoordinates[l]) {
if (prevLvlCoords[l] > lvlCoordinates[l])
Expand All @@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank,
isSorted = true;
for (uint64_t n = 1; n < nse; ++n)
readNextElement();

return isSorted;
}

Expand Down
81 changes: 81 additions & 0 deletions mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// A dim2lvl/lvl2dim map encoding class, with utility methods.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H
#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H

#include <cinttypes>

#include <cassert>
#include <vector>

namespace mlir {
namespace sparse_tensor {

/// A class for capturing the sparse tensor type map with a compact encoding.
///
/// Currently, the following situations are supported:
/// (1) map is a permutation
/// (2) map has affine ops (restricted set)
///
/// The pushforward/backward operations are fast for (1) but incur some obvious
/// overhead for situation (2).
///
class MapRef final {
public:
MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d);

//
// Push forward maps from dimensions to levels.
//

template <typename T>
inline void pushforward(const T *in, T *out) const {
if (isPermutation) {
for (uint64_t i = 0; i < lvlRank; ++i)
out[i] = in[lvl2dim[i]];
} else {
assert(0 && "coming soon");
}
}

//
// Push backward maps from levels to dimensions.
//

template <typename T>
inline void pushbackward(const T *in, T *out) const {
if (isPermutation) {
for (uint64_t i = 0; i < dimRank; ++i)
out[i] = in[dim2lvl[i]];
} else {
assert(0 && "coming soon");
}
}

uint64_t getDimRank() const { return dimRank; }
uint64_t getLvlRank() const { return lvlRank; }

private:
bool isPermutationMap() const;

const uint64_t dimRank;
const uint64_t lvlRank;
const uint64_t *const dim2lvl; // non-owning pointer
const uint64_t *const lvl2dim; // non-owning pointer
const bool isPermutation;
};

} // namespace sparse_tensor
} // namespace mlir

#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H
Loading

0 comments on commit d3af653

Please sign in to comment.