browsermt · jerinphilip · Oct 5, 2021 · Oct 5, 2021 · Oct 5, 2021 · Oct 6, 2021
diff --git a/src/common/intrusive_ptr.h b/src/common/intrusive_ptr.h
@@ -71,7 +71,7 @@ class IntrusivePtr {
     rhs.ptr_ = 0;
   }
 
-  inline size_t useCount() {
+  inline size_t useCount() const {
     return references(ptr_);
   }
 

diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp
@@ -1,6 +1,5 @@
 #include "graph/expression_graph.h"
 #include "tensors/tensor_operators.h"
-
 #include <sstream>
 
 namespace marian {

diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
@@ -26,28 +26,33 @@ class Tensors {
 
   typedef std::unordered_map<size_t, std::vector<WExpr>> WeakMemory;
   typedef std::unordered_map<size_t, std::vector<Expr>> Memory;
-  //typedef std::unordered_map<std::string, Expr> ShortlistMemory; //Because... yeah
 
   Ptr<WeakMemory> shortterm_;
   Ptr<Memory> longterm_;
-  //Ptr<ShortlistMemory> midterm_;
 
 public:
   Tensors(Ptr<Backend> backend)
       : tensors_(New<TensorAllocator>(backend)),
         cache_(New<TensorAllocator>(backend)),
         shortterm_(New<WeakMemory>()),
-        longterm_(New<Memory>())/*,
-        midterm_(New<ShortlistMemory>())*/ {}
+        longterm_(New<Memory>())
+       {}
 
   Tensors(Ptr<Backend> backend, Ptr<Device> device)
       : tensors_(New<TensorAllocator>(backend, device)),
         cache_(New<TensorAllocator>(backend)),
         shortterm_(New<WeakMemory>()),
-        longterm_(New<Memory>())/*,
-        midterm_(New<ShortlistMemory>())*/ {}
+        longterm_(New<Memory>()){}
 
-  void reserve(size_t bytes) { tensors_->reserve(bytes); }
+  // We introduce this third constructor, where we can share a workspace
+  // (static preallocated storage) from a worker which comes from elsewhere. 
+  Tensors(Ptr<TensorAllocator> tensors, Ptr<TensorAllocator> cache)
+      : tensors_(tensors), 
+        cache_(cache), 
+        shortterm_(New<WeakMemory>()), 
+        longterm_(New<Memory>()) {}
+
+  void reserve(size_t bytes) { tensors_->reserve(bytes); }; 
 
   void throwAtReallocation(bool throwAtRealloc) {
     tensors_->throwAtReallocation(throwAtRealloc);
@@ -76,43 +81,6 @@ class Tensors {
     size_t hash = node->hash();
     // memoize constant nodes that are not parameters
     // parameters are already memoized in the graph itself
-
-    // int size1 = 0;
-    // for (auto it = longterm_->begin(); it != longterm_->end(); it++) {
-    //   size1 += it->second.size();
-    // }
-
-    // int size2 = 0;
-    // for (auto it = shortterm_->begin(); it != shortterm_->end(); it++) {
-    //   size2 += it->second.size();
-    // }
-    // std::cerr << "Longterm: " << size1 << " shortterm: " << size2 << std::endl;
-
-    // When we have a shortlist, we're getting screwed by the constantly changing shortlist
-    // Which is necessary for this batch, but not for anything else. The current cache mechanism has no notion of
-    // "Keep those tensors cached but delete them once it is over". Conveniently, they all have different hashes
-    // making it difficult to isolate them inside the longterm memory.
-    // Somewhat less important, the same thing happens with:
-    // F0::none_QuantMultA Type: alphaNodeOp shape: shape=1 size=1  and
-    // none_QuantMultB Type: intgemmQuantMultB shape: shape=1 size=1
-    // But as their sizes are very small, they are less of an issue.
-    // Those are actually constant, but as they have different parents, marian cache doesn't match them.
-    // To fix those, in intgemm_interface we're hashing the name() string and comparing its equality of the equals method.
-    /*if (node->type() == "intgemmSelectColumnsB") {
-      auto it = midterm_->find("intgemmSelectColumnsB");
-      //std::cerr << "Midterm size: " << midterm_->size() << std::endl;
-      if (it != midterm_->end()) {
-        if (it->second->hash() == hash) {
-          return it->second;
-        } else {
-          it->second->free();
-          midterm_->clear();
-        }
-      }
-      (*midterm_)["intgemmSelectColumnsB"] = node;
-      return nullptr;
-
-    } else */
     if(node->type() != "param" && node->memoize()) {
       auto it = longterm_->find(hash);
       if(it != longterm_->end()) {
@@ -152,8 +120,11 @@ class Tensors {
   void clearShorttermMemory() { shortterm_->clear(); }
 
   void clearLongtermMemory() { longterm_->clear(); }
+
 };
 
+
+
 typedef std::map<Type, Ptr<Parameters>> ElementTypeParamsMap; // keep it sorted, hence map not unordered map
 
 class ExpressionGraph : public std::enable_shared_from_this<ExpressionGraph> {
@@ -165,8 +136,6 @@ class ExpressionGraph : public std::enable_shared_from_this<ExpressionGraph> {
   std::list<Expr> nodesForward_;
   std::list<Expr> nodesBackward_;
 
-  // Holds memory and expressions that correspond to temporary expressions.
-  // This gets cleared before a new graph is built.
   Ptr<Tensors> tensors_;
 private:
 
@@ -214,6 +183,10 @@ class ExpressionGraph : public std::enable_shared_from_this<ExpressionGraph> {
   virtual void setDevice(DeviceId deviceId = {0, DeviceType::gpu},
                          Ptr<Device> device = nullptr);
 
+  void setWorkspaces(Ptr<TensorAllocator> tensors, Ptr<TensorAllocator> cache){
+      tensors_ = New<Tensors>(tensors, cache);
+  }
+
   DeviceId getDeviceId() { return backend_->getDeviceId(); }
 
   Ptr<Backend> getBackend() { return backend_; }
@@ -523,7 +496,9 @@ class ExpressionGraph : public std::enable_shared_from_this<ExpressionGraph> {
 
     topNodes_.clear();
 
-    tensors_->clear();
+    if(tensors_){
+        tensors_->clear();
+    }
   }
 
   void setReloaded(bool reloaded) { reloaded_ = reloaded; }