ROCm · BrianHarrisonAMD · Jan 16, 2025 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
@@ -337,11 +337,22 @@ set( MIOpen_Source
     solver/softmarginloss/forward_softmarginloss.cpp
     solver/softmax/attn_softmax.cpp
     solver/softmax/softmax.cpp
+    solver/tensorOp/Op1dTensorGeneric.cpp
+    solver/tensorOp/Op2dTensorGeneric.cpp
+    solver/tensorOp/Op2dTensorLite.cpp
+    solver/tensorOp/Op2dTensorSquash.cpp
+    solver/tensorOp/Op3dTensorGeneric.cpp
+    solver/tensorOp/Op4dTensorGeneric.cpp
+    solver/tensorOp/Op4dTensorLite.cpp
+    solver/tensorOp/Op5dTensorGeneric.cpp
+    solver/tensorOp/OpTensorFwdBias.cpp
+    solver/tensorOp/OpTensorLeadingOnes.cpp
     subbuffers.cpp
     t5layernorm_api.cpp
     target_properties.cpp
     temp_file.cpp
     tensor.cpp
+    tensorOp/problem_description.cpp
     tensor_api.cpp
     transformers_adam_w_api.cpp
     seq_tensor.cpp
@@ -685,7 +696,6 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         ocl/lrn_ocl.cpp
         ocl/mloNorm.cpp
         ocl/pooling_ocl.cpp
-        ocl/tensorocl.cpp
         ocl/rnnocl.cpp
         ocl/utilocl.cpp
         ocl/ctcocl.cpp

@@ -34,6 +34,7 @@ struct NetworkConfig
 {
     NetworkConfig() = default;
     explicit NetworkConfig(const std::string& value_) : value(value_) {}
+    explicit NetworkConfig(std::string&& value_) noexcept : value(std::move(value_)) {}
     operator std::string() const { return value; }
     const std::string& ToString() const { return value; }
 

diff --git a/src/include/miopen/rnn/solvers.hpp b/src/include/miopen/rnn/solvers.hpp
@@ -171,9 +171,9 @@ class RNNForwardDataModularAlgo : RNNModuleAlgoBase
     // base API
     void PrepareWriteBuffers(const Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
 
-    void PropX(const Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
+    void PropX(Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
 
-    void AddBias(const Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
+    void AddBias(Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
     void PropHxCx(const Handle& handle,
                   const runtimeArgsFwd& runtimeArgs,
                   unsigned int layer,
@@ -206,7 +206,7 @@ class RNNForwardDataModularAlgo : RNNModuleAlgoBase
     void PropY(const Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
 
     // ext API
-    void PropX(const Handle& handle,
+    void PropX(Handle& handle,
                const runtimeArgsFwd& runtimeArgs,
                size_t gemm_batch_offset,
                size_t gemm_batch_size) const;
@@ -340,7 +340,7 @@ class RNNBackwardDataModularAlgo : RNNModuleAlgoBase
 public:
     void PrepareWriteBuffers(const Handle& handle, Data_t dhx, Data_t dcx, Data_t workSpace) const;
 
-    void PropDhy(const Handle& handle,
+    void PropDhy(Handle& handle,
                  ConstData_t dhy,
                  Data_t workSpace,
                  unsigned int layer,
@@ -364,7 +364,7 @@ class RNNBackwardDataModularAlgo : RNNModuleAlgoBase
                                 const SequenceIterator& seq,
                                 SequenceDirection direction) const;
 
-    void PropDhxDcx(const Handle& handle,
+    void PropDhxDcx(Handle& handle,
                     ConstData_t w,
                     Data_t dhx,
                     Data_t dcx,
@@ -625,7 +625,7 @@ class RNNModularMultiStreamBWD
 
     struct runtimeArgsBwd
     {
-        const Handle* handle;
+        Handle* handle;
         ConstData_t dy;
         ConstData_t dhy;
         Data_t dhx;
@@ -728,11 +728,8 @@ class RNNBackwardWeightsModularAlgo
                              ConstData_t reserveSpace,
                              size_t layer) const;
 
-    void BiasUpdate(const Handle& handle,
-                    Data_t dw,
-                    Data_t workSpace,
-                    size_t layer,
-                    size_t workSpaceSize) const;
+    void BiasUpdate(
+        Handle& handle, Data_t dw, Data_t workSpace, size_t layer, size_t workSpaceSize) const;
 
     void HiddenHStateWeights(const Handle& handle,
                              Data_t dw,
@@ -1027,7 +1024,7 @@ class RNNModularSingleStreamBWWeights
     // TODO
     static size_t GetWsSize() { return 0; };
 
-    void Compute(const Handle& handle,
+    void Compute(Handle& handle,
                  ConstData_t x,
                  ConstData_t hx,
                  Data_t dw,
@@ -1076,7 +1073,7 @@ class RNNModularMultiStreamBWWeights
         ConstData_t reserveSpace;
     };
 
-    void Compute(const Handle& handle,
+    void Compute(Handle& handle,
                  ConstData_t x,
                  ConstData_t hx,
                  Data_t dw,

@@ -0,0 +1,78 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+
+namespace tensorOp {
+
+struct InvokeParams : public miopen::InvokeParams
+{
+    InvokeParams(const void* alpha0_,
+                 ConstData_t ATensor_,
+                 const void* alpha1_,
+                 ConstData_t BTensor_,
+                 const void* beta_,
+                 Data_t CTensor_,
+                 const size_t Aoffset_,
+                 const size_t Boffset_,
+                 const size_t Coffset_)
+        : alpha0(alpha0_),
+          alpha1(alpha1_),
+          beta(beta_),
+          ATensor(ATensor_),
+          BTensor(BTensor_),
+          CTensor(CTensor_),
+          Aoffset(Aoffset_),
+          Boffset(Boffset_),
+          Coffset(Coffset_)
+    {
+    }
+
+    size_t GetWorkspaceSize() const { return 0; }
+    Data_t GetWorkspace() const { return nullptr; }
+
+public:
+    const void* alpha0;
+    const void* alpha1;
+    const void* beta;
+
+    ConstData_t ATensor;
+    ConstData_t BTensor;
+    Data_t CTensor;
+
+    size_t Aoffset;
+    size_t Boffset;
+    size_t Coffset;
+};
+
+} // namespace tensorOp
+
+} // namespace miopen
@@ -0,0 +1,130 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/problem_description_base.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+
+struct NetworkConfig;
+
+namespace tensorOp {
+
+struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
+{
+    ProblemDescription(const miopenTensorOp_t tensorOp_,
+                       const void* beta_,
+                       const TensorDescriptor& aTensorDesc_,
+                       const TensorDescriptor& bTensorDesc_,
+                       const TensorDescriptor& cTensorDesc_,
+                       const bool nonStandardSquash_)
+        : tensorOp(tensorOp_),
+          aTensorDesc(aTensorDesc_),
+          bTensorDesc(bTensorDesc_),
+          cTensorDesc(cTensorDesc_),
+          nonStandardSquash(nonStandardSquash_)
+    {
+        if(beta_ == nullptr)
+        {
+            MIOPEN_THROW(miopenStatusBadParm, "Beta value is nullptr");
+        }
+
+        beta = *(static_cast<const float*>(beta_));
+
+        if(aTensorDesc.GetElementSize() != cTensorDesc.GetElementSize())
+        {
+            MIOPEN_THROW("A and C Tensors do not match");
+        }
+
+        if(bTensorDesc.GetType() != cTensorDesc.GetType())
+        {
+            MIOPEN_THROW("Datatypes for B and C tensors do not match !");
+        }
+
+        const auto& blens = bTensorDesc.GetLengths();
+        const auto& clens = cTensorDesc.GetLengths();
+
+        if(clens.size() > 5)
+        {
+            MIOPEN_THROW("Tensor dimension larger than 5: " + std::to_string(clens.size()));
+        }
+
+        if(blens.size() != clens.size())
+        {
+            MIOPEN_THROW("Number of dims in B and C Tensors do not match: " +
+                         std::to_string(blens.size()) + ", " + std::to_string(clens.size()));
+        }
+
+        if(!nonStandardSquash)
+        {
+            constexpr auto comparator = [](size_t c, size_t b) { return b == 1 || b == c; };
+            const auto [c_diff, b_diff] =
+                std::mismatch(clens.begin(), clens.end(), blens.begin(), comparator);
+            if(c_diff != clens.end())
+                MIOPEN_THROW("BTensor dim != 1 && BTensor dim != CTensor dim:" +
+                             std::to_string(std::distance(clens.begin(), c_diff)));
+        }
+        else
+        {
+            // non standard behavior because blens[1] can be not equalt to clens[1]
+            if(!(clens.size() == 3 && blens[0] == 1 && clens[0] == 1 && blens[2] == clens[2]))
+            {
+                MIOPEN_THROW(
+                    "Non standard squashed operation supported only for 3d tensors and for "
+                    "the specific configuration");
+            }
+        }
+    }
+
+    miopenTensorOp_t GetTensorOp() const { return tensorOp; }
+
+    float GetBeta() const { return beta; }
+
+    const TensorDescriptor& GetATensorDesc() const { return aTensorDesc; }
+    const TensorDescriptor& GetBTensorDesc() const { return bTensorDesc; }
+    const TensorDescriptor& GetCTensorDesc() const { return cTensorDesc; }
+
+    bool GetNonStandardSquash() const { return nonStandardSquash; }
+
+    NetworkConfig MakeNetworkConfig() const override;
+
+private:
+    const miopenTensorOp_t tensorOp;
+
+    float beta;
+
+    TensorDescriptor aTensorDesc;
+    TensorDescriptor bTensorDesc;
+    TensorDescriptor cTensorDesc;
+
+    const bool nonStandardSquash;
+};
+
+} // namespace tensorOp
+
+} // namespace miopen