Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TensorOps kernels refactoring #3346

Merged
merged 40 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a2972c0
initial changes and support for 1d generic kernel
novakovicdj Oct 30, 2024
75cecb2
1d solver file name change
novakovicdj Oct 30, 2024
035989c
solver name change in cmakelists.txt
novakovicdj Oct 30, 2024
cf91070
more changes, 2d generic and 2d lite kernel
novakovicdj Oct 31, 2024
f2a11d6
some changes suggested in the comments
novakovicdj Nov 1, 2024
ac13ff3
additional changes
novakovicdj Nov 4, 2024
cadb264
initial switch to solver structure for all kernels, still need to sep…
novakovicdj Nov 5, 2024
63603f0
fix for two kernels in one solver
novakovicdj Nov 6, 2024
976bd84
additional changes
novakovicdj Nov 7, 2024
6be98d0
clang format
novakovicdj Nov 7, 2024
d6ffea5
fwd_conv_bias changed
novakovicdj Nov 7, 2024
89dd24c
tidy some part of the code
novakovicdj Nov 8, 2024
9ba8810
Merge branch 'develop' into tensor_refactoring
novakovicdj Nov 8, 2024
5a9b5ed
fix typos
novakovicdj Nov 8, 2024
c9f310a
implementnting suggestions, updating network_config and changes to po…
novakovicdj Nov 15, 2024
496b414
clang format
novakovicdj Nov 15, 2024
22c6c48
Merge branch 'develop' into tensor_refactoring
novakovicdj Nov 18, 2024
cb6fd6e
change for new Op3dTensorGeneric kernel usage
novakovicdj Nov 18, 2024
6c3d0c2
remove unused variable
novakovicdj Nov 18, 2024
bd0bd61
clang format
novakovicdj Nov 18, 2024
3f14d3a
support for half data type for CL kernels
novakovicdj Nov 19, 2024
042129e
additional changes for support for half type
novakovicdj Nov 19, 2024
371d43c
initial removal of tensorocl.cpp
novakovicdj Nov 20, 2024
155b35f
code tidying
novakovicdj Nov 20, 2024
0b3454c
unit test for tensorOp PD + additional changes requested
novakovicdj Nov 25, 2024
2bef739
Merge branch 'develop' into tensor_refactoring
BrianHarrisonAMD Nov 27, 2024
146070a
fix windows build issue
novakovicdj Nov 29, 2024
a83ac16
Merge branch 'develop' into tensor_refactoring
novakovicdj Nov 29, 2024
b8d9ab0
Merge branch 'tensor_refactoring' of github.com:novakovicdj/MIOpen in…
novakovicdj Nov 29, 2024
6258109
Merge branch 'develop' into tensor_refactoring
BrianHarrisonAMD Dec 3, 2024
0eb63fc
resolved conflict
novakovicdj Dec 5, 2024
3dc0f66
kept changes in CastTensor but in tensor.cpp file
novakovicdj Dec 5, 2024
6e37785
Merge branch 'develop' into tensor_refactoring
BrianHarrisonAMD Dec 17, 2024
edaa59a
Merge branch 'develop' into tensor_refactoring
Jan 2, 2025
c10ac52
Merge branch 'develop' into tensor_refactoring
novakovicdj Jan 8, 2025
8d0c414
Merge branch 'develop' into tensor_refactoring
novakovicdj Jan 10, 2025
b864063
reversed back to using cont Handle
novakovicdj Jan 10, 2025
492ce35
change for accepting double as tensor data type
novakovicdj Jan 10, 2025
0006033
Merge branch 'tensor_refactoring' of github.com:novakovicdj/MIOpen in…
novakovicdj Jan 10, 2025
a7325b2
Merge branch 'develop' into tensor_refactoring
BrianHarrisonAMD Jan 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -337,11 +337,22 @@ set( MIOpen_Source
solver/softmarginloss/forward_softmarginloss.cpp
solver/softmax/attn_softmax.cpp
solver/softmax/softmax.cpp
solver/tensorOp/Op1dTensorGeneric.cpp
solver/tensorOp/Op2dTensorGeneric.cpp
solver/tensorOp/Op2dTensorLite.cpp
solver/tensorOp/Op2dTensorSquash.cpp
solver/tensorOp/Op3dTensorGeneric.cpp
solver/tensorOp/Op4dTensorGeneric.cpp
solver/tensorOp/Op4dTensorLite.cpp
solver/tensorOp/Op5dTensorGeneric.cpp
solver/tensorOp/OpTensorFwdBias.cpp
solver/tensorOp/OpTensorLeadingOnes.cpp
subbuffers.cpp
t5layernorm_api.cpp
target_properties.cpp
temp_file.cpp
tensor.cpp
tensorOp/problem_description.cpp
tensor_api.cpp
transformers_adam_w_api.cpp
seq_tensor.cpp
Expand Down Expand Up @@ -685,7 +696,6 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
ocl/lrn_ocl.cpp
ocl/mloNorm.cpp
ocl/pooling_ocl.cpp
ocl/tensorocl.cpp
ocl/rnnocl.cpp
ocl/utilocl.cpp
ocl/ctcocl.cpp
Expand Down
1 change: 1 addition & 0 deletions src/include/miopen/names.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ struct NetworkConfig
{
NetworkConfig() = default;
explicit NetworkConfig(const std::string& value_) : value(value_) {}
explicit NetworkConfig(std::string&& value_) noexcept : value(std::move(value_)) {}
operator std::string() const { return value; }
const std::string& ToString() const { return value; }

Expand Down
23 changes: 10 additions & 13 deletions src/include/miopen/rnn/solvers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ class RNNForwardDataModularAlgo : RNNModuleAlgoBase
// base API
void PrepareWriteBuffers(const Handle& handle, const runtimeArgsFwd& runtimeArgs) const;

void PropX(const Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
void PropX(Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
randyspauldingamd marked this conversation as resolved.
Show resolved Hide resolved

void AddBias(const Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
void AddBias(Handle& handle, const runtimeArgsFwd& runtimeArgs) const;
void PropHxCx(const Handle& handle,
const runtimeArgsFwd& runtimeArgs,
unsigned int layer,
Expand Down Expand Up @@ -206,7 +206,7 @@ class RNNForwardDataModularAlgo : RNNModuleAlgoBase
void PropY(const Handle& handle, const runtimeArgsFwd& runtimeArgs) const;

// ext API
void PropX(const Handle& handle,
void PropX(Handle& handle,
const runtimeArgsFwd& runtimeArgs,
size_t gemm_batch_offset,
size_t gemm_batch_size) const;
Expand Down Expand Up @@ -340,7 +340,7 @@ class RNNBackwardDataModularAlgo : RNNModuleAlgoBase
public:
void PrepareWriteBuffers(const Handle& handle, Data_t dhx, Data_t dcx, Data_t workSpace) const;

void PropDhy(const Handle& handle,
void PropDhy(Handle& handle,
ConstData_t dhy,
Data_t workSpace,
unsigned int layer,
Expand All @@ -364,7 +364,7 @@ class RNNBackwardDataModularAlgo : RNNModuleAlgoBase
const SequenceIterator& seq,
SequenceDirection direction) const;

void PropDhxDcx(const Handle& handle,
void PropDhxDcx(Handle& handle,
ConstData_t w,
Data_t dhx,
Data_t dcx,
Expand Down Expand Up @@ -625,7 +625,7 @@ class RNNModularMultiStreamBWD

struct runtimeArgsBwd
{
const Handle* handle;
Handle* handle;
ConstData_t dy;
ConstData_t dhy;
Data_t dhx;
Expand Down Expand Up @@ -728,11 +728,8 @@ class RNNBackwardWeightsModularAlgo
ConstData_t reserveSpace,
size_t layer) const;

void BiasUpdate(const Handle& handle,
Data_t dw,
Data_t workSpace,
size_t layer,
size_t workSpaceSize) const;
void BiasUpdate(
Handle& handle, Data_t dw, Data_t workSpace, size_t layer, size_t workSpaceSize) const;

void HiddenHStateWeights(const Handle& handle,
Data_t dw,
Expand Down Expand Up @@ -1027,7 +1024,7 @@ class RNNModularSingleStreamBWWeights
// TODO
static size_t GetWsSize() { return 0; };

void Compute(const Handle& handle,
void Compute(Handle& handle,
ConstData_t x,
ConstData_t hx,
Data_t dw,
Expand Down Expand Up @@ -1076,7 +1073,7 @@ class RNNModularMultiStreamBWWeights
ConstData_t reserveSpace;
};

void Compute(const Handle& handle,
void Compute(Handle& handle,
ConstData_t x,
ConstData_t hx,
Data_t dw,
Expand Down
78 changes: 78 additions & 0 deletions src/include/miopen/tensorOp/invoke_params.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2024 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/

#pragma once

#include <miopen/invoke_params.hpp>
#include <miopen/tensor.hpp>

namespace miopen {

namespace tensorOp {

struct InvokeParams : public miopen::InvokeParams
{
InvokeParams(const void* alpha0_,
ConstData_t ATensor_,
const void* alpha1_,
ConstData_t BTensor_,
const void* beta_,
Data_t CTensor_,
const size_t Aoffset_,
const size_t Boffset_,
const size_t Coffset_)
: alpha0(alpha0_),
alpha1(alpha1_),
beta(beta_),
ATensor(ATensor_),
BTensor(BTensor_),
CTensor(CTensor_),
Aoffset(Aoffset_),
Boffset(Boffset_),
Coffset(Coffset_)
{
}

size_t GetWorkspaceSize() const { return 0; }
Data_t GetWorkspace() const { return nullptr; }

public:
const void* alpha0;
const void* alpha1;
const void* beta;

ConstData_t ATensor;
ConstData_t BTensor;
Data_t CTensor;

size_t Aoffset;
size_t Boffset;
size_t Coffset;
};

} // namespace tensorOp

} // namespace miopen
130 changes: 130 additions & 0 deletions src/include/miopen/tensorOp/problem_description.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2024 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/

#pragma once

#include <miopen/problem_description_base.hpp>
#include <miopen/tensor.hpp>

namespace miopen {

struct NetworkConfig;

namespace tensorOp {

struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
{
ProblemDescription(const miopenTensorOp_t tensorOp_,
const void* beta_,
const TensorDescriptor& aTensorDesc_,
const TensorDescriptor& bTensorDesc_,
const TensorDescriptor& cTensorDesc_,
const bool nonStandardSquash_)
: tensorOp(tensorOp_),
aTensorDesc(aTensorDesc_),
bTensorDesc(bTensorDesc_),
cTensorDesc(cTensorDesc_),
nonStandardSquash(nonStandardSquash_)
{
if(beta_ == nullptr)
{
MIOPEN_THROW(miopenStatusBadParm, "Beta value is nullptr");
}

beta = *(static_cast<const float*>(beta_));

if(aTensorDesc.GetElementSize() != cTensorDesc.GetElementSize())
{
MIOPEN_THROW("A and C Tensors do not match");
}

if(bTensorDesc.GetType() != cTensorDesc.GetType())
{
MIOPEN_THROW("Datatypes for B and C tensors do not match !");
}

const auto& blens = bTensorDesc.GetLengths();
const auto& clens = cTensorDesc.GetLengths();

if(clens.size() > 5)
{
MIOPEN_THROW("Tensor dimension larger than 5: " + std::to_string(clens.size()));
}

if(blens.size() != clens.size())
{
MIOPEN_THROW("Number of dims in B and C Tensors do not match: " +
std::to_string(blens.size()) + ", " + std::to_string(clens.size()));
}

if(!nonStandardSquash)
{
constexpr auto comparator = [](size_t c, size_t b) { return b == 1 || b == c; };
const auto [c_diff, b_diff] =
std::mismatch(clens.begin(), clens.end(), blens.begin(), comparator);
if(c_diff != clens.end())
MIOPEN_THROW("BTensor dim != 1 && BTensor dim != CTensor dim:" +
std::to_string(std::distance(clens.begin(), c_diff)));
}
else
{
// non standard behavior because blens[1] can be not equalt to clens[1]
if(!(clens.size() == 3 && blens[0] == 1 && clens[0] == 1 && blens[2] == clens[2]))
{
MIOPEN_THROW(
"Non standard squashed operation supported only for 3d tensors and for "
"the specific configuration");
}
}
}

miopenTensorOp_t GetTensorOp() const { return tensorOp; }

float GetBeta() const { return beta; }

const TensorDescriptor& GetATensorDesc() const { return aTensorDesc; }
const TensorDescriptor& GetBTensorDesc() const { return bTensorDesc; }
const TensorDescriptor& GetCTensorDesc() const { return cTensorDesc; }

bool GetNonStandardSquash() const { return nonStandardSquash; }

NetworkConfig MakeNetworkConfig() const override;

private:
const miopenTensorOp_t tensorOp;

float beta;

TensorDescriptor aTensorDesc;
TensorDescriptor bTensorDesc;
TensorDescriptor cTensorDesc;

const bool nonStandardSquash;
};

} // namespace tensorOp

} // namespace miopen
Loading