Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some fixes in tilemul tester #320

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 4 additions & 17 deletions src/library/blas/gens/blas_kgen.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ typedef enum TileMulFlags {
itself */

/**
* Deprecated. Use the repsective mode being a part of FetchAddr mode.
* Deprecated. Use the respective mode being a part of FetchAddr mode.
* He is left just for backward compatibility to don't break the working
* code and will be removed soon
*/
Expand All @@ -140,24 +140,15 @@ typedef enum TileMulFlags {
TILEMUL_GLOBAL_CYCLIC = TILEMUL_GLOBAL_CYCLIC_A |
TILEMUL_GLOBAL_CYCLIC_B |
TILEMUL_GLOBAL_CYCLIC_K,
// Deprecated
TILEMUL_SKEWS = TILEMUL_SKEW_A | TILEMUL_SKEW_B | TILEMUL_SKEW_K,
/** Optimize coordinates calculations by storing coordinates values */
// Deprecated
TILEMUL_OPTIMIZE_COORD_CALC = 0x4000,
/** Use bwidth0 stride */
TILEMUL_BW_STRIDE = 0x8000,
/** Optimize coordinates calculations by using vectors
* and pointer increments */
// Deprecated
TILEMUL_OPTIMIZE_VEC_COORDS = 0x10000,
TILEMUL_BW_STRIDE = 0x4000,
/** Do not increment K*/
TILEMUL_NOT_INC_K = 0x20000,
TILEMUL_NOT_INC_K = 0x8000,
/**
* Use variants with explicit vectorization. Useful on platforms with
* true SIMD.
*/
TILEMUL_FORCE_VECTORIZATION = 0x40000
TILEMUL_FORCE_VECTORIZATION = 0x10000
} TileMulFlags;


Expand Down Expand Up @@ -258,10 +249,6 @@ typedef struct KernelVarNames {
const char *lda; /**< Leading dimension of matrix A */
const char *ldb; /**< Leading dimension of matrix B */
const char *ldc; /**< Leading dimension of matrix C, in vectors */
const char *vectCoordA; /**< Vector containing indexes of tile a elements
in matrix A */
const char *vectCoordB; /**< Vector containing indexes of tile b elements
in matrix B*/
const char *startM;
const char *startN;
const char *startK;
Expand Down
2 changes: 0 additions & 2 deletions src/library/blas/gens/gemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -746,8 +746,6 @@ subgGen(
vnames->alpha = "alpha";
vnames->beta = "beta";

vnames->vectCoordA = "vca";
vnames->vectCoordB = "vcb";
vnames->k = exprK.buf;

subgroupsA = (unsigned int)(gset.subdims[0].y/gset.subdims[1].y);
Expand Down
1 change: 0 additions & 1 deletion src/library/blas/gens/gemv.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ generator(
kgenAddBlankLine(ctx);
}

mulOpts.flags |= TILEMUL_OPTIMIZE_COORD_CALC;
if (tailM) {
mulOpts.flags |= TILEMUL_GLOBAL_CYCLIC_A;
}
Expand Down
1 change: 0 additions & 1 deletion src/library/blas/gens/symv.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,6 @@ generator(
kgenAddBlankLine(ctx);
}

mulOpts.flags |= TILEMUL_OPTIMIZE_COORD_CALC;
if (tailM) {
vnames->sizeM = "N";
}
Expand Down
97 changes: 21 additions & 76 deletions src/library/blas/gens/tests/t_tilemul.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ typedef union FType {
static void
printUsage(const char *programName, int exitCode)
{
printf( "USAGE: %s [options] <M N K>\n"
printf( "%s - tiles multiplier generator testing and debugging tool.\n"
"USAGE: %s [options] <M N K>\n"
" --help, -h Print this help message.\n"
" --device, -d <device> OpenCL device used. <device> can "
"be \"gpu\" or \"cpu\". Default is \"gpu\".\n"
Expand Down Expand Up @@ -102,8 +103,8 @@ printUsage(const char *programName, int exitCode)
"with one generator function call for both fetching and "
"multiplication. Separate generators functions are used by "
"default.\n"
" M N K Size of block.\n",
programName);
" M N K Sizes of blocks multiplied, 4 4 4 for example.\n",
programName, programName);
exit(exitCode);
}

Expand Down Expand Up @@ -158,14 +159,15 @@ genTest(
{
char s[1024];
Kstring kstr;
char *tName, tVect[64], *ptrName;
char *tName, *ptrName;
char nameA[64], nameB[64];
KernelVarNames *vnames = &gset->varNames;
DataType dtype = gset->kextra->dtype;
const SubproblemDim *subdims = gset->subdims;
unsigned int vecLen = gset->kextra->vecLen;
size_t m, n, k;
unsigned int i, j;
bool tra, trb, localA, localB, vecCoords;
bool tra, trb, localA, localB;
int ret;
TileMulFlags flags = mulOpts->flags;
FetchOpts fetchOpts;
Expand All @@ -179,14 +181,6 @@ genTest(
localA = (mulOpts->memA == CLMEM_LOCAL_MEMORY);
localB = (mulOpts->memB == CLMEM_LOCAL_MEMORY);

vecCoords = ((flags & TILEMUL_OPTIMIZE_VEC_COORDS) != 0);

tVect[0] = '\0';

if (vecCoords && vecLen != 1) {
sprintf(tVect, "%u", vecLen);
}

switch (dtype) {
case TYPE_FLOAT:
tName = "float";
Expand All @@ -208,15 +202,14 @@ genTest(
return;
}

if (vecCoords) {
//Do not use GPtrs in fetching
vnames->A = "A";
vnames->B = "B";
}
else {
vnames->A = localA ? "LAptr" : "((GPtr)A)";
vnames->B = localB ? "LBptr" : "((GPtr)B)";
{
const char *typePtrName;
getVectorTypeName(dtype, vecLen, NULL, &typePtrName);
sprintf(nameA, localA ? "LAptr.%s" : "((GPtr)A).%s", typePtrName);
sprintf(nameB, localB ? "LBptr.%s" : "((GPtr)B).%s", typePtrName);
}
vnames->A = nameA;
vnames->B = nameB;
if (!localA) {
vnames->lda = "lda";

Expand All @@ -243,9 +236,9 @@ genTest(
kgenAddStmt(ctx, s);
sprintf(s," %s alpha,\n", tName);
kgenAddStmt(ctx, s);
sprintf(s," __global %s%s *A,\n", tName, tVect);
sprintf(s," __global %s *A,\n", tName);
kgenAddStmt(ctx, s);
sprintf(s," __global %s%s *B,\n", tName, tVect);
sprintf(s," __global %s *B,\n", tName);
kgenAddStmt(ctx, s);
kgenAddStmt(ctx, " uint M,\n"
" uint N,\n"
Expand Down Expand Up @@ -291,67 +284,15 @@ genTest(
initDefaultTiles(gset, CLBLAS_GEMM, TILE_PACKED, PRIV_STORAGE_ARRAY);
declareTileStorages(ctx, gset);

if (vecCoords) {
size_t ha, hb;
char *str;

ha = tra ? k : m;
hb = trb ? n : k;

if (ha > 1) {
str = s;
str += sprintf(str, "uint%lu ca = {0", ha);
for (i = 1; i < ha; i++) {
str += sprintf(str, ", %s * %u / %u", vnames->lda, i, vecLen);
}
str += sprintf(str, "};\n");
kgenAddStmt(ctx, s);
}
else {
kgenAddStmt(ctx, "uint ca = 0;\n");
}
vnames->vectCoordA = "ca";

if (hb > 1) {
str = s;
str += sprintf(str, "uint%lu cb = {0", hb);
for (i = 1; i < hb; i++) {
str += sprintf(str, ", %s * %u / %u", vnames->ldb, i, vecLen);
}
str += sprintf(str, "};\n");
kgenAddStmt(ctx, s);
}
else {
kgenAddStmt(ctx, "uint cb = 0;\n");
}
vnames->vectCoordB = "cb";

// uint4 ca = {0, vecLDA, vecLDA * 2, vecLDA * 3};
// uint4 cb = {0, vecLDB, vecLDB * 2, vecLDB * 3};
}

kgenAddBlankLine(ctx);

sprintf(s, "for (int it = 0; it < iter; it++)");
kgenBeginBranch(ctx, s);

if (!(localA && localB)) {
kgenAddStmt(ctx, "uint k = 0;\n");
}
kgenAddStmt(ctx, "uint k = 0;\n");

genZeroTile(ctx, &gset->tileCY);

if (vecCoords) {
char *coordsA[2] = {"workItemM", "k"};
char *coordsB[2] = {"k", "workItemN"};
sprintf(s, "A += %s * (lda / %u) + %s / %u;\n",
coordsA[tra], vecLen, coordsA[1 - tra], vecLen);
kgenAddStmt(ctx, s);
sprintf(s, "B += %s * (ldb / %u) + %s / %u;\n",
coordsB[trb], vecLen, coordsB[1 - trb], vecLen);
kgenAddStmt(ctx, s);
}

sprintf(s, "for (int k0 = 0; k0 < K; k0 += %lu)", subdims[0].bwidth);
kgenBeginBranch(ctx, s);

Expand Down Expand Up @@ -1062,6 +1003,10 @@ int main(int argc, char *argv[])
blockN = atoi(argv[optind + 1]);
blockK = atoi(argv[optind + 2]);

kextra.vecLenA = kextra.vecLen;
kextra.vecLenB = kextra.vecLen;
kextra.vecLenC = kextra.vecLen;

if ((mulOpts.memA == CLMEM_LOCAL_MEMORY ||
mulOpts.memB == CLMEM_LOCAL_MEMORY) &&
((mulOpts.flags & TILEMUL_GLOBAL_CYCLIC) != 0)) {
Expand Down